07.c ``stats.2dbin`` with fake data
-----------------------------------

Use binned_statistic_2d and display using heatmap. GENERATED FROM PYTHON SOURCE LINES 8-175 .. image-sg:: /_examples/matplotlib/images/sphx_glr_plot_main07_c_2dbin_fake_001.png :alt: C-Reactive Protein, count, median :srcset: /_examples/matplotlib/images/sphx_glr_plot_main07_c_2dbin_fake_001.png :class: sphx-glr-single-img .. rst-class:: sphx-glr-script-out Out: .. code-block:: none [1 1 1 1 2 2 2 3] [1 1 2 2 3 3 4 1] [1 1 5 6 7 8 7 7] Scipy: [0.5 1.5 2.5 3.5 4.5 5.5 6.5] [1. 1.03030303 1.06060606 1.09090909 1.12121212 1.15151515 1.18181818 1.21212121 1.24242424 1.27272727 1.3030303 1.33333333 1.36363636 1.39393939 1.42424242 1.45454545 1.48484848 1.51515152 1.54545455 1.57575758 1.60606061 1.63636364 1.66666667 1.6969697 1.72727273 1.75757576 1.78787879 1.81818182 1.84848485 1.87878788 1.90909091 1.93939394 1.96969697 2. 2.03030303 2.06060606 2.09090909 2.12121212 2.15151515 2.18181818 2.21212121 2.24242424 2.27272727 2.3030303 2.33333333 2.36363636 2.39393939 2.42424242 2.45454545 2.48484848 2.51515152 2.54545455 2.57575758 2.60606061 2.63636364 2.66666667 2.6969697 2.72727273 2.75757576 2.78787879 2.81818182 2.84848485 2.87878788 2.90909091 2.93939394 2.96969697 3. 3.03030303 3.06060606 3.09090909 3.12121212 3.15151515 3.18181818 3.21212121 3.24242424 3.27272727 3.3030303 3.33333333 3.36363636 3.39393939 3.42424242 3.45454545 3.48484848 3.51515152 3.54545455 3.57575758 3.60606061 3.63636364 3.66666667 3.6969697 3.72727273 3.75757576 3.78787879 3.81818182 3.84848485 3.87878788 3.90909091 3.93939394 3.96969697 4. ] [[2. 0. 1. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [2. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 2. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 0. 0. 0. 0. 0.] [0. 1. 0. 0. 0. 0.]] [1.01515152 1.04545455 1.07575758 1.10606061 1.13636364 1.16666667 1.1969697 1.22727273 1.25757576 1.28787879 1.31818182 1.34848485 1.37878788 1.40909091 1.43939394 1.46969697 1.5 1.53030303 1.56060606 1.59090909 1.62121212 1.65151515 1.68181818 1.71212121 1.74242424 1.77272727 1.8030303 1.83333333 1.86363636 1.89393939 1.92424242 1.95454545 1.98484848 2.01515152 2.04545455 2.07575758 2.10606061 2.13636364 2.16666667 2.1969697 2.22727273 2.25757576 2.28787879 2.31818182 2.34848485 2.37878788 2.40909091 2.43939394 2.46969697 2.5 2.53030303 2.56060606 2.59090909 2.62121212 2.65151515 2.68181818 2.71212121 2.74242424 2.77272727 2.8030303 2.83333333 2.86363636 2.89393939 2.92424242 2.95454545 2.98484848 3.01515152 3.04545455 3.07575758 3.10606061 3.13636364 3.16666667 3.1969697 3.22727273 3.25757576 3.28787879 3.31818182 3.34848485 3.37878788 3.40909091 3.43939394 3.46969697 3.5 3.53030303 3.56060606 3.59090909 3.62121212 3.65151515 3.68181818 3.71212121 3.74242424 3.77272727 3.8030303 3.83333333 3.86363636 3.89393939 3.92424242 3.95454545 3.98484848] [1. 2. 3. 4. 5. 6.] '\nimport sys\nsys.exit()\n# Compute bin statistic (count and median)\n\n# Plot\nplt.figure()\nsns.violinplot(data=data, x="timestep", y="shap_values", inner="box")\nplt.figure()\nplt.tight_layout()\nsns.violinplot(data=data, x="timestep", y="feature_values", inner="box")\nplt.figure()\nsns.histplot(data=data, x="timestep", shrink=.8)\n\n\n# Plot hist\nf1 = plt.hist2d(data.timestep, data.feature_values, bins=30, cmap=\'Reds\')\ncb = plt.colorbar()\ncb.set_label(\'counts in bin\')\nplt.title(\'Counts (square bin)\')\nplt.show()\n' | .. code-block:: default :lineno-start: 9 # Libraries import seaborn as sns import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy import stats from matplotlib.colors import LogNorm def data_shap(): data = pd.read_csv('../../datasets/shap/shap.csv') return data.timestep, \ data.shap_values, \ data.feature_values, \ data def data_manual(): """""" # Create random values x = np.array([1, 1, 1, 1, 2, 2, 2, 3]) y = np.array([1, 1, 2, 2, 3, 3, 4, 1]) z = np.array([1, 1, 5, 6, 7, 8, 7, 7]) return x, y, z, None # Create data x, y, z, data = data_manual() print(x) print(y) print(z) """ # With pandas v = z vals, bins = np.histogram(v) a = pd.Series(v).groupby(pd.cut(v, bins)).median() print("\nPandas:") print(bins) print(vals) print(a) """ vmin = z.min() vmax = z.max() # Compute binned statistic (median) binx = np.linspace(0, 3, 4) + 0.5 biny = np.linspace(0, 4, 5) + 0.5 binx = np.linspace(0, 6, 7) + 0.5 biny = np.linspace(y.min(), y.max(), 100) r1 = stats.binned_statistic_2d(x=y, y=x, values=z, statistic='count', bins=[biny, binx], expand_binnumbers=False) r2 = stats.binned_statistic_2d(x=y, y=x, values=z, statistic='median', bins=[biny, binx], expand_binnumbers=False) # Compute centres x_center = (r1.x_edge[:-1] + r1.x_edge[1:]) / 2 y_center = (r1.y_edge[:-1] + r1.y_edge[1:]) / 2 # Show print("\nScipy:") print(binx) print(biny) print(r1.statistic) print(x_center) print(y_center) # Convert the computed matrix to an stacked dataframe? flip1 = np.flip(r1.statistic, 0) flip2 = np.flip(r2.statistic, 0) #flip1 = r1.statistic #flip2 = r2.statistic # Display fig, axs = plt.subplots(nrows=1, ncols=2, sharey=False, sharex=False, figsize=(8, 7)) sns.heatmap(flip1, annot=False, linewidth=0.5, xticklabels=y_center.astype(int), yticklabels=x_center.round(2)[::-1], # Because of flip cmap='Blues', ax=axs[0], norm=LogNorm()) sns.heatmap(flip2, annot=False, linewidth=0.5, xticklabels=y_center.astype(int), yticklabels=x_center.round(2)[::-1], # Because of flip cmap='coolwarm', ax=axs[1], zorder=1, vmin=None, vmax=None, center=None, robust=True) # If robust=True and vmin or vmax are absent, the colormap range # is computed with robust quantiles instead of the extreme values. """ sns.violinplot(x=x, y=y, saturation=0.5, fliersize=0.1, linewidth=0.5, color='green', ax=axs[2], zorder=3, width=0.5) """ # Configure ax0 axs[0].set_title('count') axs[0].set_xlabel('timestep') axs[0].set_ylabel('shap') axs[0].locator_params(axis='y', nbins=10) #axs[0].set_aspect('equal', 'box' # Configure ax1 axs[1].set_title('median') axs[1].set_xlabel('timestep') axs[1].set_ylabel('shap') axs[1].locator_params(axis='y', nbins=10) #axs[1].set_aspect('equal', 'box') #axs[1].invert_yaxis() # Generic plt.suptitle('C-Reactive Protein') """ # Set axes manually #plt.set_xticks() #plt.setp(axs[1].get_yticklabels()[::1], visible=False) #plt.setp(axs[1].get_yticklabels()[::5], visible=True) from matplotlib import ticker axs[1].xaxis.set_major_locator(ticker.MultipleLocator(1.00)) axs[1].xaxis.set_minor_locator(ticker.MultipleLocator(0.25)) """ plt.tight_layout() plt.show() """ import sys sys.exit() # Compute bin statistic (count and median) # Plot plt.figure() sns.violinplot(data=data, x="timestep", y="shap_values", inner="box") plt.figure() plt.tight_layout() sns.violinplot(data=data, x="timestep", y="feature_values", inner="box") plt.figure() sns.histplot(data=data, x="timestep", shrink=.8) # Plot hist f1 = plt.hist2d(data.timestep, data.feature_values, bins=30, cmap='Reds') cb = plt.colorbar() cb.set_label('counts in bin') plt.title('Counts (square bin)') plt.show() """