Note
Click here to download the full example code
07.c stats.2dbin
with fake data
Use binned_statistic_2d and display using heatmap.
Out:
[1 1 1 1 2 2 2 3]
[1 1 2 2 3 3 4 1]
[1 1 5 6 7 8 7 7]
Scipy:
[0.5 1.5 2.5 3.5 4.5 5.5 6.5]
[1. 1.03030303 1.06060606 1.09090909 1.12121212 1.15151515
1.18181818 1.21212121 1.24242424 1.27272727 1.3030303 1.33333333
1.36363636 1.39393939 1.42424242 1.45454545 1.48484848 1.51515152
1.54545455 1.57575758 1.60606061 1.63636364 1.66666667 1.6969697
1.72727273 1.75757576 1.78787879 1.81818182 1.84848485 1.87878788
1.90909091 1.93939394 1.96969697 2. 2.03030303 2.06060606
2.09090909 2.12121212 2.15151515 2.18181818 2.21212121 2.24242424
2.27272727 2.3030303 2.33333333 2.36363636 2.39393939 2.42424242
2.45454545 2.48484848 2.51515152 2.54545455 2.57575758 2.60606061
2.63636364 2.66666667 2.6969697 2.72727273 2.75757576 2.78787879
2.81818182 2.84848485 2.87878788 2.90909091 2.93939394 2.96969697
3. 3.03030303 3.06060606 3.09090909 3.12121212 3.15151515
3.18181818 3.21212121 3.24242424 3.27272727 3.3030303 3.33333333
3.36363636 3.39393939 3.42424242 3.45454545 3.48484848 3.51515152
3.54545455 3.57575758 3.60606061 3.63636364 3.66666667 3.6969697
3.72727273 3.75757576 3.78787879 3.81818182 3.84848485 3.87878788
3.90909091 3.93939394 3.96969697 4. ]
[[2. 0. 1. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[2. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 2. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0.]
[0. 1. 0. 0. 0. 0.]]
[1.01515152 1.04545455 1.07575758 1.10606061 1.13636364 1.16666667
1.1969697 1.22727273 1.25757576 1.28787879 1.31818182 1.34848485
1.37878788 1.40909091 1.43939394 1.46969697 1.5 1.53030303
1.56060606 1.59090909 1.62121212 1.65151515 1.68181818 1.71212121
1.74242424 1.77272727 1.8030303 1.83333333 1.86363636 1.89393939
1.92424242 1.95454545 1.98484848 2.01515152 2.04545455 2.07575758
2.10606061 2.13636364 2.16666667 2.1969697 2.22727273 2.25757576
2.28787879 2.31818182 2.34848485 2.37878788 2.40909091 2.43939394
2.46969697 2.5 2.53030303 2.56060606 2.59090909 2.62121212
2.65151515 2.68181818 2.71212121 2.74242424 2.77272727 2.8030303
2.83333333 2.86363636 2.89393939 2.92424242 2.95454545 2.98484848
3.01515152 3.04545455 3.07575758 3.10606061 3.13636364 3.16666667
3.1969697 3.22727273 3.25757576 3.28787879 3.31818182 3.34848485
3.37878788 3.40909091 3.43939394 3.46969697 3.5 3.53030303
3.56060606 3.59090909 3.62121212 3.65151515 3.68181818 3.71212121
3.74242424 3.77272727 3.8030303 3.83333333 3.86363636 3.89393939
3.92424242 3.95454545 3.98484848]
[1. 2. 3. 4. 5. 6.]
'\nimport sys\nsys.exit()\n# Compute bin statistic (count and median)\n\n# Plot\nplt.figure()\nsns.violinplot(data=data, x="timestep", y="shap_values", inner="box")\nplt.figure()\nplt.tight_layout()\nsns.violinplot(data=data, x="timestep", y="feature_values", inner="box")\nplt.figure()\nsns.histplot(data=data, x="timestep", shrink=.8)\n\n\n# Plot hist\nf1 = plt.hist2d(data.timestep, data.feature_values, bins=30, cmap=\'Reds\')\ncb = plt.colorbar()\ncb.set_label(\'counts in bin\')\nplt.title(\'Counts (square bin)\')\nplt.show()\n'
9 # Libraries
10 import seaborn as sns
11 import pandas as pd
12 import numpy as np
13 import matplotlib.pyplot as plt
14
15 from scipy import stats
16 from matplotlib.colors import LogNorm
17
18 def data_shap():
19 data = pd.read_csv('../../datasets/shap/shap.csv')
20 return data.timestep, \
21 data.shap_values, \
22 data.feature_values, \
23 data
24
25 def data_manual():
26 """"""
27 # Create random values
28 x = np.array([1, 1, 1, 1, 2, 2, 2, 3])
29 y = np.array([1, 1, 2, 2, 3, 3, 4, 1])
30 z = np.array([1, 1, 5, 6, 7, 8, 7, 7])
31 return x, y, z, None
32
33 # Create data
34 x, y, z, data = data_manual()
35
36 print(x)
37 print(y)
38 print(z)
39
40 """
41 # With pandas
42 v = z
43 vals, bins = np.histogram(v)
44 a = pd.Series(v).groupby(pd.cut(v, bins)).median()
45 print("\nPandas:")
46 print(bins)
47 print(vals)
48 print(a)
49 """
50
51
52
53
54
55
56
57 vmin = z.min()
58 vmax = z.max()
59
60
61
62 # Compute binned statistic (median)
63 binx = np.linspace(0, 3, 4) + 0.5
64 biny = np.linspace(0, 4, 5) + 0.5
65 binx = np.linspace(0, 6, 7) + 0.5
66 biny = np.linspace(y.min(), y.max(), 100)
67 r1 = stats.binned_statistic_2d(x=y, y=x, values=z,
68 statistic='count', bins=[biny, binx],
69 expand_binnumbers=False)
70
71 r2 = stats.binned_statistic_2d(x=y, y=x, values=z,
72 statistic='median', bins=[biny, binx],
73 expand_binnumbers=False)
74
75 # Compute centres
76 x_center = (r1.x_edge[:-1] + r1.x_edge[1:]) / 2
77 y_center = (r1.y_edge[:-1] + r1.y_edge[1:]) / 2
78
79 # Show
80 print("\nScipy:")
81 print(binx)
82 print(biny)
83 print(r1.statistic)
84 print(x_center)
85 print(y_center)
86
87 # Convert the computed matrix to an stacked dataframe?
88
89 flip1 = np.flip(r1.statistic, 0)
90 flip2 = np.flip(r2.statistic, 0)
91 #flip1 = r1.statistic
92 #flip2 = r2.statistic
93
94
95
96 # Display
97 fig, axs = plt.subplots(nrows=1, ncols=2,
98 sharey=False, sharex=False, figsize=(8, 7))
99
100 sns.heatmap(flip1, annot=False, linewidth=0.5,
101 xticklabels=y_center.astype(int),
102 yticklabels=x_center.round(2)[::-1], # Because of flip
103 cmap='Blues', ax=axs[0],
104 norm=LogNorm())
105
106 sns.heatmap(flip2, annot=False, linewidth=0.5,
107 xticklabels=y_center.astype(int),
108 yticklabels=x_center.round(2)[::-1], # Because of flip
109 cmap='coolwarm', ax=axs[1], zorder=1,
110 vmin=None, vmax=None, center=None, robust=True)
111
112 # If robust=True and vmin or vmax are absent, the colormap range
113 # is computed with robust quantiles instead of the extreme values.
114 """
115 sns.violinplot(x=x, y=y,
116 saturation=0.5, fliersize=0.1, linewidth=0.5,
117 color='green', ax=axs[2], zorder=3,
118 width=0.5)
119 """
120
121
122 # Configure ax0
123 axs[0].set_title('count')
124 axs[0].set_xlabel('timestep')
125 axs[0].set_ylabel('shap')
126 axs[0].locator_params(axis='y', nbins=10)
127 #axs[0].set_aspect('equal', 'box'
128
129 # Configure ax1
130 axs[1].set_title('median')
131 axs[1].set_xlabel('timestep')
132 axs[1].set_ylabel('shap')
133 axs[1].locator_params(axis='y', nbins=10)
134 #axs[1].set_aspect('equal', 'box')
135 #axs[1].invert_yaxis()
136
137 # Generic
138 plt.suptitle('C-Reactive Protein')
139
140 """
141 # Set axes manually
142 #plt.set_xticks()
143 #plt.setp(axs[1].get_yticklabels()[::1], visible=False)
144 #plt.setp(axs[1].get_yticklabels()[::5], visible=True)
145 from matplotlib import ticker
146 axs[1].xaxis.set_major_locator(ticker.MultipleLocator(1.00))
147 axs[1].xaxis.set_minor_locator(ticker.MultipleLocator(0.25))
148 """
149
150 plt.tight_layout()
151 plt.show()
152
153
154 """
155 import sys
156 sys.exit()
157 # Compute bin statistic (count and median)
158
159 # Plot
160 plt.figure()
161 sns.violinplot(data=data, x="timestep", y="shap_values", inner="box")
162 plt.figure()
163 plt.tight_layout()
164 sns.violinplot(data=data, x="timestep", y="feature_values", inner="box")
165 plt.figure()
166 sns.histplot(data=data, x="timestep", shrink=.8)
167
168
169 # Plot hist
170 f1 = plt.hist2d(data.timestep, data.feature_values, bins=30, cmap='Reds')
171 cb = plt.colorbar()
172 cb.set_label('counts in bin')
173 plt.title('Counts (square bin)')
174 plt.show()
175 """
Total running time of the script: ( 0 minutes 0.853 seconds)