Note
Click here to download the full example code
07.e sns.displot
Figure-level interface for drawing distribution plots onto a FacetGrid.
Out:
Unnamed: 0 sample timestep feature_values shap_values
count 7000.000000 7000.000000 7000.000000 7000.000000 7000.000000
mean 126003.000000 499.500000 3.000000 -0.570943 0.000091
std 72751.329885 288.695612 2.000143 0.410149 0.042353
min 21.000000 0.000000 0.000000 -1.000000 -0.204305
25% 63012.000000 249.750000 1.000000 -0.900000 -0.011614
50% 126003.000000 499.500000 3.000000 -0.800000 0.000515
75% 188994.000000 749.250000 5.000000 0.000000 0.012057
max 251985.000000 999.000000 6.000000 0.600000 0.789859
0. Computing... C-Reactive Protein
9 import pandas as pd
10 import seaborn as sns
11 import matplotlib as mpl
12 import matplotlib.pyplot as plt
13
14 from pathlib import Path
15
16 sns.set_style(style="white")
17
18
19 def scalar_colormap(values, cmap, vmin, vmax):
20 """This method creates a colormap based on values.
21
22 Parameters
23 ----------
24 values : array-like
25 The values to create the corresponding colors
26
27 cmap : str
28 The colormap
29
30 vmin, vmax : float
31 The minimum and maximum possible values
32
33 Returns
34 -------
35 scalar colormap
36 """
37 # Create scalar mappable
38 norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax, clip=True)
39 mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
40 # Get color map
41 colormap = sns.color_palette([mapper.to_rgba(i) for i in values])
42 # Return
43 return colormap, norm
44
45 def scalar_palette(values, cmap, vmin, vmax):
46 """This method creates a colorpalette based on values.
47
48 Parameters
49 ----------
50 values : array-like
51 The values to create the corresponding colors
52
53 cmap : str
54 The colormap
55
56 vmin, vmax : float
57 The minimum and maximum possible values
58
59 Returns
60 -------
61 scalar colormap
62
63 """
64 # Create a matplotlib colormap from name
65 # cmap = sns.light_palette(cmap, reverse=False, as_cmap=True)
66 cmap = sns.color_palette(cmap, as_cmap=True)
67 # Normalize to the range of possible values from df["c"]
68 norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
69 # Create a color dictionary (value in c : color from colormap)
70 colors = {}
71 for cval in values:
72 colors.update({cval: cmap(norm(cval))})
73 # Return
74 return colors, norm
75
76
77 # Load dataset
78 path = Path('../../datasets/shap')
79 data = pd.read_csv(path / 'shap.csv')
80 data = data[data.features.isin(['C-Reactive Protein'])]
81
82 # Since the colorbar is discrete, needs to round so that
83 # the amount of bins is small and therefore visible. Would
84 # it be possible to define a continuous colormap?
85 data.feature_values = data.feature_values.round(1)
86
87 # Show
88 print(data.describe())
89
90 # Configuration
91 cmap_name = 'coolwarm' # colormap name
92
93
94 # .. note:: The function displot calls the histplot function. However,
95 # the features allowed are count, frequency, probability or
96 # proportion, percent and density. Thus, the median cannot
97 # be computed.
98
99 # .. note:: The resulting colormap is discrete. Could it be continuous?
100
101 # Loop
102 for i, (name, df) in enumerate(data.groupby('features')):
103
104 # Info
105 print("%2d. Computing... %s" % (i, name))
106
107 # Get colormap
108 values = df.feature_values
109 cmap, norm = scalar_colormap(values=values,
110 cmap=cmap_name, vmin=values.min(),
111 vmax=values.max())
112
113 # Display displot
114 sns.displot(data=df, x='timestep', y='shap_values',
115 hue='feature_values', palette='coolwarm',
116 hue_norm=(values.min(), values.max()),
117 rug=False) # bins
118
119 """
120 # Display histplot
121 plt.figure()
122 sns.histplot(
123 data=df, x='timestep', y='shap_values',
124 discrete=(False, False),
125 hue='feature_values', palette=cmap_name,
126 hue_norm=(values.min(), values.max()),
127 cbar=False, cbar_kws=dict(shrink=.75),
128 #pthresh=.05, pmax=.9, bins=100
129 )
130 """
131
132 # Format figure
133 plt.suptitle(name)
134 plt.tight_layout()
135 plt.legend([], [], frameon=False)
136
137 # Show only first N
138 if int(i) > 2:
139 break
140
141 # Show
142 plt.show()
Total running time of the script: ( 0 minutes 2.329 seconds)