07.e sns.displot

Figure-level interface for drawing distribution plots onto a FacetGrid.

C-Reactive Protein

Out:

          Unnamed: 0       sample     timestep  feature_values  shap_values
count    7000.000000  7000.000000  7000.000000     7000.000000  7000.000000
mean   126003.000000   499.500000     3.000000       -0.570943     0.000091
std     72751.329885   288.695612     2.000143        0.410149     0.042353
min        21.000000     0.000000     0.000000       -1.000000    -0.204305
25%     63012.000000   249.750000     1.000000       -0.900000    -0.011614
50%    126003.000000   499.500000     3.000000       -0.800000     0.000515
75%    188994.000000   749.250000     5.000000        0.000000     0.012057
max    251985.000000   999.000000     6.000000        0.600000     0.789859
 0. Computing... C-Reactive Protein

  9 import pandas as pd
 10 import seaborn as sns
 11 import matplotlib as mpl
 12 import matplotlib.pyplot as plt
 13
 14 from pathlib import Path
 15
 16 sns.set_style(style="white")
 17
 18
 19 def scalar_colormap(values, cmap, vmin, vmax):
 20     """This method creates a colormap based on values.
 21
 22     Parameters
 23     ----------
 24     values : array-like
 25     The values to create the corresponding colors
 26
 27     cmap : str
 28     The colormap
 29
 30     vmin, vmax : float
 31     The minimum and maximum possible values
 32
 33     Returns
 34     -------
 35     scalar colormap
 36     """
 37     # Create scalar mappable
 38     norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax, clip=True)
 39     mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
 40     # Get color map
 41     colormap = sns.color_palette([mapper.to_rgba(i) for i in values])
 42     # Return
 43     return colormap, norm
 44
 45 def scalar_palette(values, cmap, vmin, vmax):
 46     """This method creates a colorpalette based on values.
 47
 48     Parameters
 49     ----------
 50     values : array-like
 51     The values to create the corresponding colors
 52
 53     cmap : str
 54     The colormap
 55
 56     vmin, vmax : float
 57     The minimum and maximum possible values
 58
 59     Returns
 60     -------
 61     scalar colormap
 62
 63     """
 64     # Create a matplotlib colormap from name
 65     # cmap = sns.light_palette(cmap, reverse=False, as_cmap=True)
 66     cmap = sns.color_palette(cmap, as_cmap=True)
 67     # Normalize to the range of possible values from df["c"]
 68     norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax)
 69     # Create a color dictionary (value in c : color from colormap)
 70     colors = {}
 71     for cval in values:
 72         colors.update({cval: cmap(norm(cval))})
 73     # Return
 74     return colors, norm
 75
 76
 77 # Load dataset
 78 path = Path('../../datasets/shap')
 79 data = pd.read_csv(path / 'shap.csv')
 80 data = data[data.features.isin(['C-Reactive Protein'])]
 81
 82 # Since the colorbar is discrete, needs to round so that
 83 # the amount of bins is small and therefore visible. Would
 84 # it be possible to define a continuous colormap?
 85 data.feature_values = data.feature_values.round(1)
 86
 87 # Show
 88 print(data.describe())
 89
 90 # Configuration
 91 cmap_name = 'coolwarm' # colormap name
 92
 93
 94 # .. note:: The function displot calls the histplot function. However,
 95 #           the features allowed are count, frequency, probability or
 96 #           proportion, percent and density. Thus, the median cannot
 97 #           be computed.
 98
 99 # .. note:: The resulting colormap is discrete. Could it be continuous?
100
101 # Loop
102 for i, (name, df) in enumerate(data.groupby('features')):
103
104     # Info
105     print("%2d. Computing... %s" % (i, name))
106
107     # Get colormap
108     values = df.feature_values
109     cmap, norm = scalar_colormap(values=values,
110         cmap=cmap_name, vmin=values.min(),
111         vmax=values.max())
112
113     # Display displot
114     sns.displot(data=df, x='timestep', y='shap_values',
115         hue='feature_values', palette='coolwarm',
116         hue_norm=(values.min(), values.max()),
117         rug=False) # bins
118
119     """
120     # Display histplot
121     plt.figure()
122     sns.histplot(
123         data=df, x='timestep', y='shap_values',
124         discrete=(False, False),
125         hue='feature_values', palette=cmap_name,
126         hue_norm=(values.min(), values.max()),
127         cbar=False, cbar_kws=dict(shrink=.75),
128         #pthresh=.05, pmax=.9, bins=100
129     )
130     """
131
132     # Format figure
133     plt.suptitle(name)
134     plt.tight_layout()
135     plt.legend([], [], frameon=False)
136
137     # Show only first N
138     if int(i) > 2:
139         break
140
141 # Show
142 plt.show()

Total running time of the script: ( 0 minutes 2.329 seconds)

Gallery generated by Sphinx-Gallery