Note
Click here to download the full example code
05. Summary plot
6 # Libraries
7 import shap
8 import pandas as pd
9
10 import matplotlib.pyplot as plt
11
12
13 try:
14 __file__
15 TERMINAL = True
16 except:
17 TERMINAL = False
18
19
20 # ------------------------
21 # Methods
22 # ------------------------
23 def load_shap_file():
24 """Load shap file.
25
26 .. note: The timestep does not indicate time step but matrix
27 index index. Since the matrix index for time steps
28 started in negative t=-T and ended in t=0 the
29 transformation should be taken into account.
30
31 """
32 from pathlib import Path
33 # Load data
34 path = Path('../../datasets/shap/')
35 data = pd.read_csv(path / 'shap.csv')
36 data = data.iloc[:, 1:]
37 data = data.rename(columns={'timestep': 'indice'})
38 data['timestep'] = data.indice - (data.indice.nunique() - 1)
39 return data
40
41
42 # -----------------------------------------------------
43 # Main
44 # -----------------------------------------------------
45 # Load data
46 # data = create_random_shap(10, 6, 4)
47 data = load_shap_file()
48 #data = data[data['sample'] < 100]
49
50 shap_values = pd.pivot_table(data,
51 values='shap_values',
52 index=['sample', 'timestep'],
53 columns=['features'])
54
55 feature_values = pd.pivot_table(data,
56 values='feature_values',
57 index=['sample', 'timestep'],
58 columns=['features'])
59
60 # Show
61 if TERMINAL:
62 print("\nShow:")
63 print(data)
64 print(shap_values)
65 print(feature_values)
Let’s see how data looks like
69 data.head(10)
Let’s see how shap_values looks like
73 shap_values.iloc[:10, :5]
Let’s see how feature_values looks like
77 feature_values.iloc[:10, :5]
Display using shap.summary_plot
The first option is to use the shap
library to plot the results.
85 # Let's define/extract some useful variables.
86 N = 10 # max loops filter
87 TIMESTEPS = len(shap_values.index.unique(level='timestep')) # number of timesteps
88 SAMPLES = len(shap_values.index.unique(level='sample')) # number of samples
89
90 shap_min = data.shap_values.min()
91 shap_max = data.shap_values.max()
Now, let’s display the shap values for all features in each timestep.
98 # For each timestep (visualise all features)
99 steps = shap_values.index.get_level_values('timestep').unique()
100 for i, step in enumerate(steps):
101 # Get interesting indexes
102 indice = shap_values.index.get_level_values('timestep') == step
103
104 # Create auxiliary matrices
105 shap_aux = shap_values.iloc[indice]
106 feat_aux = feature_values.iloc[indice]
107
108 # Display
109 plt.figure()
110 plt.title("Timestep: %s" % step)
111 shap.summary_plot(shap_aux.to_numpy(), feat_aux, show=False)
112 plt.xlim(shap_min, shap_max)
Now, let’s display the shap values for all timesteps of each feature.
118 # For each feature (visualise all time-steps)
119 for i, f in enumerate(shap_values.columns[:N]):
120 # Show
121 # print('%2d. %s' % (i, f))
122
123 # Create auxiliary matrices (select feature and reshape)
124 shap_aux = shap_values.iloc[:, i] \
125 .to_numpy().reshape(-1, TIMESTEPS)
126 feat_aux = feature_values.iloc[:, i] \
127 .to_numpy().reshape(-1, TIMESTEPS)
128 feat_aux = pd.DataFrame(feat_aux,
129 columns=['timestep %s' % j for j in range(-TIMESTEPS+1, 1)]
130 )
131
132 # Show
133 plt.figure()
134 plt.title("Feature: %s" % f)
135 shap.summary_plot(shap_aux, feat_aux, sort=False, show=False, plot_type='violin')
136 plt.xlim(shap_min, shap_max)
137 plt.gca().invert_yaxis()
138
139 # Show
140 plt.show()
Total running time of the script: ( 0 minutes 7.590 seconds)