30. Sample shap.csv boxplot

The aim is to visualise all the features for all the timesteps to quickly see which shap values are higher and therefore influence more in the result.

Note

Using plotly we could interact with the outcome!

  • plot main30 sepsis shap boxplot
  • plot main30 sepsis shap boxplot
  • plot main30 sepsis shap boxplot

Out:

    Unnamed: 0  sample  timestep                 features  feature_values  shap_values
0            0       0         0             Ward Lactate             0.0     0.000652
4            4       0         0                Platelets             0.0    -0.001705
5            5       0         0              Haemoglobin             0.0    -0.000918
6            6       0         0  Mean cell volume, blood             0.0    -0.000654
7            7       0         0              Haematocrit             0.0    -0.000487
16          16       0         0              Neutrophils             0.0     0.002521
17          17       0         0                 Chloride             0.0    -0.000858
18          18       0         0              Lymphocytes             0.0    -0.002920
19          19       0         0                Monocytes             0.0    -0.002224
20          20       0         0              Eosinophils             0.0    -0.005246

 13 # Libraries
 14 import seaborn as sns
 15 import pandas as pd
 16 import numpy as np
 17 import matplotlib as mpl
 18 import matplotlib.pyplot as plt
 19
 20 from scipy import stats
 21 from matplotlib.colors import LogNorm
 22
 23 sns.set_theme(style="white")
 24
 25 # See https://matplotlib.org/devdocs/users/explain/customizing.html
 26 mpl.rcParams['axes.titlesize'] = 8
 27 mpl.rcParams['axes.labelsize'] = 8
 28 mpl.rcParams['xtick.labelsize'] = 8
 29 mpl.rcParams['ytick.labelsize'] = 8
 30 mpl.rcParams['legend.fontsize'] = 7
 31 mpl.rcParams['legend.handlelength'] = 1
 32 mpl.rcParams['legend.handleheight'] = 1
 33 mpl.rcParams['legend.loc'] = 'upper left'
 34
 35 # Features
 36 features = [
 37     'Ward Lactate',
 38     #'Ward Glucose',
 39     #'Ward sO2',
 40     #'White blood cell count, blood',
 41     'Platelets',
 42     'Haemoglobin',
 43     'Mean cell volume, blood',
 44     'Haematocrit',
 45     #'Mean cell haemoglobin conc, blood',
 46     #'Mean cell haemoglobin level, blood',
 47     #'Red blood cell count, blood',
 48     #'Red blood cell distribution width',
 49     #'Creatinine',
 50     #'Urea level, blood',
 51     #'Potassium',
 52     #'Sodium',
 53     'Neutrophils',
 54     'Chloride',
 55     'Lymphocytes',
 56     'Monocytes',
 57     'Eosinophils',
 58     'C-Reactive Protein',
 59     'Albumin',
 60     #'Alkaline Phosphatase',
 61     #'Glucose POCT Strip Blood',
 62     'Total Protein',
 63     'Globulin',
 64     'Alanine Transaminase',
 65     'Bilirubin',
 66     'Prothrombin time',
 67     'Fibrinogen (clauss)',
 68     'Procalcitonin',
 69     'Ferritin',
 70     'D-Dimer',
 71     'sex',
 72     'age'
 73 ]
 74
 75 # Load data
 76 data = pd.read_csv('../../datasets/shap/shap.csv')
 77
 78 # Filter
 79 data = data[data.features.isin(features)]
 80
 81 # Show
 82 print(data.head(10))
 83
 84
 85 # .. todo:: Change flier size, cmap, ...
 86
 87
 88 def configure_ax(ax):
 89     sns.despine(ax=ax)
 90     lg = ax.legend(loc='upper center',
 91                    bbox_to_anchor=(0.05, 1.15, 0.9, 0.1),
 92                    borderaxespad=2, ncol=5, mode='expand')
 93     plt.tight_layout()
 94
 95 # Boxenplot
 96 plt.figure(figsize=(12, 4))
 97 ax = sns.boxenplot(data, x='timestep', y='shap_values',
 98     hue='features', saturation=0.5, showfliers=False)
 99 configure_ax(ax)
100
101 # Violinplot
102 plt.figure(figsize=(12, 4))
103 ax = sns.violinplot(data, x='timestep', y='shap_values',
104     hue='features', saturation=0.5, showfliers=False,
105     whis=1.0)
106 configure_ax(ax)
107
108 # Boxplot
109 plt.figure(figsize=(12, 4))
110 ax = sns.boxplot(data, x='timestep', y='shap_values',
111     hue='features', saturation=0.5, showfliers=False,
112     whis=1.0)
113 configure_ax(ax)
114
115
116 # Show
117 plt.show()

Total running time of the script: ( 0 minutes 4.962 seconds)

Gallery generated by Sphinx-Gallery