05. Format damien sepsis data

Example

  7 # Libraries
  8 import time
  9 import pandas as pd
 10 import seaborn as sns
 11 import matplotlib.pyplot as plt
 12
 13 # ---------------------------
 14 # Constants
 15 # ---------------------------
 16 # Path to biochemical markers
 17 path_bio = '.\\datasets\\damien-sepsis-biomarkers.csv'
 18
 19 # Path to nhs to hos mappings
 20 path_nth = '.\\datasets\\damien-sepsis-nhs_to_hos.csv'
 21
 22 # Path to data request megalist
 23 path_drm = '.\\datasets\\data-request-megalist.xlsx'
 24
 25 # Path to save output
 26 path_save = '.\\outputs\\{0}-damien-sepsis-biomarkers-pm{1}.csv'
 27
 28 # Save
 29 SAVE = True
 30
 31 # Days +- first micro sample
 32 WINDOW = 30
 33
 34 # ---------------------------
 35 # Main
 36 # ---------------------------
 37
 38 # -----------
 39 # Read data
 40 # -----------
 41 # Read biomarkers
 42 bio = pd.read_csv(path_bio,
 43     #nrows=10000,
 44     parse_dates=['date_collected',
 45                          'date_outcome'])
 46
 47 # Read nhs to hos
 48 nth = pd.read_csv(path_nth)
 49
 50 # Read data request megalist
 51 drm = pd.read_excel(path_drm,
 52     parse_dates=['Sampledate'])
 53
 54 # Rename drm
 55 drm = drm.rename(columns={
 56     'Sampledate': 'date_sample',
 57     'Hospital Number': 'hos_number'})
 58
 59 # Sort by date (important if keeping first)
 60 drm = drm.sort_values(by='date_sample')
 61
 62 # Keep first appearance only
 63 drm = drm.groupby(by='hos_number') \
 64              .first().reset_index()
 65
 66 # Show
 67 print("\nShow datasets:")
 68 print(bio)
 69 print(nth)
 70 print(drm)
 71
 72 # Show columns
 73 print("\nShow columns:")
 74 print(bio.columns)
 75 print(nth.columns)
 76 print(drm.columns)
 77
 78 # -----------
 79 # Merge
 80 # -----------
 81 # Merge by nhs_number
 82 bio = bio.merge(nth, how='left',
 83     left_on='patient_nhs_number',
 84     right_on='nhs_number')
 85
 86 # Merge with date (first)
 87 bio = bio.merge(drm, how='inner',
 88     left_on='hos_number',
 89     right_on='hos_number')
 90
 91 # .. note: There must be an issue with Sampledate, because it is not
 92 #          being converted to datetime64[ns] from parse_dates. Thus
 93 #          force conversion ourselves. Note that invalid parsing will
 94 #          be set to NaT (not a time)
 95 bio.date_sample = \
 96     pd.to_datetime(bio.date_sample, errors='coerce')
 97
 98 # Compute day difference
 99 bio['day'] = (bio.date_sample - bio.date_collected).dt.days
100
101 # -----------
102 # Plot
103 # -----------
104 # Count
105 count = bio.day.value_counts().sort_index()
106
107 # Configure sns
108 #sns.set_theme(style='whitegrid')
109 sns.set_color_codes("muted")
110 sns.despine(left=True, bottom=True)
111
112 # Plot bars
113 ax = plt.bar(count.index.values,
114     count.values, color='b', alpha=0.5)
115
116 # Fill aea selected
117 plt.fill_between(x=[-WINDOW, WINDOW],
118     y1=0, y2=count.max(), alpha=0.25,
119     color='orange')
120
121 # Draw vertical line at 30
122 plt.vlines([-WINDOW, WINDOW], ymin=0,
123     ymax=count.max(), color='k',
124     linestyle='dashed', linewidth=0.75)
125
126 # Configure
127 plt.grid(False)
128 plt.xlabel('Day from sample')
129 plt.ylabel('Count')
130 plt.title('Day from sample count')
131
132 # Layout
133 plt.tight_layout()
134
135 # Show
136 plt.show()
137
138 # ---------------
139 # Filter and save
140 # ---------------
141 # Filter out
142 bio = bio[bio.day.abs() <= WINDOW]
143
144 # Save
145 if SAVE:
146     # Get time
147     time = time.strftime('%Y%m%d-%H%M%S')
148     # Save with all info
149     bio.to_csv(path_save.format(time, str(WINDOW)))
150     # Save anonymised
151     bio = bio.drop(columns=['patient_nhs_number',
152                                                     'nhs_number',
153                                                     'hos_number'])
154     # Show columns
155     print(bio.columns)
156     bio.to_csv(path_save.format(time, str(WINDOW) + '-anonymised'))
157
158 # Show
159 plt.show()

Total running time of the script: ( 0 minutes 0.000 seconds)

Gallery generated by Sphinx-Gallery