Note
Click here to download the full example code
04. Format MIMIC therapy (one)
Description…
7 # Generic libraries
8 import pandas as pd
9
10 # Show in terminal
11 TERMINAL = False
First, lets load and do some basic formatting on the data.
16 # -----------------------------
17 # Constants
18 # -----------------------------
19 # Path
20 path = './data/mimic-therapy/One_patient_condensed_10656173.csv'
21 path = './data/mimic-therapy/One_patient_condensed_11803145.csv'
22
23 # -----------------------------
24 # Load data
25 # -----------------------------
26 # Read data
27 data = pd.read_csv(path,
28 dayfirst=True,
29 parse_dates=['starttime',
30 'stoptime'])
31
32 # Keep only useful columns
33 data = data[['subject_id',
34 'antibiotic',
35 'route',
36 'starttime',
37 'stoptime']]
38
39 # Reformat (ignore time information)
40 data.starttime = data.starttime.dt.date
41 data.stoptime = data.stoptime.dt.date
42
43 # Show
44 if TERMINAL:
45 print("\nData:")
46 print(data)
47 data
Lets transform the data
52 # -----------------------------
53 # Transform data
54 # -----------------------------
55 # .. note: The closed parameter indicates whether to include
56 # the first and/or last samples. None will keep both,
57 # left will keep only start date and right will keep
58 # also the right date.
59 # Create column with date range
60 data['startdate'] = data.apply(lambda x:
61 pd.date_range(start=x['starttime'],
62 end=x['stoptime'],
63 closed='left', # ignoring right
64 freq='D') ,axis=1)
65
66 # Explode such column
67 data = data.explode('startdate')
68
69 # Create daily therapies
70 aux = data.groupby('startdate') \
71 .apply(lambda x: sorted(x.antibiotic \
72 .str.lower().str.strip().unique().tolist()))
73
74 # Include missing days
75 aux = aux.asfreq('1D')
Lets see the formatted data
80 # Show
81 if TERMINAL:
82 print("\nFormatted:")
83 print(aux)
84 aux
Out:
startdate
2159-01-20 [piperacillin-tazobactam, vancomycin]
2159-01-21 [piperacillin-tazobactam, vancomycin]
2159-01-22 [piperacillin-tazobactam, vancomycin]
2159-01-23 [piperacillin-tazobactam, vancomycin]
2159-01-24 [piperacillin-tazobactam]
2159-01-25 [meropenem]
2159-01-26 [meropenem, vancomycin]
2159-01-27 [meropenem, vancomycin]
2159-01-28 [azithromycin, meropenem, tobramycin sulfate, ...
2159-01-29 [azithromycin, meropenem, metronidazole (flagyl)]
2159-01-30 [meropenem, metronidazole (flagyl), vancomycin]
2159-01-31 [meropenem, metronidazole (flagyl)]
2159-02-01 [meropenem, vancomycin]
2159-02-02 NaN
2159-02-03 [meropenem, vancomycin]
Freq: D, dtype: object
Lets count the number of days
89 # Show
90 if TERMINAL:
91 print("\nTherapies (number of days)")
92 print(aux.value_counts())
93 aux.value_counts()
Out:
[piperacillin-tazobactam, vancomycin] 4
[meropenem, vancomycin] 4
[piperacillin-tazobactam] 1
[meropenem] 1
[azithromycin, meropenem, tobramycin sulfate, vancomycin] 1
[azithromycin, meropenem, metronidazole (flagyl)] 1
[meropenem, metronidazole (flagyl), vancomycin] 1
[meropenem, metronidazole (flagyl)] 1
dtype: int64
Total running time of the script: ( 0 minutes 0.039 seconds)