04. Format MIMIC therapy (one)

Description…

 7 # Generic libraries
 8 import pandas as pd
 9
10 # Show in terminal
11 TERMINAL = False

First, lets load and do some basic formatting on the data.

16 # -----------------------------
17 # Constants
18 # -----------------------------
19 # Path
20 path = './data/mimic-therapy/One_patient_condensed_10656173.csv'
21 path = './data/mimic-therapy/One_patient_condensed_11803145.csv'
22
23 # -----------------------------
24 # Load data
25 # -----------------------------
26 # Read data
27 data = pd.read_csv(path,
28     dayfirst=True,
29     parse_dates=['starttime',
30                  'stoptime'])
31
32 # Keep only useful columns
33 data = data[['subject_id',
34              'antibiotic',
35              'route',
36              'starttime',
37              'stoptime']]
38
39 # Reformat (ignore time information)
40 data.starttime = data.starttime.dt.date
41 data.stoptime = data.stoptime.dt.date
42
43 # Show
44 if TERMINAL:
45     print("\nData:")
46     print(data)
47 data
subject_id antibiotic route starttime stoptime
0 11803145 Vancomycin IV 2159-01-20 2159-01-20
1 11803145 Piperacillin-Tazobactam IV 2159-01-20 2159-01-20
2 11803145 Piperacillin-Tazobactam IV 2159-01-20 2159-01-20
3 11803145 Piperacillin-Tazobactam IV 2159-01-20 2159-01-25
4 11803145 Vancomycin IV 2159-01-21 2159-01-22
5 11803145 Vancomycin IV 2159-01-22 2159-01-23
6 11803145 Vancomycin IV 2159-01-23 2159-01-23
7 11803145 Meropenem IV 2159-01-25 2159-01-26
8 11803145 Meropenem IV 2159-01-25 2159-01-26
9 11803145 Meropenem IV 2159-01-26 2159-02-02
10 11803145 Vancomycin IV 2159-01-26 2159-01-26
11 11803145 Vancomycin IV 2159-01-26 2159-01-27
12 11803145 Vancomycin IV 2159-01-26 2159-01-29
13 11803145 Vancomycin IV 2159-01-28 2159-01-29
14 11803145 Tobramycin Sulfate IV 2159-01-28 2159-01-29
15 11803145 Azithromycin IV 2159-01-28 2159-01-30
16 11803145 MetRONIDAZOLE (FLagyl) IV 2159-01-29 2159-02-01
17 11803145 Vancomycin IV 2159-01-30 2159-01-31
18 11803145 Vancomycin IV 2159-02-01 2159-02-02
19 11803145 Meropenem IV 2159-02-03 2159-02-04
20 11803145 Vancomycin IV 2159-02-03 2159-02-04
21 11803145 Vancomycin IV 2159-02-03 2159-02-04


Lets transform the data

52 # -----------------------------
53 # Transform data
54 # -----------------------------
55 # .. note: The closed parameter indicates whether to include
56 #          the first and/or last samples. None will keep both,
57 #          left will keep only start date and right will keep
58 #          also the right date.
59 # Create column with date range
60 data['startdate'] = data.apply(lambda x:
61     pd.date_range(start=x['starttime'],
62                   end=x['stoptime'],
63                   closed='left',         # ignoring right
64                   freq='D') ,axis=1)
65
66 # Explode such column
67 data = data.explode('startdate')
68
69 # Create daily therapies
70 aux = data.groupby('startdate') \
71     .apply(lambda x: sorted(x.antibiotic \
72         .str.lower().str.strip().unique().tolist()))
73
74 # Include missing days
75 aux = aux.asfreq('1D')

Lets see the formatted data

80 # Show
81 if TERMINAL:
82     print("\nFormatted:")
83     print(aux)
84 aux

Out:

startdate
2159-01-20                [piperacillin-tazobactam, vancomycin]
2159-01-21                [piperacillin-tazobactam, vancomycin]
2159-01-22                [piperacillin-tazobactam, vancomycin]
2159-01-23                [piperacillin-tazobactam, vancomycin]
2159-01-24                            [piperacillin-tazobactam]
2159-01-25                                          [meropenem]
2159-01-26                              [meropenem, vancomycin]
2159-01-27                              [meropenem, vancomycin]
2159-01-28    [azithromycin, meropenem, tobramycin sulfate, ...
2159-01-29    [azithromycin, meropenem, metronidazole (flagyl)]
2159-01-30      [meropenem, metronidazole (flagyl), vancomycin]
2159-01-31                  [meropenem, metronidazole (flagyl)]
2159-02-01                              [meropenem, vancomycin]
2159-02-02                                                  NaN
2159-02-03                              [meropenem, vancomycin]
Freq: D, dtype: object

Lets count the number of days

89 # Show
90 if TERMINAL:
91     print("\nTherapies (number of days)")
92     print(aux.value_counts())
93 aux.value_counts()

Out:

[piperacillin-tazobactam, vancomycin]                        4
[meropenem, vancomycin]                                      4
[piperacillin-tazobactam]                                    1
[meropenem]                                                  1
[azithromycin, meropenem, tobramycin sulfate, vancomycin]    1
[azithromycin, meropenem, metronidazole (flagyl)]            1
[meropenem, metronidazole (flagyl), vancomycin]              1
[meropenem, metronidazole (flagyl)]                          1
dtype: int64

Total running time of the script: ( 0 minutes 0.039 seconds)

Gallery generated by Sphinx-Gallery