04. Format MIMIC therapy (one)

Description…

 # Generic libraries
 import pandas as pd

 # Show in terminal
 TERMINAL = False

First, lets load and do some basic formatting on the data.

 # -----------------------------
 # Constants
 # -----------------------------
 # Path
 path = './data/mimic-therapy/One_patient_condensed_10656173.csv'
 path = './data/mimic-therapy/One_patient_condensed_11803145.csv'

 # -----------------------------
 # Load data
 # -----------------------------
 # Read data
 data = pd.read_csv(path,
     dayfirst=True,
     parse_dates=['starttime',
                  'stoptime'])

 # Keep only useful columns
 data = data[['subject_id',
              'antibiotic',
              'route',
              'starttime',
              'stoptime']]

 # Reformat (ignore time information)
 data.starttime = data.starttime.dt.date
 data.stoptime = data.stoptime.dt.date

 # Show
 if TERMINAL:
     print("\nData:")
     print(data)
 data

	subject_id	antibiotic	route	starttime	stoptime
0	11803145	Vancomycin	IV	2159-01-20	2159-01-20
1	11803145	Piperacillin-Tazobactam	IV	2159-01-20	2159-01-20
2	11803145	Piperacillin-Tazobactam	IV	2159-01-20	2159-01-20
3	11803145	Piperacillin-Tazobactam	IV	2159-01-20	2159-01-25
4	11803145	Vancomycin	IV	2159-01-21	2159-01-22
5	11803145	Vancomycin	IV	2159-01-22	2159-01-23
6	11803145	Vancomycin	IV	2159-01-23	2159-01-23
7	11803145	Meropenem	IV	2159-01-25	2159-01-26
8	11803145	Meropenem	IV	2159-01-25	2159-01-26
9	11803145	Meropenem	IV	2159-01-26	2159-02-02
10	11803145	Vancomycin	IV	2159-01-26	2159-01-26
11	11803145	Vancomycin	IV	2159-01-26	2159-01-27
12	11803145	Vancomycin	IV	2159-01-26	2159-01-29
13	11803145	Vancomycin	IV	2159-01-28	2159-01-29
14	11803145	Tobramycin Sulfate	IV	2159-01-28	2159-01-29
15	11803145	Azithromycin	IV	2159-01-28	2159-01-30
16	11803145	MetRONIDAZOLE (FLagyl)	IV	2159-01-29	2159-02-01
17	11803145	Vancomycin	IV	2159-01-30	2159-01-31
18	11803145	Vancomycin	IV	2159-02-01	2159-02-02
19	11803145	Meropenem	IV	2159-02-03	2159-02-04
20	11803145	Vancomycin	IV	2159-02-03	2159-02-04
21	11803145	Vancomycin	IV	2159-02-03	2159-02-04

Lets transform the data

 # -----------------------------
 # Transform data
 # -----------------------------
 # .. note: The closed parameter indicates whether to include
 #          the first and/or last samples. None will keep both,
 #          left will keep only start date and right will keep
 #          also the right date.
 # Create column with date range
 data['startdate'] = data.apply(lambda x:
     pd.date_range(start=x['starttime'],
                   end=x['stoptime'],
                   closed='left',         # ignoring right
                   freq='D') ,axis=1)

 # Explode such column
 data = data.explode('startdate')

 # Create daily therapies
 aux = data.groupby('startdate') \
     .apply(lambda x: sorted(x.antibiotic \
         .str.lower().str.strip().unique().tolist()))

 # Include missing days
 aux = aux.asfreq('1D')

Lets see the formatted data

 # Show
 if TERMINAL:
     print("\nFormatted:")
     print(aux)
 aux

Out:

startdate
2159-01-20                [piperacillin-tazobactam, vancomycin]
2159-01-21                [piperacillin-tazobactam, vancomycin]
2159-01-22                [piperacillin-tazobactam, vancomycin]
2159-01-23                [piperacillin-tazobactam, vancomycin]
2159-01-24                            [piperacillin-tazobactam]
2159-01-25                                          [meropenem]
2159-01-26                              [meropenem, vancomycin]
2159-01-27                              [meropenem, vancomycin]
2159-01-28    [azithromycin, meropenem, tobramycin sulfate, ...
2159-01-29    [azithromycin, meropenem, metronidazole (flagyl)]
2159-01-30      [meropenem, metronidazole (flagyl), vancomycin]
2159-01-31                  [meropenem, metronidazole (flagyl)]
2159-02-01                              [meropenem, vancomycin]
2159-02-02                                                  NaN
2159-02-03                              [meropenem, vancomycin]
Freq: D, dtype: object

Lets count the number of days

 # Show
 if TERMINAL:
     print("\nTherapies (number of days)")
     print(aux.value_counts())
 aux.value_counts()

Out:

[piperacillin-tazobactam, vancomycin]                        4
[meropenem, vancomycin]                                      4
[piperacillin-tazobactam]                                    1
[meropenem]                                                  1
[azithromycin, meropenem, tobramycin sulfate, vancomycin]    1
[azithromycin, meropenem, metronidazole (flagyl)]            1
[meropenem, metronidazole (flagyl), vancomycin]              1
[meropenem, metronidazole (flagyl)]                          1
dtype: int64

Total running time of the script: ( 0 minutes 0.039 seconds)

Gallery generated by Sphinx-Gallery