Note
Click here to download the full example code
99. Basic Example
6 # Library
7 import numpy as np
8 import pandas as pd
9
10 # Show in terminal
11 TERMINAL = False
12
13 # Create data
14 data = [
15 ['p1', '1/5/2021', 1, 2, 3],
16 ['p1', '2/5/2021', 3, 3, 3],
17 ['p1', '3/5/2021', 4, 4, 4],
18 ['p1', '5/5/2021', 5, 5, 5],
19
20 ['p2', '11/5/2021', 5, 3, 3],
21 ['p2', '12/5/2021', 4, 3, None],
22 ['p2', '16/5/2021', None, 1, None], # unordered
23 ['p2', '15/5/2021', 5, 2, 4],
24 ]
25
26 # Load DataFrame
27 data = pd.DataFrame(data,
28 columns=['patient', 'date', 'plt', 'hct', 'bil'])
29
30 # Format datetime
31 # Date will be a datetime64[ns] instead of string
32 data.date = pd.to_datetime(data.date, dayfirst=True)
33 data.date = data.date.dt.normalize()
34
35 # Show
36 if TERMINAL:
37 print("\nData:")
38 print(data)
39 data
Lets sort values
44 # Note that if you set columns as indexes (e.g. the
45 # datetime) they will be sorted by default.
46 aux = data.sort_values(by=['plt', 'hct'])
47
48 # Show
49 if TERMINAL:
50 print("\nOut:")
51 print(aux)
52 aux
Lets select columns
57 # Select columns from DataFrame
58 aux = data[['patient', 'date', 'plt']]
59
60 # Show
61 if TERMINAL:
62 print("\nOut:")
63 print(aux)
64 aux
Lets do indexing (not nan)
70 # Keep rows where plt is not nan
71 aux = data[data.plt.notna()]
72
73 # Show
74 if TERMINAL:
75 print("\nOut:")
76 print(aux)
77 aux
Lets drop nan (in subset)
84 # Keep rows without any nan in subset
85 aux = data.dropna(how='any', subset=['plt', 'bil'])
86
87 # Show
88 if TERMINAL:
89 print("\nOut:")
90 print(aux)
91 aux
Lets drop nan (all)
98 # Keep rows without any nan at all
99 aux = data.dropna(how='any')
100
101 # Show
102 if TERMINAL:
103 print("\nOut:")
104 print(aux)
105 aux
Lets resample daily
111 # Resample
112 aux = data.set_index('date').resample('D').asfreq()
113
114 # Show
115 if TERMINAL:
116 print("\nOut:")
117 print(aux)
118 aux
Lets fill missing values (pad)
124 # Pad is synonym of DataFrame.fillna() with method='ffill'.
125 aux = data.set_index('date').resample('D').asfreq().pad()
126
127 # Show
128 if TERMINAL:
129 print("\nOut:")
130 print(aux)
131 aux
Lets group by patient and sum
136 # Group by patient and sum
137 agg = aux.groupby('patient').sum()
138
139 # Show
140 if TERMINAL:
141 print("\nOut:")
142 print(agg)
143 agg
Lets group by patient per 2 days and compute mean and max.
148 agg = aux.groupby(by=['patient', pd.Grouper(freq='2D')]) \
149 .agg('mean', 'max')
150 #.agg({'idx': ['first', 'last'],
151 # 0: [skew, kurtosis, own],
152 # 1: [skew, kurtosis, own],
153 # '0_hr': [own],
154 # '0_rr': [own]})
155
156 # Show
157 if TERMINAL:
158 print("\nOut:")
159 print(agg)
160 agg
Total running time of the script: ( 0 minutes 0.046 seconds)