99. Basic Example

 6 # Library
 7 import numpy as np
 8 import pandas as pd
 9
10 # Show in terminal
11 TERMINAL = False
12
13 # Create data
14 data = [
15     ['p1', '1/5/2021', 1, 2, 3],
16     ['p1', '2/5/2021', 3, 3, 3],
17     ['p1', '3/5/2021', 4, 4, 4],
18     ['p1', '5/5/2021', 5, 5, 5],
19
20     ['p2', '11/5/2021', 5, 3, 3],
21     ['p2', '12/5/2021', 4, 3, None],
22     ['p2', '16/5/2021', None, 1, None], # unordered
23     ['p2', '15/5/2021', 5, 2, 4],
24 ]
25
26 # Load DataFrame
27 data = pd.DataFrame(data,
28     columns=['patient', 'date', 'plt', 'hct', 'bil'])
29
30 # Format datetime
31 # Date will be a datetime64[ns] instead of string
32 data.date = pd.to_datetime(data.date, dayfirst=True)
33 data.date = data.date.dt.normalize()
34
35 # Show
36 if TERMINAL:
37     print("\nData:")
38     print(data)
39 data
patient date plt hct bil
0 p1 2021-05-01 1.0 2 3.0
1 p1 2021-05-02 3.0 3 3.0
2 p1 2021-05-03 4.0 4 4.0
3 p1 2021-05-05 5.0 5 5.0
4 p2 2021-05-11 5.0 3 3.0
5 p2 2021-05-12 4.0 3 NaN
6 p2 2021-05-16 NaN 1 NaN
7 p2 2021-05-15 5.0 2 4.0


Lets sort values

44 # Note that if you set columns as indexes (e.g. the
45 # datetime) they will be sorted by default.
46 aux = data.sort_values(by=['plt', 'hct'])
47
48 # Show
49 if TERMINAL:
50     print("\nOut:")
51     print(aux)
52 aux
patient date plt hct bil
0 p1 2021-05-01 1.0 2 3.0
1 p1 2021-05-02 3.0 3 3.0
5 p2 2021-05-12 4.0 3 NaN
2 p1 2021-05-03 4.0 4 4.0
7 p2 2021-05-15 5.0 2 4.0
4 p2 2021-05-11 5.0 3 3.0
3 p1 2021-05-05 5.0 5 5.0
6 p2 2021-05-16 NaN 1 NaN


Lets select columns

57 # Select columns from DataFrame
58 aux = data[['patient', 'date', 'plt']]
59
60 # Show
61 if TERMINAL:
62     print("\nOut:")
63     print(aux)
64 aux
patient date plt
0 p1 2021-05-01 1.0
1 p1 2021-05-02 3.0
2 p1 2021-05-03 4.0
3 p1 2021-05-05 5.0
4 p2 2021-05-11 5.0
5 p2 2021-05-12 4.0
6 p2 2021-05-16 NaN
7 p2 2021-05-15 5.0


Lets do indexing (not nan)

70 # Keep rows where plt is not nan
71 aux = data[data.plt.notna()]
72
73 # Show
74 if TERMINAL:
75     print("\nOut:")
76     print(aux)
77 aux
patient date plt hct bil
0 p1 2021-05-01 1.0 2 3.0
1 p1 2021-05-02 3.0 3 3.0
2 p1 2021-05-03 4.0 4 4.0
3 p1 2021-05-05 5.0 5 5.0
4 p2 2021-05-11 5.0 3 3.0
5 p2 2021-05-12 4.0 3 NaN
7 p2 2021-05-15 5.0 2 4.0


Lets drop nan (in subset)

84 # Keep rows without any nan in subset
85 aux = data.dropna(how='any', subset=['plt', 'bil'])
86
87 # Show
88 if TERMINAL:
89     print("\nOut:")
90     print(aux)
91 aux
patient date plt hct bil
0 p1 2021-05-01 1.0 2 3.0
1 p1 2021-05-02 3.0 3 3.0
2 p1 2021-05-03 4.0 4 4.0
3 p1 2021-05-05 5.0 5 5.0
4 p2 2021-05-11 5.0 3 3.0
7 p2 2021-05-15 5.0 2 4.0


Lets drop nan (all)

 98 # Keep rows without any nan at all
 99 aux = data.dropna(how='any')
100
101 # Show
102 if TERMINAL:
103     print("\nOut:")
104     print(aux)
105 aux
patient date plt hct bil
0 p1 2021-05-01 1.0 2 3.0
1 p1 2021-05-02 3.0 3 3.0
2 p1 2021-05-03 4.0 4 4.0
3 p1 2021-05-05 5.0 5 5.0
4 p2 2021-05-11 5.0 3 3.0
7 p2 2021-05-15 5.0 2 4.0


Lets resample daily

111 # Resample
112 aux = data.set_index('date').resample('D').asfreq()
113
114 # Show
115 if TERMINAL:
116     print("\nOut:")
117     print(aux)
118 aux
patient plt hct bil
date
2021-05-01 p1 1.0 2.0 3.0
2021-05-02 p1 3.0 3.0 3.0
2021-05-03 p1 4.0 4.0 4.0
2021-05-04 NaN NaN NaN NaN
2021-05-05 p1 5.0 5.0 5.0
2021-05-06 NaN NaN NaN NaN
2021-05-07 NaN NaN NaN NaN
2021-05-08 NaN NaN NaN NaN
2021-05-09 NaN NaN NaN NaN
2021-05-10 NaN NaN NaN NaN
2021-05-11 p2 5.0 3.0 3.0
2021-05-12 p2 4.0 3.0 NaN
2021-05-13 NaN NaN NaN NaN
2021-05-14 NaN NaN NaN NaN
2021-05-15 p2 5.0 2.0 4.0
2021-05-16 p2 NaN 1.0 NaN


Lets fill missing values (pad)

124 # Pad is synonym of DataFrame.fillna() with method='ffill'.
125 aux = data.set_index('date').resample('D').asfreq().pad()
126
127 # Show
128 if TERMINAL:
129     print("\nOut:")
130     print(aux)
131 aux
patient plt hct bil
date
2021-05-01 p1 1.0 2.0 3.0
2021-05-02 p1 3.0 3.0 3.0
2021-05-03 p1 4.0 4.0 4.0
2021-05-04 p1 4.0 4.0 4.0
2021-05-05 p1 5.0 5.0 5.0
2021-05-06 p1 5.0 5.0 5.0
2021-05-07 p1 5.0 5.0 5.0
2021-05-08 p1 5.0 5.0 5.0
2021-05-09 p1 5.0 5.0 5.0
2021-05-10 p1 5.0 5.0 5.0
2021-05-11 p2 5.0 3.0 3.0
2021-05-12 p2 4.0 3.0 3.0
2021-05-13 p2 4.0 3.0 3.0
2021-05-14 p2 4.0 3.0 3.0
2021-05-15 p2 5.0 2.0 4.0
2021-05-16 p2 5.0 1.0 4.0


Lets group by patient and sum

136 # Group by patient and sum
137 agg = aux.groupby('patient').sum()
138
139 # Show
140 if TERMINAL:
141     print("\nOut:")
142     print(agg)
143 agg
plt hct bil
patient
p1 42.0 43.0 44.0
p2 27.0 15.0 20.0


Lets group by patient per 2 days and compute mean and max.

148 agg = aux.groupby(by=['patient', pd.Grouper(freq='2D')]) \
149     .agg('mean', 'max')
150     #.agg({'idx': ['first', 'last'],
151     #      0: [skew, kurtosis, own],
152     #      1: [skew, kurtosis, own],
153     #      '0_hr': [own],
154     #      '0_rr': [own]})
155
156 # Show
157 if TERMINAL:
158     print("\nOut:")
159     print(agg)
160 agg
plt hct bil
patient date
p1 2021-05-01 2.0 2.5 3.0
2021-05-03 4.0 4.0 4.0
2021-05-05 5.0 5.0 5.0
2021-05-07 5.0 5.0 5.0
2021-05-09 5.0 5.0 5.0
p2 2021-05-11 4.5 3.0 3.0
2021-05-13 4.0 3.0 3.0
2021-05-15 5.0 1.5 4.0


Total running time of the script: ( 0 minutes 0.046 seconds)

Gallery generated by Sphinx-Gallery