Note
Click here to download the full example code
01. Sliding window
Out:
Data:
feature_0 feature_1 feature_2 feature_3 feature_4 feature_5 feature_6 feature_7 feature_8 feature_9 patient day
0 0.782839 0.579171 0.875666 0.896433 0.522666 0.424833 0.019499 0.843256 0.508497 0.023651 0 -15
1 0.938406 0.686556 0.716277 0.474455 0.839579 0.552715 0.597967 0.308887 0.613897 0.567229 0 -14
2 0.917493 0.842386 0.728984 0.522605 0.850795 0.407061 0.984394 0.082715 0.594869 0.059661 0 -13
3 0.722237 0.947399 0.549326 0.725588 0.976665 0.596684 0.882883 0.594011 0.760227 0.505716 0 -12
4 0.680577 0.052478 0.142805 0.977845 0.755283 0.091267 0.700849 0.692678 0.687108 0.046146 0 -11
... ... ... ... ... ... ... ... ... ... ... ... ...
1495 0.668256 0.162212 0.051996 0.768991 0.126166 0.594980 0.606882 0.973683 0.444446 0.438552 99 -5
1496 0.074663 0.669059 0.815666 0.425611 0.900212 0.069323 0.597965 0.300225 0.087881 0.892852 99 -4
1497 0.572242 0.856386 0.745518 0.044273 0.610881 0.320131 0.804878 0.652734 0.014684 0.986622 99 -3
1498 0.266470 0.857181 0.888199 0.421400 0.169874 0.498681 0.395948 0.240385 0.194971 0.757166 99 -2
1499 0.131035 0.197843 0.184269 0.605561 0.850481 0.687066 0.508127 0.683124 0.304230 0.779854 99 -1
[1500 rows x 12 columns]
C:\Users\kelda\Desktop\repositories\github\python-spare-code\main\examples\pandas\plot_format01.py:50: FutureWarning:
DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
Result:
feature_0 feature_1 feature_2 feature_3 feature_4 feature_5 feature_6 feature_7 feature_8 feature_9 patient day window
patient
0 0 0.782839 0.579171 0.875666 0.896433 0.522666 0.424833 0.019499 0.843256 0.508497 0.023651 0 -15 0
1 0.938406 0.686556 0.716277 0.474455 0.839579 0.552715 0.597967 0.308887 0.613897 0.567229 0 -14 0
2 0.917493 0.842386 0.728984 0.522605 0.850795 0.407061 0.984394 0.082715 0.594869 0.059661 0 -13 0
1 0.938406 0.686556 0.716277 0.474455 0.839579 0.552715 0.597967 0.308887 0.613897 0.567229 0 -14 1
2 0.917493 0.842386 0.728984 0.522605 0.850795 0.407061 0.984394 0.082715 0.594869 0.059661 0 -13 1
... ... ... ... ... ... ... ... ... ... ... ... ... ...
99 1497 0.572242 0.856386 0.745518 0.044273 0.610881 0.320131 0.804878 0.652734 0.014684 0.986622 99 -3 13
1498 0.266470 0.857181 0.888199 0.421400 0.169874 0.498681 0.395948 0.240385 0.194971 0.757166 99 -2 13
1497 0.572242 0.856386 0.745518 0.044273 0.610881 0.320131 0.804878 0.652734 0.014684 0.986622 99 -3 14
1498 0.266470 0.857181 0.888199 0.421400 0.169874 0.498681 0.395948 0.240385 0.194971 0.757166 99 -2 14
1499 0.131035 0.197843 0.184269 0.605561 0.850481 0.687066 0.508127 0.683124 0.304230 0.779854 99 -1 14
[3900 rows x 13 columns]
6 # Interesting code.
7 # np.lib.stride_tricks.sliding_window_view(df.index, 3)
8
9 # Libraries
10 import numpy as np
11 import pandas as pd
12
13 # Configuration
14 ROWS, COLS = 1500, 10
15 PATIENTS = 100
16
17 # Create random values
18 features = np.random.random_sample((ROWS, COLS))
19 patients = np.random.randint(PATIENTS, size=(ROWS,1))
20
21 # Create DataFrame
22 df = pd.DataFrame(data=features)
23 df = df.add_prefix('feature_')
24 df['patient'] = patients
25 df['day'] = -(df.groupby('patient').cumcount()+1)
26 df = df.sort_values(by=['patient', 'day'],
27 ascending=[True, True]).reset_index(drop=True)
28
29 # Show
30 print("\nData:")
31 print(df)
32
33 # ----------------------------------
34 # Method I: Own method
35 # ----------------------------------
36 def sliding_window_iter(series, size, include_id=True):
37 """series is a column of a DataFrame.
38
39 .. note: The DataFrame should be pre-ordered to ensure
40 that IDs remain consistent.
41 """
42 for i, start_row in enumerate(range(len(series) - size + 1)):
43 s = series[start_row:start_row + size]
44 if include_id:
45 s['window'] = i
46 yield s
47
48
49 # Group by patient and compute sliding window
50 result = df.groupby('patient').apply(lambda x:
51 pd.concat(sliding_window_iter(x, 3)))
52
53 # Show
54 print("\nResult:")
55 print(result)
56
57 # ----------------------------------
58 # Method II: Using rolling
59 # ----------------------------------
60 #a = df.groupby('patient').rolling(window=3)
61 #b = [win for win in a if win.shape[0] == 3]
62 #c = pd.concat(b)
63 #print(c)
Total running time of the script: ( 0 minutes 0.276 seconds)