Note
Click here to download the full example code
01. Sliding window
Out:
Data:
feature_0 feature_1 feature_2 feature_3 feature_4 feature_5 feature_6 feature_7 feature_8 feature_9 patient day
0 0.797303 0.486808 0.070104 0.971053 0.776341 0.045555 0.160014 0.276555 0.852008 0.377592 0 -22
1 0.704938 0.499511 0.814738 0.107659 0.560178 0.187534 0.326855 0.997646 0.089117 0.149500 0 -21
2 0.810045 0.687574 0.898904 0.362586 0.561158 0.053785 0.775372 0.518350 0.874378 0.198487 0 -20
3 0.328990 0.727673 0.770983 0.861185 0.714234 0.226629 0.467179 0.906795 0.928829 0.888581 0 -19
4 0.466125 0.609360 0.251939 0.953791 0.001671 0.897286 0.889254 0.114267 0.594083 0.663169 0 -18
... ... ... ... ... ... ... ... ... ... ... ... ...
1495 0.710897 0.730301 0.734666 0.053983 0.465668 0.821405 0.705441 0.804418 0.239905 0.157425 99 -5
1496 0.854815 0.512272 0.410890 0.467133 0.710533 0.935196 0.961018 0.557413 0.601999 0.643482 99 -4
1497 0.339058 0.771450 0.070506 0.661574 0.602983 0.228695 0.806739 0.714704 0.668646 0.745606 99 -3
1498 0.533159 0.611534 0.588047 0.262790 0.702912 0.800653 0.997572 0.772465 0.882509 0.878257 99 -2
1499 0.298069 0.396236 0.414331 0.599024 0.004090 0.181602 0.853995 0.138398 0.053779 0.525269 99 -1
[1500 rows x 12 columns]
Result:
feature_0 feature_1 feature_2 feature_3 feature_4 feature_5 feature_6 feature_7 feature_8 feature_9 patient day window
patient
0 0 0.797303 0.486808 0.070104 0.971053 0.776341 0.045555 0.160014 0.276555 0.852008 0.377592 0 -22 0
1 0.704938 0.499511 0.814738 0.107659 0.560178 0.187534 0.326855 0.997646 0.089117 0.149500 0 -21 0
2 0.810045 0.687574 0.898904 0.362586 0.561158 0.053785 0.775372 0.518350 0.874378 0.198487 0 -20 0
1 0.704938 0.499511 0.814738 0.107659 0.560178 0.187534 0.326855 0.997646 0.089117 0.149500 0 -21 1
2 0.810045 0.687574 0.898904 0.362586 0.561158 0.053785 0.775372 0.518350 0.874378 0.198487 0 -20 1
... ... ... ... ... ... ... ... ... ... ... ... ... ...
99 1497 0.339058 0.771450 0.070506 0.661574 0.602983 0.228695 0.806739 0.714704 0.668646 0.745606 99 -3 14
1498 0.533159 0.611534 0.588047 0.262790 0.702912 0.800653 0.997572 0.772465 0.882509 0.878257 99 -2 14
1497 0.339058 0.771450 0.070506 0.661574 0.602983 0.228695 0.806739 0.714704 0.668646 0.745606 99 -3 15
1498 0.533159 0.611534 0.588047 0.262790 0.702912 0.800653 0.997572 0.772465 0.882509 0.878257 99 -2 15
1499 0.298069 0.396236 0.414331 0.599024 0.004090 0.181602 0.853995 0.138398 0.053779 0.525269 99 -1 15
[3900 rows x 13 columns]
6 # Interesting code.
7 # np.lib.stride_tricks.sliding_window_view(df.index, 3)
8
9 # Libraries
10 import numpy as np
11 import pandas as pd
12
13 # Configuration
14 ROWS, COLS = 1500, 10
15 PATIENTS = 100
16
17 # Create random values
18 features = np.random.random_sample((ROWS, COLS))
19 patients = np.random.randint(PATIENTS, size=(ROWS,1))
20
21 # Create DataFrame
22 df = pd.DataFrame(data=features)
23 df = df.add_prefix('feature_')
24 df['patient'] = patients
25 df['day'] = -(df.groupby('patient').cumcount()+1)
26 df = df.sort_values(by=['patient', 'day'],
27 ascending=[True, True]).reset_index(drop=True)
28
29 # Show
30 print("\nData:")
31 print(df)
32
33 # ----------------------------------
34 # Method I: Own method
35 # ----------------------------------
36 def sliding_window_iter(series, size, include_id=True):
37 """series is a column of a DataFrame.
38
39 .. note: The DataFrame should be pre-ordered to ensure
40 that IDs remain consistent.
41 """
42 for i, start_row in enumerate(range(len(series) - size + 1)):
43 s = series[start_row:start_row + size]
44 if include_id:
45 s['window'] = i
46 yield s
47
48
49 # Group by patient and compute sliding window
50 result = df.groupby('patient').apply(lambda x:
51 pd.concat(sliding_window_iter(x, 3)))
52
53 # Show
54 print("\nResult:")
55 print(result)
56
57 # ----------------------------------
58 # Method II: Using rolling
59 # ----------------------------------
60 #a = df.groupby('patient').rolling(window=3)
61 #b = [win for win in a if win.shape[0] == 3]
62 #c = pd.concat(b)
63 #print(c)
Total running time of the script: ( 0 minutes 0.402 seconds)