01. Sliding window

Out:

Data:
      feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  feature_6  feature_7  feature_8  feature_9  patient  day
0      0.797303   0.486808   0.070104   0.971053   0.776341   0.045555   0.160014   0.276555   0.852008   0.377592        0  -22
1      0.704938   0.499511   0.814738   0.107659   0.560178   0.187534   0.326855   0.997646   0.089117   0.149500        0  -21
2      0.810045   0.687574   0.898904   0.362586   0.561158   0.053785   0.775372   0.518350   0.874378   0.198487        0  -20
3      0.328990   0.727673   0.770983   0.861185   0.714234   0.226629   0.467179   0.906795   0.928829   0.888581        0  -19
4      0.466125   0.609360   0.251939   0.953791   0.001671   0.897286   0.889254   0.114267   0.594083   0.663169        0  -18
...         ...        ...        ...        ...        ...        ...        ...        ...        ...        ...      ...  ...
1495   0.710897   0.730301   0.734666   0.053983   0.465668   0.821405   0.705441   0.804418   0.239905   0.157425       99   -5
1496   0.854815   0.512272   0.410890   0.467133   0.710533   0.935196   0.961018   0.557413   0.601999   0.643482       99   -4
1497   0.339058   0.771450   0.070506   0.661574   0.602983   0.228695   0.806739   0.714704   0.668646   0.745606       99   -3
1498   0.533159   0.611534   0.588047   0.262790   0.702912   0.800653   0.997572   0.772465   0.882509   0.878257       99   -2
1499   0.298069   0.396236   0.414331   0.599024   0.004090   0.181602   0.853995   0.138398   0.053779   0.525269       99   -1

[1500 rows x 12 columns]

Result:
              feature_0  feature_1  feature_2  feature_3  feature_4  feature_5  feature_6  feature_7  feature_8  feature_9  patient  day  window
patient
0       0      0.797303   0.486808   0.070104   0.971053   0.776341   0.045555   0.160014   0.276555   0.852008   0.377592        0  -22       0
        1      0.704938   0.499511   0.814738   0.107659   0.560178   0.187534   0.326855   0.997646   0.089117   0.149500        0  -21       0
        2      0.810045   0.687574   0.898904   0.362586   0.561158   0.053785   0.775372   0.518350   0.874378   0.198487        0  -20       0
        1      0.704938   0.499511   0.814738   0.107659   0.560178   0.187534   0.326855   0.997646   0.089117   0.149500        0  -21       1
        2      0.810045   0.687574   0.898904   0.362586   0.561158   0.053785   0.775372   0.518350   0.874378   0.198487        0  -20       1
...                 ...        ...        ...        ...        ...        ...        ...        ...        ...        ...      ...  ...     ...
99      1497   0.339058   0.771450   0.070506   0.661574   0.602983   0.228695   0.806739   0.714704   0.668646   0.745606       99   -3      14
        1498   0.533159   0.611534   0.588047   0.262790   0.702912   0.800653   0.997572   0.772465   0.882509   0.878257       99   -2      14
        1497   0.339058   0.771450   0.070506   0.661574   0.602983   0.228695   0.806739   0.714704   0.668646   0.745606       99   -3      15
        1498   0.533159   0.611534   0.588047   0.262790   0.702912   0.800653   0.997572   0.772465   0.882509   0.878257       99   -2      15
        1499   0.298069   0.396236   0.414331   0.599024   0.004090   0.181602   0.853995   0.138398   0.053779   0.525269       99   -1      15

[3900 rows x 13 columns]

 # Interesting code.
 # np.lib.stride_tricks.sliding_window_view(df.index, 3)

 # Libraries
 import numpy as np
 import pandas as pd

 # Configuration
 ROWS, COLS = 1500, 10
 PATIENTS = 100

 # Create random values
 features = np.random.random_sample((ROWS, COLS))
 patients = np.random.randint(PATIENTS, size=(ROWS,1))

 # Create DataFrame
 df = pd.DataFrame(data=features)
 df = df.add_prefix('feature_')
 df['patient'] = patients
 df['day'] = -(df.groupby('patient').cumcount()+1)
 df = df.sort_values(by=['patient', 'day'],
     ascending=[True, True]).reset_index(drop=True)

 # Show
 print("\nData:")
 print(df)

 # ----------------------------------
 # Method I: Own method
 # ----------------------------------
 def sliding_window_iter(series, size, include_id=True):
     """series is a column of a DataFrame.

     .. note: The DataFrame should be pre-ordered to ensure
              that IDs remain consistent.
     """
     for i, start_row in enumerate(range(len(series) - size + 1)):
         s = series[start_row:start_row + size]
         if include_id:
             s['window'] = i
         yield s


 # Group by patient and compute sliding window
 result = df.groupby('patient').apply(lambda x:
     pd.concat(sliding_window_iter(x, 3)))

 # Show
 print("\nResult:")
 print(result)

 # ----------------------------------
 # Method II: Using rolling
 # ----------------------------------
 #a = df.groupby('patient').rolling(window=3)
 #b = [win for win in a if win.shape[0] == 3]
 #c = pd.concat(b)
 #print(c)

Total running time of the script: ( 0 minutes 0.402 seconds)

Gallery generated by Sphinx-Gallery