02. Aggregation

Aggregate patient’s data in a single row.

Out:

    1/  300. Patient '0'
   11/  300. Patient '10'
   21/  300. Patient '20'
   31/  300. Patient '30'
   41/  300. Patient '40'
   51/  300. Patient '50'
   61/  300. Patient '60'
   71/  300. Patient '70'
   81/  300. Patient '80'
   91/  300. Patient '90'
  101/  300. Patient '100'
              0         1         2         3         4         5         6    ...        94         95         96         97            98         99          100
max_f_0  0.999396  0.999916  0.998421  0.999023  0.999431  0.997913  0.997867  ...   0.995800   0.998766   0.998735   0.998977  9.976324e-01   0.991941    0.998891
max_f_1  0.999436  0.997938  0.999444  0.999926  0.998605  0.995591  0.999488  ...   0.998451   0.999851   0.995863   0.998557  9.971850e-01   0.992134    0.994609
max_f_2  0.996942  0.998603  0.998015  0.995936  0.993793  0.999142  0.998222  ...   0.999786   0.996589   0.994065   0.998225  9.991617e-01   0.999670    0.997319
max_f_3  0.999834  0.999682  0.994780  0.998556  0.999465  0.997353  0.998338  ...   0.998686   0.999914   0.999150   0.996743  9.990733e-01   0.998847    0.998722
max_f_4  0.998651  0.999780  0.999122  0.998361  0.998318  0.999930  0.999977  ...   0.998310   0.999673   0.993660   0.999548  9.964524e-01   0.989648    0.995319
max_f_5  0.997312  0.998754  0.998809  0.997283  0.999338  0.998211  0.999350  ...   0.994484   0.999991   0.998394   0.999013  9.987785e-01   0.999315    0.999718
max_f_6  0.999305  0.999753  0.999907  0.996438  0.999014  0.998860  0.999388  ...   0.993969   0.997109   0.999011   0.997794  9.979976e-01   0.999533    0.999551
max_f_7  0.998653  0.999945  0.996116  0.990794  0.999622  0.994723  0.998606  ...   0.997663   0.999109   0.999812   0.999841  9.911849e-01   0.999201    0.996596
max_f_8  0.996390  0.997969  0.997322  0.997818  0.997749  0.997650  0.999295  ...   0.999591   0.998346   0.991803   0.999475  9.987050e-01   0.998879    0.997566
max_f_9  0.999218  0.985888  0.999487  0.995975  0.993271  0.998376  0.996804  ...   0.996486   0.998782   0.998949   0.999446  9.992994e-01   0.999501    0.999945
max_id   0.000000  1.000000  2.000000  3.000000  4.000000  5.000000  6.000000  ...  94.000000  95.000000  96.000000  97.000000  9.800000e+01  99.000000  100.000000
min_f_0  0.006612  0.005354  0.001194  0.001723  0.000572  0.001657  0.000188  ...   0.000205   0.003465   0.002174   0.000060  4.641318e-04   0.003149    0.012990
min_f_1  0.000657  0.009678  0.003852  0.005713  0.000374  0.001143  0.000361  ...   0.002627   0.001206   0.000043   0.000317  2.485503e-04   0.000097    0.001037
min_f_2  0.004106  0.002320  0.000009  0.001365  0.002018  0.003379  0.005193  ...   0.001478   0.000453   0.003230   0.000050  2.346579e-03   0.004809    0.000394
min_f_3  0.001637  0.000820  0.001093  0.000946  0.002665  0.000166  0.006021  ...   0.003038   0.002047   0.003247   0.000594  3.530193e-03   0.001577    0.000516
min_f_4  0.001236  0.000880  0.001519  0.002644  0.002253  0.004729  0.001125  ...   0.000415   0.004389   0.003957   0.001519  1.975811e-03   0.000841    0.000739
min_f_5  0.007147  0.001026  0.000837  0.000039  0.000605  0.002531  0.001165  ...   0.002278   0.000446   0.003155   0.002953  4.909229e-03   0.000199    0.001498
min_f_6  0.001654  0.000702  0.000005  0.002121  0.002108  0.003364  0.001350  ...   0.001140   0.001168   0.004485   0.001613  5.254541e-08   0.001442    0.000083
min_f_7  0.001018  0.000803  0.010409  0.002907  0.003485  0.002294  0.004204  ...   0.001379   0.001513   0.005337   0.003602  5.742431e-04   0.002944    0.001785
min_f_8  0.004807  0.000146  0.002599  0.000593  0.001437  0.000679  0.000507  ...   0.001697   0.001800   0.001131   0.002471  1.088023e-03   0.000636    0.001334
min_f_9  0.001023  0.000182  0.000290  0.000437  0.000455  0.008994  0.002949  ...   0.000819   0.003768   0.000417   0.003779  1.259451e-03   0.001748    0.001426
min_id   0.000000  1.000000  2.000000  3.000000  4.000000  5.000000  6.000000  ...  94.000000  95.000000  96.000000  97.000000  9.800000e+01  99.000000  100.000000
id       0.000000  1.000000  2.000000  3.000000  4.000000  5.000000  6.000000  ...  94.000000  95.000000  96.000000  97.000000  9.800000e+01  99.000000  100.000000

[23 rows x 101 columns]

  8 # Generic
  9 import warnings
 10 import numpy as np
 11 import pandas as pd
 12
 13 # Ignore warnings
 14 warnings.simplefilter("ignore")
 15
 16 def load_data():
 17     return pd.read_csv('./laboratory.csv', parse_dates=['date'])
 18
 19 def create_data():
 20     """"""
 21     # Configuration
 22     ROWS, COLS = 150000, 10
 23     PATIENTS = 300
 24
 25     # Create random values
 26     features = np.random.random_sample((ROWS, COLS))
 27     patients = np.random.randint(PATIENTS, size=(ROWS, 1))
 28
 29     # Create DataFrame
 30     df = pd.DataFrame(data=features)
 31     df = df.add_prefix('f_')
 32     df['id'] = patients
 33
 34     # Return
 35     return df
 36
 37
 38 # -----------------------
 39 # Read data
 40 # -----------------------
 41 # Read data
 42 #data = pd.read_csv('./laboratory.csv', parse_dates=['date'])
 43
 44 data = create_data()
 45
 46
 47 # -----------------------
 48 # Format
 49 # -----------------------
 50 # Configuration
 51 show_progress_every = 10 # Number of patients
 52 break_loop_after = 100  # Number of patients or None
 53
 54 # Create empty outcome
 55 results = pd.DataFrame()
 56
 57 # Groups
 58 groups = data.groupby(by='id')
 59
 60 # Step by step (16270 groups!)
 61 for i, (k, g) in enumerate(groups):
 62     # Show information
 63     if (i % show_progress_every) == 0:
 64             print("%5s/%5s. Patient '%s'" % (i+1, len(groups), k))
 65
 66     # Show dataframe
 67     #print(g)
 68
 69     # Sort by dates (if needed)
 70
 71     # Fill empty values
 72     #g.fillna(method='backfill', inplace=True)
 73
 74     # Compute statistics
 75     # ------------------
 76     # .. note: Forward/backward filling does not affect
 77     #          the max/min but it affects the mean or
 78     #          median.
 79     #
 80     # .. note: You could also create a map with all the
 81     #          functions you want to apply instead of
 82     #          using describe. This is useful if you need
 83     #          specific stats
 84
 85     # Get the common stats
 86     #d = g.describe()
 87
 88     # Get specific stats for all columns
 89     d = g.agg({c: ['max', 'min'] for c in g.columns})
 90
 91     # Stack the describe outcome
 92     d = d.stack()
 93     d.index = ['_'.join(e) for e in d.index.tolist()]
 94     d['id'] = k                  # patient identifier
 95     #d['date'] = min(g['date'])   # admission date
 96
 97     # Append result
 98     results = pd.concat([results, d], ignore_index=True, axis=1)
 99
100     # Break clause for testing
101     #if break_loop_after is not None:
102     if i==break_loop_after:
103             break
104
105
106 # Show columns
107 #print(results.columns.values)
108
109 print(results)
110
111 # Show results (partially)
112 #print(results[['id', 'date', 'max_wcc']])
113
114 # .. note: Once it works as you want, you can try to do it
115 #          in one single line and compare the results to
116 #          verify that it is correct.

Total running time of the script: ( 0 minutes 0.868 seconds)

Gallery generated by Sphinx-Gallery