06. Clean OUCRU v1.0

 6 # Libraries
 7 import pandas as pd
 8
 9 # -----------------------------
10 # Load data
11 # -----------------------------
12 # Load data
13 data = pd.read_csv('./data/stacked-oucru/combined_stacked.csv',
14     parse_dates=['date'])
15
16 # Show
17 print(data)
18 print(data.columns)
19
20 # -----------------------------
21 # Format
22 # -----------------------------
23 # Drop
24 drop = [
25     'Unnamed: 0',
26     'Unnamed: 0.1',
27     'result_old',
28     'date_old',
29     'dsource'
30 ]
31
32 # Rename
33 rename = {
34     'study_no' : 'patient',
35     'date': 'date_collected',
36     'column': 'code',
37 }
38
39 # Replace
40 replace = {
41     'result': {
42         'True': 1,
43         'False': 0
44     }
45 }
46
47 # Format
48 data = data.drop(columns=drop)
49 data = data.rename(columns=rename)
50 data['id'] = data.index.values
51 data['uuid'] = data.index.values
52 data['date_outcome'] = data.date_collected
53 data = data.replace(replace)
54 data = data[sorted(data.columns.values)]
55
56 # Keep only those whose result can be cast to number
57 data.result = pd.to_numeric(data.result, errors='coerce')
58
59 # Remove nan
60 data = data[data.result.notna()]
61
62 # Show types
63 print(data.dtypes)
64
65 # Save
66 data.head(10000).to_csv('./outputs/combined_stack_head10000.csv')
67 data.to_csv('./outputs/combined_stacked.csv')

Total running time of the script: ( 0 minutes 0.000 seconds)

Gallery generated by Sphinx-Gallery