01. Basic example

Basic example to use tpot.

 8 # Import
 9 import numpy as np
10 import pandas as pd
11
12 # Specific
13 from tpot import TPOTClassifier
14
15 # Import own
16 from pySML2.preprocessing.splitters import cvs_hos_split
17 from pySML2.preprocessing.splitters import kfolds_split
18
19
20 # ---------------------------------------------
21 # Configuration
22 # ---------------------------------------------
23 # The input features and label for the algorithm
24 features = sorted(['alb', 'alp', 'alt', 'baso', 'bil', 'cl', 'cre', 'crp', 'egfr',
25                   'eos', 'k', 'ly',  'mcv', 'mono', 'mpv', 'nrbca', 'plt', 'rbc',
26                   'rdw',  'urea', 'wbc'])
27
28 # The labels
29 labels = sorted(['micro_confirmed'])
30
31 # The splits
32 n_splits = 10
33
34 # Dataset
35 # -------
36 # Dataset filepath
37 filepath = 'data/dataset.csv'
38
39 # ---------------------------------------------
40 # Load dataset and format it
41 # ---------------------------------------------
42 # Read data
43 data = pd.read_csv(filepath)
44 data.columns = [c.lower() for c in data.columns.values]
45 # data = data[features + labels]
46
47 # Missing values
48 data['missing'] = data[features].isnull().sum(axis=1)
49
50 # The indexes for complete profiles
51 cmp = (data.missing == 0)
52
53 # Split in CVS and HOS
54 data['cvs_hos_split'] = cvs_hos_split(data, selected_rows=cmp)
55
56 # ---------------------------------------------
57 # Train
58 # ---------------------------------------------
59 data[(data.missing == 0)].to_csv('outcomes/main01/tpot_data_cvs.csv')
60 data[(data.cvs_hos_split == 'hos')].to_csv('outcomes/main01/tpot_data_hos.csv')
61 data[(data.cvs_hos_split == 'cvs')].to_csv('outcomes/main01/tpot_data_cvs.csv')
62 data[(data.cvs_hos_split == 'hos')].to_csv('outcomes/main01/tpot_data_hos.csv')
63
64 # ---------------------------------------------
65 # Train
66 # ---------------------------------------------
67 # The indexes used for cross validation
68 cvs_idxs = (data.cvs_hos_split == 'cvs')
69 hos_idxs = (data.cvs_hos_split == 'hos')
70
71 # Create matrices train
72 X_train = data[cvs_idxs][features].to_numpy()
73 y_train = data[cvs_idxs][labels].to_numpy()
74
75 # Create matrices test
76 X_test = data[cvs_idxs][features].to_numpy()
77 y_test = data[cvs_idxs][labels].to_numpy()
78
79 # ---------------------------------------------
80 # Search
81 # ---------------------------------------------
82 # Create genetic search
83 tpot = TPOTClassifier(generations=5, verbosity=2,
84                       scoring='roc_auc', cv=2)
85
86 # Fit
87 tpot.fit(X_train, y_train)
88
89 # Score
90 score = tpot.score(X_test, y_test)
91
92 # Save
93 tpot.export('outcomes/main01/tpot_best_pipeline.py')

Total running time of the script: ( 0 minutes 0.000 seconds)

Gallery generated by Sphinx-Gallery