Note
Click here to download the full example code
07b. Dengue features (parallel)
This Python script uses the pandas and Plotly libraries to create an interactive parallel coordinates plot. This type of plot is excellent for visualizing and exploring relationships in datasets with many variables (high-dimensional data). Each vertical line represents a different variable (a column from the dataset), and each colored line that snakes across the plot represents a single data point (a row from the dataset).
Out:
C:\Users\kelda\Desktop\repositories\github\python-spare-code\main\examples\plotly\plot_main07_parallel_v2.py:63: DtypeWarning:
Columns (3,5,7,9,12,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,33,34,37,40,41,42,43,44,48,49,50,51,52,53,54,55,56,57,61,62,64,66,67,68,69,70,73,74,75,77,79,80,81,82,83,84,85,87,88,89,90,95,96,97,99,101,102,103,104,107,108,110,113,115,117,119,120,121,122,125,126,127,129,130,132,133,134,135,136,137,143,152,153,155,156,157,159,161,162,163,164,165,166,167,168,174,175,176,177,178,182,184,193,197,198,199,200,202,203,207,208,209,213,214,215,218,219,220,221,224,225,226,227,229,232,233,234,235,236,237,238,240,241,242,244,247,250,253,254,255,259,261,262,263,264,265,266,267,268,270,271,272,273,274,275,276,277,278,279,280,281,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,301,302,303,304,305,307,308,309,310,311,312,313,314,315,317,318,319,320,324,325,326,327,328,330,333,334,335,336,337,338,339,340,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,381,382,384,385,386,391,392,393,395,398,403,404,407,408,409,410,411,412,413,414,415,416,417,418,419,424) have mixed types. Specify dtype option on import or set low_memory=False.
14 import plotly.graph_objects as go
15
16 import numpy as np
17 import pandas as pd
18 from pandas.api.types import is_string_dtype
19 from pandas.api.types import is_numeric_dtype
20
21 from plotly.io import show
22
23 try:
24 __file__
25 TERMINAL = True
26 except:
27 TERMINAL = False
28
29
30 def load_gridsearch_sklearn_iris():
31 """This method..."""
32 # Define datapath
33 FILEPATH = './data/sklearn-gridsearch/ls2d-iris.csv'
34 # Load data
35 df = pd.read_csv(FILEPATH)
36 # Columns
37 columns = [
38 ('mean_train_spearman', 'Spearman'),
39 ('mean_train_pearson', 'Pearson'),
40 ('param_sae__max_epochs', 'Max Epochs'),
41 ('param_sae__lr', 'Learning Rate'),
42 ('mean_train_procrustes', 'Procrustes'),
43 ('mean_train_calinski_target', 'Calinski'),
44 ('mean_train_davies_b_target', 'Davies'),
45 #('param_sae__module__layers', 'Layers')
46 ]
47 # Line
48 line = dict(color=df.mean_train_calinski_target,
49 colorscale='Electric',
50 showscale=True,
51 cmin=df.mean_train_calinski_target.min(),
52 cmax=df.mean_train_calinski_target.max())
53 # Return
54 return df, line, columns
55
56
57
58 def load_raw_dengue():
59 """This method..."""
60 # Define datapath
61 FILEPATH = '../../datasets/dengue-htd-dataset/combined.csv'
62 # Load data
63 df = pd.read_csv(FILEPATH)
64 # Columns
65 columns = [
66 ('age', 'Age'),
67 ('body_temperature', 'Body Temperature'),
68 ('weight', 'Weight'),
69 ('plt', 'Platelets'),
70 ('haematocrit_percent', 'Haematocrit')
71 ]
72 # Line
73 line = dict(color=df.haematocrit_percent,
74 colorscale='Electric',
75 showscale=True,
76 cmin=df.haematocrit_percent.min(),
77 cmax=df.haematocrit_percent.max())
78 # Return
79 return df, line, columns
80
81 def create_dimension(s):
82 """This method creates the dimesions.
83
84 Dimension: numeric
85 dict(range = [32000,227900],
86 constraintrange = [100000,150000],
87 label = "Block Height",
88 values = df['blockHeight'])
89
90 Dimension: enumerated
91 dict(tickvals = [0,0.5,1,2,3],
92 ticktext = ['A','AB','B','Y','Z'],
93 label = 'Cyclinder Material',
94 values = df['cycMaterial'],
95 visible = True)
96 """
97 if is_numeric_dtype(s):
98 return dict(
99 range=[s.min(), s.max()],
100 constraintrange=[s.min(), s.max()],
101 label=s.name, values=s)
102 if is_string_dtype(s):
103 s = s.apply(str)
104 return dict(tickvals=np.arange(s.nunique()),
105 ticktext=sorted(s.unique()),
106 label=s.name, values=s)
107
108
109 # Load data
110 df, line, columns = load_raw_dengue()
111 #df, columns = load_gridsearch_sklearn_iris()
112
113 # Show
114 fig = go.Figure(data=
115 go.Parcoords(line=line,
116 dimensions=[create_dimension(df[name])
117 for name, label in columns]
118 ))
119
120
121 # Show
122 show(fig)
Total running time of the script: ( 0 minutes 6.099 seconds)