07b. Dengue features (parallel)

This Python script uses the pandas and Plotly libraries to create an interactive parallel coordinates plot. This type of plot is excellent for visualizing and exploring relationships in datasets with many variables (high-dimensional data). Each vertical line represents a different variable (a column from the dataset), and each colored line that snakes across the plot represents a single data point (a row from the dataset).

Out:

C:\Users\kelda\Desktop\repositories\github\python-spare-code\main\examples\plotly\plot_main07_parallel_v2.py:63: DtypeWarning:

Columns (3,5,7,9,12,14,15,16,17,18,19,20,21,23,24,25,26,27,28,29,30,33,34,37,40,41,42,43,44,48,49,50,51,52,53,54,55,56,57,61,62,64,66,67,68,69,70,73,74,75,77,79,80,81,82,83,84,85,87,88,89,90,95,96,97,99,101,102,103,104,107,108,110,113,115,117,119,120,121,122,125,126,127,129,130,132,133,134,135,136,137,143,152,153,155,156,157,159,161,162,163,164,165,166,167,168,174,175,176,177,178,182,184,193,197,198,199,200,202,203,207,208,209,213,214,215,218,219,220,221,224,225,226,227,229,232,233,234,235,236,237,238,240,241,242,244,247,250,253,254,255,259,261,262,263,264,265,266,267,268,270,271,272,273,274,275,276,277,278,279,280,281,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,301,302,303,304,305,307,308,309,310,311,312,313,314,315,317,318,319,320,324,325,326,327,328,330,333,334,335,336,337,338,339,340,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,381,382,384,385,386,391,392,393,395,398,403,404,407,408,409,410,411,412,413,414,415,416,417,418,419,424) have mixed types. Specify dtype option on import or set low_memory=False.

 14 import plotly.graph_objects as go
 15
 16 import numpy as np
 17 import pandas as pd
 18 from pandas.api.types import is_string_dtype
 19 from pandas.api.types import is_numeric_dtype
 20
 21 from plotly.io import show
 22
 23 try:
 24     __file__
 25     TERMINAL = True
 26 except:
 27     TERMINAL = False
 28
 29
 30 def load_gridsearch_sklearn_iris():
 31     """This method..."""
 32     # Define datapath
 33     FILEPATH = './data/sklearn-gridsearch/ls2d-iris.csv'
 34     # Load data
 35     df = pd.read_csv(FILEPATH)
 36     # Columns
 37     columns = [
 38         ('mean_train_spearman', 'Spearman'),
 39         ('mean_train_pearson', 'Pearson'),
 40         ('param_sae__max_epochs', 'Max Epochs'),
 41         ('param_sae__lr', 'Learning Rate'),
 42         ('mean_train_procrustes', 'Procrustes'),
 43         ('mean_train_calinski_target', 'Calinski'),
 44         ('mean_train_davies_b_target', 'Davies'),
 45         #('param_sae__module__layers', 'Layers')
 46     ]
 47     # Line
 48     line = dict(color=df.mean_train_calinski_target,
 49        colorscale='Electric',
 50        showscale=True,
 51        cmin=df.mean_train_calinski_target.min(),
 52        cmax=df.mean_train_calinski_target.max())
 53     # Return
 54     return df, line, columns
 55
 56
 57
 58 def load_raw_dengue():
 59     """This method..."""
 60     # Define datapath
 61     FILEPATH = '../../datasets/dengue-htd-dataset/combined.csv'
 62     # Load data
 63     df = pd.read_csv(FILEPATH)
 64     # Columns
 65     columns = [
 66         ('age', 'Age'),
 67         ('body_temperature', 'Body Temperature'),
 68         ('weight', 'Weight'),
 69         ('plt', 'Platelets'),
 70         ('haematocrit_percent', 'Haematocrit')
 71     ]
 72     # Line
 73     line = dict(color=df.haematocrit_percent,
 74         colorscale='Electric',
 75         showscale=True,
 76         cmin=df.haematocrit_percent.min(),
 77         cmax=df.haematocrit_percent.max())
 78     # Return
 79     return df, line, columns
 80
 81 def create_dimension(s):
 82     """This method creates the dimesions.
 83
 84     Dimension: numeric
 85     dict(range = [32000,227900],
 86          constraintrange = [100000,150000],
 87          label = "Block Height",
 88          values = df['blockHeight'])
 89
 90     Dimension: enumerated
 91     dict(tickvals = [0,0.5,1,2,3],
 92          ticktext = ['A','AB','B','Y','Z'],
 93          label = 'Cyclinder Material',
 94          values = df['cycMaterial'],
 95          visible = True)
 96     """
 97     if is_numeric_dtype(s):
 98         return dict(
 99             range=[s.min(), s.max()],
100             constraintrange=[s.min(), s.max()],
101             label=s.name, values=s)
102     if is_string_dtype(s):
103         s = s.apply(str)
104         return dict(tickvals=np.arange(s.nunique()),
105             ticktext=sorted(s.unique()),
106             label=s.name, values=s)
107
108
109 # Load data
110 df, line, columns = load_raw_dengue()
111 #df, columns = load_gridsearch_sklearn_iris()
112
113 # Show
114 fig = go.Figure(data=
115     go.Parcoords(line=line,
116         dimensions=[create_dimension(df[name])
117             for name, label in columns]
118     ))
119
120
121 # Show
122 show(fig)

Total running time of the script: ( 0 minutes 6.099 seconds)

Gallery generated by Sphinx-Gallery