.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "_examples/scikits/plot_threshold_moving.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note Click :ref:`here ` to download the full example code .. rst-class:: sphx-glr-example-title .. _sphx_glr__examples_scikits_plot_threshold_moving.py: 06. Threshold moving approaches ------------------------------- .. note:: https://en.wikipedia.org/wiki/Sensitivity_and_specificity .. GENERATED FROM PYTHON SOURCE LINES 7-296 .. rst-class:: sphx-glr-horizontal * .. image-sg:: /_examples/scikits/images/sphx_glr_plot_threshold_moving_001.png :alt: plot threshold moving :srcset: /_examples/scikits/images/sphx_glr_plot_threshold_moving_001.png :class: sphx-glr-multi-img * .. image-sg:: /_examples/scikits/images/sphx_glr_plot_threshold_moving_002.png :alt: plot threshold moving :srcset: /_examples/scikits/images/sphx_glr_plot_threshold_moving_002.png :class: sphx-glr-multi-img * .. image-sg:: /_examples/scikits/images/sphx_glr_plot_threshold_moving_003.png :alt: From 'roc_curve', th=0.003, npv=0.077, ppv=0.961 :srcset: /_examples/scikits/images/sphx_glr_plot_threshold_moving_003.png :class: sphx-glr-multi-img * .. image-sg:: /_examples/scikits/images/sphx_glr_plot_threshold_moving_004.png :alt: From 'manual thresholds', th=0.02, npv=0.8, ppv=0.259 :srcset: /_examples/scikits/images/sphx_glr_plot_threshold_moving_004.png :class: sphx-glr-multi-img .. rst-class:: sphx-glr-script-out Out: .. code-block:: none Results from 'roc_curve' th ppv npv sens spec gmean 64 0.000000 0.951872 NaN 1.000000 0.000000 0.000000 63 0.003333 0.960870 0.077500 0.772727 0.377622 0.147559 62 0.005000 0.959732 0.070954 0.750000 0.377622 0.147020 61 0.010000 0.960166 0.072173 0.750000 0.384615 0.207917 60 0.013333 0.960315 0.069936 0.727273 0.405594 0.253707 .. ... ... ... ... ... ... 4 0.910000 0.960166 0.048730 0.068182 0.944056 0.543119 3 0.970000 0.948361 0.047961 0.045455 0.951049 0.537086 2 0.980000 0.901793 0.046897 0.022727 0.951049 0.532181 1 1.000000 0.914625 0.047226 0.022727 0.958042 0.540184 0 2.000000 NaN 0.048128 0.000000 1.000000 0.000000 [65 rows x 6 columns] Results from manual th ppv npv sens spec 0 0.000000 0.276423 0.843750 0.772727 0.377622 1 0.010101 0.273504 0.828571 0.727273 0.405594 2 0.020202 0.258929 0.800000 0.659091 0.419580 3 0.030303 0.259259 0.797468 0.636364 0.440559 4 0.040404 0.254717 0.790123 0.613636 0.447552 .. ... ... ... ... ... 95 0.959596 0.222222 0.764045 0.045455 0.951049 96 0.969697 0.222222 0.764045 0.045455 0.951049 97 0.979798 0.125000 0.759777 0.022727 0.951049 98 0.989899 0.142857 0.761111 0.022727 0.958042 99 1.000000 NaN 0.764706 0.000000 1.000000 [100 rows x 5 columns] | .. code-block:: default :lineno-start: 8 # Libraries import numpy as np import pandas as pd import matplotlib.pyplot as plt # Libraries scikits from sklearn.datasets import fetch_openml from sklearn.datasets import load_iris from sklearn.datasets import load_breast_cancer from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline from sklearn.linear_model import LogisticRegression from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import train_test_split def display_npv_ppv_curve(ppv, npv, ths, idx): """This method plots the curve Parameters ---------- ppv: array-like npv: array-like ths: array-like idx: integer """ # Display f, axes = plt.subplots(1, 1) axes.plot(ths, npv, marker='o', label='npv') axes.plot(ths, ppv, marker='o', label='ppv') axes.set(aspect='equal', xlim=[0,1], ylim=[0,1], xlabel='threshold', title='th={0}, npv={1}, ppv={2}' \ .format(round(ths[idx], 3), round(npv[idx], 3), round(ppv[idx], 3))) plt.legend() def npv_ppv_from_sens_spec(sens, spec, prev): """Compute npv and ppv. Parameters ---------- sens: array-like spec: array-like prev: float """ npv = (spec * (1 - prev)) / ((spec * (1 - prev)) + ((1 - sens) * prev)) ppv = (sens * prev) / ((sens * prev) + ((1 - spec) * (1 - prev))) return npv, ppv # ---------------------- # Load data # ---------------------- # Fetch data X, y = fetch_openml(data_id=1464, return_X_y=True, as_frame=True) #parser='auto') # Format y to binary (0,1) y = y.cat.rename_categories({'1':0, '2':1}) # Split X_train, X_test, y_train, y_test = \ train_test_split(X, y, stratify=y) # ---------------------- # Create pipeline # ---------------------- # Create pipeline clf = make_pipeline( StandardScaler(), #LogisticRegression(random_state=0) ExtraTreesClassifier(n_estimators=100) ) # Train clf.fit(X_train, y_train) # Predictions y_pred = clf.predict(X_test) y_prob = clf.predict_proba(X_test) # .. note: Some classifiers do not have the decision # function method but all implement the # predict_proba. #y_score = clf.decision_function(X_test) # ----------------------- # Show confusion matrix # ----------------------- # .. note: We are using Display objects to plot # the graphs, they could also be displayed # using the functions or matplotlib # directly. # # plot_roc_curve(clf, X_test, y_test, ax=ax_roc, name=name) # plot_det_curve(clf, X_test, y_test, ax=ax_roc, name=name) # Libraries from sklearn.metrics import confusion_matrix from sklearn.metrics import ConfusionMatrixDisplay from sklearn.metrics import roc_curve from sklearn.metrics import RocCurveDisplay from sklearn.metrics import precision_recall_curve from sklearn.metrics import PrecisionRecallDisplay # Value counts value_counts = y.value_counts() # Prevalence prev = value_counts[1] / len(y_test) # Confusion matrix cm = confusion_matrix(y_test, y_pred) # .. note: It is possible to use either y_score # or y_prob in the roc_curve function # .. note: sens=tpr, spec=1-fpr # Compute ROC curve fpr, tpr, ths1 = roc_curve( y_test, y_prob[:, 1], drop_intermediate=False) # .. note: ppv=prec, sens=recall # Compute PR curve prec, recall, ths2 = \ precision_recall_curve(y_test, y_prob[:, 1]) # Create plot objects cm_display = ConfusionMatrixDisplay(cm) roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr) pr_display = PrecisionRecallDisplay(precision=prec, recall=recall) # Create figure f, axes = plt.subplots(1, 2, figsize=(12, 4)) axes = axes.flatten() # Display cm_display.plot() roc_display.plot(ax=axes[0]) pr_display.plot(ax=axes[1]) # Configure for ax in axes: ax.set(aspect='equal', xlim=[0,1], ylim=[0,1]) plt.tight_layout() # --------- # Option I # --------- # Compute the npv and ppv from the sensitivity # and specificity values obtained from the # 'roc_curve' function. # Compute ROC curve fpr, tpr, ths1 = roc_curve( y_test, y_prob[:, 1], drop_intermediate=False) # Compute npv and ppv npv, ppv = npv_ppv_from_sens_spec( \ sens=tpr, spec=1-fpr, prev=prev) # Create DataFrame results = pd.DataFrame( data=np.array([ths1, ppv, npv, tpr, 1-fpr]).T, columns=['th', 'ppv', 'npv', 'sens', 'spec'] ).sort_values(by='th') # Add gmean results['gmean'] = np.sqrt(tpr * (1-fpr)) # Find closest to 0.8 idx = np.nanargmin(np.abs(npv - 0.8)) # Find best gmean idx2 = np.argmax(results.gmean) # Display display_npv_ppv_curve(ppv, npv, ths1, idx) # Title plt.suptitle("From 'roc_curve'") # Show print("\n\nResults from 'roc_curve'") print(results) """ # --------- # Option II # --------- # NOT WORKING! # # Compute the npv by knowing that it is the inverse # of the precision, thus calling the function # 'precision_recall_curve' with opposite labels and # probabilities. # .. note: invprec=npv # .. note: invrec=fnr # Computed inverted PR curve invprec, invrec, invths2 = \ precision_recall_curve(y_test, y_prob[:, 0], pos_label=clf.classes_[0]) # Create DataFrame results = pd.DataFrame() results['th'] = invths2[::-1] results['npv'] = invprec[1:] results['ppv'] = 0.0 results = results.sort_values(by='th') # Find closest to 0.8 idx = np.nanargmin(np.abs(invprec - 0.8)) # Show print("\n\nResults from 'precision_recall_curve'") print(results) print("\nIndex: {0} | Threshold: {1} | NPV: {2}" \ .format(idx, invths2[idx-1], npv[idx])) # Display graph display_npv_ppv_curve( results.ppv, results.npv, results.th, idx) # Title plt.suptitle("From 'precision_recall_curve'") """ # ---------- # Option II # ---------- # Perform the computation of metrics and the threshold # search based on a condition (e.g. npv closest to an # specific value) manually. # Thresholds thresholds = np.linspace(0,1,100) # Metrics def metrics(y_test, y_prob, th, **kwargs): # Libraries from sklearn.metrics import confusion_matrix # Compute confusion matrix cm = confusion_matrix(y_test, y_prob>th) tn, fp, fn, tp = cm.ravel() # Compute metrics return {'th': th, 'ppv': tp/(tp+fp), 'npv': tn/(tn+fn), 'sens': tp/(tp+fn), 'spec': tn/(tn+fp)} # Compute scores scores = [metrics(y_test, y_prob[:,1], t) \ for t in thresholds] # Create DataFrame results = pd.DataFrame(scores) # Find idx where npv is closest to 0.8 idx = np.nanargmin(np.abs(results.npv - 0.8)) # Show print("\n\nResults from manual") print(results) # Display graph display_npv_ppv_curve( results.ppv, results.npv, results.th, idx) # Title plt.suptitle("From 'manual thresholds'") # Show plt.show() .. rst-class:: sphx-glr-timing **Total running time of the script:** ( 0 minutes 0.682 seconds) .. _sphx_glr_download__examples_scikits_plot_threshold_moving.py: .. only :: html .. container:: sphx-glr-footer :class: sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: plot_threshold_moving.py ` .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: plot_threshold_moving.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_