.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "_examples/utils/plot_collateral_sensitivity.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        Click :ref:`here <sphx_glr_download__examples_utils_plot_collateral_sensitivity.py>`
        to download the full example code

.. rst-class:: sphx-glr-example-title

.. _sphx_glr__examples_utils_plot_collateral_sensitivity.py:


Collateral Sensitivity Index
-----------------------------

In order to run the script in such a way that we can profile
the time used for each method, statement, .. use the following
command:

    $ python -m cProfile -s cumtime plot_collateral_sensitivity.py > outcome.csv

.. GENERATED FROM PYTHON SOURCE LINES 11-224

.. code-block:: default
   :lineno-start: 11

    # Libraries
    import numpy as np
    import pandas as pd
    import seaborn as sns
    import matplotlib as mpl
    import matplotlib.pyplot as plt

    #
    from pathlib import Path
    from datetime import datetime
    from itertools import combinations
    from mic import mutual_info_matrix_v3

    # See https://matplotlib.org/devdocs/users/explain/customizing.html
    mpl.rcParams['axes.titlesize'] = 8
    mpl.rcParams['axes.labelsize'] = 8
    mpl.rcParams['xtick.labelsize'] = 8
    mpl.rcParams['ytick.labelsize'] = 8

    def collateral_resistance_index(m):
        """Collateral Resistance Index

        The collateral resistance index is based on the mutual
        information matrix. This implementation assumes there
        are two classes resistant (R) and sensitive (S).

        Parameters
        ----------
        m: np.array
            A numpy array with the mutual information matrix.

        Returns
        -------
        """
        return (m[0, 0] + m[1, 1]) - (m[0, 1] + m[1, 0])

    def CRI(x, func):
        ct = np.array([[x.SS, x.SR], [x.RS, x.RR]])
        m = func(ct=ct)
        return collateral_resistance_index(m)


    def combo_v1():
        # Build combination
        c = pd.DataFrame()
        for i, g in df.groupby(['o', 's']):
            for index in list(combinations(g.index, 2)):
                i, j = index
                s = pd.Series({
                    'o': df.loc[i, 'o'],
                    's': df.loc[i, 's'],
                    'ax': df.loc[i, 'a'],
                    'ay': df.loc[j, 'a'],
                    'rx': df.loc[i, 'r'],
                    'ry': df.loc[j, 'r']
                })
                c = pd.concat([c, s.to_frame().T])
                # c.append(s)

        """
        # Build combination
        c = pd.DataFrame()
        for i, g in data.groupby(['specimen_code',
                                  'microorganism_code',
                                  'laboratory_number']):
            for index in list(combinations(g.index, 2)):
                i, j = index
                s = pd.Series({
                    'o': data.loc[i, 'microorganism_code'],
                    's': data.loc[i, 'laboratory_number'],
                    'ax': data.loc[i, 'antimicrobial_code'],
                    'ay': data.loc[j, 'antimicrobial_code'],
                    'rx': data.loc[i, 'sensitivity'],
                    'ry': data.loc[j, 'sensitivity']
                })
                c = pd.concat([c, s.to_frame().T])


        # Add class
        c['class'] = c.rx + c.ry
        """
        return c


    def create_df_combo_v1(d, col_o='o', # organism
                              col_s='s', # sample
                              col_a='a', # antimicrobial
                              col_r='r'): # outcome / result
        """

        .. note:: There might be an issue if there are two different outcomes
                  for the same record. For example, a susceptibility test
                  record for penicillin (APEN) with R and another one with
                  S. Warn of this issue if it appears!

        :param d:
        :param col_o:
        :param col_s:
        :param col_a:
        :param col_r:
        :return:
        """

        # This is innefficient!

        # Build combination
        c = []
        for i, g in d.groupby([col_s, col_o]):
            for x, y in combinations(g.sort_values(by=col_a).index, 2):
                s = pd.Series({
                    'o': g.loc[x, col_o],
                    's': g.loc[x, col_s],
                    'ax': g.loc[x, col_a],
                    'ay': g.loc[y, col_a],
                    'rx': g.loc[x, col_r],
                    'ry': g.loc[y, col_r]
                })
                c.append(s)


        # Concatenate
        c = pd.concat(c, axis=1).T

        # Add class
        c['class'] = c.rx + c.ry

        # Return
        return c


    def create_combinations_v1(d, col_specimen='s',
                                  col_lab_id='l',
                                  col_microorganism='o',
                                  col_antimicrobial='a',
                                  col_result='r'):
        """Creates the dataframe with all combinations.

        Parameters
        ----------

        Returns
        --------
        """
        # Initialize
        c = []

        # Loop
        for i, g in d.groupby([col_specimen,
                               col_microorganism,
                               col_lab_id]):
            for x, y in combinations(g.sort_values(by=col_antimicrobial).index, 2):
                c.append({
                    'specimen': g.loc[x, col_specimen],
                    'lab_id': g.loc[x, col_lab_id],
                    'o': g.loc[x, col_microorganism],
                    'ax': g.loc[x, col_antimicrobial],
                    'ay': g.loc[y, col_antimicrobial],
                    'rx': g.loc[x, col_result],
                    'ry': g.loc[y, col_result]
                })

        # Create DataFrame
        c = pd.DataFrame(c)
        # Add class
        c['class'] = c.rx + c.ry
        # Return
        return c


    def create_combinations_v2(d, col_o='o',
          col_s='s', col_a='a', col_r='r'):
        """Creates the dataframe with all combinations.

        .. note:: There might be an issue if there are two different outcomes
                  for the same record. For example, a susceptibility test
                  record for penicillin (APEN) with R and another one with
                  S. Warn of this issue if it appears!

        Parameters
        ----------

        Returns
        --------

        """
        # Initialize
        c = pd.DataFrame()

        # Loop
        for i, g in d.groupby([col_s, col_o]):

            aux = []
            for x, y in combinations(g.sort_values(by=col_a).index, 2):
                aux.append({
                    'ax': g.loc[x, col_a],
                    'ay': g.loc[y, col_a],
                    'rx': g.loc[x, col_r],
                    'ry': g.loc[y, col_r]
                })
            aux = pd.DataFrame(aux)
            aux['s'] = i[0]
            aux['o'] = i[1]

            # Concatenate
            c = pd.concat([c, aux], axis=0)

        # Add class
        c['class'] = c.rx + c.ry

        # Return
        return c


.. GENERATED FROM PYTHON SOURCE LINES 225-230

a) A basic example

Note that the columns names are the initial for the following
full names: s=specimen, l=laboratory sample, o=organism,
a=antimicrobial and r=result

.. GENERATED FROM PYTHON SOURCE LINES 230-425

.. code-block:: default
   :lineno-start: 231


    # Create matrix
    data = [
        ['s1', 'l1', 'o1', 'a1', 'S'],
        ['s1', 'l1', 'o1', 'a2', 'S'],
        ['s1', 'l1', 'o1', 'a3', 'R'],
        ['s1', 'l2', 'o1', 'a1', 'S'],
        ['s1', 'l2', 'o1', 'a2', 'S'],
        ['s1', 'l2', 'o1', 'a3', 'R'],
        ['s1', 'l2', 'o1', 'a4', 'R'],
        ['s1', 'l3', 'o1', 'a1', 'R'],
        ['s1', 'l3', 'o1', 'a2', 'S'],
        ['s1', 'l4', 'o1', 'a2', 'R'],
        ['s1', 'l4', 'o1', 'a1', 'S'],
        ['s1', 'l5', 'o1', 'a5', 'S'],
        ['s1', 'l6', 'o1', 'a4', 'S'],
        ['s1', 'l5', 'o1', 'a2', 'S'],
    ]

    # Create DataFrame
    df = pd.DataFrame(data,
        columns=['s', 'l', 'o', 'a', 'r'])

    # Show
    print("\nData:")
    print(df)

    # Create combo
    c = create_combinations_v1(df)

    # Show
    print("\nCombinations (within isolates):")
    print(c)

    # Build contingency
    r = c.groupby(['ax', 'ay', 'class']).size().unstack()

    # Show
    print("\nContingency:")
    print(r)

    # Compute CRI
    r['MIS'] = r.apply(CRI, args=(mutual_info_matrix_v3,), axis=1)

    # Show
    print("\n" + "="*80 + "\nExample 1\n" + "="*80)
    print("\nResult")
    print(r)

    # Create index with all pairs
    index = pd.MultiIndex.from_product(
        [df.a.unique(), df.a.unique()]
    )

    # Reformat
    aux = r['MIS'] \
        .reindex(index, fill_value=np.nan)\
        .unstack()

    # Display
    sns.heatmap(data=aux*100, annot=True, linewidth=.5,
        cmap='coolwarm', vmin=-70, vmax=70, center=0,
        square=True)

    # Show
    plt.tight_layout()
    #plt.show()


    def load_susceptibility_nhs(**kwargs):
        """Load and format MIMIC microbiology data.

        Parameters
        ----------
        **kwargs: dict-like
            The arguments as used in pandas read_csv function

        Returns
        --------
        """
        # Load data
        path = Path('../../datasets/susceptibility-nhs/')
        path = path / 'susceptibility-v0.0.1'

        data = pd.concat([
            pd.read_csv(f, **kwargs)
                for f in Path(path).glob('susceptibility-*.csv')])

        # Format data
        data.sensitivity = data.sensitivity \
            .replace({
                'sensitive': 'S',
                'resistant': 'R',
                'intermediate': 'I',
                'highly resistant': 'HR'
        })

        # Select specimen
        # data = data[data.specimen_code.isin(['URICUL'])]
        # data = data[data.microorganism_code.isin(['SAUR', 'ECOL', 'PAER'])]
        # data = data[data.sensitivity.isin(['R', 'S'])]
        # data = data[data.laboratory_number.isin(['H1954180', 'M1596362'])]

        data = data[data.sensitivity.isin(['R', 'S', 'I', 'HR'])]

        # .. note:: For some reason, for the same specimen and antimicrobial
        #           there are sometimes contradictory outcomes (e.g. R and S)
        #           so we are removing this by keeping the last.

        # Keep only last/first specimen (sometimes repeated)
        subset = data.columns.tolist()
        subset = subset.remove('sensitivity')
        data = data.drop_duplicates(subset=subset, keep='last')

        # Further cleaning

        # Return
        return data


    def load_susceptibility_mimic(**kwargs):
        """Load and format MIMIC microbiology data.

        Parameters
        ----------
        **kwargs: dict-like
            The arguments as used in pandas read_csv function

        Returns
        --------
        """
        # Load data
        path = Path('../../datasets/susceptibility-mimic/')
        path = path / 'microbiologyevents.csv'
        data = pd.read_csv(path, **kwargs)

        # Format data
        data = data.rename(columns={
            'micro_specimen_id': 'laboratory_number',
            'spec_type_desc': 'specimen_code',
            'org_name': 'microorganism_code',
            'ab_name': 'antimicrobial_code',
            'interpretation': 'sensitivity'
        })

        # Keep only last/first specimen

        # Remove inconsistent records, for example if for an specimen there are two
        # rows for the same antimicrobial. Or even worse, these two rows are
        # contradictory (e.g. R and S)

        # Other cleaning.

        # Return
        return data


    """
    # Load data
    #data = load_susceptibility_mimic()
    data = load_susceptibility_nhs()

    # Create combo
    c = create_combinations_v1(data,
        col_specimen='specimen_code',
        col_lab_id='laboratory_number',
        col_microorganism='microorganism_code',
        col_antimicrobial='antimicrobial_code',
        col_result='sensitivity')

    # Create folder if it does not exist.
    today = datetime.now().strftime("%Y%m%d-%H%M%S")
    path = Path('./outputs/cri/') / today
    Path(path).mkdir(parents=True, exist_ok=True)

    # Save combinations file.
    c.to_csv(path / 'combinations.csv')

    # Build contingency
    r = c.groupby(['specimen', 'o', 'ax', 'ay', 'class']).size().unstack()

    # Compute CRI
    r['MIS'] = r.fillna(0) \
        .apply(CRI, args=(mutual_info_matrix_v3,), axis=1)

    # Show
    print("\n" + "="*80 + "\nExample 2\n" + "="*80)
    print("\nResult")
    print(r)

    # Save collateral sensitivity index file.
    r.to_csv(path / 'contingency.csv')
    """


.. image-sg:: /_examples/utils/images/sphx_glr_plot_collateral_sensitivity_001.png
   :alt: plot collateral sensitivity
   :srcset: /_examples/utils/images/sphx_glr_plot_collateral_sensitivity_001.png
   :class: sphx-glr-single-img


.. rst-class:: sphx-glr-script-out

 Out:

 .. code-block:: none


    Data:
         s   l   o   a  r
    0   s1  l1  o1  a1  S
    1   s1  l1  o1  a2  S
    2   s1  l1  o1  a3  R
    3   s1  l2  o1  a1  S
    4   s1  l2  o1  a2  S
    5   s1  l2  o1  a3  R
    6   s1  l2  o1  a4  R
    7   s1  l3  o1  a1  R
    8   s1  l3  o1  a2  S
    9   s1  l4  o1  a2  R
    10  s1  l4  o1  a1  S
    11  s1  l5  o1  a5  S
    12  s1  l6  o1  a4  S
    13  s1  l5  o1  a2  S

    Combinations (within isolates):
       specimen lab_id   o  ax  ay rx ry class
    0        s1     l1  o1  a1  a2  S  S    SS
    1        s1     l1  o1  a1  a3  S  R    SR
    2        s1     l1  o1  a2  a3  S  R    SR
    3        s1     l2  o1  a1  a2  S  S    SS
    4        s1     l2  o1  a1  a3  S  R    SR
    5        s1     l2  o1  a1  a4  S  R    SR
    6        s1     l2  o1  a2  a3  S  R    SR
    7        s1     l2  o1  a2  a4  S  R    SR
    8        s1     l2  o1  a3  a4  R  R    RR
    9        s1     l3  o1  a1  a2  R  S    RS
    10       s1     l4  o1  a1  a2  S  R    SR
    11       s1     l5  o1  a2  a5  S  S    SS

    Contingency:
    class   RR   RS   SR   SS
    ax ay                    
    a1 a2  NaN  1.0  1.0  2.0
       a3  NaN  NaN  2.0  NaN
       a4  NaN  NaN  1.0  NaN
    a2 a3  NaN  NaN  2.0  NaN
       a4  NaN  NaN  1.0  NaN
       a5  NaN  NaN  NaN  1.0
    a3 a4  1.0  NaN  NaN  NaN

    ================================================================================
    Example 1
    ================================================================================

    Result
    class   RR   RS   SR   SS  MIS
    ax ay                         
    a1 a2  NaN  1.0  1.0  2.0  0.0
       a3  NaN  NaN  2.0  NaN  0.0
       a4  NaN  NaN  1.0  NaN  0.0
    a2 a3  NaN  NaN  2.0  NaN  0.0
       a4  NaN  NaN  1.0  NaN  0.0
       a5  NaN  NaN  NaN  1.0  0.0
    a3 a4  1.0  NaN  NaN  NaN  0.0

    '\n# Load data\n#data = load_susceptibility_mimic()\ndata = load_susceptibility_nhs()\n\n# Create combo\nc = create_combinations_v1(data,\n    col_specimen=\'specimen_code\',\n    col_lab_id=\'laboratory_number\',\n    col_microorganism=\'microorganism_code\',\n    col_antimicrobial=\'antimicrobial_code\',\n    col_result=\'sensitivity\')\n\n# Create folder if it does not exist.\ntoday = datetime.now().strftime("%Y%m%d-%H%M%S")\npath = Path(\'./outputs/cri/\') / today\nPath(path).mkdir(parents=True, exist_ok=True)\n\n# Save combinations file.\nc.to_csv(path / \'combinations.csv\')\n\n# Build contingency\nr = c.groupby([\'specimen\', \'o\', \'ax\', \'ay\', \'class\']).size().unstack()\n\n# Compute CRI\nr[\'MIS\'] = r.fillna(0)     .apply(CRI, args=(mutual_info_matrix_v3,), axis=1)\n\n# Show\nprint("\n" + "="*80 + "\nExample 2\n" + "="*80)\nprint("\nResult")\nprint(r)\n\n# Save collateral sensitivity index file.\nr.to_csv(path / \'contingency.csv\')\n'


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** ( 0 minutes  0.194 seconds)


.. _sphx_glr_download__examples_utils_plot_collateral_sensitivity.py:


.. only :: html

 .. container:: sphx-glr-footer
    :class: sphx-glr-footer-example


  .. container:: sphx-glr-download sphx-glr-download-python

     :download:`Download Python source code: plot_collateral_sensitivity.py <plot_collateral_sensitivity.py>`


  .. container:: sphx-glr-download sphx-glr-download-jupyter

     :download:`Download Jupyter notebook: plot_collateral_sensitivity.ipynb <plot_collateral_sensitivity.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_