Source code for pyamr.core.dri

# Libraries
import warnings
import pandas as pd

[docs]def dri(*args, **kwargs):
    """Redirects to ``drug_resistance_index``."""
    return drug_resistance_index(*args, **kwargs)

[docs]def drug_resistance_index_v2(smmry, cu='use', cr='sari',
                          return_all=False,
                          reference_time=None,
                          **kwargs):
        """Computes the Drug Resistance Index

        An possible summary table would look like this...

        DATE     MICROORGANISM ANTIMICROBIAL     sari   use
        2011 Q2  E. Coli       Aminopenicillins  0.422  300
        2011 Q2  E. Coli       Quinolones        0.130  250
        2011 Q2  E. Coli       Cephalosporins    0.010  100
        2011 Q3  E. Coli       Aminopenicillins  0.437  250
        2011 Q3  E. Coli       Quinolones        0.132  300
        2011 Q3  E. Coli       Cephalosporins    0.014  1500


        Parameters
        ----------
        smmry: pd.DataFrame
            The summary DataFrame with the data required to compute the
            drug resistance index. The following information needs to be
            present in the DataFrame:
               (i) the date (e.g. DATE)
               (ii) the resistance (e.g. sari)
               (iii) the drug use (e.g. use)

        cu: str
            Column name with use
        cr: str
            Column name with resistance
        ct: str
            Column name with time
        **kwargs
            Arguments to pass to groupby

        Returns
        -------
        """

        # Enable to chose whether to return all columns or only dri.
        # Ensure that the summary matrix is consistent

        # Clone matrix
        m = smmry.copy(deep=True)

        # Compute
        m['use_period'] = m \
            .groupby(**kwargs)[cu] \
            .transform(lambda x: x.sum())
        m['u_weight'] = (m[cu] / m.use_period)  # .round(decimals=2)
        m['w_rate'] = (m[cr] * m.u_weight)      # .round(decimals=3)
        m['dri'] = m \
            .groupby(**kwargs).w_rate \
            .transform(lambda x: x.sum())

        # Check result for validity.
        #if (m.dri > 1).any():
        #    raise warnings.warn("""
        #            The dri column is ill defined because it has
        ##            values larger than one. Please revisit the
        #            summary table and ensure that all the data
        #            is consistent with the requirements.""")

        """
        if reference_time is not None:
            for t in reference_time:
                # Get use_period uses
                aux = m.groupby(**kwargs).use_period.first()

                use = aux.values[0]
                u_weight = (m[cu] / use)
                w_rate = (m[cr] * u_weight)
                print(w_rate)

                a = m.groupby(**kwargs).groups.keys()
                print(a)
                m['dri_%s' % t] = m \
                    .groupby(**kwargs).w_rate1 \
                    .transform(lambda x: x.sum())

        print(m)
        """

        if return_all:
            return m

        # Update use
        m.use = m.groupby(**kwargs).use \
            .transform(lambda x: x.sum())
        return m.drop(columns=[
            'use_period', 'u_weight', 'w_rate']) \
                .groupby(**kwargs).first()


[docs]def drug_resistance_index(dataframe,
            return_complete=False,
            return_usage=False):
    """Computes the Drug Resistance Index.

    Parameters
    ----------
    dataframe: pd.DataFrame

    return_complete: bool, default=False
        Returns the whole set of results.

    return_usage: bool, default=False
        Returns only 'use_period' and 'dri'.

    Returns
    -------
    """
    # Required columns
    required = ['USE', 'RESISTANCE']

    # Check columns
    if set(required).difference(dataframe.columns):
        raise ValueError("The following columns are missing: {0} " \
            .format(set(required).difference(dataframe.columns)))

    # Clone matrix
    m = dataframe.copy(deep=True)

    # Compute
    u, r = m.USE, m.RESISTANCE
    wu = u / u.sum()
    wr = r * wu

    # Return
    if return_complete:
        m['use_period'] = u.sum()
        m['u_weight'] = wu
        m['w_rate'] = wr
        m['dri'] = wr.sum()
        return m
    if return_usage:
        return pd.Series({
            'use_period': u.sum(),
            'dri': wr.sum()
        })
    return wr.sum()



[docs]class DRI:
    """Drug Resistance Index.
    """
    # Attributes
    #c_spe = 'SPECIMEN'
    #c_org = 'MICROORGANISM'
    #c_abx = 'ANTIMICROBIAL'
    #c_dat = 'DATE'
    #c_out = 'SENSITIVITY'
    #c_drg = 'DRUG'

    c_res = 'RESISTANCE'
    c_use = 'USE'


    """"""
    def __init__(self, column_resistance=c_res,
                       column_usage=c_use):
        """"""
        # Create dictionary to rename columns
        self.rename = {
            column_resistance: self.c_res,
            column_usage: self.c_use
        }

[docs]    def compute(self, dataframe, groupby=None,
                column_usage=None, **kwargs):
        """Computes the Drug Resistance Index.

        .. note:: Needs to include checks.

        Parameters
        ----------
        dataframe: pd.DataFrame
            The pandas dataframe with the information. The following columns
            are always required [RESISTANCE and USE]. The RESISTANCE column
            indicates the proportion of resistance isolates and the USE the
            amount of antimicrobial doses applied.

        groupby: list, default=None
            The elements to groupby (pd.groupby)

        column_usage: str
            The column with the usage values. This value overwrites the
            column_usage value passed during the instance creation. If
            the value is None, the default value passed during the instance
            creation will be used.

        **kwargs

        Returns
        -------
        """
        # Bad input type
        if not isinstance(dataframe, pd.DataFrame):
            raise TypeError("""
                The instance passed as argument needs to be a pandas
                DataFrame. Instead, a <%s> was found. Please convert 
                the input accordingly.""" % type(dataframe))

        if isinstance(groupby, str):
            groupby = [groupby]

        # Temporal update of usage column
        rename = self.rename.copy()
        if column_usage is not None:
            rename = {k:v for k,v in rename.items()
                if v != self.c_use}
            rename[column_usage] = self.c_use

        # Rename columns
        aux = dataframe.rename(columns=rename, copy=True)

        # Compute overall
        if groupby is None or not groupby:
            return dri(aux, **kwargs)

        # Compute
        scores = aux.groupby(by=groupby) \
            .apply(dri, **kwargs)

        # Format
        if isinstance(scores, pd.Series):
            scores.rename('dri', inplace=True)

        # Return
        return scores








if __name__ == '__main__':

    # Libraries
    import pandas as pd

    from pathlib import Path

    # ----------------------------------
    # Create data
    # ----------------------------------
    # Define susceptibility test records
    susceptibility_records = [
        ['2021-01-01', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
        ['2021-01-01', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
        ['2021-01-01', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
        ['2021-01-01', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'],
        ['2021-01-02', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
        ['2021-01-02', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
        ['2021-01-02', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'],
        ['2021-01-03', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
        ['2021-01-03', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'],
        ['2021-01-04', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'],

        ['2021-01-01', 'BLDCUL', 'ECOL', 'ACIP', 'sensitive'],
        ['2021-01-01', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
        ['2021-01-01', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
        ['2021-01-01', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
        ['2021-01-02', 'BLDCUL', 'ECOL', 'ACIP', 'sensitive'],
        ['2021-01-02', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
        ['2021-01-02', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
        ['2021-01-03', 'BLDCUL', 'ECOL', 'ACIP', 'sensitive'],
        ['2021-01-03', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
        ['2021-01-04', 'BLDCUL', 'ECOL', 'ACIP', 'sensitive'],

        ['2021-01-01', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-01', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-01', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-01', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-02', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
        ['2021-01-02', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
        ['2021-01-02', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
        ['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-09', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
        ['2021-01-09', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
        ['2021-01-09', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
        ['2021-01-09', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
        ['2021-01-09', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],

        ['2021-01-12', 'URICUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-12', 'URICUL', 'SAUR', 'ACIP', 'intermediate'],
        ['2021-01-13', 'URICUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-13', 'URICUL', 'SAUR', 'ACIP', 'sensitive'],
        ['2021-01-14', 'URICUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-14', 'URICUL', 'SAUR', 'ACIP', 'resistant'],
        ['2021-01-15', 'URICUL', 'SAUR', 'ACIP', 'sensitive'],
        ['2021-01-15', 'URICUL', 'SAUR', 'ACIP', 'sensitive'],
        ['2021-01-16', 'URICUL', 'SAUR', 'ACIP', 'intermediate'],
        ['2021-01-16', 'URICUL', 'SAUR', 'ACIP', 'intermediate'],
    ]

    # Define prescription test records
    prescription_records = [
        ['2021-01-01', 'PATIENT_1', 'AAUG', 150],
        ['2021-01-02', 'PATIENT_1', 'AAUG', 220],
        ['2021-01-03', 'PATIENT_1', 'AAUG', 150],

        ['2021-01-01', 'PATIENT_2', 'AAUG', 250],
        ['2021-01-02', 'PATIENT_2', 'AAUG', 320],
        ['2021-01-03', 'PATIENT_2', 'AAUG', 350],

        ['2021-01-01', 'PATIENT_3', 'ACIP', 450],
        ['2021-01-02', 'PATIENT_3', 'ACIP', 420],
        ['2021-01-03', 'PATIENT_3', 'ACIP', 450],

        ['2021-01-01', 'PATIENT_4', 'ACIP', 50],
        ['2021-01-02', 'PATIENT_4', 'ACIP', 50],
        ['2021-01-03', 'PATIENT_4', 'ACIP', 50],

    ]

    prescription_records = [
        ['2021-01-01', 'PATIENT_1', 'AAUG', 1500],
        ['2021-01-02', 'PATIENT_1', 'AAUG', 2],
        ['2021-01-03', 'PATIENT_1', 'AAUG', 500],

        ['2021-01-01', 'PATIENT_2', 'AAUG', 2000],
        ['2021-01-02', 'PATIENT_2', 'AAUG', 320],
        ['2021-01-03', 'PATIENT_2', 'AAUG', 350],

        ['2021-01-01', 'PATIENT_3', 'ACIP', 2],
        ['2021-01-02', 'PATIENT_3', 'ACIP', 505],
        ['2021-01-03', 'PATIENT_3', 'ACIP', 1124],

        ['2021-01-01', 'PATIENT_4', 'ACIP', 5],
        ['2021-01-02', 'PATIENT_4', 'ACIP', 643],
        ['2021-01-03', 'PATIENT_4', 'ACIP', 2330],

    ]


    # Create DataFrames
    susceptibility = pd.DataFrame(susceptibility_records,
        columns=['DATE',
                 'SPECIMEN',
                 'MICROORGANISM',
                 'ANTIMICROBIAL',
                 'SENSITIVITY'])

    prescriptions = pd.DataFrame(prescription_records,
        columns=['DATE',
                 'PATIENT',
                 'DRUG',
                 'DOSE'])

    # Format dates
    susceptibility.DATE = pd.to_datetime(susceptibility.DATE)
    prescriptions.DATE = pd.to_datetime(prescriptions.DATE)

    # Show
    print("\nSusceptibility records")
    print(susceptibility.head(5))
    print("\nPrescription records")
    print(prescriptions.head(5))


    # .. note:: Uncomment to load the CDDEP example data instead

    # Load default CDDEP sample
    #path = Path('../datasets/cddep')
    #susceptibility = pd.read_csv(path / 'susceptibility.csv')
    #prescriptions = pd.read_csv(path / 'prescriptions.csv')


    # ------------------------
    # Compute summary table
    # ------------------------
    # Libraries
    from pyamr.core.sari import SARI

    # Create sari instance
    sari = SARI(groupby=['DATE',
                         'SPECIMEN',
                         'MICROORGANISM',
                         'ANTIMICROBIAL',
                         'SENSITIVITY'])

    # Compute susceptibility summary table
    smmry1 = sari.compute(susceptibility,
        return_frequencies=False)

    # Compute prescriptions summary table.
    smmry2 = prescriptions \
        .groupby(by=['DATE', 'DRUG']) \
        .DOSE.sum().rename('use')

    # Combine both summary tables
    smmry = smmry1.reset_index().merge(
        smmry2.reset_index(), how='inner',
        left_on=['DATE', 'ANTIMICROBIAL'],
        right_on=['DATE', 'DRUG']
    )

    # Show
    print("\nSummary")
    print(smmry)


    # -------------------------
    # Compute DRI
    # -------------------------
    obj = DRI(
        column_resistance='sari',
        column_usage='use'
    )

    # Compute DRI overall
    dri1 = obj.compute(smmry)

    # Compute DRI
    dri2 = obj.compute(smmry,
        groupby=['SPECIMEN'])

    # Compute DRI
    dri3 = obj.compute(smmry,
        groupby=['MICROORGANISM'])

    # Compute DRI
    dri4 = obj.compute(smmry,
        groupby=['MICROORGANISM', 'ANTIMICROBIAL'])

    # Compute DRI
    dri5 = obj.compute(smmry,
        groupby=['DATE'],
        return_usage=True)

    # Compute DRI
    dri6 = obj.compute(smmry,
        groupby=['DATE', 'MICROORGANISM'],
        return_usage=True)

    # Compute DRI
    dri7 = obj.compute(smmry,
        groupby=['DATE', 'MICROORGANISM', 'ANTIMICROBIAL'],
        return_usage=True,
        return_complete=True)

    # Compute DRI (return all elements of summary table).
    dri8 = obj.compute(smmry,
        groupby=['MICROORGANISM'],
        return_complete=True)

    # Show
    print("\nDRI (1):")
    print(dri1)
    print("\nDRI (2):")
    print(dri2)
    print("\nDRI (3):")
    print(dri3)
    print("\nDRI (4):")
    print(dri4)
    print("\nDRI (5):")
    print(dri5)
    print("\nDRI (6):")
    print(dri6)
    print("\nDRI (7):")
    print(dri7)
    print("\nDRI (8):")
    print(dri8)


    # --------------------------------------------
    # Compute DRI fixed
    # --------------------------------------------
    # Compute prescriptions on t0.
    use_t0 = prescriptions \
        .groupby(by=['DATE', 'DRUG']) \
        .DOSE.sum().rename('use') \
        .to_frame().reset_index() \
        .groupby('DRUG').use.first()

    # Add to summary table
    smmry = smmry.assign(use_t0=smmry.DRUG.map(use_t0))

    # Define groupby
    groupby = [
        'DATE',
        'MICROORGANISM'
    ]

    # Compute DRI
    dri9a = obj.compute(smmry,
        groupby=groupby,
        return_usage=True)

    # Compute DRI using new USE
    dri9b = obj.compute(smmry,
        groupby=groupby,
        return_usage=True,
        column_usage='use_t0')

    #aux = pd.concat([dri9a, dri9b], axis=1)
    aux = dri9a.merge(dri9b,
        left_index=True, right_index=True,
        suffixes=['', '_fixed'])

    # Concatenate (series)
    #aux = pd.concat([
    #    dri9a.rename('dri'),
    #    dri9b.rename('dri_fixed')], axis=1)

    # Show
    print("\n\n")
    print("\nSummary (variable):")
    print(smmry)
    print("\nDRI (9):")
    print(aux)


    # -------------------------------------------------------------------------
    # Testing
    # -------------------------------------------------------------------------

    # ---------------------------------------------------------------------
    # Success
    # ---------------------------------------------------------------------
    # .. note: All this examples should succeed. At the moment the code
    #          breaks if gram is not included. This is because the data
    #          we have created has duplicated values for each gram.
    #          Should we consider this within the ASAI?

    """
    # Compute DRI overall
    dri1 = dri(smmry)

    # Compute DRI
    dri8 = smmry \
        .groupby(by=['SPECIMEN']) \
        .apply(drug_resistance_index_v2)

    # Compute DRI
    dri2 = smmry \
        .groupby(by=['MICROORGANISM']) \
        .apply(drug_resistance_index_v2)

    dri3 = smmry \
        .groupby(by=['MICROORGANISM', 'ANTIMICROBIAL']) \
        .apply(drug_resistance_index_v2,
               return_components=True)

    # Compute DRI
    dri9 = smmry \
        .groupby(by=['DATE']) \
        .apply(drug_resistance_index_v2)

    dri4 = smmry \
        .groupby(by=['DATE', 'MICROORGANISM']) \
        .apply(drug_resistance_index_v2,
               return_components=True)

    dri5 = smmry \
        .groupby(by=['DATE', 'MICROORGANISM', 'ANTIMICROBIAL']) \
        .apply(drug_resistance_index_v2,
               return_components=True)
    """

    # ---------------------------------------------------------------------
    # Errors
    # ---------------------------------------------------------------------
    # .. note: In the examples below, the method acsi is meant to raise
    #          an error either because any of the required missing columns
    #          is missing or because the configuration is not correct.
    print("\n\nHandling errors:")

    # ---------------------------------------------------------------------
    # Warnings
    # ---------------------------------------------------------------------
    # .. note: In the examples below, the method acsi is meant to show a
    #          warning message either no threshold has been specified or
    #          because thresholds have been specified twice.
    print("\n\nShow warnings:")


    """
    # -------------------------
    # Compute resistance index
    # -------------------------
    r = drug_resistance_index(smmry,
        by=['DATE'],
        return_all=True,
        reference_time=[1,2])

    # Show
    print("\nResult:")
    print(r.round(decimals=3))
    """