# Libraries
import warnings
import pandas as pd
[docs]def dri(*args, **kwargs):
"""Redirects to ``drug_resistance_index``."""
return drug_resistance_index(*args, **kwargs)
[docs]def drug_resistance_index_v2(smmry, cu='use', cr='sari',
return_all=False,
reference_time=None,
**kwargs):
"""Computes the Drug Resistance Index
An possible summary table would look like this...
DATE MICROORGANISM ANTIMICROBIAL sari use
2011 Q2 E. Coli Aminopenicillins 0.422 300
2011 Q2 E. Coli Quinolones 0.130 250
2011 Q2 E. Coli Cephalosporins 0.010 100
2011 Q3 E. Coli Aminopenicillins 0.437 250
2011 Q3 E. Coli Quinolones 0.132 300
2011 Q3 E. Coli Cephalosporins 0.014 1500
Parameters
----------
smmry: pd.DataFrame
The summary DataFrame with the data required to compute the
drug resistance index. The following information needs to be
present in the DataFrame:
(i) the date (e.g. DATE)
(ii) the resistance (e.g. sari)
(iii) the drug use (e.g. use)
cu: str
Column name with use
cr: str
Column name with resistance
ct: str
Column name with time
**kwargs
Arguments to pass to groupby
Returns
-------
"""
# Enable to chose whether to return all columns or only dri.
# Ensure that the summary matrix is consistent
# Clone matrix
m = smmry.copy(deep=True)
# Compute
m['use_period'] = m \
.groupby(**kwargs)[cu] \
.transform(lambda x: x.sum())
m['u_weight'] = (m[cu] / m.use_period) # .round(decimals=2)
m['w_rate'] = (m[cr] * m.u_weight) # .round(decimals=3)
m['dri'] = m \
.groupby(**kwargs).w_rate \
.transform(lambda x: x.sum())
# Check result for validity.
#if (m.dri > 1).any():
# raise warnings.warn("""
# The dri column is ill defined because it has
## values larger than one. Please revisit the
# summary table and ensure that all the data
# is consistent with the requirements.""")
"""
if reference_time is not None:
for t in reference_time:
# Get use_period uses
aux = m.groupby(**kwargs).use_period.first()
use = aux.values[0]
u_weight = (m[cu] / use)
w_rate = (m[cr] * u_weight)
print(w_rate)
a = m.groupby(**kwargs).groups.keys()
print(a)
m['dri_%s' % t] = m \
.groupby(**kwargs).w_rate1 \
.transform(lambda x: x.sum())
print(m)
"""
if return_all:
return m
# Update use
m.use = m.groupby(**kwargs).use \
.transform(lambda x: x.sum())
return m.drop(columns=[
'use_period', 'u_weight', 'w_rate']) \
.groupby(**kwargs).first()
[docs]def drug_resistance_index(dataframe,
return_complete=False,
return_usage=False):
"""Computes the Drug Resistance Index.
Parameters
----------
dataframe: pd.DataFrame
return_complete: bool, default=False
Returns the whole set of results.
return_usage: bool, default=False
Returns only 'use_period' and 'dri'.
Returns
-------
"""
# Required columns
required = ['USE', 'RESISTANCE']
# Check columns
if set(required).difference(dataframe.columns):
raise ValueError("The following columns are missing: {0} " \
.format(set(required).difference(dataframe.columns)))
# Clone matrix
m = dataframe.copy(deep=True)
# Compute
u, r = m.USE, m.RESISTANCE
wu = u / u.sum()
wr = r * wu
# Return
if return_complete:
m['use_period'] = u.sum()
m['u_weight'] = wu
m['w_rate'] = wr
m['dri'] = wr.sum()
return m
if return_usage:
return pd.Series({
'use_period': u.sum(),
'dri': wr.sum()
})
return wr.sum()
[docs]class DRI:
"""Drug Resistance Index.
"""
# Attributes
#c_spe = 'SPECIMEN'
#c_org = 'MICROORGANISM'
#c_abx = 'ANTIMICROBIAL'
#c_dat = 'DATE'
#c_out = 'SENSITIVITY'
#c_drg = 'DRUG'
c_res = 'RESISTANCE'
c_use = 'USE'
""""""
def __init__(self, column_resistance=c_res,
column_usage=c_use):
""""""
# Create dictionary to rename columns
self.rename = {
column_resistance: self.c_res,
column_usage: self.c_use
}
[docs] def compute(self, dataframe, groupby=None,
column_usage=None, **kwargs):
"""Computes the Drug Resistance Index.
.. note:: Needs to include checks.
Parameters
----------
dataframe: pd.DataFrame
The pandas dataframe with the information. The following columns
are always required [RESISTANCE and USE]. The RESISTANCE column
indicates the proportion of resistance isolates and the USE the
amount of antimicrobial doses applied.
groupby: list, default=None
The elements to groupby (pd.groupby)
column_usage: str
The column with the usage values. This value overwrites the
column_usage value passed during the instance creation. If
the value is None, the default value passed during the instance
creation will be used.
**kwargs
Returns
-------
"""
# Bad input type
if not isinstance(dataframe, pd.DataFrame):
raise TypeError("""
The instance passed as argument needs to be a pandas
DataFrame. Instead, a <%s> was found. Please convert
the input accordingly.""" % type(dataframe))
if isinstance(groupby, str):
groupby = [groupby]
# Temporal update of usage column
rename = self.rename.copy()
if column_usage is not None:
rename = {k:v for k,v in rename.items()
if v != self.c_use}
rename[column_usage] = self.c_use
# Rename columns
aux = dataframe.rename(columns=rename, copy=True)
# Compute overall
if groupby is None or not groupby:
return dri(aux, **kwargs)
# Compute
scores = aux.groupby(by=groupby) \
.apply(dri, **kwargs)
# Format
if isinstance(scores, pd.Series):
scores.rename('dri', inplace=True)
# Return
return scores
if __name__ == '__main__':
# Libraries
import pandas as pd
from pathlib import Path
# ----------------------------------
# Create data
# ----------------------------------
# Define susceptibility test records
susceptibility_records = [
['2021-01-01', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
['2021-01-01', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
['2021-01-01', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
['2021-01-01', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'],
['2021-01-02', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
['2021-01-02', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
['2021-01-02', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'],
['2021-01-03', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'],
['2021-01-03', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'],
['2021-01-04', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'],
['2021-01-01', 'BLDCUL', 'ECOL', 'ACIP', 'sensitive'],
['2021-01-01', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
['2021-01-01', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
['2021-01-01', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
['2021-01-02', 'BLDCUL', 'ECOL', 'ACIP', 'sensitive'],
['2021-01-02', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
['2021-01-02', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
['2021-01-03', 'BLDCUL', 'ECOL', 'ACIP', 'sensitive'],
['2021-01-03', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'],
['2021-01-04', 'BLDCUL', 'ECOL', 'ACIP', 'sensitive'],
['2021-01-01', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-01', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-01', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-01', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-02', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
['2021-01-02', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
['2021-01-02', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-08', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-09', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
['2021-01-09', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
['2021-01-09', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
['2021-01-09', 'BLDCUL', 'SAUR', 'ACIP', 'sensitive'],
['2021-01-09', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-12', 'URICUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-12', 'URICUL', 'SAUR', 'ACIP', 'intermediate'],
['2021-01-13', 'URICUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-13', 'URICUL', 'SAUR', 'ACIP', 'sensitive'],
['2021-01-14', 'URICUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-14', 'URICUL', 'SAUR', 'ACIP', 'resistant'],
['2021-01-15', 'URICUL', 'SAUR', 'ACIP', 'sensitive'],
['2021-01-15', 'URICUL', 'SAUR', 'ACIP', 'sensitive'],
['2021-01-16', 'URICUL', 'SAUR', 'ACIP', 'intermediate'],
['2021-01-16', 'URICUL', 'SAUR', 'ACIP', 'intermediate'],
]
# Define prescription test records
prescription_records = [
['2021-01-01', 'PATIENT_1', 'AAUG', 150],
['2021-01-02', 'PATIENT_1', 'AAUG', 220],
['2021-01-03', 'PATIENT_1', 'AAUG', 150],
['2021-01-01', 'PATIENT_2', 'AAUG', 250],
['2021-01-02', 'PATIENT_2', 'AAUG', 320],
['2021-01-03', 'PATIENT_2', 'AAUG', 350],
['2021-01-01', 'PATIENT_3', 'ACIP', 450],
['2021-01-02', 'PATIENT_3', 'ACIP', 420],
['2021-01-03', 'PATIENT_3', 'ACIP', 450],
['2021-01-01', 'PATIENT_4', 'ACIP', 50],
['2021-01-02', 'PATIENT_4', 'ACIP', 50],
['2021-01-03', 'PATIENT_4', 'ACIP', 50],
]
prescription_records = [
['2021-01-01', 'PATIENT_1', 'AAUG', 1500],
['2021-01-02', 'PATIENT_1', 'AAUG', 2],
['2021-01-03', 'PATIENT_1', 'AAUG', 500],
['2021-01-01', 'PATIENT_2', 'AAUG', 2000],
['2021-01-02', 'PATIENT_2', 'AAUG', 320],
['2021-01-03', 'PATIENT_2', 'AAUG', 350],
['2021-01-01', 'PATIENT_3', 'ACIP', 2],
['2021-01-02', 'PATIENT_3', 'ACIP', 505],
['2021-01-03', 'PATIENT_3', 'ACIP', 1124],
['2021-01-01', 'PATIENT_4', 'ACIP', 5],
['2021-01-02', 'PATIENT_4', 'ACIP', 643],
['2021-01-03', 'PATIENT_4', 'ACIP', 2330],
]
# Create DataFrames
susceptibility = pd.DataFrame(susceptibility_records,
columns=['DATE',
'SPECIMEN',
'MICROORGANISM',
'ANTIMICROBIAL',
'SENSITIVITY'])
prescriptions = pd.DataFrame(prescription_records,
columns=['DATE',
'PATIENT',
'DRUG',
'DOSE'])
# Format dates
susceptibility.DATE = pd.to_datetime(susceptibility.DATE)
prescriptions.DATE = pd.to_datetime(prescriptions.DATE)
# Show
print("\nSusceptibility records")
print(susceptibility.head(5))
print("\nPrescription records")
print(prescriptions.head(5))
# .. note:: Uncomment to load the CDDEP example data instead
# Load default CDDEP sample
#path = Path('../datasets/cddep')
#susceptibility = pd.read_csv(path / 'susceptibility.csv')
#prescriptions = pd.read_csv(path / 'prescriptions.csv')
# ------------------------
# Compute summary table
# ------------------------
# Libraries
from pyamr.core.sari import SARI
# Create sari instance
sari = SARI(groupby=['DATE',
'SPECIMEN',
'MICROORGANISM',
'ANTIMICROBIAL',
'SENSITIVITY'])
# Compute susceptibility summary table
smmry1 = sari.compute(susceptibility,
return_frequencies=False)
# Compute prescriptions summary table.
smmry2 = prescriptions \
.groupby(by=['DATE', 'DRUG']) \
.DOSE.sum().rename('use')
# Combine both summary tables
smmry = smmry1.reset_index().merge(
smmry2.reset_index(), how='inner',
left_on=['DATE', 'ANTIMICROBIAL'],
right_on=['DATE', 'DRUG']
)
# Show
print("\nSummary")
print(smmry)
# -------------------------
# Compute DRI
# -------------------------
obj = DRI(
column_resistance='sari',
column_usage='use'
)
# Compute DRI overall
dri1 = obj.compute(smmry)
# Compute DRI
dri2 = obj.compute(smmry,
groupby=['SPECIMEN'])
# Compute DRI
dri3 = obj.compute(smmry,
groupby=['MICROORGANISM'])
# Compute DRI
dri4 = obj.compute(smmry,
groupby=['MICROORGANISM', 'ANTIMICROBIAL'])
# Compute DRI
dri5 = obj.compute(smmry,
groupby=['DATE'],
return_usage=True)
# Compute DRI
dri6 = obj.compute(smmry,
groupby=['DATE', 'MICROORGANISM'],
return_usage=True)
# Compute DRI
dri7 = obj.compute(smmry,
groupby=['DATE', 'MICROORGANISM', 'ANTIMICROBIAL'],
return_usage=True,
return_complete=True)
# Compute DRI (return all elements of summary table).
dri8 = obj.compute(smmry,
groupby=['MICROORGANISM'],
return_complete=True)
# Show
print("\nDRI (1):")
print(dri1)
print("\nDRI (2):")
print(dri2)
print("\nDRI (3):")
print(dri3)
print("\nDRI (4):")
print(dri4)
print("\nDRI (5):")
print(dri5)
print("\nDRI (6):")
print(dri6)
print("\nDRI (7):")
print(dri7)
print("\nDRI (8):")
print(dri8)
# --------------------------------------------
# Compute DRI fixed
# --------------------------------------------
# Compute prescriptions on t0.
use_t0 = prescriptions \
.groupby(by=['DATE', 'DRUG']) \
.DOSE.sum().rename('use') \
.to_frame().reset_index() \
.groupby('DRUG').use.first()
# Add to summary table
smmry = smmry.assign(use_t0=smmry.DRUG.map(use_t0))
# Define groupby
groupby = [
'DATE',
'MICROORGANISM'
]
# Compute DRI
dri9a = obj.compute(smmry,
groupby=groupby,
return_usage=True)
# Compute DRI using new USE
dri9b = obj.compute(smmry,
groupby=groupby,
return_usage=True,
column_usage='use_t0')
#aux = pd.concat([dri9a, dri9b], axis=1)
aux = dri9a.merge(dri9b,
left_index=True, right_index=True,
suffixes=['', '_fixed'])
# Concatenate (series)
#aux = pd.concat([
# dri9a.rename('dri'),
# dri9b.rename('dri_fixed')], axis=1)
# Show
print("\n\n")
print("\nSummary (variable):")
print(smmry)
print("\nDRI (9):")
print(aux)
# -------------------------------------------------------------------------
# Testing
# -------------------------------------------------------------------------
# ---------------------------------------------------------------------
# Success
# ---------------------------------------------------------------------
# .. note: All this examples should succeed. At the moment the code
# breaks if gram is not included. This is because the data
# we have created has duplicated values for each gram.
# Should we consider this within the ASAI?
"""
# Compute DRI overall
dri1 = dri(smmry)
# Compute DRI
dri8 = smmry \
.groupby(by=['SPECIMEN']) \
.apply(drug_resistance_index_v2)
# Compute DRI
dri2 = smmry \
.groupby(by=['MICROORGANISM']) \
.apply(drug_resistance_index_v2)
dri3 = smmry \
.groupby(by=['MICROORGANISM', 'ANTIMICROBIAL']) \
.apply(drug_resistance_index_v2,
return_components=True)
# Compute DRI
dri9 = smmry \
.groupby(by=['DATE']) \
.apply(drug_resistance_index_v2)
dri4 = smmry \
.groupby(by=['DATE', 'MICROORGANISM']) \
.apply(drug_resistance_index_v2,
return_components=True)
dri5 = smmry \
.groupby(by=['DATE', 'MICROORGANISM', 'ANTIMICROBIAL']) \
.apply(drug_resistance_index_v2,
return_components=True)
"""
# ---------------------------------------------------------------------
# Errors
# ---------------------------------------------------------------------
# .. note: In the examples below, the method acsi is meant to raise
# an error either because any of the required missing columns
# is missing or because the configuration is not correct.
print("\n\nHandling errors:")
# ---------------------------------------------------------------------
# Warnings
# ---------------------------------------------------------------------
# .. note: In the examples below, the method acsi is meant to show a
# warning message either no threshold has been specified or
# because thresholds have been specified twice.
print("\n\nShow warnings:")
"""
# -------------------------
# Compute resistance index
# -------------------------
r = drug_resistance_index(smmry,
by=['DATE'],
return_all=True,
reference_time=[1,2])
# Show
print("\nResult:")
print(r.round(decimals=3))
"""