Source code for pyamr.core.acsi

# Libraries
import numpy as np
import pandas as pd


[docs]def create_combinations_v1(d, groupby, col_spe='SPECIMEN', col_lab='LAB_NUMBER', col_org='MICROORGANISM', col_abx='ANTIMICROBIAL', col_sns='SENSITIVITY'): """Creates the DataFrame with all combinations. .. note:: There might be an issue if there are two different outcomes for the same record. For example, a susceptibility test record for penicillin (APEN) with R and another one with S. Warn of this issue if it appears! .. note:: If the data is right and the laboratory numbers are unique per isolate then the date is not necessary. However, what if we want to keep it? Groupby should at least contain: specimen, microorganism and lab_id .. note:: How to add all data in addition to the columns manually. Parameters ---------- Returns -------- """ # Libraries from itertools import combinations # Initialize c = [] # Loop for i, g in d.groupby(groupby): for x, y in combinations(g.sort_values(by=col_abx).index, 2): aux = dict(zip(groupby, i)) aux.update({ '%s_x' % col_abx: g.loc[x, col_abx], '%s_y' % col_abx: g.loc[y, col_abx], '%s_x' % col_sns: g.loc[x, col_sns], '%s_y' % col_sns: g.loc[y, col_sns] }) c.append(aux) # Create DataFrame c = pd.DataFrame(c) # Add class c['class'] = c['%s_x' % col_sns] + \ c['%s_y' % col_sns] # Return return c
[docs]def mutual_info_matrix_v3(x=None, y=None, ct=None): """Compute the component information score. .. note: Might be inefficient but good for testing. .. note: In order to be able to compute the mutual information score it is necessary to have variation within the variable. Thus, if there is only one class, should we return a result or a warning? Parameters ---------- x: list List with the classes y: list List with the classes Returns ------- """ # Libraries from scipy.stats.contingency import crosstab def _check_nparray(obj, param_name): if obj is not None: if isinstance(obj, np.ndarray): return obj elif isinstance(obj, pd.Series): return obj.to_numpy() elif isinstance(obj, pd.DataFrame): return obj.to_numpy() elif isinstance(obj, list): return np.array(obj) else: raise ValueError(""" The input parameter '{0}' is of type '{1} which is not supported. Please ensure it is a np.ndarray.""" .format(param_name, type(obj))) # Ensure they are all np arrays x = _check_nparray(x, 'x') y = _check_nparray(y, 'y') ct = _check_nparray(ct, 'ct') # Compute contingency if ct is None: c = crosstab(x,y) if isinstance(c, tuple): ct = c[-1] # older scipy else: ct = c.count # newer scipy # Variables n = ct.sum() pi = np.ravel(ct.sum(axis=1)) / n pj = np.ravel(ct.sum(axis=0)) / n # Create empty matrix m = np.empty(ct.shape) m[:] = np.nan # Fill with component information score with np.errstate(all='ignore'): for i in range(m.shape[0]): for j in range(m.shape[1]): pxy = ct[i,j] / n m[i,j] = pxy * np.log(pxy / (pi[i] * pj[j])) # Fill with na (lim x->0 => 0) m[np.isnan(m)] = 0 # Return return m
[docs]def collateral_resistance_index(m): """Collateral Resistance Index The collateral resistance index is based on the mutual information matrix. This implementation assumes there are only two classes resistant (R) and sensitive (S). .. warning:: Only works for a 2x2 contingency matrix Parameters ---------- m: np.array A numpy array with the mutual information matrix. Also called the contingency matrix. Returns ------- """ return (m[0, 0] + m[1, 1]) - (m[0, 1] + m[1, 0])
[docs]def CRI(x, func_mis=mutual_info_matrix_v3): """Collateral resistance index Parameters ---------- x: pd.Series Contains the combinations classes (e.g. SS, SR, RS, RR) func_mis: function The function to use to compute the contingency matrix from the mutual information score. By default it uses the function mutual_info_matrix_v3. """ ct = np.array([[x.SS, x.SR], [x.RS, x.RR]]) m = func_mis(ct=ct) return collateral_resistance_index(m)
[docs]class ACSI: """Antimicrobial Collateral Sensitivity Index. Other possible names for this index... a) Antimicrobial Disjoint Resistance Index b) Antimicrobial Collateral Resistance Index c) Antimicrobial Collateral Sensitivity Index d) Antimicrobial Collateral Resistance Index """ # Attributes c_spe = 'SPECIMEN' c_org = 'MICROORGANISM' c_abx = 'ANTIMICROBIAL' c_sns = 'SENSITIVITY' c_lab = 'LAB_NUMBER' c_dat = 'DATE' def __init__(self, column_specimen=c_spe, column_microorganism=c_org, column_antimicrobial=c_abx, column_sensitivity=c_sns, column_laboratory=c_lab): """The constructor. Parameters ---------- column_specimen: string The column name with the specimen column_antimicrobial: string The column name with the antimicrobial column_microorganism: string The column name with the microorganism column_sensitivity: string The column name with the sensitivity column_laboratory: string The column name with the laboratory id Returns ------- none """ # Create dictionary to rename columns self.rename_columns = {column_specimen: self.c_spe, column_antimicrobial: self.c_abx, column_microorganism: self.c_org, column_sensitivity: self.c_sns, column_laboratory: self.c_lab}
[docs] def combinations(self, dataframe, **kwargs): """Creates the combinations. .. note:: In theory the combinations only need to be grouped by laboratory number, however, in order to maintain the rest of the information (like date) we need to pass all of them. """ return create_combinations_v1(dataframe, **kwargs)
[docs] def compute_from_contingency(self): pass
[docs] def compute_from_combinations(self): pass
[docs] def compute(self, dataframe, flag_combinations=False, flag_contingency=False, groupby=None, func_mis=None, return_combinations=False): """Computes the Antimicrobial Collateral Sensitivity. .. note:: Enable to pass a combinations dataframe. .. note:: The lab number should be used only to compute the combinations. It should not be used when computing the ACSI. Parameters ---------- dataframe: pd.DataFrame A dataframe with the susceptibility test interpretations as columns. combinations: boolean Indicates whether the variable DataFrame contains susceptibility test records (combinations=False) or the antimicrobial combinations and the class (combination=True) func_mis: function The function to use to compute the contingency matrix from the mutual information score. By default it uses the function mutual_info_matrix_v3. Returns ------- pd.Series or pd.DataFrame """ # Do checks # Set default groupby if groupby is None: groupby = [ self.c_dat, self.c_spe, self.c_org, ] # Set default function if func_mis is None: func_mis = mutual_info_matrix_v3 # Rename columns aux = dataframe.copy(deep=True) \ .rename(columns=self.rename_columns) # Create combinations if flag_combinations: combinations = aux else: combinations = self.combinations(aux, groupby=groupby + [self.c_lab]) # Create contingency DataFrame contingency = combinations.groupby( by=groupby + [ self.c_abx + '_x', self.c_abx + '_y', 'class'])\ .size().unstack() # Ensure that variables in CRI needed exist for s in ['SS', 'RS', 'SR', 'RR']: if not s in contingency: contingency[s] = 0 # Compute CRI contingency['acsi'] = contingency.fillna(0) \ .apply(CRI, args=(func_mis,), axis=1) # Return if return_combinations: return contingency, combinations return contingency
if __name__ == '__main__': # Libraries import pandas as pd # ---------------------------------- # Create data # ---------------------------------- # Define susceptibility test records susceptibility_records = [ ['2021-01-01', 'LAB_1', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'], ['2021-01-01', 'LAB_1', 'BLDCUL', 'ECOL', 'ACIP', 'sensitive'], ['2021-01-01', 'LAB_2', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'], ['2021-01-01', 'LAB_2', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'], ['2021-01-01', 'LAB_3', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'], ['2021-01-01', 'LAB_3', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'], ['2021-01-01', 'LAB_4', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'], ['2021-01-01', 'LAB_4', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'], ['2021-01-02', 'LAB_5', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'], ['2021-01-02', 'LAB_5', 'BLDCUL', 'ECOL', 'ACIP', 'sensitive'], ['2021-01-02', 'LAB_6', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'], ['2021-01-02', 'LAB_6', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'], ['2021-01-02', 'LAB_7', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'], ['2021-01-02', 'LAB_7', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'], ['2021-01-03', 'LAB_8', 'BLDCUL', 'ECOL', 'AAUG', 'sensitive'], ['2021-01-03', 'LAB_8', 'BLDCUL', 'ECOL', 'ACIP', 'intermediate'], ['2021-01-03', 'LAB_9', 'BLDCUL', 'ECOL', 'AAUG', 'resistant'], ['2021-01-03', 'LAB_9', 'BLDCUL', 'ECOL', 'ACIP', 'resistant'], ['2021-01-03', 'LAB_9', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'], ['2021-01-03', 'LAB_9', 'BLDCUL', 'SAUR', 'ACIP', 'resistant'], ['2021-01-04', 'LAB_10', 'URICUL', 'ECOL', 'AAUG', 'resistant'], ['2021-01-04', 'LAB_10', 'URICUL', 'ECOL', 'ACIP', 'sensitive'], ['2021-01-04', 'LAB_10', 'URICUL', 'SAUR', 'AAUG', 'resistant'], ['2021-01-04', 'LAB_10', 'URICUL', 'SAUR', 'APEN', 'resistant'], ] # Create DataFrames susceptibility = pd.DataFrame(susceptibility_records, columns=['DATE', 'LAB_NUMBER', 'SPECIMEN', 'MICROORGANISM', 'ANTIMICROBIAL', 'SENSITIVITY']) # Format DataFrame susceptibility.SENSITIVITY = \ susceptibility.SENSITIVITY.replace({ 'resistant': 'R', 'intermediate': 'I', 'sensitive': 'S' }) # Show print("\nSusceptibility:") print(susceptibility) # .. note: It is important to ensure that there are not susceptibility # test records with contradicting results. For example being # resistant and sensitive at the same time. Integrate this # check in the computation! # --------------------------- # Create combinations # --------------------------- # Create combinations c = create_combinations_v1(susceptibility, groupby=[ 'DATE', 'LAB_NUMBER', 'SPECIMEN', 'MICROORGANISM' ]) print("\nCombinations:") print(c) # Build contingency r = c.groupby([ 'DATE', 'SPECIMEN', 'MICROORGANISM', 'ANTIMICROBIAL_x', 'ANTIMICROBIAL_y', 'class']).size().unstack() print("Contingency:") print(r) # Compute CRI r['MIS'] = r.fillna(0) \ .apply(CRI, args=(mutual_info_matrix_v3,), axis=1) # Show print("\nResult") print(r) # ------------------------------------------ # Computes ACSI using class # ------------------------------------------ def show(combinations, contingency, title=None, n=100): """Helper function to display outcomes.""" # Variables n_comb = combinations.shape[0] n_cont = np.nansum(contingency.to_numpy()[:, :-1]) if title is None: title = 'Grouped By: %s' % str(contingency.index.names[:-2]) # Display print("\n" + "="*n + '\n%s\n'%title + "="*n) print("Total combinations: %s" % int(n_comb)) print("Total contingency: %s" % int(n_cont)) print("\nCombinations:") print(combinations) print("\nContingency:") print(contingency) # Create ACSI instance acsi = ACSI() # --------------- # Compute overall # --------------- # .. note:: Why removing LAB_NUMBER returns only # the first letter...? # Compute index contingency, combinations = \ acsi.compute(susceptibility, groupby=[], return_combinations=True) # Show show(combinations, contingency, title='Overall') # --------------- # Compute by # --------------- # Compute index contingency, combinations = \ acsi.compute(susceptibility, groupby=['DATE'], return_combinations=True) # Show show(combinations, contingency, title='By <DATE>') # ---------------- # Compute by pairs # ---------------- # Compute index contingency, combinations = \ acsi.compute(susceptibility, groupby=[ 'SPECIMEN', 'MICROORGANISM' ], return_combinations=True) # Show show(combinations, contingency, title='By <SPECIMEN, MICROORGANISM>') # ------------------------- # Compute by date and pairs # ------------------------- # .. note:: It seems that it is important to include all the # parameters when computing the combinations. Otherwise # it might create ill defined combinations. Think this # through... # Compute index contingency, combinations = \ acsi.compute(susceptibility, groupby=[ 'DATE', 'SPECIMEN', 'MICROORGANISM' ], return_combinations=True) # Show show(combinations, contingency, title=None) # Compute contingency reusing combinations. contingency = acsi.compute(combinations.reset_index(), groupby=['SPECIMEN'], flag_combinations=True, return_combinations=False) show(combinations, contingency, title=None) # Compute contingency reusing combinations. contingency = acsi.compute(combinations.reset_index(), groupby=['MICROORGANISM'], flag_combinations=True, return_combinations=False) show(combinations, contingency, title=None) # ------------------------------------------------------------------------- # Testing # ------------------------------------------------------------------------- # --------------------------------------------------------------------- # Success # --------------------------------------------------------------------- # .. note: All this examples should succeed. At the moment the code # breaks if gram is not included. This is because the data # we have created has duplicated values for each gram. # Should we consider this within the ASAI? # --------------------------------------------------------------------- # Errors # --------------------------------------------------------------------- # .. note: In the examples below, the method acsi is meant to raise # an error either because any of the required missing columns # is missing or because the configuration is not correct. print("\n\nHandling errors:") # --------------------------------------------------------------------- # Warnings # --------------------------------------------------------------------- # .. note: In the examples below, the method acsi is meant to show a # warning message either no threshold has been specified or # because thresholds have been specified twice. print("\n\nShow warnings:")