Source code for pyamr.datasets.microbiology.create_quickimport



# -------------------------------------------
# Methods
# -------------------------------------------
[docs]def create_registry(data, keyword=None, keep=None): """Creates registry from data. Parameters ---------- data: pd.DataFrame The data keyword: string The keyword for the columns. All columns starting with such keyword will be kept and used for the registry. keep: list The list of columns to keep for the registry """ # Columns to keep if keep is None: keep = [c for c in data.columns if c.startswith(keyword)] # Copy data reg = data[keep].copy(deep=True) reg = reg.drop_duplicates() reg = reg.reset_index(drop=True) # Add id if keyword is not None: reg['%s_id' % keyword] = reg.index.values # Return return reg
if __name__ == '__main__': # Libraries generic import time import pandas as pd # Libraries specific from pyamr.datasets.load import make_susceptibility from pyamr.datasets.load import load_data_nhs # ------------------------------------------- # Load data # ------------------------------------------- # Load data data = make_susceptibility() data, abxs, orgs = \ load_data_nhs(folder='susceptibility-v0.0.5') # Show print("\nData:") print(data) print("\nColumns:") print(data.columns) # -------------------------------------------- # Create registries # -------------------------------------------- # Create registries reg_patient = create_registry(data, keyword='patient') reg_specimen = create_registry(data, keyword='specimen') reg_sensitivity = create_registry(data, keyword='sensitivity') reg_method = create_registry(data, keyword='method') reg_microorganism = create_registry(data, keyword='microorganism') reg_antimicrobial = create_registry(data, keyword='antimicrobial') # Show print("\nMethod:") print(reg_method) print("\nSensitivity:") print(reg_sensitivity) print("\nSpecimen:") print(reg_specimen) print("\nMicroorganism:") print(reg_microorganism) print("\nAntimicrobial:") print(reg_antimicrobial) print("\nPatients:") print(reg_patient) # ----------------------------------------------- # Include microorganism/antimicrobial information # ----------------------------------------------- # Libraries from pyamr.datasets.registries import MicroorganismRegistry from pyamr.datasets.registries import AntimicrobialRegistry # Load registry mreg = MicroorganismRegistry() areg = AntimicrobialRegistry() # Create genus and species reg_microorganism[['genus', 'species']] = \ reg_microorganism.microorganism_name \ .str.capitalize() \ .str.split(expand=True, n=1) # Combine with registry information reg_microorganism = mreg.combine(reg_microorganism) reg_antimicrobial = areg.combine(reg_antimicrobial) # This fix with others in registries reg_antimicrobial.antimicrobial_name = \ reg_antimicrobial.antimicrobial_name.str.lower() # Fill missing gram stain reg_microorganism.gram_stain = reg_microorganism.gram_stain.fillna('u') # ------------------------------------------------- # Complete susceptibility records # ------------------------------------------------- # Helper methods def drop_y(df): to_drop = [x for x in df if x.endswith('_y')] return df.drop(to_drop, axis=1) def rename_x(df): to_rename = {c: c.rstrip('_x') for c in df.columns if c.endswith('_x')} return df.rename(columns=to_rename) # Merge data = data.merge(reg_sensitivity, how='left', on='sensitivity_name') data = data.merge(reg_method, how='left', on='method_code') data = data.merge(reg_specimen, how='left', on=['specimen_code', 'specimen_description']) data = data.merge(reg_patient, how='left', on='patient_hos_number') data = data.merge(reg_microorganism[['microorganism_name', 'microorganism_id']], how='left', on='microorganism_name') data = data.merge(reg_antimicrobial[['antimicrobial_name', 'antimicrobial_id']], how='left', on='antimicrobial_name') # Drop and rename data = drop_y(data) data = rename_x(data) print(data) print(data.count()) # ----------------------------------- # Save to MySQL # ---------------------------------- # Libraries from sqlalchemy import create_engine # Constants user = 'root' pwd = 'toor' host = 'localhost' schema = 'epicimpoc-test' fmt = 'mysql+pymysql://{user}:{pwd}@{host}/{schema}' # Create connection db_connection = create_engine(fmt.format(user=user, pwd=pwd, host=host, schema=schema)) # Columns to keep keep = ['date_created', 'date_updated', 'date_received', 'date_outcome', 'laboratory_number', 'mic', 'reported', 'antimicrobial_id', 'method_id', 'microorganism_id', 'patient_id', 'sensitivity_id', 'specimen_id'] # Susceptibility tests aux = data.copy(deep=True) aux['date_created'] = pd.to_datetime('today') aux['date_updated'] = pd.to_datetime('today') aux = aux[keep] # Save aux.to_sql(name='microbiology_susceptibilitytest', con=db_connection, if_exists='replace', index=False) print(aux) import sys sys.exit() def sql_prepare_lookup(df): """Helper method to prepare lookup tables""" aux = df.copy(deep=True) # Remove prefixes aux.columns = [c.split('_', 1)[-1] for c in aux.columns] # Add missing columns aux['date_created'] = pd.to_datetime('today') aux['date_updated'] = pd.to_datetime('today') if not 'description' in aux: aux['description'] = '' if not 'is_visible' in aux: aux['is_visible'] = True # Fill na with '' so null=False. aux = aux.fillna(aux.dtypes.replace({'O': ''})) # Keep aux = aux[['id', 'name', 'code', 'description', 'date_created', 'date_updated', 'is_visible']] aux = aux.drop_duplicates() # Return return aux LOOP = [#(reg_sensitivity, 'microbiology_sensitivity'), #(reg_specimen, 'microbiology_specimen'), (reg_method, 'microbiology_method'), #(reg_patient, 'microbiology_patient'), (reg_antimicrobial, 'microbiology_antimicrobial'), (reg_microorganism, 'microbiology_microorganism')] for df, name in LOOP: try: print("Importing.... %s" % name) sql_prepare_lookup(df) \ .to_sql(name=name, con=db_connection, if_exists='append', index=False) except Exception as e: print(e) import sys sys.exit() # ------------------------------------------------- # # ------------------------------------------------- # Specific from pathlib import Path # Time timestr = time.strftime("%Y%m%d-%H%M%S") # Define path path = Path('./%s' % timestr) # Create path if it does not exist path.mkdir(parents=True, exist_ok=True) # Save registries reg_patient.to_csv(path / 'patients.csv', index=False) reg_specimen.to_csv(path / 'specimens.csv', index=False) reg_sensitivity.to_csv(path / 'sensitivities.csv', index=False) reg_method.to_csv(path / 'methods.csv', index=False) reg_microorganism.to_csv(path / 'microorganisms.csv', index=False) reg_antimicrobial.to_csv(path / 'antimicrobial.csv', index=False) import sys sys.exit() # ------------------------------------------------- # Create susceptibility test record from registries # ------------------------------------------------- data = data.merge(reg_specimen['specimen_code'], how='left', left_on='specimen_code', right_on='specimen_code') data = data.merge(reg_sensitivity, how='left', left_on='sensitivity_name', right_on='sensitivity_name') data = data.merge(reg_method['method_code'], how='left', left_on='method_code', right_on='method_code') keep = ['date_received', 'date_outcome', 'laboratory_number', 'specimen_id', 'sensitivity_id', 'method_id'] # Show print(data[keep])