################################################################################
# Author:
# Date:
# Description:
#
#
#
# Copyright:
#
#
################################################################################
from __future__ import division
# Libraries
import sys
import warnings
import numpy as np
import pandas as pd
# ------------------------------------------------------------------------------
# methods
# ------------------------------------------------------------------------------
def _check_asai_weights_genus(dataframe): # pragma: no cover
"""Checks that the weights for each genus add up to one.
.. deprecated:: 0.0.1
Parameters
----------
dataframe : dtaframe-like
The dataframe whose columns must be checked
Returns
-------
raise error
"""
# Compute weights per genus
weights = dataframe.groupby(by='GENUS')['W_SPECIE'].sum()
weights = np.round(weights, decimals=10)
# Check that all add up to one
if not (weights==1).all():
raise TypeError("The weights (W_SPECIE) for each genus should add up to "
"one. Please review these weights since they are not "
"valid. \n%s" % weights)
def _check_asai_weights_specie(dataframe): # pragma: no cover
"""Check that the weights for all the species add up to one.
.. deprecated:: 0.0.1
Parameters
----------
dataframe : dtaframe-like
The dataframe whose columns must be checked
Returns
-------
raise error
"""
# Compute weights per species
unique = dataframe.groupby(by='GENUS')['W_GENUS'].nunique()
weights = dataframe.groupby(by='GENUS')['W_GENUS'].mean()
merged = pd.concat([unique, weights], axis=1)
merged.columns = ['UNIQUE', 'W_GENUS']
# Check only one weight is given for each genus
if not (unique==1).all():
raise TypeError("The weights (W_GENUS) should be equal for all the rows "
"with a same genus value. Please ensure that the number "
"of unique elements is always 1. \n%s" % merged)
# Check that weights add up to one
if not (np.round(np.sum(weights), decimals=10)==1):
return TypeError("The weights (W_GENUS) should add up to one. Please "
"review these weights since they are not valid. "
"\n%s" % weights)
def _check_asai_dataframe_columns(dataframe, required_columns): # pragma: no cover
"""This method checks that the dataframe has all attributes.
.. deprecated:: 0.0.1
Parameters
-----------
dataframe : pandas DataFrame
The dataframe containing the information.
attributes :
The required columns
Returns
-------
exit the program
"""
# Find missing columns
missing = list(set(required_columns) - set(dataframe.columns))
# There are missing columns
if not missing: return
# Raise an error
raise TypeError("The dataframe passed as argument is missing the "
"following columns: %s. Please correct this issue."
% missing)
def _asai(dataframe, threshold=None, weights='uniform'): # pragma: no cover
"""Computes the antimicrobial spectrum of activity.
.. deprecated:: 0.0.1
.. todo: There is an error when W_GENUS = 1 / GENUS.nunique()
Parameters
----------
dataframe : dataframe-like
The dataframe containing the information to compute the asai index. The
following columns are required [SPECIE, GENUS, RESISTANCE]. In addition,
the effective threshold, genus weight and specie weight can be specied
using the following columns [THRESHOLD, W_GENUS, W_SPECIE]. Note that
the weights must add up to one.
threshold : number
The number to set a common threshold.
weights : string
Method used to compute the weights. The possible values are uniform and
proportional. In order to use proportional a column with the frequency
for each specie must be included in the dataframe.
Returns
-------
dataframe
"""
# Check that the input is a dataframe
if not isinstance(dataframe, pd.DataFrame):
raise TypeError("The instance passed as argument needs to be a pandas "
"DataFrame. Instead, a <%s> was found. Please convert "
"the input accordingly." % type(dataframe))
# Add fixed threshold
if threshold is not None:
dataframe['THRESHOLD'] = threshold
# Add weights
if weights == 'uniform':
# Set uniform weights
dataframe = dataframe.set_index(keys=['GENUS'], drop=False)
dataframe['W_GENUS'] = 1. / dataframe.GENUS.nunique()
dataframe['W_SPECIE'] = 1. / dataframe.SPECIE.groupby(level=0).count()
dataframe = dataframe.reset_index(drop=True)
# Required columns
required = ['RESISTANCE', 'THRESHOLD', 'W_GENUS', 'W_SPECIE']
# Check that the weights add up to one.
_check_asai_dataframe_columns(dataframe, required_columns=required)
_check_asai_weights_genus(dataframe)
_check_asai_weights_specie(dataframe)
# Select data
resistance = dataframe.RESISTANCE
threshold = dataframe.THRESHOLD
weights_genus = dataframe.W_GENUS
weights_specie = dataframe.W_SPECIE
# Create results
d = {'N_GENUS': dataframe.GENUS.nunique(),
'N_SPECIE': dataframe.SPECIE.nunique(),
'ASAI_SCORE': _asai_score(weights_genus,
weights_specie,
resistance,
threshold)}
# Compute ASAI.
return pd.Series(d)
def _asai_score(weights_genus, weights_specie, resistance, threshold): # pragma: no cover
"""Computes the asai score.
.. deprecated:: 0.0.1
Parameters
----------
weights_genus : array-like
The weight associated to each of the genus
weights_specie : array-like
The weight associated to each of the species
resistance : array-like
The resistances
threshold : array-like
The thresholds
Returns
-------
asai score
"""
return np.sum(weights_genus*weights_specie*(resistance<=threshold))
[docs]def asai(*args, **kwargs):
"""Redirects to ``antimicrobial_spectrum_activity_index``."""
return antimicrobial_spectrum_activity_index(*args, **kwargs)
[docs]def antimicrobial_spectrum_activity_index(dataframe, weights='uniform',
threshold=0.5, tol=1e-6, verbose=0):
"""Computes the Antimicrobial Spectrum of Activity.
.. note:: Since threshold and weights have a default value, the
warnings below will not be displayed. However, the code
is there in case the behaviour needs to be changed in
the future.
.. note:: Another way to check that the weights are correct is just
by computing ASAI with th=0 and th=1. These should result
in asai=1 and asai=0 respectively.
| # Compute score
| score_1 = np.sum(wgn * wsp * (sari <= 0))
| score_2 = np.sum(wgn * wsp * (sari <= 1))
.. warning:: Should the duplicated check only for the columns
GENUS and SPECIE? What if we do not group by
antibiotic? It has to be unique for the antibiotic
also. It is up to the user to make the right use
of this?
Parameters
----------
dataframe: pd.DataFrame
The pandas dataframe with the information. The following columns
are always required [RESISTANCE, GENUS and SPECIE]. In addition,
[W_GENUS and W_SPECIE] are required if weights is None. Also,
if weights = 'frequency' the column FREQUENCY must be present.
weights: string, default='uniform'
The method to compute the weights. The methods supported are:
- 'specified': weights must be in [W_GENUS and W_SPECIE]
- 'uniform': uniform weights for genus and species within genus.
- 'frequency': weights are proportional to the frequencies.
The following rules must be fulfilled by the weight columns:
- consistent weight for a given genus
- all genus weights must add up to one.
- all specie weights within a genus must add up to one.
threshold: float, default=0.5
The threshold resistance value above which the antimicrobial is
considered non-effective to treat the microorganism. For instance,
for a resistance threshold of 0.5, if a pair <o,a> has a resistance
value of 0.4, the microorganism will be considered sensitive. In
order to use specific thresholds keep threshold to None and include
a column 'THRESHOLD'.ss
tol: float, default=1e-6
The tolerance in order to check that all conditions (uniqueness
and sums) are satisfied. Note that that float precision varies
and therefore not always adds up to exactly one.
verbose: int, default=0
The level of verbosity.
Returns
-------
pd.DataFrame
The dataframe with the ASAI information and counts.
"""
# Required columns
required = ['RESISTANCE', 'GENUS', 'SPECIE']
# Add weight-related required columns
if weights == 'specified':
required += ['W_GENUS', 'W_SPECIE']
if weights == 'frequency':
required += ['FREQUENCY']
# Check weights
if weights not in ['uniform', 'frequency', 'specified']:
raise ValueError("""
The weights '{0}' is not supported. Please
use one of the following: uniform, frequency
or specified""".format(weights))
# Bad input type
if not isinstance(dataframe, pd.DataFrame):
raise TypeError("""\n
The instance passed as argument needs to be a pandas
"DataFrame. Instead, a <%s> was found. Please convert
the input accordingly.""" % type(dataframe))
# Check columns
if set(required).difference(dataframe.columns):
raise ValueError("The following columns are missing: {0} " \
.format(set(required).difference(dataframe.columns)))
# Check duplicates
if dataframe.duplicated().any():
raise ValueError("There are duplicated rows in the DataFrame.")
# Get NaN idxs
idxs = dataframe[required].isna().any(axis=1)
# Show warning and correct
if idxs.any():
raise ValueError("""\n
There are NULL values in columns that are required.
Please correct this issue and try again. See below
for more information:\n\n\t\t{0}""".format(
dataframe.loc[idxs, required] \
.to_string().replace("\n", "\n\t\t")
))
# Copy DataFrame
aux = dataframe.copy(deep=True)
# Check threshold
if 'THRESHOLD' in aux.columns:
if threshold is not None:
warnings.warn("""\n
The threshold has been defined both as an
input parameter (threshold={0}) and a DataFrame
column 'THRESHOLD'. The latter will be used."""
.format(threshold))
else:
if threshold is None:
warnings.warn("""\n
The threshold has not been defined using either
an input parameter (threshold={0}) or a column in the
dataframe named 'THRESHOLD'. Thus a default threshold
value of '0.5' will be used.""".format(threshold))
threshold = 0.5
aux['THRESHOLD'] = threshold
# Set uniform weights
if weights == 'uniform':
aux['W_GENUS'] = 1. / aux.GENUS.nunique()
aux['W_SPECIE'] = 1. / aux.GENUS.map(
aux.groupby(['GENUS']).SPECIE.count())
# Set frequency weights
if weights == 'frequency':
# Set frequency weights
fgn = aux.groupby(['GENUS']).FREQUENCY.sum()
aux['S_GENUS'] = aux.GENUS.map(fgn)
aux['W_GENUS'] = aux.GENUS.map(fgn / fgn.sum())
aux['W_SPECIE'] = aux.FREQUENCY / aux.S_GENUS
# Check sums
#report = pd.DataFrame()
#report['W_GENUS_UNIQUE_OK'] = aux.groupby('GENUS').W_GENUS.nunique()
#report['W_GENUS_SUM_OK'] = aux.groupby('GENUS').head(1).W_GENUS.sum()
#report['W_SPECIE_SUM_OK'] = aux.groupby(['GENUS']).W_SPECIE.sum()
#if verbose > 5:
# # Explain each error individually.
# pass
# Condition
#condition = (1 - report).abs() < tol
# Report
#if not condition.all().all():
# raise ValueError("""
# The weights imputed do not fulfill all the requirements. Please
# check the report below and correct the weights accordingly. Note
# a given genus must have a consistent weight and the sum of weights
# must add up to 1.\n\n\t\t{0}""" \
# .format(condition.to_string().replace("\n", "\n\t\t")))
# Show
if verbose > 5:
print("\nweights={0} | threshold={1}".format(weights, threshold))
print(aux)
# Extract vectors
wgn = aux.W_GENUS
wsp = aux.W_SPECIE
sari = aux.RESISTANCE
th = aux.THRESHOLD
# Check range using extreme thresholds
s1 = np.sum(wgn * wsp * (sari < 0))
s2 = np.sum(wgn * wsp * (sari <= 1))
if abs(s1-0) > tol or abs(s2-1) > tol:
raise ValueError("""
The weights argument do not fulfill all the requirements. Note
that the correct weights would produce a SARI value within the
range [0, 1]. However, the weights received did not fulfill
such constraint.""")
# Compute score
score = np.sum(wgn * wsp * (sari < th))
# Create results
d = {
'N_GENUS': aux.GENUS.nunique(),
'N_SPECIE': aux.SPECIE.nunique(),
'ASAI_SCORE': score
}
# Default weights
return pd.Series(d)
[docs]class ASAI:
"""Antimicrobial Spectrum of Activity Index.
"""
# Attributes
c_gen = 'GENUS'
c_spe = 'SPECIE'
c_res = 'RESISTANCE'
c_thr = 'THRESHOLD'
c_fre = 'FREQUENCY'
c_wgen = 'W_GENUS'
c_wspe = 'W_SPECIE'
def __init__(self, column_genus=c_gen,
column_specie=c_spe,
column_resistance=c_res,
column_threshold=c_thr,
column_frequency=c_fre,
column_wgenus=c_wgen,
column_wspecie=c_wspe):
"""The constructor.
Parameters
----------
column_genus: string
The column name with the genus values
column_specie: string
The column name with the specie values
column_resistance: string
The column name with the resistance values
column_threshold: string
The column name with the threshold values
column_frequency: string
The column name with the frequency values
Returns
-------
none
"""
# Create dictionary to rename columns
self.rename = {column_genus: self.c_gen,
column_specie: self.c_spe,
column_resistance: self.c_res,
column_threshold: self.c_thr,
column_frequency: self.c_fre,
column_wgenus: self.c_wgen,
column_wspecie: self.c_wspe}
# Columns that are required
self.required = [self.c_gen, self.c_spe, self.c_res]
[docs] def compute(self, dataframe, groupby=None, min_freq=None, **kwargs):
"""Computes the ASAI index (safely).
.. note: Review first NaN and then duplicated?
.. note: Review extreme values in resistance?
Parameters
----------
dataframe: pd.DataFrame
The pandas dataframe with the information. The following columns
are always required [RESISTANCE, GENUS and SPECIE]. In addition,
[W_GENUS and W_SPECIE] are required if weights is None. Also,
if weights = 'frequency' the column FREQUENCY must be present.
groupby: list, default=None
The elements to groupby (pd.groupby)
min_freq: int, default=None
The minimum number of susceptibility tests required in order to
include the species to compute ASAI. Note that to work the dataframe
must include a column indicating the frequencies.
weights: string, default=None
The method to compute the weights. The methods supported are:
- None: weights must be specified in [W_GENUS and W_SPECIE]
- 'uniform': uniform weights for genus and species within genus.
- 'frequency: weights are proportional to the frequencies.
The following rules must be fulfilled by the weight columns:
- consistent weight for a given genus
- all genus weights must add up to one.
- all specie weights within a genus must add up to one.
threshold: float, default=None
The threshold resistance value above which the antimicrobial is
considered non-effective to treat the microorganism. For instance,
for a resistance threshold of 0.5, if a pair <o,a> has a resistance
value of 0.4, the microorganism will be considered sensitive. In
order to use specific thresholds keep threshold to None and include
a column 'THRESHOLD'.ss
tol: float, default=1e-6
The tolerance in order to check that all conditions (uniqueness
and sums) are satisfied. Note that that float precision varies
and therefore not always adds up to exactly one.
verbose: int, default=0
The level of verbosity.
Returns
-------
pd.DataFrame
The dataframe with the ASAI information and counts.
"""
# Bad input type
if not isinstance(dataframe, pd.DataFrame):
raise TypeError("""
The instance passed as argument needs to be a pandas
DataFrame. Instead, a <%s> was found. Please convert
the input accordingly.""" % type(dataframe))
if isinstance(groupby, str):
groupby = [groupby]
# Create auxiliary variable
required = groupby + self.required
# Rename columns
aux = dataframe.rename(columns=self.rename, copy=True)
# Filter by freq
if min_freq is not None:
if not self.c_fre in aux:
warnings.warn("""
The min_freq={0} cannot be applied because the frequency
columns 'FREQUENCY' does not exist in the DataFrame.\n"""
.format(min_freq))
else:
aux = aux[aux[self.c_fre] >= min_freq]
# Check duplicates
if aux.duplicated(subset=required).any():
warnings.warn("""
There are duplicated rows in the DataFrame. This is
usually not expected. Please review the DataFrame and
address this inconsistencies. Maybe you should include
more columns in the groupby (e.g. specimen_code). The
columns used to compute duplicated are:
{0}.\n""".format(required))
#aux = aux.drop_duplicates(required)
# Check extreme resistance values
if aux.RESISTANCE.isin([0.0, 1.0]).any():
warnings.warn("""
Extreme resistances [0, 1] were found in the DataFrame. These
rows should be reviewed since these resistances might correspond
to pairs with low number of records.\n""")
#aux = aux[aux[self.c_res] != 1.0]
# Get NaN indexes
idxs = aux[required].isna().any(axis=1)
# Show warning and correct
if idxs.any():
warnings.warn("""
There are NULL values in columns that are required. These
rows will be ignored to safely compute ASAI. Please review
the DataFrame and address this inconsistencies. See below
for more information: \n\n\t\t\t{0}\n""".format( \
aux[required].isna().sum(axis=0) \
.to_string().replace("\n", "\n\t\t\t")))
aux = aux.dropna(subset=required)
# Check all genus weights add up to one?
# Compute
scores = aux.groupby(groupby) \
.apply(asai, **kwargs)
# Return
return scores
[docs]class ASAI_old(): # pragma: no cover
"""This class computes the antimicrobial spectrum of activity.
.. deprecated:: 0.0.1
"""
# Attributes
c_abx = 'ANTIBIOTIC'
c_gen = 'GENUS'
c_spe = 'SPECIE'
c_res = 'RESISTANCE'
c_thr = 'THRESHOLD'
c_fre = 'FREQUENCY'
c_wgen = 'W_GENUS'
c_wspe = 'W_SPECIE'
def __init__(self, weights='uniform', threshold=0.5,
column_genus=c_gen,
column_specie=c_spe,
column_antibiotic=c_abx,
column_resistance=c_res,
column_threshold=c_thr,
column_frequency=c_fre,
column_wgenus=c_wgen,
column_wspecie=c_wspe):
"""The constructor.
Parameters
----------
threshold : number
The threshold under which the drug is considered effective.
weights : string
The method to compute the weights
column_genus : string
The column name with the genus values
column_specie : string
The column name with the specie values
column_antibiotic : string
The column name with the antibiotic values
column_resistance : string
The column name with the resistance values
column_threshold : string
The column name with the threshold values
column_frequency : string
The column name with the frequency values
Returns
-------
none
"""
# Set parameters
self.weights = weights
self.threshold = threshold
# Create dictionary to rename columns
self.rename_columns = {column_genus: self.c_gen,
column_specie: self.c_spe,
column_antibiotic: self.c_abx,
column_resistance: self.c_res,
column_threshold: self.c_thr,
column_frequency: self.c_fre,
column_wgenus: self.c_wgen,
column_wspecie: self.c_wspe}
# Columns that are required
self.required_columns = [self.c_gen, self.c_spe, self.c_abx, self.c_res]
[docs] def compute(self, dataframe, by_category):
"""This function computes the asai index by category.
Parameters
----------
dataframe : pandas DataFrame
The pandas DataFrame containing the data. In particular it needs to
contain the following columns: genus, specie, antibiotic and the
resistance outcome within the range [0,1].
by_category : string
The name of the column that will be used to group ASAI.
Returns
-------
pandas dataframe
the dataframe...
"""
# Check that it is a dataframe
if not isinstance(dataframe, pd.DataFrame):
raise TypeError("The instance passed as argument needs to be a pandas "
"DataFrame. Instead, a <%s> was found. Please convert "
"the input accordingly." % type(dataframe))
# Rename columns
dataframe = dataframe.rename(columns=self.rename_columns, copy=True)
# Check dataframe columns
_check_asai_dataframe_columns(dataframe, self.required_columns)
# Check that there are no duplicates
dataframe = dataframe.drop_duplicates(subset=[self.c_gen,
self.c_spe,
self.c_abx,
by_category])
# Check that no intrinsic resistance is considered
dataframe = dataframe[dataframe[self.c_res]!=1.0]
# Check that the by parameter has all value different than none
dataframe = dataframe.dropna(subset=[by_category])
# Compute asai and return
return dataframe.groupby(by=[self.c_abx, by_category]) \
.apply(_asai, threshold=self.threshold) \
.unstack()
if __name__ == '__main__': # pragma: no cover
# Import libraries
import sys
import numpy as np
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
# Import specific libraries
from pyamr.datasets import load
# Configure seaborn style (context=talk)
sns.set(style="white")
# Set matplotlib
mpl.rcParams['xtick.labelsize'] = 9
mpl.rcParams['ytick.labelsize'] = 9
mpl.rcParams['axes.titlesize'] = 11
mpl.rcParams['legend.fontsize'] = 9
# Pandas configuration
pd.set_option('display.max_colwidth', 40)
pd.set_option('display.width', 300)
pd.set_option('display.precision', 4)
# Numpy configuration
np.set_printoptions(precision=2)
# ---------------------
# helper method
# ---------------------
def scalar_colormap(values, cmap, vmin, vmax):
"""This method creates a colormap based on values.
Parameters
----------
values : array-like
The values to create the corresponding colors
cmap : str
The colormap
vmin, vmax : float
The minimum and maximum possible values
Returns
-------
scalar colormap
"""
# Create scalar mappable
norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax, clip=True)
mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
# Gete color map
colormap = sns.color_palette([mapper.to_rgba(i) for i in values])
# Return
return colormap
# ---------------------
# Create data
# ---------------------
# Create data
data = [['GENUS_1', 'SPECIE_1', 'ANTIBIOTIC_1', 'N', 1, 0.6000, 0.05],
['GENUS_2', 'SPECIE_2', 'ANTIBIOTIC_1', 'N', 1, 0.0000, 0.05],
['GENUS_2', 'SPECIE_3', 'ANTIBIOTIC_1', 'N', 1, 0.0000, 0.05],
['GENUS_2', 'SPECIE_4', 'ANTIBIOTIC_1', 'N', 1, 0.0064, 0.05],
['GENUS_2', 'SPECIE_5', 'ANTIBIOTIC_1', 'N', 1, 0.0073, 0.05],
['GENUS_2', 'SPECIE_6', 'ANTIBIOTIC_1', 'N', 1, 0.0056, 0.05],
['GENUS_3', 'SPECIE_7', 'ANTIBIOTIC_1', 'N', 1, 0.0000, 0.05],
['GENUS_4', 'SPECIE_8', 'ANTIBIOTIC_1', 'N', 1, 0.0518, 0.05],
['GENUS_4', 'SPECIE_9', 'ANTIBIOTIC_1', 'N', 1, 0.0000, 0.05],
['GENUS_4', 'SPECIE_10', 'ANTIBIOTIC_1', 'N', 1, 0.0595, 0.05],
['GENUS_1', 'SPECIE_1', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
['GENUS_2', 'SPECIE_2', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
['GENUS_2', 'SPECIE_3', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
['GENUS_2', 'SPECIE_4', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
['GENUS_2', 'SPECIE_5', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
['GENUS_2', 'SPECIE_6', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
['GENUS_3', 'SPECIE_7', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
['GENUS_4', 'SPECIE_8', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
['GENUS_4', 'SPECIE_9', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
['GENUS_5', 'SPECIE_10', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05]]
# Create dataframe
dataframe = pd.DataFrame(data, columns=['GENUS',
'SPECIE',
'ANTIBIOTIC',
'GRAM',
'FREQUENCY',
'RESISTANCE',
'THRESHOLD'])
print(dataframe)
# -------------------------------
# Create antimicrobial spectrum
# -------------------------------
# Create antimicrobial spectrum of activity instance
obj = ASAI(column_genus='GENUS',
column_specie='SPECIE',
column_resistance='RESISTANCE',
column_frequency='FREQUENCY',
column_threshold='THRESHOLD',
column_wgenus='W_GENUS',
column_wspecie='W_SPECIE')
# Compute
scores = obj.compute(dataframe,
groupby=['ANTIBIOTIC', 'GRAM'],
weights='frequency',
threshold=0.5,
min_freq=0)
# Unstack
scores = scores.unstack()
# Show
print("\nResults:")
print(scores)
# -----------------------------
# Plot
# -----------------------------
# Variables to plot.
x = scores.index.values
y_n = scores['ASAI_SCORE']['N'].values
y_p = scores['ASAI_SCORE']['P'].values
# Constants
colormap_p = scalar_colormap(y_p, cmap='Blues', vmin=-0.1, vmax=1.1)
colormap_n = scalar_colormap(y_n, cmap='Reds', vmin=-0.1, vmax=1.1)
# Create figure
f, ax = plt.subplots(1, 1, figsize=(8, 0.5))
# Plot
sns.barplot(x=y_p, y=x, palette=colormap_p, ax=ax, orient='h',
saturation=0.5, label='Gram-positive')
sns.barplot(x=-y_n, y=x, palette=colormap_n, ax=ax, orient='h',
saturation=0.5, label='Gram-negative')
# Configure
sns.despine(bottom=True)
# Configure
ax.set_xlim([-1,1])
# Legend
plt.legend()
# Display
#plt.show()
# -------------------------------------------------------------------------
# Testing
# -------------------------------------------------------------------------
# Create data
data = [['STAPH', 'COAGU', 'ANTIBIOTIC_1', 'P', 0.88, 1, 0.20, 1 / 10, 1 / 3],
['STAPH', 'EPIDE', 'ANTIBIOTIC_1', 'P', 0.11, 1, 0.20, 1 / 10, 1 / 3],
['STAPH', 'HAEMO', 'ANTIBIOTIC_1', 'P', 0.32, 1, 0.20, 1 / 10, 1 / 3],
['STAPH', 'LUGDU', 'ANTIBIOTIC_1', 'P', 0.45, 1, 0.20, 1 / 10, 1 / 3],
['STAPH', 'SAPRO', 'ANTIBIOTIC_1', 'P', 0.18, 1, 0.20, 1 / 10, 1 / 3],
['STAPH', 'AUREU', 'ANTIBIOTIC_1', 'P', 0.13, 5, 0.20, 5 / 10, 1 / 3],
['ENTER', 'DURAN', 'ANTIBIOTIC_1', 'N', 0.64, 1, 0.20, 1 / 4, 1 / 3],
['ENTER', 'FAECI', 'ANTIBIOTIC_1', 'N', 0.48, 1, 0.20, 1 / 4, 1 / 3],
['ENTER', 'GALLI', 'ANTIBIOTIC_1', 'N', 0.10, 1, 0.20, 1 / 4, 1 / 3],
['ENTER', 'FAECA', 'ANTIBIOTIC_1', 'N', 0.09, 1, 0.20, 1 / 4, 1 / 3],
['STREP', 'VIRID', 'ANTIBIOTIC_1', 'P', 0.08, 1, 0.20, 1 / 3, 1 / 3],
['STREP', 'PNEUM', 'ANTIBIOTIC_1', 'P', 0.89, 2, 0.20, 2 / 3, 1 / 3]]
# Create dataframe
dataframe = pd.DataFrame(data, columns=['GENUS',
'SPECIE',
'ANTIBIOTIC',
'GRAM',
'RESISTANCE',
'FREQUENCY',
'THRESHOLD',
'W_SPECIE',
'W_GENUS'])
# ---------------------------------------------------------------------
# Success
# ---------------------------------------------------------------------
# .. note: All this examples should succeed. At the moment the code
# breaks if gram is not included. This is because the data
# we have created has duplicated values for each gram.
# Should we consider this within the ASAI?
cols = ['GENUS',
'SPECIE',
'ANTIBIOTIC',
'RESISTANCE',
'GRAM']
def show_i(i, df):
print("\n\n%s:" % i)
print(df)
# Using minimum number of columns
r = dataframe[cols]\
.groupby(['ANTIBIOTIC', 'GRAM']) \
.apply(asai, weights='uniform',
threshold=0.5)
show_i("Using minimum number of columns", r)
# User defined constant threshold
r = dataframe[cols]\
.groupby(['ANTIBIOTIC', 'GRAM']) \
.apply(asai, weights='uniform',
threshold=0.05)
show_i("User defined constant threshold", r)
# Use frequency to compute weights
r = dataframe[cols + ['FREQUENCY']] \
.groupby(['ANTIBIOTIC']) \
.apply(asai, weights='frequency',
threshold=0.05)
show_i("Use frequency to compute weights", r)
# Use weights previously specified.
r = dataframe[cols + ['W_GENUS', 'W_SPECIE']] \
.groupby(['ANTIBIOTIC']) \
.apply(asai, weights='specified',
threshold=0.05)
show_i("Use weights specified manually", r)
# ---------------------------------------------------------------------
# ASAI - Errors
# ---------------------------------------------------------------------
# .. note: In the examples below, the method asai is meant to raise
# an error either because any of the required missing columns
# is missing or because the weight configuration is not
# correct.
print("\n\nHandling errors:")
try:
# Error: resistance column is missing
r = dataframe.drop(columns=['RESISTANCE']) \
.groupby(['ANTIBIOTIC']) \
.apply(asai)
except Exception as e:
print(e)
try:
# Error: genus column is missing
r = dataframe.drop(columns=['GENUS']) \
.groupby(['ANTIBIOTIC']) \
.apply(asai)
except Exception as e:
print(e)
try:
# Error: specie column is missing
r = dataframe.drop(columns=['SPECIE']) \
.groupby(['ANTIBIOTIC']) \
.apply(asai)
except Exception as e:
print(e)
try:
# Error: w_genus and/or w_specie columns are missing
r = dataframe.drop(columns=['W_GENUS', 'W_SPECIE']) \
.groupby(['ANTIBIOTIC']) \
.apply(asai, weights='specified')
except Exception as e:
print(e)
try:
# Error: weights is not a valid value
r = dataframe \
.groupby(['ANTIBIOTIC']) \
.apply(asai, weights=None)
except Exception as e:
print(e)
try:
# Error: weights not valid (W_GENUS)
aux = dataframe.copy(deep=True)
aux.loc[0, 'W_GENUS'] = 1
r = aux \
.groupby(['ANTIBIOTIC']) \
.apply(asai, weights='specified')
except Exception as e:
print(e)
try:
# Error: weights not valid (W_SPECIE)
aux = dataframe.copy(deep=True)
aux.loc[0, 'W_SPECIE'] = 1
r = aux \
.groupby(['ANTIBIOTIC']) \
.apply(asai, weights='specified')
except Exception as e:
print(e)
try:
# Error: null values in required column
aux = dataframe.copy(deep=True)
aux.loc[0, 'RESISTANCE'] = np.NaN
r = aux \
.groupby(['ANTIBIOTIC']) \
.apply(asai)
except Exception as e:
print(e)
# ---------------------------------------------------------------------
# ASAI - Warnings
# ---------------------------------------------------------------------
# .. note: In the examples below, the method asai is meant to show a
# warning message either no threshold has been specified or
# because thresholds have been specified twice.
print("\n\nShow warnings:")
# Warning: default threshold=0.5 and THRESHOLD column passed.
r = dataframe \
.groupby(['ANTIBIOTIC', 'GRAM']) \
.apply(asai)
# Warning: threshold is None and no column THRESHOLD
r = dataframe.drop(columns=['THRESHOLD']) \
.groupby(['ANTIBIOTIC']) \
.apply(asai, threshold=None)