Source code for pyamr.core.asai

################################################################################
# Author:
# Date:
# Description:
#
#
#
# Copyright:
#
# 
################################################################################
from __future__ import division 

# Libraries
import sys
import warnings
import numpy as np 
import pandas as pd 

# ------------------------------------------------------------------------------
#                                 methods
# ------------------------------------------------------------------------------
def _check_asai_weights_genus(dataframe): # pragma: no cover
  """Checks that the weights for each genus add up to one.

  .. deprecated:: 0.0.1

  Parameters
  ----------
  dataframe : dtaframe-like
    The dataframe whose columns must be checked

  Returns
  -------
  raise error  
  """
  # Compute weights per genus
  weights = dataframe.groupby(by='GENUS')['W_SPECIE'].sum()
  weights = np.round(weights, decimals=10)

  # Check that all add up to one
  if not (weights==1).all():
    raise TypeError("The weights (W_SPECIE) for each genus should add up to "
                    "one. Please review these weights since they are not "
                    "valid. \n%s" % weights)


def _check_asai_weights_specie(dataframe): # pragma: no cover
  """Check that the weights for all the species add up to one.

  .. deprecated:: 0.0.1

  Parameters
  ----------
  dataframe : dtaframe-like
    The dataframe whose columns must be checked

  Returns
  -------
  raise error
  """
  # Compute weights per species
  unique = dataframe.groupby(by='GENUS')['W_GENUS'].nunique()
  weights = dataframe.groupby(by='GENUS')['W_GENUS'].mean()
  merged = pd.concat([unique, weights], axis=1)
  merged.columns = ['UNIQUE', 'W_GENUS']

  # Check only one weight is given for each genus
  if not (unique==1).all():
    raise TypeError("The weights (W_GENUS) should be equal for all the rows "
                    "with a same genus value. Please ensure that the number "
                    "of unique elements is always 1. \n%s" % merged)

  # Check that weights add up to one
  if not (np.round(np.sum(weights), decimals=10)==1):
    return TypeError("The weights (W_GENUS) should add up to one. Please "
                     "review these weights since they are not valid. "
                     "\n%s" % weights)


def _check_asai_dataframe_columns(dataframe, required_columns): # pragma: no cover
  """This method checks that the dataframe has all attributes.

  .. deprecated:: 0.0.1

  Parameters
  -----------
  dataframe : pandas DataFrame
    The dataframe containing the information.

  attributes :
    The required columns

  Returns
  -------
  exit the program
  """
  # Find missing columns
  missing = list(set(required_columns) - set(dataframe.columns))
  # There are missing columns
  if not missing: return
  # Raise an error
  raise TypeError("The dataframe passed as argument is missing the "
                  "following columns: %s. Please correct this issue." 
                  % missing)


def _asai(dataframe, threshold=None, weights='uniform'): # pragma: no cover
  """Computes the antimicrobial spectrum of activity.

  .. deprecated:: 0.0.1

  .. todo: There is an error when W_GENUS = 1 / GENUS.nunique()

  Parameters
  ----------
  dataframe : dataframe-like
    The dataframe containing the information to compute the asai index. The 
    following columns are required [SPECIE, GENUS, RESISTANCE]. In addition,
    the effective threshold, genus weight and specie weight can be specied
    using the following columns [THRESHOLD, W_GENUS, W_SPECIE]. Note that
    the weights must add up to one.

  threshold : number
    The number to set a common threshold.

  weights : string
    Method used to compute the weights. The possible values are uniform and
    proportional. In order to use proportional a column with the frequency
    for each specie must be included in the dataframe.


  Returns
  -------
  dataframe
  """
  # Check that the input is a dataframe
  if not isinstance(dataframe, pd.DataFrame):
    raise TypeError("The instance passed as argument needs to be a pandas "
                    "DataFrame. Instead, a <%s> was found. Please convert "
                    "the input accordingly." % type(dataframe))

  # Add fixed threshold
  if threshold is not None:
    dataframe['THRESHOLD'] = threshold

  # Add weights
  if weights == 'uniform':
    # Set uniform weights
    dataframe = dataframe.set_index(keys=['GENUS'], drop=False)
    dataframe['W_GENUS'] = 1. / dataframe.GENUS.nunique()
    dataframe['W_SPECIE'] = 1. / dataframe.SPECIE.groupby(level=0).count()
    dataframe = dataframe.reset_index(drop=True)

  # Required columns
  required = ['RESISTANCE', 'THRESHOLD', 'W_GENUS', 'W_SPECIE']

  # Check that the weights add up to one.
  _check_asai_dataframe_columns(dataframe, required_columns=required)
  _check_asai_weights_genus(dataframe)
  _check_asai_weights_specie(dataframe)

  # Select data
  resistance = dataframe.RESISTANCE
  threshold = dataframe.THRESHOLD
  weights_genus = dataframe.W_GENUS
  weights_specie = dataframe.W_SPECIE


  # Create results
  d = {'N_GENUS': dataframe.GENUS.nunique(),
       'N_SPECIE': dataframe.SPECIE.nunique(),
       'ASAI_SCORE': _asai_score(weights_genus,
                                 weights_specie,
                                 resistance, 
                                 threshold)}

  # Compute ASAI.
  return pd.Series(d)


def _asai_score(weights_genus, weights_specie, resistance, threshold): # pragma: no cover
  """Computes the asai score.

  .. deprecated:: 0.0.1

  Parameters
  ----------
  weights_genus : array-like
    The weight associated to each of the genus

  weights_specie : array-like
    The weight associated to each of the species

  resistance : array-like

    The resistances
  threshold : array-like
    The thresholds

  Returns
  -------
  asai score
  """
  return np.sum(weights_genus*weights_specie*(resistance<=threshold))


[docs]def asai(*args, **kwargs):
    """Redirects to ``antimicrobial_spectrum_activity_index``."""
    return antimicrobial_spectrum_activity_index(*args, **kwargs)


[docs]def antimicrobial_spectrum_activity_index(dataframe, weights='uniform',
                                          threshold=0.5, tol=1e-6, verbose=0):
    """Computes the Antimicrobial Spectrum of Activity.

    .. note:: Since threshold and weights have a default value, the
              warnings below will not be displayed. However, the code
              is there in case the behaviour needs to be changed in
              the future.

    .. note:: Another way to check that the weights are correct is just
              by computing ASAI with th=0 and th=1. These should result
              in asai=1 and asai=0 respectively.
              | # Compute score
              | score_1 = np.sum(wgn * wsp * (sari <= 0))
              | score_2 = np.sum(wgn * wsp * (sari <= 1))


    .. warning:: Should the duplicated check only for the columns
                 GENUS and SPECIE? What if we do not group by
                 antibiotic? It has to be unique for the antibiotic
                 also. It is up to the user to make the right use
                 of this?

    Parameters
    ----------
    dataframe: pd.DataFrame
        The pandas dataframe with the information. The following columns
        are always required [RESISTANCE, GENUS and SPECIE]. In addition,
        [W_GENUS and W_SPECIE] are required if weights is None. Also,
        if weights = 'frequency' the column FREQUENCY must be present.

    weights: string, default='uniform'
        The method to compute the weights. The methods supported are:

            - 'specified': weights must be in [W_GENUS and W_SPECIE]
            - 'uniform': uniform weights for genus and species within genus.
            - 'frequency': weights are proportional to the frequencies.

        The following rules must be fulfilled by the weight columns:

            - consistent weight for a given genus
            - all genus weights must add up to one.
            - all specie weights within a genus must add up to one.

    threshold: float, default=0.5
        The threshold resistance value above which the antimicrobial is
        considered non-effective to treat the microorganism. For instance,
        for a resistance threshold of 0.5, if a pair <o,a> has a resistance
        value of 0.4, the microorganism will be considered sensitive. In
        order to use specific thresholds keep threshold to None and include
        a column 'THRESHOLD'.ss

    tol: float, default=1e-6
        The tolerance in order to check that all conditions (uniqueness
        and sums) are satisfied. Note that that float precision varies
        and therefore not always adds up to exactly one.

    verbose: int, default=0
        The level of verbosity.

    Returns
    -------
    pd.DataFrame
        The dataframe with the ASAI information and counts.
    """
    # Required columns
    required = ['RESISTANCE', 'GENUS', 'SPECIE']

    # Add weight-related required columns
    if weights == 'specified':
        required += ['W_GENUS', 'W_SPECIE']
    if weights == 'frequency':
        required += ['FREQUENCY']

    # Check weights
    if weights not in ['uniform', 'frequency', 'specified']:
        raise ValueError("""
              The weights '{0}' is not supported. Please
              use one of the following: uniform, frequency
              or specified""".format(weights))

    # Bad input type
    if not isinstance(dataframe, pd.DataFrame):
        raise TypeError("""\n
            The instance passed as argument needs to be a pandas 
            "DataFrame. Instead, a <%s> was found. Please convert 
            the input accordingly.""" % type(dataframe))

    # Check columns
    if set(required).difference(dataframe.columns):
        raise ValueError("The following columns are missing: {0} " \
                .format(set(required).difference(dataframe.columns)))

    # Check duplicates
    if dataframe.duplicated().any():
        raise ValueError("There are duplicated rows in the DataFrame.")

    # Get NaN idxs
    idxs = dataframe[required].isna().any(axis=1)

    # Show warning and correct
    if idxs.any():
        raise ValueError("""\n
              There are NULL values in columns that are required.
              Please correct this issue and try again. See below 
              for more information:\n\n\t\t{0}""".format(
                dataframe.loc[idxs, required] \
                    .to_string().replace("\n", "\n\t\t")
        ))

    # Copy DataFrame
    aux = dataframe.copy(deep=True)

    # Check threshold
    if 'THRESHOLD' in aux.columns:
        if threshold is not None:
            warnings.warn("""\n
                  The threshold has been defined both as an 
                  input parameter (threshold={0}) and a DataFrame 
                  column 'THRESHOLD'. The latter will be used."""
                  .format(threshold))
    else:
        if threshold is None:
            warnings.warn("""\n
                  The threshold has not been defined using either 
                  an input parameter (threshold={0}) or a column in the 
                  dataframe named 'THRESHOLD'. Thus a default threshold 
                  value of '0.5' will be used.""".format(threshold))
            threshold = 0.5
        aux['THRESHOLD'] = threshold

    # Set uniform weights
    if weights == 'uniform':
        aux['W_GENUS'] = 1. / aux.GENUS.nunique()
        aux['W_SPECIE'] = 1. / aux.GENUS.map(
            aux.groupby(['GENUS']).SPECIE.count())

    # Set frequency weights
    if weights == 'frequency':
        # Set frequency weights
        fgn = aux.groupby(['GENUS']).FREQUENCY.sum()
        aux['S_GENUS'] = aux.GENUS.map(fgn)
        aux['W_GENUS'] = aux.GENUS.map(fgn / fgn.sum())
        aux['W_SPECIE'] = aux.FREQUENCY / aux.S_GENUS


    # Check sums
    #report = pd.DataFrame()
    #report['W_GENUS_UNIQUE_OK'] = aux.groupby('GENUS').W_GENUS.nunique()
    #report['W_GENUS_SUM_OK'] = aux.groupby('GENUS').head(1).W_GENUS.sum()
    #report['W_SPECIE_SUM_OK'] = aux.groupby(['GENUS']).W_SPECIE.sum()

    #if verbose > 5:
    #    # Explain each error individually.
    #    pass

    # Condition
    #condition = (1 - report).abs() < tol

    # Report
    #if not condition.all().all():
    #    raise ValueError("""
    #        The weights imputed do not fulfill all the requirements. Please
    #        check the report below and correct the weights accordingly. Note
    #        a given genus must have a consistent weight and the sum of weights
    #        must add up to 1.\n\n\t\t{0}""" \
    #        .format(condition.to_string().replace("\n", "\n\t\t")))

    # Show
    if verbose > 5:
        print("\nweights={0} | threshold={1}".format(weights, threshold))
        print(aux)

    # Extract vectors
    wgn = aux.W_GENUS
    wsp = aux.W_SPECIE
    sari = aux.RESISTANCE
    th = aux.THRESHOLD

    # Check range using extreme thresholds
    s1 = np.sum(wgn * wsp * (sari < 0))
    s2 = np.sum(wgn * wsp * (sari <= 1))
    if abs(s1-0) > tol or abs(s2-1) > tol:
        raise ValueError("""
            The weights argument do not fulfill all the requirements. Note
            that the correct weights would produce a SARI value within the
            range [0, 1]. However, the weights received did not fulfill 
            such constraint.""")

    # Compute score
    score = np.sum(wgn * wsp * (sari < th))

    # Create results
    d = {
        'N_GENUS': aux.GENUS.nunique(),
        'N_SPECIE': aux.SPECIE.nunique(),
        'ASAI_SCORE': score
    }

    # Default weights
    return pd.Series(d)







[docs]class ASAI:
    """Antimicrobial Spectrum of Activity Index.
    """
    # Attributes
    c_gen = 'GENUS'
    c_spe = 'SPECIE'
    c_res = 'RESISTANCE'
    c_thr = 'THRESHOLD'
    c_fre = 'FREQUENCY'
    c_wgen = 'W_GENUS'
    c_wspe = 'W_SPECIE'

    def __init__(self, column_genus=c_gen,
                       column_specie=c_spe,
                       column_resistance=c_res,
                       column_threshold=c_thr,
                       column_frequency=c_fre,
                       column_wgenus=c_wgen,
                       column_wspecie=c_wspe):
        """The constructor.

        Parameters
        ----------
        column_genus: string
            The column name with the genus values

        column_specie: string
            The column name with the specie values

        column_resistance: string
            The column name with the resistance values

        column_threshold: string
            The column name with the threshold values

        column_frequency: string
            The column name with the frequency values

        Returns
        -------
        none
        """
        # Create dictionary to rename columns
        self.rename = {column_genus: self.c_gen,
                       column_specie: self.c_spe,
                       column_resistance: self.c_res,
                       column_threshold: self.c_thr,
                       column_frequency: self.c_fre,
                       column_wgenus: self.c_wgen,
                       column_wspecie: self.c_wspe}

        # Columns that are required
        self.required = [self.c_gen, self.c_spe, self.c_res]


[docs]    def compute(self, dataframe, groupby=None, min_freq=None, **kwargs):
        """Computes the ASAI index (safely).

        .. note: Review first NaN and then duplicated?
        .. note: Review extreme values in resistance?

        Parameters
        ----------
        dataframe: pd.DataFrame
            The pandas dataframe with the information. The following columns
            are always required [RESISTANCE, GENUS and SPECIE]. In addition,
            [W_GENUS and W_SPECIE] are required if weights is None. Also,
            if weights = 'frequency' the column FREQUENCY must be present.

        groupby: list, default=None
            The elements to groupby (pd.groupby)

        min_freq: int, default=None
            The minimum number of susceptibility tests required in order to
            include the species to compute ASAI. Note that to work the dataframe
            must include a column indicating the frequencies.

        weights: string, default=None
            The method to compute the weights. The methods supported are:

                - None: weights must be specified in [W_GENUS and W_SPECIE]
                - 'uniform': uniform weights for genus and species within genus.
                - 'frequency: weights are proportional to the frequencies.

            The following rules must be fulfilled by the weight columns:

                - consistent weight for a given genus
                - all genus weights must add up to one.
                - all specie weights within a genus must add up to one.

        threshold: float, default=None
            The threshold resistance value above which the antimicrobial is
            considered non-effective to treat the microorganism. For instance,
            for a resistance threshold of 0.5, if a pair <o,a> has a resistance
            value of 0.4, the microorganism will be considered sensitive. In
            order to use specific thresholds keep threshold to None and include
            a column 'THRESHOLD'.ss

        tol: float, default=1e-6
            The tolerance in order to check that all conditions (uniqueness
            and sums) are satisfied. Note that that float precision varies
            and therefore not always adds up to exactly one.

        verbose: int, default=0
            The level of verbosity.

        Returns
        -------
        pd.DataFrame
            The dataframe with the ASAI information and counts.
        """
        # Bad input type
        if not isinstance(dataframe, pd.DataFrame):
            raise TypeError("""
                The instance passed as argument needs to be a pandas
                DataFrame. Instead, a <%s> was found. Please convert 
                the input accordingly.""" % type(dataframe))

        if isinstance(groupby, str):
            groupby = [groupby]

        # Create auxiliary variable
        required = groupby + self.required

        # Rename columns
        aux = dataframe.rename(columns=self.rename, copy=True)

        # Filter by freq
        if min_freq is not None:
            if not self.c_fre in aux:
                warnings.warn("""
                The min_freq={0} cannot be applied because the frequency
                columns 'FREQUENCY' does not exist in the DataFrame.\n"""
                    .format(min_freq))
            else:
                aux = aux[aux[self.c_fre] >= min_freq]


        # Check duplicates
        if aux.duplicated(subset=required).any():
            warnings.warn("""
                 There are duplicated rows in the DataFrame. This is
                 usually not expected. Please review the DataFrame and 
                 address this inconsistencies. Maybe you should include
                 more columns in the groupby (e.g. specimen_code). The 
                 columns used to compute duplicated are: 
                 {0}.\n""".format(required))
            #aux = aux.drop_duplicates(required)

        # Check extreme resistance values
        if aux.RESISTANCE.isin([0.0, 1.0]).any():
            warnings.warn("""
                 Extreme resistances [0, 1] were found in the DataFrame. These 
                 rows should be reviewed since these resistances might correspond
                 to pairs with low number of records.\n""")
            #aux = aux[aux[self.c_res] != 1.0]

        # Get NaN indexes
        idxs = aux[required].isna().any(axis=1)

        # Show warning and correct
        if idxs.any():
            warnings.warn("""
                 There are NULL values in columns that are required. These
                 rows will be ignored to safely compute ASAI. Please review
                 the DataFrame and address this inconsistencies. See below
                 for more information: \n\n\t\t\t{0}\n""".format( \
                    aux[required].isna().sum(axis=0) \
                        .to_string().replace("\n", "\n\t\t\t")))
            aux = aux.dropna(subset=required)

        # Check all genus weights add up to one?

        # Compute
        scores = aux.groupby(groupby) \
                    .apply(asai, **kwargs)

        # Return
        return scores








[docs]class ASAI_old(): # pragma: no cover
  """This class computes the antimicrobial spectrum of activity. 

  .. deprecated:: 0.0.1

  """
  # Attributes
  c_abx = 'ANTIBIOTIC'
  c_gen = 'GENUS'
  c_spe = 'SPECIE'
  c_res = 'RESISTANCE'
  c_thr = 'THRESHOLD'
  c_fre = 'FREQUENCY'
  c_wgen = 'W_GENUS'
  c_wspe = 'W_SPECIE'


  def __init__(self, weights='uniform', threshold=0.5,
                                        column_genus=c_gen, 
                                        column_specie=c_spe, 
                                        column_antibiotic=c_abx,
                                        column_resistance=c_res,
                                        column_threshold=c_thr,
                                        column_frequency=c_fre,
                                        column_wgenus=c_wgen,
                                        column_wspecie=c_wspe):
    """The constructor.

    Parameters
    ----------
    threshold : number
      The threshold under which the drug is considered effective.

    weights : string
      The method to compute the weights

    column_genus : string
      The column name with the genus values

    column_specie : string
      The column name with the specie values
    
    column_antibiotic : string 
      The column name with the antibiotic values
    
    column_resistance : string
      The column name with the resistance values
    
    column_threshold : string
      The column name with the threshold values
    
    column_frequency : string
      The column name with the frequency values
    
    Returns
    -------
    none
    """
    # Set parameters
    self.weights = weights
    self.threshold = threshold

    # Create dictionary to rename columns
    self.rename_columns = {column_genus: self.c_gen,
                           column_specie: self.c_spe,
                           column_antibiotic: self.c_abx,
                           column_resistance: self.c_res,
                           column_threshold: self.c_thr,
                           column_frequency: self.c_fre,
                           column_wgenus: self.c_wgen,
                           column_wspecie: self.c_wspe}

    # Columns that are required
    self.required_columns = [self.c_gen, self.c_spe, self.c_abx, self.c_res]


[docs]  def compute(self, dataframe, by_category):
    """This function computes the asai index by category.

    Parameters
    ----------
    dataframe : pandas DataFrame
      The pandas DataFrame containing the data. In particular it needs to
      contain the following columns: genus, specie, antibiotic and the
      resistance outcome within the range [0,1].

    by_category : string
      The name of the column that will be used to group ASAI.

    Returns
    -------
    pandas dataframe
      the dataframe...
    """
    # Check that it is a dataframe
    if not isinstance(dataframe, pd.DataFrame):
      raise TypeError("The instance passed as argument needs to be a pandas "
                      "DataFrame. Instead, a <%s> was found. Please convert "
                      "the input accordingly." % type(dataframe))

    # Rename columns
    dataframe = dataframe.rename(columns=self.rename_columns, copy=True)

    # Check dataframe columns
    _check_asai_dataframe_columns(dataframe, self.required_columns)

    # Check that there are no duplicates
    dataframe = dataframe.drop_duplicates(subset=[self.c_gen, 
                                                  self.c_spe, 
                                                  self.c_abx, 
                                                  by_category])

    # Check that no intrinsic resistance is considered
    dataframe = dataframe[dataframe[self.c_res]!=1.0]

    # Check that the by parameter has all value different than none
    dataframe = dataframe.dropna(subset=[by_category])

    # Compute asai and return
    return dataframe.groupby(by=[self.c_abx, by_category]) \
                    .apply(_asai, threshold=self.threshold) \
                    .unstack()





if __name__ == '__main__': # pragma: no cover

  # Import libraries
  import sys
  import numpy as np
  import seaborn as sns
  import matplotlib as mpl
  import matplotlib.pyplot as plt

  # Import specific libraries
  from pyamr.datasets import load

  # Configure seaborn style (context=talk)
  sns.set(style="white")

  # Set matplotlib
  mpl.rcParams['xtick.labelsize'] = 9
  mpl.rcParams['ytick.labelsize'] = 9
  mpl.rcParams['axes.titlesize'] = 11
  mpl.rcParams['legend.fontsize'] = 9

  # Pandas configuration
  pd.set_option('display.max_colwidth', 40)
  pd.set_option('display.width', 300)
  pd.set_option('display.precision', 4)

  # Numpy configuration
  np.set_printoptions(precision=2)


  # ---------------------
  # helper method
  # ---------------------
  def scalar_colormap(values, cmap, vmin, vmax):
    """This method creates a colormap based on values.

    Parameters
    ----------
    values : array-like
      The values to create the corresponding colors

    cmap : str
      The colormap

    vmin, vmax : float
      The minimum and maximum possible values

    Returns
    -------
    scalar colormap
    """
    # Create scalar mappable
    norm = mpl.colors.Normalize(vmin=vmin, vmax=vmax, clip=True)
    mapper = mpl.cm.ScalarMappable(norm=norm, cmap=cmap)
    # Gete color map
    colormap = sns.color_palette([mapper.to_rgba(i) for i in values])
    # Return
    return colormap



  # ---------------------
  # Create data
  # ---------------------
  # Create data
  data = [['GENUS_1', 'SPECIE_1', 'ANTIBIOTIC_1', 'N', 1, 0.6000, 0.05],
          ['GENUS_2', 'SPECIE_2', 'ANTIBIOTIC_1', 'N', 1, 0.0000, 0.05],
          ['GENUS_2', 'SPECIE_3', 'ANTIBIOTIC_1', 'N', 1, 0.0000, 0.05],
          ['GENUS_2', 'SPECIE_4', 'ANTIBIOTIC_1', 'N', 1, 0.0064, 0.05],
          ['GENUS_2', 'SPECIE_5', 'ANTIBIOTIC_1', 'N', 1, 0.0073, 0.05],
          ['GENUS_2', 'SPECIE_6', 'ANTIBIOTIC_1', 'N', 1, 0.0056, 0.05],
          ['GENUS_3', 'SPECIE_7', 'ANTIBIOTIC_1', 'N', 1, 0.0000, 0.05],
          ['GENUS_4', 'SPECIE_8', 'ANTIBIOTIC_1', 'N', 1, 0.0518, 0.05],
          ['GENUS_4', 'SPECIE_9', 'ANTIBIOTIC_1', 'N', 1, 0.0000, 0.05],
          ['GENUS_4', 'SPECIE_10', 'ANTIBIOTIC_1', 'N', 1, 0.0595, 0.05],
          
          ['GENUS_1', 'SPECIE_1', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
          ['GENUS_2', 'SPECIE_2', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
          ['GENUS_2', 'SPECIE_3', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
          ['GENUS_2', 'SPECIE_4', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
          ['GENUS_2', 'SPECIE_5', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
          ['GENUS_2', 'SPECIE_6', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
          ['GENUS_3', 'SPECIE_7', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
          ['GENUS_4', 'SPECIE_8', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
          ['GENUS_4', 'SPECIE_9', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05],
          ['GENUS_5', 'SPECIE_10', 'ANTIBIOTIC_1', 'P', 1, 0.0, 0.05]]

  # Create dataframe
  dataframe = pd.DataFrame(data, columns=['GENUS', 
                                          'SPECIE', 
                                          'ANTIBIOTIC',
                                          'GRAM',
                                          'FREQUENCY',
                                          'RESISTANCE',
                                          'THRESHOLD'])

  print(dataframe)

  # -------------------------------
  # Create antimicrobial spectrum
  # -------------------------------
  # Create antimicrobial spectrum of activity instance
  obj = ASAI(column_genus='GENUS',
             column_specie='SPECIE',
             column_resistance='RESISTANCE',
             column_frequency='FREQUENCY',
             column_threshold='THRESHOLD',
             column_wgenus='W_GENUS',
             column_wspecie='W_SPECIE')

  # Compute
  scores = obj.compute(dataframe,
    groupby=['ANTIBIOTIC', 'GRAM'],
    weights='frequency',
    threshold=0.5,
    min_freq=0)

  # Unstack
  scores = scores.unstack()

  # Show
  print("\nResults:")
  print(scores)

  # -----------------------------
  # Plot
  # ----------------------------- 
  # Variables to plot.
  x = scores.index.values
  y_n = scores['ASAI_SCORE']['N'].values
  y_p = scores['ASAI_SCORE']['P'].values

  # Constants
  colormap_p = scalar_colormap(y_p, cmap='Blues', vmin=-0.1, vmax=1.1)
  colormap_n = scalar_colormap(y_n, cmap='Reds', vmin=-0.1, vmax=1.1)

  # Create figure
  f, ax = plt.subplots(1, 1, figsize=(8, 0.5))

  # Plot
  sns.barplot(x=y_p, y=x, palette=colormap_p, ax=ax, orient='h', 
    saturation=0.5, label='Gram-positive')
  sns.barplot(x=-y_n, y=x, palette=colormap_n, ax=ax, orient='h', 
    saturation=0.5, label='Gram-negative')

  # Configure
  sns.despine(bottom=True)

  # Configure
  ax.set_xlim([-1,1])

  # Legend
  plt.legend()

  # Display
  #plt.show()


  # -------------------------------------------------------------------------
  # Testing
  # -------------------------------------------------------------------------
  # Create data
  data = [['STAPH', 'COAGU', 'ANTIBIOTIC_1', 'P', 0.88, 1, 0.20, 1 / 10, 1 / 3],
          ['STAPH', 'EPIDE', 'ANTIBIOTIC_1', 'P', 0.11, 1, 0.20, 1 / 10, 1 / 3],
          ['STAPH', 'HAEMO', 'ANTIBIOTIC_1', 'P', 0.32, 1, 0.20, 1 / 10, 1 / 3],
          ['STAPH', 'LUGDU', 'ANTIBIOTIC_1', 'P', 0.45, 1, 0.20, 1 / 10, 1 / 3],
          ['STAPH', 'SAPRO', 'ANTIBIOTIC_1', 'P', 0.18, 1, 0.20, 1 / 10, 1 / 3],
          ['STAPH', 'AUREU', 'ANTIBIOTIC_1', 'P', 0.13, 5, 0.20, 5 / 10, 1 / 3],

          ['ENTER', 'DURAN', 'ANTIBIOTIC_1', 'N', 0.64, 1, 0.20, 1 / 4, 1 / 3],
          ['ENTER', 'FAECI', 'ANTIBIOTIC_1', 'N', 0.48, 1, 0.20, 1 / 4, 1 / 3],
          ['ENTER', 'GALLI', 'ANTIBIOTIC_1', 'N', 0.10, 1, 0.20, 1 / 4, 1 / 3],
          ['ENTER', 'FAECA', 'ANTIBIOTIC_1', 'N', 0.09, 1, 0.20, 1 / 4, 1 / 3],

          ['STREP', 'VIRID', 'ANTIBIOTIC_1', 'P', 0.08, 1, 0.20, 1 / 3, 1 / 3],
          ['STREP', 'PNEUM', 'ANTIBIOTIC_1', 'P', 0.89, 2, 0.20, 2 / 3, 1 / 3]]

  # Create dataframe
  dataframe = pd.DataFrame(data, columns=['GENUS',
                                          'SPECIE',
                                          'ANTIBIOTIC',
                                          'GRAM',
                                          'RESISTANCE',
                                          'FREQUENCY',
                                          'THRESHOLD',
                                          'W_SPECIE',
                                          'W_GENUS'])


  # ---------------------------------------------------------------------
  # Success
  # ---------------------------------------------------------------------
  # .. note: All this examples should succeed. At the moment the code
  #          breaks if gram is not included. This is because the data
  #          we have created has duplicated values for each gram.
  #          Should we consider this within the ASAI?
  cols = ['GENUS',
          'SPECIE',
          'ANTIBIOTIC',
          'RESISTANCE',
          'GRAM']

  def show_i(i, df):
      print("\n\n%s:" % i)
      print(df)

  # Using minimum number of columns
  r = dataframe[cols]\
      .groupby(['ANTIBIOTIC', 'GRAM']) \
      .apply(asai, weights='uniform',
                   threshold=0.5)
  show_i("Using minimum number of columns", r)

  # User defined constant threshold
  r = dataframe[cols]\
      .groupby(['ANTIBIOTIC', 'GRAM']) \
      .apply(asai, weights='uniform',
                   threshold=0.05)
  show_i("User defined constant threshold", r)

  # Use frequency to compute weights
  r = dataframe[cols + ['FREQUENCY']] \
      .groupby(['ANTIBIOTIC']) \
      .apply(asai, weights='frequency',
                   threshold=0.05)
  show_i("Use frequency to compute weights", r)

  # Use weights previously specified.
  r = dataframe[cols + ['W_GENUS', 'W_SPECIE']] \
      .groupby(['ANTIBIOTIC']) \
      .apply(asai, weights='specified',
                   threshold=0.05)
  show_i("Use weights specified manually", r)


  # ---------------------------------------------------------------------
  # ASAI - Errors
  # ---------------------------------------------------------------------
  # .. note: In the examples below, the method asai is meant to raise
  #          an error either because any of the required missing columns
  #          is missing or because the weight configuration is not
  #          correct.
  print("\n\nHandling errors:")

  try:
      # Error: resistance column is missing
      r = dataframe.drop(columns=['RESISTANCE']) \
          .groupby(['ANTIBIOTIC']) \
          .apply(asai)
  except Exception as e:
      print(e)

  try:
      # Error: genus column is missing
      r = dataframe.drop(columns=['GENUS']) \
          .groupby(['ANTIBIOTIC']) \
          .apply(asai)
  except Exception as e:
      print(e)

  try:
      # Error: specie column is missing
      r = dataframe.drop(columns=['SPECIE']) \
          .groupby(['ANTIBIOTIC']) \
          .apply(asai)
  except Exception as e:
      print(e)

  try:
      # Error: w_genus and/or w_specie columns are missing
      r = dataframe.drop(columns=['W_GENUS', 'W_SPECIE'])  \
          .groupby(['ANTIBIOTIC']) \
          .apply(asai, weights='specified')
  except Exception as e:
      print(e)

  try:
      # Error: weights is not a valid value
      r = dataframe  \
          .groupby(['ANTIBIOTIC']) \
          .apply(asai, weights=None)
  except Exception as e:
      print(e)

  try:
      # Error: weights not valid (W_GENUS)
      aux = dataframe.copy(deep=True)
      aux.loc[0, 'W_GENUS'] = 1
      r = aux \
          .groupby(['ANTIBIOTIC']) \
          .apply(asai, weights='specified')
  except Exception as e:
      print(e)

  try:
      # Error: weights not valid (W_SPECIE)
      aux = dataframe.copy(deep=True)
      aux.loc[0, 'W_SPECIE'] = 1
      r = aux \
          .groupby(['ANTIBIOTIC']) \
          .apply(asai, weights='specified')
  except Exception as e:
      print(e)

  try:
      # Error: null values in required column
      aux = dataframe.copy(deep=True)
      aux.loc[0, 'RESISTANCE'] = np.NaN
      r = aux \
          .groupby(['ANTIBIOTIC']) \
          .apply(asai)
  except Exception as e:
      print(e)





  # ---------------------------------------------------------------------
  # ASAI - Warnings
  # ---------------------------------------------------------------------
  # .. note: In the examples below, the method asai is meant to show a
  #          warning message either no threshold has been specified or
  #          because thresholds have been specified twice.
  print("\n\nShow warnings:")

  # Warning: default threshold=0.5 and THRESHOLD column passed.
  r = dataframe \
      .groupby(['ANTIBIOTIC', 'GRAM']) \
      .apply(asai)

  # Warning: threshold is None and no column THRESHOLD
  r = dataframe.drop(columns=['THRESHOLD']) \
      .groupby(['ANTIBIOTIC']) \
      .apply(asai, threshold=None)