Source code for pyamr.core.stats.kendall

##############################################################################
# Author: Bernard Hernandez
# Filename: 03-main-create-sari-idxs.py
# Description : This file contains differnent statistics used in time-series.
#               What it mainly does is to format the output of tests provided
#               by external libraries and return them in a dataframe.
#
# TODO: Move it to a module.
#
###############################################################################
# Forces decimals on divisions.
from __future__ import division 

# Libraries
import sys
import numpy as np
import pandas as pd

# Import base wrapper
from pyamr.core.stats.wbase import BaseWrapper


# ----------------------------------------------------------------------------
#
# ----------------------------------------------------------------------------

[docs]def kendall(x):  
  """Computes the kendall statistical test

  Parameters
  ----------
  x     : a vector of data
  alpha : significance level (0.05 default)

  Returns
  -------
  trend : tells the trend (increasing, decreasing or no trend)
  h     : True (if trend is present) or False (if trend is absence)
  p     : p value of the significance test
  z     : normalized test statistics 
  """
  # Libraries.
  from scipy.stats import norm

  # Compute n.
  n = len(x)

  # calculate S 
  s = 0
  for k in range(n-1):
      for j in range(k+1,n):
          s += np.sign(x[j] - x[k])

  # calculate the unique data
  unique_x = np.unique(x)
  g = len(unique_x)

  # calculate the var(s)
  if n == g: # there is no tie
      var_s = (n*(n-1)*(2*n+5))/18
  else: # there are some ties in data
      tp = np.zeros(unique_x.shape)
      for i in range(len(unique_x)):
          tp[i] = sum(unique_x[i] == x)
      var_s = (n*(n-1)*(2*n+5) + np.sum(tp*(tp-1)*(2*tp+5)))/18

  # calculate z
  if s>0:      z = (s - 1)/np.sqrt(var_s)
  elif s == 0: z = 0
  elif s<0:    z = (s + 1)/np.sqrt(var_s)

  # Check
  if np.isnan(s): return [None, None]

  # calculate the p_value
  p = 2*(1-norm.cdf(abs(z))) # two tail test

  # Return
  return [p, z]


[docs]class KendallWrapper(BaseWrapper):
  """

  The Kendall statistical test, also known as Kendall's rank correlation test or
  Kendall's tau test, is a nonparametric statistical test used to assess the strength
  and direction of association between two variables. It is particularly suited for
  analyzing ranked or ordinal data, where the values of the variables are ranked or
  ordered but not necessarily quantitatively measurable. The coefficient ranges from
  -1 to +1, where a value of +1 indicates a perfect positive rank correlation, -1
  indicates a perfect negative rank correlation, and 0 indicates no rank correlation.

  """

  # --------------------------------------------------------------------------
  #                             new methods
  # --------------------------------------------------------------------------
[docs]  def trend_exists(self, alpha):
    """This method returns a boolean with the stationarity outocme.

    Parameters
    ----------
    alpha : float
      The significance level

    Returns
    -------
    """
    # Libraries.
    from scipy.stats import norm
    # Check.
    if self._raw[1] is None: return None
    # Return
    return abs(self._raw[1]) > norm.ppf(1-alpha/2) # abs(z)

[docs]  def trend_direction(self, alpha):
    """This method returns the trend direction.

    Parameters
    ----------
    alpha : float
      The significance level

    Returns
    -------
    """
    # Libraries.
    from scipy.stats import norm
    # Check
    if self._raw is None: return "failed"
    # Compute h
    h = abs(self._raw[1]) > norm.ppf(1-alpha/2) 
    # Trends
    z = self._raw[1]
    if (z<0) and h:   return 'decreasing'
    elif (z>0) and h: return 'increasing'
    else:             return 'no trend'

  # --------------------------------------------------------------------------
  #                           override methods
  # --------------------------------------------------------------------------
[docs]  def evaluate(self, alpha=0.05, **kwargs):
    """Evaluates the model for the specified alpha.
    """
    # Create series
    d = {}

    # Add results
    d['m_pvalue'] = self._raw[0]
    d['m_z'] = self._raw[1]
    d['m_trend_existence'] = self.trend_exists(alpha)
    d['m_trend_direction'] = self.trend_direction(alpha)

    # Return
    return d
    
[docs]  def as_summary(self, alpha=0.05):
    """This method displays the summary.
    """
    # Create summary base
    summary = '     kendall test (monotonic)  \n'
    summary+= "==================================\n"
    summary+= "statistic (z):   %#17.3f\n" % self.m_z
    summary+= "pvalue (manual): %#17.5f\n" % self.m_pvalue
    summary+= "trend exists:    %17s\n" % self.trend_exists(alpha)
    summary+= "trend direction: %17s\n" % self.trend_direction(alpha)
    summary+= "=================================="
    # Return
    return summary








if __name__ == '__main__':
 
 
  # Libraries
  import pandas as pd

  # ----------------------------
  # set basic configuration
  # ----------------------------
  # Set pandas configuration.
  pd.set_option('display.max_colwidth', 14)
  pd.set_option('display.width', 150)
  pd.set_option('display.precision', 4)

  # ----------------------------
  # create data
  # ----------------------------
  # Constants
  length = 100
  offset = 100
  slope = 10

  # Create timeseries.
  x = np.arange(length)
  y = np.random.rand(length) * slope + offset

  # ---------------------
  # Create kendall object
  # ---------------------
  # Create object
  kendall = KendallWrapper(estimator=kendall).fit(x=y)

  # Print series.
  print("\n")
  print(kendall.as_series())

  # Print summary.
  print("\n")
  print(kendall.as_summary())

  # Print identifier
  print("\n")
  print(kendall._identifier())
 
  # -----------------
  # Save and load
  # -----------------
  # File location
  #fname = '../examples/saved/kendall-sample.pickle'

  # Save
  #kendall.save(fname=fname)

  # Load
  #kendall = KendallWrapper().load(fname=fname)