Source code for pyamr.core.stats.kendall

##############################################################################
# Author: Bernard Hernandez
# Filename: 03-main-create-sari-idxs.py
# Description : This file contains differnent statistics used in time-series.
#               What it mainly does is to format the output of tests provided
#               by external libraries and return them in a dataframe.
#
# TODO: Move it to a module.
#
###############################################################################
# Forces decimals on divisions.
from __future__ import division 

# Libraries
import sys
import numpy as np
import pandas as pd

# Import base wrapper
from pyamr.core.stats.wbase import BaseWrapper


# ----------------------------------------------------------------------------
#
# ----------------------------------------------------------------------------

[docs]def kendall(x): """Computes the kendall statistical test Parameters ---------- x : a vector of data alpha : significance level (0.05 default) Returns ------- trend : tells the trend (increasing, decreasing or no trend) h : True (if trend is present) or False (if trend is absence) p : p value of the significance test z : normalized test statistics """ # Libraries. from scipy.stats import norm # Compute n. n = len(x) # calculate S s = 0 for k in range(n-1): for j in range(k+1,n): s += np.sign(x[j] - x[k]) # calculate the unique data unique_x = np.unique(x) g = len(unique_x) # calculate the var(s) if n == g: # there is no tie var_s = (n*(n-1)*(2*n+5))/18 else: # there are some ties in data tp = np.zeros(unique_x.shape) for i in range(len(unique_x)): tp[i] = sum(unique_x[i] == x) var_s = (n*(n-1)*(2*n+5) + np.sum(tp*(tp-1)*(2*tp+5)))/18 # calculate z if s>0: z = (s - 1)/np.sqrt(var_s) elif s == 0: z = 0 elif s<0: z = (s + 1)/np.sqrt(var_s) # Check if np.isnan(s): return [None, None] # calculate the p_value p = 2*(1-norm.cdf(abs(z))) # two tail test # Return return [p, z]
[docs]class KendallWrapper(BaseWrapper): """ The Kendall statistical test, also known as Kendall's rank correlation test or Kendall's tau test, is a nonparametric statistical test used to assess the strength and direction of association between two variables. It is particularly suited for analyzing ranked or ordinal data, where the values of the variables are ranked or ordered but not necessarily quantitatively measurable. The coefficient ranges from -1 to +1, where a value of +1 indicates a perfect positive rank correlation, -1 indicates a perfect negative rank correlation, and 0 indicates no rank correlation. """ # -------------------------------------------------------------------------- # new methods # --------------------------------------------------------------------------
[docs] def trend_exists(self, alpha): """This method returns a boolean with the stationarity outocme. Parameters ---------- alpha : float The significance level Returns ------- """ # Libraries. from scipy.stats import norm # Check. if self._raw[1] is None: return None # Return return abs(self._raw[1]) > norm.ppf(1-alpha/2) # abs(z)
[docs] def trend_direction(self, alpha): """This method returns the trend direction. Parameters ---------- alpha : float The significance level Returns ------- """ # Libraries. from scipy.stats import norm # Check if self._raw is None: return "failed" # Compute h h = abs(self._raw[1]) > norm.ppf(1-alpha/2) # Trends z = self._raw[1] if (z<0) and h: return 'decreasing' elif (z>0) and h: return 'increasing' else: return 'no trend'
# -------------------------------------------------------------------------- # override methods # --------------------------------------------------------------------------
[docs] def evaluate(self, alpha=0.05, **kwargs): """Evaluates the model for the specified alpha. """ # Create series d = {} # Add results d['m_pvalue'] = self._raw[0] d['m_z'] = self._raw[1] d['m_trend_existence'] = self.trend_exists(alpha) d['m_trend_direction'] = self.trend_direction(alpha) # Return return d
[docs] def as_summary(self, alpha=0.05): """This method displays the summary. """ # Create summary base summary = ' kendall test (monotonic) \n' summary+= "==================================\n" summary+= "statistic (z): %#17.3f\n" % self.m_z summary+= "pvalue (manual): %#17.5f\n" % self.m_pvalue summary+= "trend exists: %17s\n" % self.trend_exists(alpha) summary+= "trend direction: %17s\n" % self.trend_direction(alpha) summary+= "==================================" # Return return summary
if __name__ == '__main__': # Libraries import pandas as pd # ---------------------------- # set basic configuration # ---------------------------- # Set pandas configuration. pd.set_option('display.max_colwidth', 14) pd.set_option('display.width', 150) pd.set_option('display.precision', 4) # ---------------------------- # create data # ---------------------------- # Constants length = 100 offset = 100 slope = 10 # Create timeseries. x = np.arange(length) y = np.random.rand(length) * slope + offset # --------------------- # Create kendall object # --------------------- # Create object kendall = KendallWrapper(estimator=kendall).fit(x=y) # Print series. print("\n") print(kendall.as_series()) # Print summary. print("\n") print(kendall.as_summary()) # Print identifier print("\n") print(kendall._identifier()) # ----------------- # Save and load # ----------------- # File location #fname = '../examples/saved/kendall-sample.pickle' # Save #kendall.save(fname=fname) # Load #kendall = KendallWrapper().load(fname=fname)