Source code for pyamr.core.regression.wregression

###############################################################################
# Author: Bernard Hernandez
# Filename: 03-main-create-sari-idxs.py
# Description : This file contains differnent statistics used in time-series.
#               What it mainly does is to format the output of tests provided
#               by external libraries and return them in a dataframe.
#
# TODO: Move it to a module.
###############################################################################
# Forces decimals on divisions.
from __future__ import division 

# Libraries
import sys
import math
import inspect
import numpy as np
import pandas as pd
import statsmodels.api as sm


# External libraries
from scipy.stats import norm
from sklearn.model_selection import ParameterGrid

# Add module wrappers to sys path dynamically.
sys.path.append("../..")

# Import base wrapper
from pyamr.core.regression.wbase import BaseWrapper


[docs]class BaseRegressionWrapper(BaseWrapper):

  # This is the name of the class.
  _name = 'REGRESSION'

  # Main attributes of the class.
  _resid = None

  # ---------------------------------------------------------------------------
  #                             HELPER METHODS
  # ---------------------------------------------------------------------------
  def _init_config(self):
    """This method fills self._config with the configuration."""
    # Create dir.
    d = {}
    # Find attributes values for interesting methods.
    d.update(self._getargspecdict(self._raw.model, '__init__'))
    d.update(self._getargspecdict(self._raw.model, 'fit'))
    # Return
    return d

  def _getargspecdict(self, instance, funcname):
    """This method creates a dictionary with pairs name and value.

    Parameters
    ----------
    instance : object with values
    funcname : function which parameters name will be looked for. 

    Returns
    -------
    tpls : dictionary with argument name and value.
    """
    try:
      # Get argument parameters.
      func = getattr(instance, funcname, None)
      prms = inspect.getargspec(func)
      tpls = {}
      # Create and fill dictionary
      for name in prms.args:
        if name=='self': continue
        tpls[name] = getattr(instance, name, None)
      # Return
      return tpls
    except Exception as e:
      # Print
      print("[Exception at _getargspecdict : %s" % e)
      # Return
      return {}

  # ---------------------------------------------------------------------------
  #                              BASIC METHODS
  # ---------------------------------------------------------------------------
  def _params_from_summary(self):
    """This method returns params from summary.
    """
    return {}

  # ---------------------------------------------------------------------------
  #              STATISTIC METHODS FOR REGRESSION ANALYSIS
  # --------------------------------------------------------------------------- 
  def _resid_stats(self, resid=None, alpha=0.05):
    """This method computes basic stats on the residuals 

    Parameters
    ----------
    resid : array-like
      The residuals to perform the stats on.

    alpha : int-like
      The alpha selected.

    Returns
    -------
    dictionary with the stats for the residuals
    """
    # Check if resid is passed.
    if resid is None:
      resid = self._resid
    
    # No resid to work with.
    if resid is None: 
      return {}

    # Create series.
    d = {}
    
    # Compute autocorrelation (durbin-watson)
    from statsmodels.stats.stattools import durbin_watson
    d['m_dw'] = durbin_watson(resid)

    # Compute normalility (jarque bera).
    from statsmodels.stats.stattools import jarque_bera
    jb_value, jb_prob, skew, kurtosis = jarque_bera(resid)
    d['m_jb_value'] = jb_value
    d['m_jb_prob'] = jb_prob
    d['m_skew'] = skew
    d['m_kurtosis'] = kurtosis

    # Compute normal test (normal test)
    from scipy.stats import normaltest
    nm_value, nm_prob = normaltest(resid)
    d['m_nm_value'] = nm_value
    d['m_nm_prob'] = nm_prob

    # Compute the kolmogorov-smirnov test.
    from scipy.stats import kstest
    ks_value, ks_prob = kstest(resid, 'norm')
    d['m_ks_value'] = ks_value
    d['m_ks_prob'] = ks_prob

    # Compute the shapiro-wilkinson test.
    from scipy.stats import shapiro
    sh_value, sh_prob = shapiro(resid)
    d['m_shp_value'] = sh_value
    d['m_shp_prob'] = sh_prob

    # Compute anderson-darling. 
    # The null hypothesis (sample data is drawn from a population that 
    # follows a particular distribution; in this case normal) can be rejected 
    # if the statistic es larger than the critical values for an specified 
    # significante level.
    from scipy.stats import anderson
    ad_value, ad_cv, ad_sl = anderson(resid)
    d['m_ad_value'] = ad_value
    d['m_ad_nnorm'] = ad_value<ad_cv[2]

    # Return
    return d


[docs]  def conf_int_insample(self, forecast, resid=None, alpha=0.05):
    """This function computes a basic confidence interval.

    Note: It might not be the adecuate way of computing it.

    Parameters
    ----------
    forecast : the forecasted values.
    alpha    : the alpha value selected.

    Returns
    -------
    cilo :
    ciup :
    """
    # Check if resid is passed.
    if resid is None:
      resid = self._resid
    
    # No resid to work with.
    if resid is None: 
      return np.array([[],[]])

    # Compute variables.
    const = norm.ppf(1.0-alpha/2.0)
    mu = np.mean(resid)
    std = np.std(resid)
    c = const*(std/math.sqrt(resid.shape[0]))

    # Compute confidence interval.
    cilo = (forecast - c).reshape(-1,1)
    ciup = (forecast + c).reshape(-1,1)

    # Return
    return np.concatenate((cilo, ciup), axis=1)
   

  def _exog(self, start=None, end=None):
    """This method generates the exogenous variable time. 

    Note: it is only used for those regression methods that do not support
    the parameters stard and end in the prediction (wls, ols, rlm, ...). 
    On the other side, this method is not necessary for ARIMA since they
    already support this notation. 

    .. note:: end is included (see _time()).

    Parameters
    ----------
    start : int (optional)
      The time t to start the prediction

    end : int (optional)
      The time t to end the prediction

    Returns
    -------
    the exogenous variable
    """
    # Default start and end.
    exog = self._time(start=start, end=end)
    # Add constant if required.
    trend = getattr(self, 'trend', None)
    # Return exog without constant
    if trend is None or trend=='ct':
      return exog
    # Add constant.
    exog = sm.add_constant(exog)
    # Return
    return exog


  def _time(self, start=None, end=None):
    """This method generates the time variable.

    Note: The value indicated by 'end' is included. This is that way 
    to match with the implementation of ARIMA from statsmodels.

    Parameters
    ----------
    start : int (optional)
      The time t to start the prediction

    end : int (optional)
      The time t to finish the prediction.

    Returns
    -------
    """
    start = 0 if start is None else start
    end = len(self.endog) if end is None else end+1 
    # Generate time variable.
    return np.arange(start, end, 1)

    
  # ---------------------------------------------------------------------------
  #                           BASIC PLOT METHODS
  # ---------------------------------------------------------------------------
  # TODO: Might be useful to create some basic plot methods so it is easy
  # to visualize the results. All the regression wrappers have an example
  # of such basic plotting in their __main__.




if __name__ == '__main__':

  # Constants
  length = 100
  offset = 100
  slope = 10

  # Create time-series.
  x = np.arange(length)
  n = np.random.randn(length)*10
  y_orig = slope*x + offset + n
  y_pred = slope*x + offset

  # Create and fill a base statistic wrapper.
  w = BaseRegressionWrapper()
  w._resid = y_orig - y_pred
  
  # Print resid stats
  print(pd.Series(w._resid_stats()))

  # Print resid confidence intervals
  print(w.conf_int_insample(forecast=y_pred))