Source code for pyamr.core.regression.wregression

###############################################################################
# Author: Bernard Hernandez
# Filename: 03-main-create-sari-idxs.py
# Description : This file contains differnent statistics used in time-series.
#               What it mainly does is to format the output of tests provided
#               by external libraries and return them in a dataframe.
#
# TODO: Move it to a module.
###############################################################################
# Forces decimals on divisions.
from __future__ import division 

# Libraries
import sys
import math
import inspect
import numpy as np
import pandas as pd
import statsmodels.api as sm


# External libraries
from scipy.stats import norm
from sklearn.model_selection import ParameterGrid

# Add module wrappers to sys path dynamically.
sys.path.append("../..")

# Import base wrapper
from pyamr.core.regression.wbase import BaseWrapper


[docs]class BaseRegressionWrapper(BaseWrapper): # This is the name of the class. _name = 'REGRESSION' # Main attributes of the class. _resid = None # --------------------------------------------------------------------------- # HELPER METHODS # --------------------------------------------------------------------------- def _init_config(self): """This method fills self._config with the configuration.""" # Create dir. d = {} # Find attributes values for interesting methods. d.update(self._getargspecdict(self._raw.model, '__init__')) d.update(self._getargspecdict(self._raw.model, 'fit')) # Return return d def _getargspecdict(self, instance, funcname): """This method creates a dictionary with pairs name and value. Parameters ---------- instance : object with values funcname : function which parameters name will be looked for. Returns ------- tpls : dictionary with argument name and value. """ try: # Get argument parameters. func = getattr(instance, funcname, None) prms = inspect.getargspec(func) tpls = {} # Create and fill dictionary for name in prms.args: if name=='self': continue tpls[name] = getattr(instance, name, None) # Return return tpls except Exception as e: # Print print("[Exception at _getargspecdict : %s" % e) # Return return {} # --------------------------------------------------------------------------- # BASIC METHODS # --------------------------------------------------------------------------- def _params_from_summary(self): """This method returns params from summary. """ return {} # --------------------------------------------------------------------------- # STATISTIC METHODS FOR REGRESSION ANALYSIS # --------------------------------------------------------------------------- def _resid_stats(self, resid=None, alpha=0.05): """This method computes basic stats on the residuals Parameters ---------- resid : array-like The residuals to perform the stats on. alpha : int-like The alpha selected. Returns ------- dictionary with the stats for the residuals """ # Check if resid is passed. if resid is None: resid = self._resid # No resid to work with. if resid is None: return {} # Create series. d = {} # Compute autocorrelation (durbin-watson) from statsmodels.stats.stattools import durbin_watson d['m_dw'] = durbin_watson(resid) # Compute normalility (jarque bera). from statsmodels.stats.stattools import jarque_bera jb_value, jb_prob, skew, kurtosis = jarque_bera(resid) d['m_jb_value'] = jb_value d['m_jb_prob'] = jb_prob d['m_skew'] = skew d['m_kurtosis'] = kurtosis # Compute normal test (normal test) from scipy.stats import normaltest nm_value, nm_prob = normaltest(resid) d['m_nm_value'] = nm_value d['m_nm_prob'] = nm_prob # Compute the kolmogorov-smirnov test. from scipy.stats import kstest ks_value, ks_prob = kstest(resid, 'norm') d['m_ks_value'] = ks_value d['m_ks_prob'] = ks_prob # Compute the shapiro-wilkinson test. from scipy.stats import shapiro sh_value, sh_prob = shapiro(resid) d['m_shp_value'] = sh_value d['m_shp_prob'] = sh_prob # Compute anderson-darling. # The null hypothesis (sample data is drawn from a population that # follows a particular distribution; in this case normal) can be rejected # if the statistic es larger than the critical values for an specified # significante level. from scipy.stats import anderson ad_value, ad_cv, ad_sl = anderson(resid) d['m_ad_value'] = ad_value d['m_ad_nnorm'] = ad_value<ad_cv[2] # Return return d
[docs] def conf_int_insample(self, forecast, resid=None, alpha=0.05): """This function computes a basic confidence interval. Note: It might not be the adecuate way of computing it. Parameters ---------- forecast : the forecasted values. alpha : the alpha value selected. Returns ------- cilo : ciup : """ # Check if resid is passed. if resid is None: resid = self._resid # No resid to work with. if resid is None: return np.array([[],[]]) # Compute variables. const = norm.ppf(1.0-alpha/2.0) mu = np.mean(resid) std = np.std(resid) c = const*(std/math.sqrt(resid.shape[0])) # Compute confidence interval. cilo = (forecast - c).reshape(-1,1) ciup = (forecast + c).reshape(-1,1) # Return return np.concatenate((cilo, ciup), axis=1)
def _exog(self, start=None, end=None): """This method generates the exogenous variable time. Note: it is only used for those regression methods that do not support the parameters stard and end in the prediction (wls, ols, rlm, ...). On the other side, this method is not necessary for ARIMA since they already support this notation. .. note:: end is included (see _time()). Parameters ---------- start : int (optional) The time t to start the prediction end : int (optional) The time t to end the prediction Returns ------- the exogenous variable """ # Default start and end. exog = self._time(start=start, end=end) # Add constant if required. trend = getattr(self, 'trend', None) # Return exog without constant if trend is None or trend=='ct': return exog # Add constant. exog = sm.add_constant(exog) # Return return exog def _time(self, start=None, end=None): """This method generates the time variable. Note: The value indicated by 'end' is included. This is that way to match with the implementation of ARIMA from statsmodels. Parameters ---------- start : int (optional) The time t to start the prediction end : int (optional) The time t to finish the prediction. Returns ------- """ start = 0 if start is None else start end = len(self.endog) if end is None else end+1 # Generate time variable. return np.arange(start, end, 1)
# --------------------------------------------------------------------------- # BASIC PLOT METHODS # --------------------------------------------------------------------------- # TODO: Might be useful to create some basic plot methods so it is easy # to visualize the results. All the regression wrappers have an example # of such basic plotting in their __main__. if __name__ == '__main__': # Constants length = 100 offset = 100 slope = 10 # Create time-series. x = np.arange(length) n = np.random.randn(length)*10 y_orig = slope*x + offset + n y_pred = slope*x + offset # Create and fill a base statistic wrapper. w = BaseRegressionWrapper() w._resid = y_orig - y_pred # Print resid stats print(pd.Series(w._resid_stats())) # Print resid confidence intervals print(w.conf_int_insample(forecast=y_pred))