###############################################################################
# Author: Bernard Hernandez
# Filename: 03-main-create-sari-idxs.py
# Description : This file contains differnent statistics used in time-series.
# What it mainly does is to format the output of tests provided
# by external libraries and return them in a dataframe.
#
# TODO: Move it to a module.
###############################################################################
# Forces decimals on divisions.
from __future__ import division
# Libraries
import sys
import math
import inspect
import numpy as np
import pandas as pd
import statsmodels.api as sm
# External libraries
from scipy.stats import norm
from sklearn.model_selection import ParameterGrid
# Import base wrapper
from pyamr.core.stats.wbase import fargs
from pyamr.core.stats.wbase import BaseWrapper
# ---------------------------------------------------------------------------
# helper methods
# ---------------------------------------------------------------------------
def _forecast_error(sigma2, arparams, maparams, steps):
"""This method forecasts the error.
Used in ARIMAWrapper.
.. note::
These functions have been extracted from the ``_forecast_conf_int()``
and ``_forecast_error()`` implemented in the the class ARIMAResults
within statsmodels.ts.arima_model. As such, it produces the
same confidence intervals as those printed using the method
``plot_predict()``.
Parameters
----------
Returns
-------
"""
# Libraries.
from statsmodels.tsa.arima_process import arma2ma
# Compute error.
ma_rep = arma2ma(np.r_[1, -arparams],
np.r_[1, maparams], lags=steps) # nobs removed!
fcasterr = np.sqrt(sigma2 * np.cumsum(ma_rep ** 2))
# Return
return fcasterr
def _forecast_conf_int(forecast, fcasterr, alpha=0.05):
"""This method forecasts the confidence interval.
Parameters
----------
Returns
-------
"""
# Libraries.
from scipy.stats import t, norm
# Compute confidence intervals
const = norm.ppf(1 - alpha / 2.)
conf_int = np.c_[forecast - const * fcasterr,
forecast + const * fcasterr]
# Return
return conf_int
def _insample_conf_int(forecast, resid, alpha=0.05):
"""This function computes a basic confidence interval.
Note: It might not be the adecuate way of computing it.
Parameters
----------
forecast : the forecasted values.
alpha : the alpha value selected.
Returns
-------
cilo :
ciup :
"""
# Compute variables.
const = norm.ppf(1.0 - alpha / 2.0)
mu = np.mean(resid)
std = np.std(resid)
c = const * (std / math.sqrt(resid.shape[0]))
# Compute confidence interval.
conf_int = np.c_[forecast - c, forecast + c]
# Return
return conf_int
[docs]class RegressionWrapper(BaseWrapper):
"""Description...
"""
# Attribute
_resid = None
# ---------------------------------------------------------------------------
# HELPER METHODS
# ---------------------------------------------------------------------------
def _init_config(self):
"""This method fills self._config with the configuration."""
# Create dir.
d = {}
# Find attributes values for interesting methods.
d.update(self._getargspecdict(self._raw.model, '__init__'))
d.update(self._getargspecdict(self._raw.model, 'fit'))
# Return
return d
def _getargspecdict(self, instance, funcname):
"""This method creates a dictionary with pairs name and value.
Parameters
----------
instance : object with values
funcname : function which parameters name will be looked for.
Returns
-------
tpls : dictionary with argument name and value.
"""
try:
# Get argument parameters.
func = getattr(instance, funcname, None)
prms = inspect.getargspec(func)
tpls = {}
# Create and fill dictionary
for name in prms.args:
if name == 'self': continue
tpls[name] = getattr(instance, name, None)
# Return
return tpls
except Exception as e:
# Print
print("[Exception at _getargspecdict : %s" % e)
# Return
return {}
[docs] def conf_int_insample(self, predictions, alpha=0.05):
"""Computes the in-sample confidence interval.
Parameters
----------
predictions : array-like
The predictions done by the model.
alpha : int
The alpha to configure the confidence interval.
Returns
-------
array-like
"""
return _insample_conf_int(forecast=predictions,
resid=self._resid,
alpha=alpha)
# ---------------------------------------------------------------------------
# STATISTIC METHODS FOR REGRESSION ANALYSIS
# ---------------------------------------------------------------------------
def _resid_stats(self, resid=None, alpha=0.05):
"""This method computes basic stats on the residuals
Parameters
----------
resid : array-like
The residuals to perform the stats on.
alpha : int-like
The alpha selected.
Returns
-------
dictionary with the stats for the residuals
"""
# Check if resid is passed.
if resid is None:
resid = self._resid
# No resid to work with.
if resid is None:
return {}
# Create series.
d = {}
# Compute autoc-orrelation (durbin-watson)
from statsmodels.stats.stattools import durbin_watson
d['m_dw'] = durbin_watson(resid)
# Compute normalility (jarque bera).
from statsmodels.stats.stattools import jarque_bera
jb_value, jb_prob, skew, kurtosis = jarque_bera(resid)
d['m_jb_value'] = jb_value
d['m_jb_prob'] = jb_prob
d['m_skew'] = skew
d['m_kurtosis'] = kurtosis
try:
# Compute normal test (normal test)
from scipy.stats import normaltest
nm_value, nm_prob = normaltest(resid)
d['m_nm_value'] = nm_value
d['m_nm_prob'] = nm_prob
except ValueError as e:
print(e)
d['m_nm_value'] = np.inf
d['m_nm_prob'] = np.inf
# Compute the kolmogorov-smirnov test.
from scipy.stats import kstest
ks_value, ks_prob = kstest(resid, 'norm')
d['m_ks_value'] = ks_value
d['m_ks_prob'] = ks_prob
try:
# Compute the shapiro-wilkinson test.
from scipy.stats import shapiro
sh_value, sh_prob = shapiro(resid)
d['m_shp_value'] = sh_value
d['m_shp_prob'] = sh_prob
except ValueError as e:
print(e)
d['m_shp_value'] = np.inf
d['m_shp_prob'] = np.inf
# Compute anderson-darling.
# The null hypothesis (sample data is drawn from a population that
# follows a particular distribution; in this case normal) can be rejected
# if the statistic es larger than the critical values for an specified
# significance level.
from scipy.stats import anderson
ad_value, ad_cv, ad_sl = anderson(resid)
d['m_ad_value'] = ad_value
d['m_ad_nnorm'] = ad_value < ad_cv[2]
# Return
return d
def _exog(self, start=None, end=None):
"""This method generates the exogenous variable time.
Note: it is only used for those regression methods that do not support
the parameters start and end in the prediction (wls, ols, rlm, ...).
On the other side, this method is not necessary for ARIMA since they
already support this notation.
.. note:: end is included (see _time()).
Parameters
----------
start : int (optional)
The time t to start the prediction
end : int (optional)
The time t to end the prediction
Returns
-------
the exogenous variable
"""
# Default start and end.
exog = self._time(start=start, end=end)
# Add constant if required.
trend = getattr(self, 'trend', None)
# Return exog without constant
if trend is None or trend == 'ct':
return exog
# Add constant.
exog = sm.add_constant(exog)
# Return
return exog
def _time(self, start=None, end=None):
"""This method generates the time variable.
.. note::
The value indicated by 'end' is included. This is that way
to match with the implementation of ARIMA from statsmodels.
Parameters
----------
start : int (optional)
The time t to start the prediction
end : int (optional)
The time t to finish the prediction.
Returns
-------
"""
start = 0 if start is None else start
end = len(self.endog) if end is None else end + 1
# Generate time variable.
return np.arange(start, end, 1)
[docs] def resid(self):
"""Get the residual"""
if hasattr(self._raw, 'resid'):
return self._raw.resid
[docs] def fit(self, **kwargs):
"""This method performs the fit.
Parameters
----------
kwargs : dict-like
The arguments that will be passed to the method.
Returns
-------
"""
# Empty the class
self._empty()
# Update the configuration
self._config.update(kwargs)
# Fit the model
if inspect.isfunction(self.estimator):
self._fit_funct(**kwargs)
elif inspect.isclass(self.estimator):
self._fit_class(**kwargs)
# Set the residual
self._resid = self.resid()
# Evaluate the model
if self.evaluate:
self._result = self.evaluate()
# Return
return self
if __name__ == '__main__':
# Constants
length = 100
offset = 100
slope = 10
# Create time-series.
x = np.arange(length)
n = np.random.randn(length) * 10
y_orig = slope * x + offset + n
y_pred = slope * x + offset
# Create and fill a base statistic wrapper.
w = RegressionWrapper()
w._resid = y_orig - y_pred
# Print resid stats
print(pd.Series(w._resid_stats()))
# Print resid confidence intervals
print(w.conf_int_insample(forecast=y_pred))