##############################################################################
# Author: Bernard Hernandez
# Filename: 03-main-create-sari-idxs.py
#
# Description : This file contains differnent statistics used in time-series.
# What it mainly does is to format the output of tests provided
# by external libraries and return them in a dataframe.
#
#
###############################################################################
# Forces decimals on divisions.
from __future__ import division
# Libraries
import sys
import math
import inspect
import warnings
import itertools
import numpy as np
import pandas as pd
# Libraries.
from scipy.stats import norm
from operator import attrgetter
# Libraries wrapper.
from pyamr.core.regression.wreg import RegressionWrapper
from pyamr.core.stats.wbase import getargspecdict
[docs]class PyramidWrapper(RegressionWrapper):
def _identifier(self):
"""This method creates a name that describes de model."""
try: exogenous = self.exogenous is not None
except: exogenous = False
return "%s%sx%s [%s,%s]" % (self._name,
self.order,
self.seasonal_order,
self.trend,
exogenous)
# --------------------------------------------------------------------------
# SET VARIABLES
# --------------------------------------------------------------------------
def _params_from_summary(self):
"""Gets parameters from the summary result of the raw object.
"""
# Format summary
summary = self._raw.summary().as_csv()
summary = summary.split("\n",1)[1] # Remove first line.
summary = summary.replace("\n",",") # Replace \n by comma.
# Split in elements.
elements = summary.split(",")
elements = [self._cast_float(e.strip()) for e in elements]
# Create series.
d = {}
# Add parameters.
d['s_jb_value'] = elements[-13]
d['s_jb_prob'] = elements[-9]
d['s_skew'] = elements[-5]
d['s_Q_value'] = elements[-15]
d['s_Q_prob'] = elements[-11]
d['s_H_value'] = elements[-7]
d['s_H_prob'] = elements[-3]
d['s_kurtosis'] = elements[-1]
d['s_heteroskedasticity'] = elements[-7]
d['s_omnibus_value'] = None
d['s_omnibus_prob'] = None
# Return
return d
[docs] def evaluate(self, alpha=0.05):
"""This method set all the variables into this class.
@see: statsmodels.Arima
@see: statsmodels.ArimaResults
Parameters
----------
alpha :
Returns
-------
series :
"""
# Create series.
d = {}
# Add generic metrics.
d['aic'] = self._raw.aic()
d['aicc'] = self._raw.aicc()
d['bic'] = self._raw.bic()
d['hqic'] = self._raw.hqic()
d['llf'] = self._raw.arima_res_.llf
# Check if it is arima or sarimax and get corresponding values
if self._raw.seasonal_order is not None:
statistic_values = self._raw.arima_res_.zvalues
else:
statistic_values = self._raw.arima_res_.tvalues,
# Create params information.
params_data = zip(self._raw.arima_res_.data.param_names,
self._raw.arima_res_.params,
self._raw.arima_res_.bse,
statistic_values,
self._raw.arima_res_.pvalues,
self._raw.arima_res_.conf_int(alpha))
# Add coefficients statistics to series.
for name, coef, std, tvalue, pvalue, (cil, ciu) in params_data:
d['%s_%s'%(name, 'coef')] = coef
d['%s_%s'%(name, 'std')] = std
d['%s_%s'%(name, 'tvalue')] = tvalue
d['%s_%s'%(name, 'tprob')] = pvalue
d['%s_%s'%(name, 'cil')] = cil
d['%s_%s'%(name, 'ciu')] = ciu
# Further statistics
d.update(self._resid_stats())
# We cannot use the params_from_summary because this wrappers stores
# different models with different summaries. The right way to solve this
# is by performing the statistics related with the residuals in the
# regression_wrapper._resid_stats.
#d.update(self._params_from_summary())
# Return
return d
def _init_config(self):
"""This method initialises the configuration.
For some reason the interestin data is in the method __init__ for the
object self._raw (ARIMA) and the method fir for the object
self._raw.arima_res_.model.
TODO: Handle if the instances passed to getargspecdict do not exist.
"""
# Create dictionary.
d = {}
# Fill it.
d.update(self._getargspecdict(self._raw.arima_res_.model, 'fit'))
d.update(self._getargspecdict(self._raw, '__init__'))
# Return
return d
# --------------------------------------------------------------------------
# HELPER METHODS
# --------------------------------------------------------------------------
[docs] def as_summary(self, **kwargs):
"""This method displays the summary.
"""
# Elements to split by.
find = "="*78
# Split and fill.
smry = find.join(self._raw.summary(**kwargs).as_text().split(find)[:-1])
smry = smry.split("\n")
smry[-6] = smry[-6].replace('=','',5)
smry[-5:] = [v.replace(' ', '', 5) for v in smry[-5:]]
smry = "\n".join(smry[:-1])
# Variables.
om, omp, dw = 0.0, 0.0, self.m_dw
jb, jbp = self.m_jb_value, self.m_jb_prob
nm, nmp = self.m_nm_value, self.m_nm_prob
skew, kurt = self.m_skew, self.m_kurtosis
# Add in new lines.
smry+= "\n%s\n%s\n%s\n" % ("="*78, "Manual".center(78, ' '), "-"*78)
smry+= "Omnibus: %#25.3f Durbin-Watson: %#23.3f\n" % (om, dw)
smry+= "Normal (N): %#25.3f Prob(N): %#23.3f\n" % (nm, nmp)
smry+= "="*78 + "\n"
smry+= "Note that JB, P(JB), skew and kurtosis have different values.\n"
smry+= "Note that Prob(Q) tests no correlation of residuals."
# Return
return smry
# --------------------------------------------------------------------------
# FIT
# --------------------------------------------------------------------------
[docs] def resid(self):
"""Get the residual"""
return self._raw.resid()
# ---------------------------------------------------------------------------
# PREDICTION
# ---------------------------------------------------------------------------
[docs] def get_prediction(self, start=None, end=None, alpha=0.05, **kwargs):
"""
"""
# Compute time
time = self._time(start=start, end=end)
# Compute prediction
mean = self._raw.predict_in_sample(start=start, end=end, **kwargs)
# Confidence interval
cito = self.conf_int_insample(mean, alpha=0.05)
# For some reason the predict_in_sample function implemented in the
# module pyramid-arima returns more samples than it should. Moreover
# although there is another method call predict for forecasting, the
# predict in sample functions seems to do both. Thus such function
# is used and the first end-start elemnts are kept. A
mean = mean[:time.size]
cito = cito[:time.size]
# Return
return np.column_stack((time, mean, cito)).T
# ---------------------------------------------------------------------------
# FIND AUTO
# ---------------------------------------------------------------------------
[docs] def from_instance(self, arima, **kwargs):
"""This method constructs a PyramidWrapper object from pyramid.ARIMA
"""
# Create object.
instance = PyramidWrapper()
# Set model.
instance._raw = arima
# Set residuals as attribute.
instance._resid = arima.resid()
# Set series with interesting params.
instance._result = instance.evaluate(alpha=0.05)
# Find arima configuration parameters
d = {}
d.update(getargspecdict(arima.arima_res_.model, 'fit'))
d.update(getargspecdict(arima, '__init__'))
# Set configuration parameters
instance._config = instance._init_config()
# Return
return instance
[docs] def auto(self, **kwargs):
"""This method finds the best arima.
@see pyrmid.arima.auto_arima
Parameters
----------
Returns
-------
"""
# Library.
from pyramid.arima import auto_arima
from pyramid.arima.arima import ARIMA
# Compute auto_arima.
results = auto_arima(**kwargs)
# Return a single PyramidWrapper object.
if isinstance(results, ARIMA):
return [self.from_instance(results)]
# Return an array of PyramidWrapper objects.
if isinstance(results, list):
return [PyramidWrapper().from_instance(a) for a in results]
if __name__ == '__main__': # pragma: no cover
# Import pyramid arima
from pyramid.arima import ARIMA as PYARIMA
# Libraries.
import matplotlib.pyplot as plt
# Set pandas configuration.
pd.set_option('display.max_colwidth', 14)
pd.set_option('display.width', 80)
pd.set_option('display.precision', 4)
# Constants
length = 100
offset = 100
slope = 10
# Create time-series.
x = np.arange(length)
n = np.random.randn(length)*150
y = slope*x + offset + n
# Format exog to be used in pyramid-arima.
exog = x.reshape(-1,1)
# Create object
# This is a wrapper for the methods arima and sarimax within statstools. Take
# to account the following things when using this wrapper:
# - seasonal_order = None calls arima, otherwise it calls sarimax.
# - transparams = True enforces stationarity (not sure).
# - the exogenous variable needs to be reshaped.
# - the trend is passed in the constructor (insted of the fit method).
pyramid = PyramidWrapper(estimator=PYARIMA).fit(y=y[:80],
exogenous=None, order=(2,1,2), seasonal_order=(0,0,0,0),
trend='c', disp=0)
# Print series.
print(pyramid.as_series())
# Print summary.
print(pyramid.as_summary())
# -----------------
# Save & Load
# -----------------
# TODO: There is a TypeError: can't pickle zStatespace objects when the
# seasonal order is included in the PyramidWrapper. However,
# apparently we are able to save sarimax with seasons using the
# SARIMAXWrapper.
# File location
#fname = '../../examples/saved/pyramid-sample.pickle'
# Save
#pyramid.save(fname=fname)
# Load
#pyramid = PyramidWrapper().load(fname=fname)
# -----------------
# Predictions
# -----------------
# Variables.
s, e, d = 50, 120, False
# Compute predictions (exogenous?).
preds = pyramid.get_prediction(start=s, end=e, dynamic=d, exogenous=None)
# Plot truth values.
plt.plot(y, color='#A6CEE3', alpha=0.5, marker='o',
markeredgecolor='k', markeredgewidth=0.5,
markersize=5, linewidth=0.75, label='Observed')
# Plot forecasted values.
plt.plot(preds[0,:], preds[1,:], color='#FF0000', alpha=1.00,
linewidth=2.0, label=pyramid._identifier())
# Plot the confidence intervals.
plt.fill_between(preds[0,:], preds[2,:],
preds[3,:],
color='r',
alpha=0.1)
# Legend
plt.legend()
plt.show()
# -------------------
# Grid search
# -------------------
# Grid parameters.
fit_params = {'exogenous': [None, exog],
'y': [y], 'disp': [0],
'order': [(0,1,1)],
'seasonal_order': [None],
'trend': ['n','c','t','ct']}
# Get summary.
models = pyramid.grid_search(grid_params=fit_params)
# Summary
summary = pyramid.from_list_dataframe(models).T
# Plot result.
print("\nSummary")
print(summary)
# -------------------
# autoarima
# -------------------
# Example 1: Retrieving just the best fit.
# ----------------------------------------
# Find best arima fit.
stepwise_fit = PyramidWrapper().auto(y=y, trend='c',
exogenous=None,
start_p=0, start_q=0, max_p=0, max_q=5, d=0,
seasonal=False, start_P=0, D=1,
trace=True,
error_action='ignore', # ignore if an order does not work.
suppress_warnings=True, # ignore convergence warnings.
information_criterion='bic', # score to select best model.
return_valid_fits=False, # Return all valid fits.
stepwise=True) # set to stepwise
# Get summary.
summary = PyramidWrapper().from_list_dataframe(stepwise_fit, flabel=False).T
# -----------------
# Predictions
# -----------------
# Variables.
s, e, d = 50, 120, False
# Compute predictions (exogenous?).
preds = stepwise_fit[0].get_prediction(start=s, end=e, dynamic=d, exogenous=None)
# Plot truth values.
plt.plot(y, color='#A6CEE3', alpha=0.5, marker='o',
markeredgecolor='k', markeredgewidth=0.5,
markersize=5, linewidth=0.75, label='Observed')
# Plot forecasted values.
plt.plot(preds[0,:], preds[1,:], color='#FF0000', alpha=1.00,
linewidth=2.0, label=pyramid._identifier())
# Plot the confidence intervals.
plt.fill_between(preds[0,:], preds[2,:],
preds[3,:],
color='r',
alpha=0.1)
# Legend
plt.legend()
plt.show()
# Print result.
print("\nAuto Stepwise Summary")
print(summary)
# Retrieving all fits
# -------------------
# Do a grid search using auto_arima.
brute_fit = PyramidWrapper().auto(y=y,
exogenous=None, trend='c',
start_p=0, d=1, start_q=0, max_p=0, max_d=1, max_q=5,
start_P=1, D=None, start_Q=1, max_P=2, max_D=1, max_Q=2,
max_order=20, m=1, seasonal=False, stationary=False,
information_criterion='bic', alpha=0.05, stepwise=False,
return_valid_fits=True,
test='kpss', seasonal_test='ch', n_jobs=1, start_params=None, method=None,
transparams=True, solver='lbfgs', maxiter=50, disp=0, callback=None,
offset_test_args=None, seasonal_test_args=None, suppress_warnings=False,
error_action='ignore', trace=False, random=False, random_state=None,
n_fits=None, out_of_sample_size=0, scoring='mse', scoring_args=None)
print(len(brute_fit))
# Create summary dataframe
summary = PyramidWrapper().from_list_dataframe(brute_fit, flabel=False).T
# Plot result.
print("\nAuto Brute Force Summary")
print(summary)
# Show.
plt.show()