##############################################################################
# Author: Bernard Hernandez
# Filename: 03-main-create-sari-idxs.py
#
# Description : This file contains differnent statistics used in time-series.
# What it mainly does is to format the output of tests provided
# by external libraries and return them in a dataframe.
#
#
###############################################################################
# Forces decimals on divisions.
from __future__ import division
# Libraries
import sys
import math
import inspect
import warnings
import itertools
import numpy as np
import pandas as pd
# Libraries.
from scipy.stats import norm
from operator import attrgetter
# Import ARIMA from statsmodels.
from statsmodels.tsa.arima_model import ARIMA
# Add module wrappers to sys path dynamically.
sys.path.append("../../../")
# Libraries wrapper.
from pyamr.core.regression.wregression import BaseRegressionWrapper
[docs]class PyramidWrapper(BaseRegressionWrapper):
# Attributes.
_name = 'PYRAMID' # Label to add to the attributes when saving.
def __init__(self, **kwargs):
"""
"""
# Library.
from pyramid.arima import ARIMA as PYARIMA
# Save config parameters.
super(PyramidWrapper, self).__init__(**kwargs)
# Create model
if len(kwargs):
self._model = PYARIMA(**kwargs)
def _identifier(self):
"""This method creates a name that describes de model."""
try: exogenous = self.exogenous is not None
except: exogenous = False
return "name to do"
#return "%s%sx%s [%s,%s]" % (self._name,
# self.order,
# self.seasonal_order,
# self.trend,
# exogenous)
# --------------------------------------------------------------------------
# SET VARIABLES
# --------------------------------------------------------------------------
def _params_from_summary(self):
"""Gets parameters from the summary result of the raw object.
"""
# Format summary
summary = self._raw.summary().as_csv()
summary = summary.split("\n",1)[1] # Remove first line.
summary = summary.replace("\n",",") # Replace \n by comma.
# Split in elements.
elements = summary.split(",")
elements = [self._cast_float(e.strip()) for e in elements]
# Create series.
d = {}
# Add parameters.
d['s_jb_value'] = elements[-13]
d['s_jb_prob'] = elements[-9]
d['s_skew'] = elements[-5]
d['s_Q_value'] = elements[-15]
d['s_Q_prob'] = elements[-11]
d['s_H_value'] = elements[-7]
d['s_H_prob'] = elements[-3]
d['s_kurtosis'] = elements[-1]
d['s_heteroskedasticity'] = elements[-7]
d['s_omnibus_value'] = None
d['s_omnibus_prob'] = None
# Return
return d
def _init_result(self, alpha=0.05):
"""This method set all the variables into this class.
@see: statsmodels.Arima
@see: statsmodels.ArimaResults
Parameters
----------
alpha :
Returns
-------
series :
"""
# Create series.
d = {}
# Add generic metrics.
d['aic'] = self._raw.aic()
d['aicc'] = self._raw.aicc()
d['bic'] = self._raw.bic()
d['hqic'] = self._raw.hqic()
d['llf'] = self._raw.arima_res_.llf
# Check if it is arima or sarimax and get corresponding values
if self._raw.seasonal_order is not None:
statistic_values = self._raw.arima_res_.zvalues
else:
statistic_values = self._raw.arima_res_.tvalues,
# Create params information.
params_data = zip(self._raw.arima_res_.data.param_names,
self._raw.arima_res_.params,
self._raw.arima_res_.bse,
statistic_values,
self._raw.arima_res_.pvalues,
self._raw.arima_res_.conf_int(alpha))
# Add coefficients statistics to series.
for name, coef, std, tvalue, pvalue, (cil, ciu) in params_data:
d['%s_%s'%(name, 'coef')] = coef
d['%s_%s'%(name, 'std')] = std
d['%s_%s'%(name, 'tvalue')] = tvalue
d['%s_%s'%(name, 'tprob')] = pvalue
d['%s_%s'%(name, 'cil')] = cil
d['%s_%s'%(name, 'ciu')] = ciu
# Further statistics
d.update(self._resid_stats())
# We cannot use the params_from_summary because this wrappers stores
# different models with different summaries. The right way to solve this
# is by performing the statistics related with the residuals in the
# regression_wrapper._resid_stats.
#d.update(self._params_from_summary())
# Return
return d
def _init_config(self):
"""This method initialises the configuration.
For some reason the interestin data is in the method __init__ for the
object self._raw (ARIMA) and the method fir for the object
self._raw.arima_res_.model.
TODO: Handle if the instances passed to getargspecdict do not exist.
"""
# Create dictionary.
d = {}
# Fill it.
d.update(self._getargspecdict(self._raw.arima_res_.model, 'fit'))
d.update(self._getargspecdict(self._raw, '__init__'))
# Return
return d
# --------------------------------------------------------------------------
# HELPER METHODS
# --------------------------------------------------------------------------
[docs] def as_summary(self, **kwargs):
"""This method displays the summary.
"""
# Elements to split by.
find = "="*78
# Split and fill.
smry = find.join(self._raw.summary(**kwargs).as_text().split(find)[:-1])
smry = smry.split("\n")
smry[-6] = smry[-6].replace('=','',5)
smry[-5:] = [v.replace(' ', '', 5) for v in smry[-5:]]
smry = "\n".join(smry[:-1])
# Variables.
om, omp, dw = 0.0, 0.0, self.m_dw
jb, jbp = self.m_jb_value, self.m_jb_prob
nm, nmp = self.m_nm_value, self.m_nm_prob
skew, kurt = self.m_skew, self.m_kurtosis
# Add in new lines.
smry+= "\n%s\n%s\n%s\n" % ("="*78, "Manual".center(78, ' '), "-"*78)
smry+= "Omnibus: %#25.3f Durbin-Watson: %#23.3f\n" % (om, dw)
smry+= "Normal (N): %#25.3f Prob(N): %#23.3f\n" % (nm, nmp)
smry+= "="*78 + "\n"
smry+= "Note that JB, P(JB), skew and kurtosis have different values.\n"
smry+= "Note that Prob(Q) tests no correlation of residuals."
# Return
return smry
# --------------------------------------------------------------------------
# FIT
# --------------------------------------------------------------------------
[docs] def fit(self, **kwargs):
"""This method fits the specified arima model.
Parameters
----------
endog :
exog :
missing :
hasconst :
Returns
-------
object : A PyramidWrapper object.
"""
# Fill config.
self._config.update(kwargs)
# Set model
self._raw = self._model.fit(**kwargs)
# Set residuals as attribute.
self._resid = self._raw.resid()
# Set series with interesting params.
self._result = self._init_result(alpha=0.05)
# return object.
return self
# ---------------------------------------------------------------------------
# PREDICTION
# ---------------------------------------------------------------------------
[docs] def get_prediction(self, **kwargs):
"""
"""
# Compute prediction
forecast = self._raw.predict_in_sample(**kwargs)
# Get plotting values.
mean = forecast.reshape(1,-1)
cilo = self.conf_int_insample(mean, alpha=0.05)[:,0].reshape(1,-1)
ciup = self.conf_int_insample(mean, alpha=0.05)[:,1].reshape(1,-1)
# Time.
time = self._time(forecast=mean, **kwargs).reshape(1,-1)
# Get plotting values.
return np.concatenate((time, mean, cilo, ciup), axis=0)
def _time(self, forecast, start=None, **kwargs):
"""This method....
"""
# Get default start.
if start is None:
start = getattr(self._raw.arima_res_, 'k_diff', 0)
# Return
return np.arange(forecast.shape[1]) + start
# ---------------------------------------------------------------------------
# FIND AUTO
# ---------------------------------------------------------------------------
[docs] def from_instance(self, arima, **kwargs):
"""This method constructs a PyramidWrapper object from pyramid.ARIMA
"""
# Create object.
instance = PyramidWrapper()
# Set model.
instance._raw = arima
# Set residuals as attribute.
instance._resid = arima.resid()
# Set series with interesting params.
instance._result = instance._init_result(alpha=0.05)
# Set configuration parameters.
instance._config = instance._init_config()
# Return
return instance
[docs] def auto(self, **kwargs):
"""This method finds the best arima.
@see pyrmid.arima.auto_arima
Parameters
----------
Returns
-------
"""
# Library.
from pyramid.arima import auto_arima
from pyramid.arima.arima import ARIMA
# Compute auto_arima.
results = auto_arima(**kwargs)
# Return a single PyramidWrapper object.
if isinstance(results, ARIMA):
return [self.from_instance(results)]
# Return an array of PyramidWrapper objects.
if isinstance(results, list):
return [PyramidWrapper().from_instance(a) for a in results]
if __name__ == '__main__': # pragma: no cover
# Libraries.
import matplotlib.pyplot as plt
# Set pandas configuration.
pd.set_option('display.max_colwidth', 14)
pd.set_option('display.width', 80)
pd.set_option('display.precision', 4)
# Constants
length = 100
offset = 100
slope = 10
# Create time-series.
x = np.arange(length)
n = np.random.randn(length)*150
y = slope*x + offset + n
# Format exog to be used in pyramid-arima.
exog = x.reshape(-1,1)
# Create object
# This is a wrapper for the methods arima and sarimax within statstools. Take
# to account the following things when using this wrapper:
# - seasonal_order = None calls arima, otherwise it calls sarimax.
# - transparams = True enforces stationarity (not sure).
# - the exogenous variable needs to be reshaped.
# - the trend is passed in the constructor (insted of the fit method).
pyramid = PyramidWrapper(order=(0,1,1),
seasonal_order=(0,0,0,0),
trend='c',
disp=0).fit(y=y, exogenous=None, fit_args={})
# Print series.
print(pyramid.as_series())
# Print summary.
print(pyramid.as_summary())
# -----------------
# Save & Load
# -----------------
# TODO: There is a TypeError: can't pickle zStatespace objects when the
# seasonal order is included in the PyramidWrapper. However,
# apparently we are able to save sarimax with seasons using the
# SARIMAXWrapper.
# File location
#fname = '../../examples/saved/pyramid-sample.pickle'
# Save
#pyramid.save(fname=fname)
# Load
#pyramid = PyramidWrapper().load(fname=fname)
# -----------------
# Predictions
# -----------------
# Variables.
s, e, d = 10, 110, False
# Compute predictions (exogenous?).
preds = pyramid.get_prediction(start=s, end=e, dynamic=d, exogenous=None)
# Plot truth values.
plt.plot(y, color='#A6CEE3', alpha=0.5, marker='o',
markeredgecolor='k', markeredgewidth=0.5,
markersize=5, linewidth=0.75, label='Observed')
# Plot forecasted values.
plt.plot(preds[0,:], preds[1,:], color='#FF0000', alpha=1.00,
linewidth=2.0, label=pyramid._identifier())
# Plot the confidence intervals.
plt.fill_between(preds[0,:], preds[2,:],
preds[3,:],
color='r',
alpha=0.1)
# Legend
plt.legend()
plt.show()
# -------------------
# GRID SEARCH
# -------------------
# Grid parameters.
fit_params = {'exogenous': [None, exog],
'y': [y]}
con_params = {'disp': [0],
'order': [(0,1,1)],
'seasonal_order': [None],
'trend': ['n','c','t','ct']}
# Get summary.
summary = pyramid.grid_search_dataframe(con_kwargs=con_params,
fit_kwargs=fit_params,
flabel=False)
# Plot result.
print("\n\n")
print("===================")
print("Grid Search Summary")
print("===================")
print(summary)
# -------------------
# AUTO ARIMA
# -------------------
# Example 1: Retrieving just the best fit.
# ----------------------------------------
# Find best arima fit.
stepwise_fit = PyramidWrapper().auto(y=y, trend='c',
exogenous=None,
start_p=0, start_q=1, max_p=0, max_q=5, d=1,
seasonal=False, start_P=0, D=1,
trace=True,
error_action='ignore', # ignore if an order does not work.
suppress_warnings=True, # ignore convergence warnings.
information_criterion='bic', # score to select best model.
return_valid_fits=False, # Return all valid fits.
stepwise=True) # set to stepwise
# Get summary.
summary = PyramidWrapper().from_list_dataframe(stepwise_fit, flabel=False)
# Print result.
print("\n\n")
print("=====================")
print("Auto Stepwise Summary")
print("=====================")
print(summary)
# Retrieving all fits
# -------------------
# Do a grid search using auto_arima.
brute_fit = PyramidWrapper().auto(y=y,
exogenous=None, trend='c',
start_p=0, d=1, start_q=0, max_p=0, max_d=1, max_q=5,
start_P=1, D=None, start_Q=1, max_P=2, max_D=1, max_Q=2,
max_order=20, m=1, seasonal=False, stationary=False,
information_criterion='bic', alpha=0.05, stepwise=False,
return_valid_fits=True,
test='kpss', seasonal_test='ch', n_jobs=1, start_params=None, method=None,
transparams=True, solver='lbfgs', maxiter=50, disp=0, callback=None,
offset_test_args=None, seasonal_test_args=None, suppress_warnings=False,
error_action='ignore', trace=False, random=False, random_state=None,
n_fits=None, out_of_sample_size=0, scoring='mse', scoring_args=None)
# Show length
print(len(brute_fit))
# Create summary dataframe
summary = PyramidWrapper().from_list_dataframe(brute_fit, flabel=False)
# Plot result.
print("\n\n")
print("========================")
print("Auto Brute Force Summary")
print("========================")
print(summary)
# Show.
plt.show()