Source code for pyamr.core.regression.pyarima

##############################################################################
# Author: Bernard Hernandez
# Filename: 03-main-create-sari-idxs.py
#
# Description : This file contains differnent statistics used in time-series.
#               What it mainly does is to format the output of tests provided
#               by external libraries and return them in a dataframe.
#
#
###############################################################################
# Forces decimals on divisions.
from __future__ import division 

# Libraries
import sys
import math
import inspect
import warnings
import itertools
import numpy as np
import pandas as pd

# Libraries.
from scipy.stats import norm
from operator import attrgetter

# Import ARIMA from statsmodels.
from statsmodels.tsa.arima_model import ARIMA

# Add module wrappers to sys path dynamically.
sys.path.append("../../../")

# Libraries wrapper.
from pyamr.core.regression.wregression import BaseRegressionWrapper

[docs]class PyramidWrapper(BaseRegressionWrapper): # Attributes. _name = 'PYRAMID' # Label to add to the attributes when saving. def __init__(self, **kwargs): """ """ # Library. from pyramid.arima import ARIMA as PYARIMA # Save config parameters. super(PyramidWrapper, self).__init__(**kwargs) # Create model if len(kwargs): self._model = PYARIMA(**kwargs) def _identifier(self): """This method creates a name that describes de model.""" try: exogenous = self.exogenous is not None except: exogenous = False return "name to do" #return "%s%sx%s [%s,%s]" % (self._name, # self.order, # self.seasonal_order, # self.trend, # exogenous) # -------------------------------------------------------------------------- # SET VARIABLES # -------------------------------------------------------------------------- def _params_from_summary(self): """Gets parameters from the summary result of the raw object. """ # Format summary summary = self._raw.summary().as_csv() summary = summary.split("\n",1)[1] # Remove first line. summary = summary.replace("\n",",") # Replace \n by comma. # Split in elements. elements = summary.split(",") elements = [self._cast_float(e.strip()) for e in elements] # Create series. d = {} # Add parameters. d['s_jb_value'] = elements[-13] d['s_jb_prob'] = elements[-9] d['s_skew'] = elements[-5] d['s_Q_value'] = elements[-15] d['s_Q_prob'] = elements[-11] d['s_H_value'] = elements[-7] d['s_H_prob'] = elements[-3] d['s_kurtosis'] = elements[-1] d['s_heteroskedasticity'] = elements[-7] d['s_omnibus_value'] = None d['s_omnibus_prob'] = None # Return return d def _init_result(self, alpha=0.05): """This method set all the variables into this class. @see: statsmodels.Arima @see: statsmodels.ArimaResults Parameters ---------- alpha : Returns ------- series : """ # Create series. d = {} # Add generic metrics. d['aic'] = self._raw.aic() d['aicc'] = self._raw.aicc() d['bic'] = self._raw.bic() d['hqic'] = self._raw.hqic() d['llf'] = self._raw.arima_res_.llf # Check if it is arima or sarimax and get corresponding values if self._raw.seasonal_order is not None: statistic_values = self._raw.arima_res_.zvalues else: statistic_values = self._raw.arima_res_.tvalues, # Create params information. params_data = zip(self._raw.arima_res_.data.param_names, self._raw.arima_res_.params, self._raw.arima_res_.bse, statistic_values, self._raw.arima_res_.pvalues, self._raw.arima_res_.conf_int(alpha)) # Add coefficients statistics to series. for name, coef, std, tvalue, pvalue, (cil, ciu) in params_data: d['%s_%s'%(name, 'coef')] = coef d['%s_%s'%(name, 'std')] = std d['%s_%s'%(name, 'tvalue')] = tvalue d['%s_%s'%(name, 'tprob')] = pvalue d['%s_%s'%(name, 'cil')] = cil d['%s_%s'%(name, 'ciu')] = ciu # Further statistics d.update(self._resid_stats()) # We cannot use the params_from_summary because this wrappers stores # different models with different summaries. The right way to solve this # is by performing the statistics related with the residuals in the # regression_wrapper._resid_stats. #d.update(self._params_from_summary()) # Return return d def _init_config(self): """This method initialises the configuration. For some reason the interestin data is in the method __init__ for the object self._raw (ARIMA) and the method fir for the object self._raw.arima_res_.model. TODO: Handle if the instances passed to getargspecdict do not exist. """ # Create dictionary. d = {} # Fill it. d.update(self._getargspecdict(self._raw.arima_res_.model, 'fit')) d.update(self._getargspecdict(self._raw, '__init__')) # Return return d # -------------------------------------------------------------------------- # HELPER METHODS # --------------------------------------------------------------------------
[docs] def as_summary(self, **kwargs): """This method displays the summary. """ # Elements to split by. find = "="*78 # Split and fill. smry = find.join(self._raw.summary(**kwargs).as_text().split(find)[:-1]) smry = smry.split("\n") smry[-6] = smry[-6].replace('=','',5) smry[-5:] = [v.replace(' ', '', 5) for v in smry[-5:]] smry = "\n".join(smry[:-1]) # Variables. om, omp, dw = 0.0, 0.0, self.m_dw jb, jbp = self.m_jb_value, self.m_jb_prob nm, nmp = self.m_nm_value, self.m_nm_prob skew, kurt = self.m_skew, self.m_kurtosis # Add in new lines. smry+= "\n%s\n%s\n%s\n" % ("="*78, "Manual".center(78, ' '), "-"*78) smry+= "Omnibus: %#25.3f Durbin-Watson: %#23.3f\n" % (om, dw) smry+= "Normal (N): %#25.3f Prob(N): %#23.3f\n" % (nm, nmp) smry+= "="*78 + "\n" smry+= "Note that JB, P(JB), skew and kurtosis have different values.\n" smry+= "Note that Prob(Q) tests no correlation of residuals." # Return return smry
# -------------------------------------------------------------------------- # FIT # --------------------------------------------------------------------------
[docs] def fit(self, **kwargs): """This method fits the specified arima model. Parameters ---------- endog : exog : missing : hasconst : Returns ------- object : A PyramidWrapper object. """ # Fill config. self._config.update(kwargs) # Set model self._raw = self._model.fit(**kwargs) # Set residuals as attribute. self._resid = self._raw.resid() # Set series with interesting params. self._result = self._init_result(alpha=0.05) # return object. return self
# --------------------------------------------------------------------------- # PREDICTION # ---------------------------------------------------------------------------
[docs] def get_prediction(self, **kwargs): """ """ # Compute prediction forecast = self._raw.predict_in_sample(**kwargs) # Get plotting values. mean = forecast.reshape(1,-1) cilo = self.conf_int_insample(mean, alpha=0.05)[:,0].reshape(1,-1) ciup = self.conf_int_insample(mean, alpha=0.05)[:,1].reshape(1,-1) # Time. time = self._time(forecast=mean, **kwargs).reshape(1,-1) # Get plotting values. return np.concatenate((time, mean, cilo, ciup), axis=0)
def _time(self, forecast, start=None, **kwargs): """This method.... """ # Get default start. if start is None: start = getattr(self._raw.arima_res_, 'k_diff', 0) # Return return np.arange(forecast.shape[1]) + start # --------------------------------------------------------------------------- # FIND AUTO # ---------------------------------------------------------------------------
[docs] def from_instance(self, arima, **kwargs): """This method constructs a PyramidWrapper object from pyramid.ARIMA """ # Create object. instance = PyramidWrapper() # Set model. instance._raw = arima # Set residuals as attribute. instance._resid = arima.resid() # Set series with interesting params. instance._result = instance._init_result(alpha=0.05) # Set configuration parameters. instance._config = instance._init_config() # Return return instance
[docs] def auto(self, **kwargs): """This method finds the best arima. @see pyrmid.arima.auto_arima Parameters ---------- Returns ------- """ # Library. from pyramid.arima import auto_arima from pyramid.arima.arima import ARIMA # Compute auto_arima. results = auto_arima(**kwargs) # Return a single PyramidWrapper object. if isinstance(results, ARIMA): return [self.from_instance(results)] # Return an array of PyramidWrapper objects. if isinstance(results, list): return [PyramidWrapper().from_instance(a) for a in results]
if __name__ == '__main__': # pragma: no cover # Libraries. import matplotlib.pyplot as plt # Set pandas configuration. pd.set_option('display.max_colwidth', 14) pd.set_option('display.width', 80) pd.set_option('display.precision', 4) # Constants length = 100 offset = 100 slope = 10 # Create time-series. x = np.arange(length) n = np.random.randn(length)*150 y = slope*x + offset + n # Format exog to be used in pyramid-arima. exog = x.reshape(-1,1) # Create object # This is a wrapper for the methods arima and sarimax within statstools. Take # to account the following things when using this wrapper: # - seasonal_order = None calls arima, otherwise it calls sarimax. # - transparams = True enforces stationarity (not sure). # - the exogenous variable needs to be reshaped. # - the trend is passed in the constructor (insted of the fit method). pyramid = PyramidWrapper(order=(0,1,1), seasonal_order=(0,0,0,0), trend='c', disp=0).fit(y=y, exogenous=None, fit_args={}) # Print series. print(pyramid.as_series()) # Print summary. print(pyramid.as_summary()) # ----------------- # Save & Load # ----------------- # TODO: There is a TypeError: can't pickle zStatespace objects when the # seasonal order is included in the PyramidWrapper. However, # apparently we are able to save sarimax with seasons using the # SARIMAXWrapper. # File location #fname = '../../examples/saved/pyramid-sample.pickle' # Save #pyramid.save(fname=fname) # Load #pyramid = PyramidWrapper().load(fname=fname) # ----------------- # Predictions # ----------------- # Variables. s, e, d = 10, 110, False # Compute predictions (exogenous?). preds = pyramid.get_prediction(start=s, end=e, dynamic=d, exogenous=None) # Plot truth values. plt.plot(y, color='#A6CEE3', alpha=0.5, marker='o', markeredgecolor='k', markeredgewidth=0.5, markersize=5, linewidth=0.75, label='Observed') # Plot forecasted values. plt.plot(preds[0,:], preds[1,:], color='#FF0000', alpha=1.00, linewidth=2.0, label=pyramid._identifier()) # Plot the confidence intervals. plt.fill_between(preds[0,:], preds[2,:], preds[3,:], color='r', alpha=0.1) # Legend plt.legend() plt.show() # ------------------- # GRID SEARCH # ------------------- # Grid parameters. fit_params = {'exogenous': [None, exog], 'y': [y]} con_params = {'disp': [0], 'order': [(0,1,1)], 'seasonal_order': [None], 'trend': ['n','c','t','ct']} # Get summary. summary = pyramid.grid_search_dataframe(con_kwargs=con_params, fit_kwargs=fit_params, flabel=False) # Plot result. print("\n\n") print("===================") print("Grid Search Summary") print("===================") print(summary) # ------------------- # AUTO ARIMA # ------------------- # Example 1: Retrieving just the best fit. # ---------------------------------------- # Find best arima fit. stepwise_fit = PyramidWrapper().auto(y=y, trend='c', exogenous=None, start_p=0, start_q=1, max_p=0, max_q=5, d=1, seasonal=False, start_P=0, D=1, trace=True, error_action='ignore', # ignore if an order does not work. suppress_warnings=True, # ignore convergence warnings. information_criterion='bic', # score to select best model. return_valid_fits=False, # Return all valid fits. stepwise=True) # set to stepwise # Get summary. summary = PyramidWrapper().from_list_dataframe(stepwise_fit, flabel=False) # Print result. print("\n\n") print("=====================") print("Auto Stepwise Summary") print("=====================") print(summary) # Retrieving all fits # ------------------- # Do a grid search using auto_arima. brute_fit = PyramidWrapper().auto(y=y, exogenous=None, trend='c', start_p=0, d=1, start_q=0, max_p=0, max_d=1, max_q=5, start_P=1, D=None, start_Q=1, max_P=2, max_D=1, max_Q=2, max_order=20, m=1, seasonal=False, stationary=False, information_criterion='bic', alpha=0.05, stepwise=False, return_valid_fits=True, test='kpss', seasonal_test='ch', n_jobs=1, start_params=None, method=None, transparams=True, solver='lbfgs', maxiter=50, disp=0, callback=None, offset_test_args=None, seasonal_test_args=None, suppress_warnings=False, error_action='ignore', trace=False, random=False, random_state=None, n_fits=None, out_of_sample_size=0, scoring='mse', scoring_args=None) # Show length print(len(brute_fit)) # Create summary dataframe summary = PyramidWrapper().from_list_dataframe(brute_fit, flabel=False) # Plot result. print("\n\n") print("========================") print("Auto Brute Force Summary") print("========================") print(summary) # Show. plt.show()