Source code for pyamr.core.regression.pyarima2

##############################################################################
# Author: Bernard Hernandez
# Filename: 03-main-create-sari-idxs.py
#
# Description : This file contains differnent statistics used in time-series.
#               What it mainly does is to format the output of tests provided
#               by external libraries and return them in a dataframe.
#
#
###############################################################################
# Forces decimals on divisions.
from __future__ import division 

# Libraries
import sys
import math
import inspect
import warnings
import itertools
import numpy as np
import pandas as pd

# Libraries.
from scipy.stats import norm
from operator import attrgetter



# Libraries wrapper.
from pyamr.core.regression.wreg import RegressionWrapper
from pyamr.core.stats.wbase import getargspecdict

[docs]class PyramidWrapper(RegressionWrapper): def _identifier(self): """This method creates a name that describes de model.""" try: exogenous = self.exogenous is not None except: exogenous = False return "%s%sx%s [%s,%s]" % (self._name, self.order, self.seasonal_order, self.trend, exogenous) # -------------------------------------------------------------------------- # SET VARIABLES # -------------------------------------------------------------------------- def _params_from_summary(self): """Gets parameters from the summary result of the raw object. """ # Format summary summary = self._raw.summary().as_csv() summary = summary.split("\n",1)[1] # Remove first line. summary = summary.replace("\n",",") # Replace \n by comma. # Split in elements. elements = summary.split(",") elements = [self._cast_float(e.strip()) for e in elements] # Create series. d = {} # Add parameters. d['s_jb_value'] = elements[-13] d['s_jb_prob'] = elements[-9] d['s_skew'] = elements[-5] d['s_Q_value'] = elements[-15] d['s_Q_prob'] = elements[-11] d['s_H_value'] = elements[-7] d['s_H_prob'] = elements[-3] d['s_kurtosis'] = elements[-1] d['s_heteroskedasticity'] = elements[-7] d['s_omnibus_value'] = None d['s_omnibus_prob'] = None # Return return d
[docs] def evaluate(self, alpha=0.05): """This method set all the variables into this class. @see: statsmodels.Arima @see: statsmodels.ArimaResults Parameters ---------- alpha : Returns ------- series : """ # Create series. d = {} # Add generic metrics. d['aic'] = self._raw.aic() d['aicc'] = self._raw.aicc() d['bic'] = self._raw.bic() d['hqic'] = self._raw.hqic() d['llf'] = self._raw.arima_res_.llf # Check if it is arima or sarimax and get corresponding values if self._raw.seasonal_order is not None: statistic_values = self._raw.arima_res_.zvalues else: statistic_values = self._raw.arima_res_.tvalues, # Create params information. params_data = zip(self._raw.arima_res_.data.param_names, self._raw.arima_res_.params, self._raw.arima_res_.bse, statistic_values, self._raw.arima_res_.pvalues, self._raw.arima_res_.conf_int(alpha)) # Add coefficients statistics to series. for name, coef, std, tvalue, pvalue, (cil, ciu) in params_data: d['%s_%s'%(name, 'coef')] = coef d['%s_%s'%(name, 'std')] = std d['%s_%s'%(name, 'tvalue')] = tvalue d['%s_%s'%(name, 'tprob')] = pvalue d['%s_%s'%(name, 'cil')] = cil d['%s_%s'%(name, 'ciu')] = ciu # Further statistics d.update(self._resid_stats()) # We cannot use the params_from_summary because this wrappers stores # different models with different summaries. The right way to solve this # is by performing the statistics related with the residuals in the # regression_wrapper._resid_stats. #d.update(self._params_from_summary()) # Return return d
def _init_config(self): """This method initialises the configuration. For some reason the interestin data is in the method __init__ for the object self._raw (ARIMA) and the method fir for the object self._raw.arima_res_.model. TODO: Handle if the instances passed to getargspecdict do not exist. """ # Create dictionary. d = {} # Fill it. d.update(self._getargspecdict(self._raw.arima_res_.model, 'fit')) d.update(self._getargspecdict(self._raw, '__init__')) # Return return d # -------------------------------------------------------------------------- # HELPER METHODS # --------------------------------------------------------------------------
[docs] def as_summary(self, **kwargs): """This method displays the summary. """ # Elements to split by. find = "="*78 # Split and fill. smry = find.join(self._raw.summary(**kwargs).as_text().split(find)[:-1]) smry = smry.split("\n") smry[-6] = smry[-6].replace('=','',5) smry[-5:] = [v.replace(' ', '', 5) for v in smry[-5:]] smry = "\n".join(smry[:-1]) # Variables. om, omp, dw = 0.0, 0.0, self.m_dw jb, jbp = self.m_jb_value, self.m_jb_prob nm, nmp = self.m_nm_value, self.m_nm_prob skew, kurt = self.m_skew, self.m_kurtosis # Add in new lines. smry+= "\n%s\n%s\n%s\n" % ("="*78, "Manual".center(78, ' '), "-"*78) smry+= "Omnibus: %#25.3f Durbin-Watson: %#23.3f\n" % (om, dw) smry+= "Normal (N): %#25.3f Prob(N): %#23.3f\n" % (nm, nmp) smry+= "="*78 + "\n" smry+= "Note that JB, P(JB), skew and kurtosis have different values.\n" smry+= "Note that Prob(Q) tests no correlation of residuals." # Return return smry
# -------------------------------------------------------------------------- # FIT # --------------------------------------------------------------------------
[docs] def resid(self): """Get the residual""" return self._raw.resid()
# --------------------------------------------------------------------------- # PREDICTION # ---------------------------------------------------------------------------
[docs] def get_prediction(self, start=None, end=None, alpha=0.05, **kwargs): """ """ # Compute time time = self._time(start=start, end=end) # Compute prediction mean = self._raw.predict_in_sample(start=start, end=end, **kwargs) # Confidence interval cito = self.conf_int_insample(mean, alpha=0.05) # For some reason the predict_in_sample function implemented in the # module pyramid-arima returns more samples than it should. Moreover # although there is another method call predict for forecasting, the # predict in sample functions seems to do both. Thus such function # is used and the first end-start elemnts are kept. A mean = mean[:time.size] cito = cito[:time.size] # Return return np.column_stack((time, mean, cito)).T
# --------------------------------------------------------------------------- # FIND AUTO # ---------------------------------------------------------------------------
[docs] def from_instance(self, arima, **kwargs): """This method constructs a PyramidWrapper object from pyramid.ARIMA """ # Create object. instance = PyramidWrapper() # Set model. instance._raw = arima # Set residuals as attribute. instance._resid = arima.resid() # Set series with interesting params. instance._result = instance.evaluate(alpha=0.05) # Find arima configuration parameters d = {} d.update(getargspecdict(arima.arima_res_.model, 'fit')) d.update(getargspecdict(arima, '__init__')) # Set configuration parameters instance._config = instance._init_config() # Return return instance
[docs] def auto(self, **kwargs): """This method finds the best arima. @see pyrmid.arima.auto_arima Parameters ---------- Returns ------- """ # Library. from pyramid.arima import auto_arima from pyramid.arima.arima import ARIMA # Compute auto_arima. results = auto_arima(**kwargs) # Return a single PyramidWrapper object. if isinstance(results, ARIMA): return [self.from_instance(results)] # Return an array of PyramidWrapper objects. if isinstance(results, list): return [PyramidWrapper().from_instance(a) for a in results]
if __name__ == '__main__': # pragma: no cover # Import pyramid arima from pyramid.arima import ARIMA as PYARIMA # Libraries. import matplotlib.pyplot as plt # Set pandas configuration. pd.set_option('display.max_colwidth', 14) pd.set_option('display.width', 80) pd.set_option('display.precision', 4) # Constants length = 100 offset = 100 slope = 10 # Create time-series. x = np.arange(length) n = np.random.randn(length)*150 y = slope*x + offset + n # Format exog to be used in pyramid-arima. exog = x.reshape(-1,1) # Create object # This is a wrapper for the methods arima and sarimax within statstools. Take # to account the following things when using this wrapper: # - seasonal_order = None calls arima, otherwise it calls sarimax. # - transparams = True enforces stationarity (not sure). # - the exogenous variable needs to be reshaped. # - the trend is passed in the constructor (insted of the fit method). pyramid = PyramidWrapper(estimator=PYARIMA).fit(y=y[:80], exogenous=None, order=(2,1,2), seasonal_order=(0,0,0,0), trend='c', disp=0) # Print series. print(pyramid.as_series()) # Print summary. print(pyramid.as_summary()) # ----------------- # Save & Load # ----------------- # TODO: There is a TypeError: can't pickle zStatespace objects when the # seasonal order is included in the PyramidWrapper. However, # apparently we are able to save sarimax with seasons using the # SARIMAXWrapper. # File location #fname = '../../examples/saved/pyramid-sample.pickle' # Save #pyramid.save(fname=fname) # Load #pyramid = PyramidWrapper().load(fname=fname) # ----------------- # Predictions # ----------------- # Variables. s, e, d = 50, 120, False # Compute predictions (exogenous?). preds = pyramid.get_prediction(start=s, end=e, dynamic=d, exogenous=None) # Plot truth values. plt.plot(y, color='#A6CEE3', alpha=0.5, marker='o', markeredgecolor='k', markeredgewidth=0.5, markersize=5, linewidth=0.75, label='Observed') # Plot forecasted values. plt.plot(preds[0,:], preds[1,:], color='#FF0000', alpha=1.00, linewidth=2.0, label=pyramid._identifier()) # Plot the confidence intervals. plt.fill_between(preds[0,:], preds[2,:], preds[3,:], color='r', alpha=0.1) # Legend plt.legend() plt.show() # ------------------- # Grid search # ------------------- # Grid parameters. fit_params = {'exogenous': [None, exog], 'y': [y], 'disp': [0], 'order': [(0,1,1)], 'seasonal_order': [None], 'trend': ['n','c','t','ct']} # Get summary. models = pyramid.grid_search(grid_params=fit_params) # Summary summary = pyramid.from_list_dataframe(models).T # Plot result. print("\nSummary") print(summary) # ------------------- # autoarima # ------------------- # Example 1: Retrieving just the best fit. # ---------------------------------------- # Find best arima fit. stepwise_fit = PyramidWrapper().auto(y=y, trend='c', exogenous=None, start_p=0, start_q=0, max_p=0, max_q=5, d=0, seasonal=False, start_P=0, D=1, trace=True, error_action='ignore', # ignore if an order does not work. suppress_warnings=True, # ignore convergence warnings. information_criterion='bic', # score to select best model. return_valid_fits=False, # Return all valid fits. stepwise=True) # set to stepwise # Get summary. summary = PyramidWrapper().from_list_dataframe(stepwise_fit, flabel=False).T # ----------------- # Predictions # ----------------- # Variables. s, e, d = 50, 120, False # Compute predictions (exogenous?). preds = stepwise_fit[0].get_prediction(start=s, end=e, dynamic=d, exogenous=None) # Plot truth values. plt.plot(y, color='#A6CEE3', alpha=0.5, marker='o', markeredgecolor='k', markeredgewidth=0.5, markersize=5, linewidth=0.75, label='Observed') # Plot forecasted values. plt.plot(preds[0,:], preds[1,:], color='#FF0000', alpha=1.00, linewidth=2.0, label=pyramid._identifier()) # Plot the confidence intervals. plt.fill_between(preds[0,:], preds[2,:], preds[3,:], color='r', alpha=0.1) # Legend plt.legend() plt.show() # Print result. print("\nAuto Stepwise Summary") print(summary) # Retrieving all fits # ------------------- # Do a grid search using auto_arima. brute_fit = PyramidWrapper().auto(y=y, exogenous=None, trend='c', start_p=0, d=1, start_q=0, max_p=0, max_d=1, max_q=5, start_P=1, D=None, start_Q=1, max_P=2, max_D=1, max_Q=2, max_order=20, m=1, seasonal=False, stationary=False, information_criterion='bic', alpha=0.05, stepwise=False, return_valid_fits=True, test='kpss', seasonal_test='ch', n_jobs=1, start_params=None, method=None, transparams=True, solver='lbfgs', maxiter=50, disp=0, callback=None, offset_test_args=None, seasonal_test_args=None, suppress_warnings=False, error_action='ignore', trace=False, random=False, random_state=None, n_fits=None, out_of_sample_size=0, scoring='mse', scoring_args=None) print(len(brute_fit)) # Create summary dataframe summary = PyramidWrapper().from_list_dataframe(brute_fit, flabel=False).T # Plot result. print("\nAuto Brute Force Summary") print(summary) # Show. plt.show()