Source code for pyamr.core.stats.stationarity

##############################################################################
# Author: Bernard Hernandez
# Filename: 03-main-create-sari-idxs.py
# Description : This file contains differnent statistics used in time-series.
#               What it mainly does is to format the output of tests provided
#               by external libraries and return them in a dataframe.
#
# TODO: Move it to a module.
#
###############################################################################
# Forces decimals on divisions.
from __future__ import division

# Libraries
import sys
import numpy as np
import pandas as pd

# Import base wrapper
from pyamr.core.stats.wbase import BaseWrapper
from pyamr.core.stats.wbase import fargs


[docs]class StationarityWrapper(BaseWrapper):
    """
    In time series analysis, "stationarity" refers to a key assumption about the behavior
    of a time series over time. A stationary time series is one in which statistical properties,
    such as mean, variance, and autocorrelation, remain constant over time. Stationarity is an
    important concept because many time series analysis techniques rely on this assumption for
    their validity. There are different types of stationarity that can be observed in time series
    data.

    The augmented Dickey–Fuller test or ``ADF`` can be used to determine the presence of a unit root.
    When the other roots of the characteristic function lie inside the unit circle the first
    difference of the process is stationary. Due to this property, these are also called
    difference-stationary processes. Since the absence of unit root is not a proof of non-stationarity,
    the Kwiatkowski–Phillips–Schmidt–Shin or ``KPSS`` test can be used to identify the existence of an
    underlying trend which can also be removed to obtain a stationary process. These are called
    trend-stationary processes. In both, unit-root and trend-stationary processes, the mean can be
    increasing or decreasing over time; however, in the presence of a shock, trend-stationary
    processes (blue) revert to this mean tendency in the long run (deterministic trend) while unit-root
    processes (green) have a permanent impact (stochastic trend). The significance level of the tests
    is usually set to 0.05.

    ================== ================== ========================= ============================
     ADF                KPSS               Outcome                   Note
    ================== ================== ========================= ============================
    ``Non-Stationary`` ``Non-Stationary`` ``Non-Stationary``
    ``Stationary``     ``Stationary``     ``Stationary``
    ``Non-Stationary`` ``Stationary``     ``Trend-Stationary``      Check the de-trended series
    ``Stationary``     ``Non-Stationary`` ``Difference-Stationary`` Check the differenced series
    ================== ================== ========================= ============================

    """

    # --------------------------------------------------------------------------
    #                          overriden methods
    # --------------------------------------------------------------------------
[docs]    def evaluate(self, alpha=0.05, **kwargs):
        """This method initialises the series.
        """
        # Create dictionary.
        d = {}

        # Basic statistics (adfuller).
        d['adf_ct_statistic'] = self._raw['adfuller-ct'][0]
        d['adf_ct_pvalue'] = self._raw['adfuller-ct'][1]
        d['adf_ct_nlags'] = self._raw['adfuller-ct'][2]
        d['adf_ct_nobs'] = self._raw['adfuller-ct'][3]
        for key, value in self._raw['adfuller-ct'][4].items():
            d['adf_ct_criticalvalue_%s' % key] = value

        # Basic statistics (adfuller).
        d['adf_c_statistic'] = self._raw['adfuller-c'][0]
        d['adf_c_pvalue'] = self._raw['adfuller-c'][1]
        d['adf_c_nlags'] = self._raw['adfuller-c'][2]
        d['adf_c_nobs'] = self._raw['adfuller-c'][3]
        for key, value in self._raw['adfuller-c'][4].items():
            d['adf_c_criticalvalue_%s' % key] = value

        # Basic statistics (kpss).
        d['kpss_ct_statistic'] = self._raw['kpss-ct'][0]
        d['kpss_ct_pvalue'] = self._raw['kpss-ct'][1]
        d['kpss_ct_nlags'] = self._raw['kpss-ct'][2]
        for key, value in self._raw['kpss-ct'][3].items():
            d['kpss_ct_criticalvalue_%s' % key] = value

        # Basic statistics (kpss).
        d['kpss_c_statistic'] = self._raw['kpss-c'][0]
        d['kpss_c_pvalue'] = self._raw['kpss-c'][1]
        d['kpss_c_nlags'] = self._raw['kpss-c'][2]
        for key, value in self._raw['kpss-c'][3].items():
            d['kpss_c_criticalvalue_%s' % key] = value

        # Extra parameters.
        d['root_ct_stationary'] = d['adf_ct_pvalue'] <= alpha
        d['root_c_stationary'] = d['adf_c_pvalue'] <= alpha
        d['trend_ct_stationary'] = d['kpss_ct_pvalue'] > alpha
        d['trend_c_stationary'] = d['kpss_c_pvalue'] > alpha

        # Unit root (Range unit root test)
        d['rur_statistic'] = self._raw['rur'][0]
        d['rur_pvalue'] = self._raw['rur'][1]
        for key, value in self._raw['rur'][2].items():
            d['rur_criticalvalue_%s' % key] = value

        # Return
        return d

[docs]    def as_summary(self, alpha=0.05):
        """This method creates the summary to display.
        """
        # Create summary.
        summary = '      stationarity (alpha=0.05)   \n'
        summary += '==================================\n'
        summary += '          root           trend    \n'
        summary += '----------------------------------\n'
        summary += 'c   {0!s:>5s} ({1:.3f})   {2!s:>5s} ({3:.3f})\n'
        summary += 'ct  {4!s:>5s} ({5:.3f})   {6!s:>5s} ({7:.3f})\n'
        summary += '=================================='

        # Format
        summary = summary.format(self.root_c_stationary,
                                 self.adf_c_pvalue,
                                 self.trend_c_stationary,
                                 self.kpss_c_pvalue,
                                 self.root_ct_stationary,
                                 self.adf_ct_pvalue,
                                 self.trend_ct_stationary,
                                 self.kpss_ct_pvalue)

        # Return
        return summary

[docs]    def fit(self, x, adf_kwargs={}, kpss_kwargs={}, **kwargs):
        """This method studies the stationarity of a given time-series.

        The parameters which can be passed to the adfuller and kpss methods
        are listed below:

        - adfuller_kwargs = {x, maxlag, regression, autolag, store, regresults}
        - kpss_kwargs = {x, regression, lags, store}

        @see statsmodels.tsa.stattools.adfuller
        @see statsmodels.tsa.stattoosl.kpss

        Parameters
        ----------
        x : array-like
          The time series

        adf_kwargs : dict-like
          The parameters to pass to the adfuller function

        kpss_kwargs : dict-like
          The parameters to apss to the kpss function


        Returns
        -------
        object : An StationarityWrapper objects.
        """
        # Library.
        from statsmodels.tsa.stattools import adfuller
        from statsmodels.tsa.stattools import kpss
        from statsmodels.tsa.stattools import range_unit_root_test

        # Empty the class
        self._empty()

        # In this fit a number of scenarios are going to be tested. The
        # term that varies within scenarios is regression, as such, if
        # it is passed it will be deleted.
        adf_kwargs.pop('regression', None)
        kpss_kwargs.pop('regression', None)

        # Update the configuration
        self._config.update({'adf_%s' % k: v for k, v in adf_kwargs.items()})
        self._config.update({'kpss_%s' % k: v for k, v in kpss_kwargs.items()})

        # Initialize raw data.
        self._raw = {'x': x}

        # Compute adfuller and kpss
        self._raw['adfuller-ct'] = adfuller(x=x, regression='ct', **adf_kwargs)
        self._raw['adfuller-c'] = adfuller(x=x, regression='c', **adf_kwargs)
        self._raw['kpss-ct'] = kpss(x=x, regression='ct', **kpss_kwargs)
        self._raw['kpss-c'] = kpss(x=x, regression='c', **kpss_kwargs)
        self._raw['rur'] = range_unit_root_test(x=x)

        print(self._raw['rur'])

        # Evaluate the model
        if self.evaluate:
            self._result = self.evaluate()

        # Save results.
        return self


if __name__ == '__main__':

    # Libraries
    import matplotlib as mpl
    import matplotlib.pyplot as plt

    # ----------------------------
    # set basic configuration
    # ----------------------------
    # Set pandas configuration.
    pd.set_option('display.max_colwidth', 14)
    pd.set_option('display.width', 150)
    pd.set_option('display.precision', 4)

    # Set default parameters.
    mpl.rc('lines', linewidth=0.35)
    mpl.rc('xtick', labelsize=6)
    mpl.rc('ytick', labelsize=6)
    mpl.rc('legend', fontsize=6)
    mpl.rc('grid')
    mpl.rc('figure')
    mpl.rc('axes')
    mpl.rc('font', size=7)

    # Font type.
    font = {
        'family': 'monospace',
        'weight': 'normal',
        'size': 6,
    }

    # ----------------------------
    # create data
    # ----------------------------
    # Constants
    length = 100
    offset = 100
    slope = 4

    # Create variables.
    x = np.arange(length)
    n = np.random.rand(length)

    # Create timeseries.
    y_n = n
    y_c = np.ones(length) * offset
    y_t = x * slope + n
    y_ct = x * slope + offset + n * 20
    y_r = np.concatenate((y_ct[:50], y_ct[50:] - offset))

    # ----------------------------
    # create stationarity objects
    # ----------------------------
    # .. note:: Including the constant series with offset produces
    #           the following error: ValueError: cannot convert float
    #           NaN to integer.

    stationarity_n = StationarityWrapper().fit(x=y_n)
    #stationarity_c = StationarityWrapper().fit(x=y_c)
    stationarity_t = StationarityWrapper().fit(x=y_t)
    stationarity_r = StationarityWrapper().fit(x=y_r)
    stationarity_ct = StationarityWrapper().fit(x=y_ct,
         adf_kwargs={'maxlag': 12, 'autolag': 'BIC'})

    # Print series.
    print("\n")
    print(stationarity_ct.as_series())

    # Print summary.
    print("\n")
    print(stationarity_ct.as_summary())

    # Print identifier.
    print("\n")
    print(stationarity_ct._identifier())

    # ----------------
    # plot
    # ----------------
    # Create figure
    fig, axes = plt.subplots(3, 2, figsize=(10, 4))
    axes = axes.flatten()

    # Plot truth values.
    axes[0].plot(y_n, color='#A6CEE3', alpha=0.5, marker='o',
                 markeredgecolor='k', markeredgewidth=0.5,
                 markersize=2, linewidth=0.75,
                 label=stationarity_n.as_summary())

    #axes[1].plot(y_c, color='#A6CEE3', alpha=0.5, marker='o',
    #             markeredgecolor='k', markeredgewidth=0.5,
    #             markersize=2, linewidth=0.75,
    #             label=stationarity_c.as_summary())

    # Plot truth values.
    axes[2].plot(y_t, color='#A6CEE3', alpha=0.5, marker='o',
                 markeredgecolor='k', markeredgewidth=0.5,
                 markersize=2, linewidth=0.75,
                 label=stationarity_t.as_summary())

    # Plot truth values.
    axes[3].plot(y_ct, color='#A6CEE3', alpha=0.5, marker='o',
                 markeredgecolor='k', markeredgewidth=0.5,
                 markersize=2, linewidth=0.75,
                 label=stationarity_ct.as_summary())

    # Plot truth values.
    axes[4].plot(y_r, color='#A6CEE3', alpha=0.5, marker='o',
                 markeredgecolor='k', markeredgewidth=0.5,
                 markersize=2, linewidth=0.75,
                 label=stationarity_r.as_summary())

    # Add grid
    for ax in axes:
        ax.grid(color='gray', linestyle='--', linewidth=0.2, alpha=0.5)

    # Add legend
    for ax in axes:
        ax.legend(prop=font, loc=2)

    # Study of Stationarity
    plt.suptitle("Study of Stationarity")

    # -----------------
    # Save and load
    # -----------------
    # File location
    # fname = '../examples/saved/stationarity-sample.pickle'

    # Save
    # stationarity_ct.save(fname=fname)

    # Load
    # stationarity_ct = StationarityWrapper().load(fname=fname)

    # Show
    #plt.show()