Stationarity

Isai Guizar

Disclaimer: This document is intended for educational purposes only. It is part of my Econometrics courses at Tec de Monterrey.


Stationarity implies that the statistical properties of a time series are constant over time, meaning that the past is informative for predicting the future. It is a key requirement for ensuring the validity of forecasts based on time series models. In this document, we study standard tests used to assess whether a time series is stationary.


import pandas as pd
import numpy  as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from datetime import datetime
import yfinance as yf
from pandas_datareader import data as pdr
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.stats.diagnostic import acorr_ljungbox
import plotly.graph_objects as go


1 Stationary

This is a stationary process with positive autocorrelation

np.random.seed(42)
T = 100

y   = np.zeros(T)
rho = 0.8
u   = np.random.normal(0, 1, T)
for t in range(1, T):
    y[t] = rho * y[t-1] + u[t]

data = pd.DataFrame({
    'y':y}, index=pd.date_range(start='2000-01-01', periods=T, freq='QE'))
data = data.reset_index()
data = data.rename(columns={'index': 'Date'})
plt.figure(figsize=(7, 5))
plt.plot(data['Date'], data['y'])
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Stationary Times Series (AR1)')
plt.tight_layout()
# plt.grid(True)
plt.show()

1.1 White noise

This is a stationary process with no autocorrelation

y1 = np.random.normal(0, 1, T)
data['y1'] = y1


plt.figure(figsize=(7, 5))
plt.plot(data['Date'], data['y1'])
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Stationary Times Series: White Noise')
plt.tight_layout()
# plt.grid(True)
plt.show()

2 Non-stationarity

np.random.seed(42)
T = 100

y2   = np.zeros(T)
rho1 = 1
u2   = np.random.normal(0, 1, T)
for t in range(1, T):
    y2[t] = rho1 * y2[t-1] + u2[t]

data['y2'] = y2


plt.figure(figsize=(7, 5))
plt.plot(data['Date'], data['y2'])
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Non-Stationary Times Series: Random Walk')
plt.tight_layout()
# plt.grid(True)
plt.show()

3 Statistical tests

ljung_p = acorr_ljungbox(y, lags=[5], return_df=True)['lb_pvalue'].iloc[0]
adf_p   = adfuller(y)[1]
kpss_p = kpss(y, regression='c', nlags='auto')[1]

print(f"ADF p-value: {adf_p:.4f}")
print(f"KPSS p-value: {kpss_p:.4f}")
print(f"Ljung-Box p-value: {ljung_p:.4f}")
ADF p-value: 0.0082
KPSS p-value: 0.1000
Ljung-Box p-value: 0.0000
/var/folders/wl/12fdw3c55777609gp0_kvdrh0000gn/T/ipykernel_76994/551625575.py:3: InterpolationWarning:

The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

def run_tests(series):
    adf_p = adfuller(series)[1]
    kpss_p = kpss(series, regression='c', nlags='auto')[1]
    ljung_p = acorr_ljungbox(series, lags=[5], return_df=True)['lb_pvalue'].iloc[0]
    return pd.Series({
        'ADF p-value': adf_p,
        'KPSS p-value': kpss_p,
        'Ljung-Box p-value': ljung_p
    })
test_y1 = run_tests(data['y1'])

print(f"ADF p-value: {test_y1.iloc[0]:.4f}")
print(f"KPSS p-value: {test_y1.iloc[1]:.4f}")
print(f"Ljung-Box p-value: {test_y1.iloc[2]:.4f}")
ADF p-value: 0.0000
KPSS p-value: 0.1000
Ljung-Box p-value: 0.4135
/var/folders/wl/12fdw3c55777609gp0_kvdrh0000gn/T/ipykernel_76994/2372992191.py:3: InterpolationWarning:

The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.

test_y2 = run_tests(data['y2'])

print(f"ADF p-value: {test_y2.iloc[0]:.4f}")
print(f"KPSS p-value: {test_y2.iloc[1]:.4f}")
print(f"Ljung-Box p-value: {test_y2.iloc[2]:.4f}")
ADF p-value: 0.6021
KPSS p-value: 0.0100
Ljung-Box p-value: 0.0000
/var/folders/wl/12fdw3c55777609gp0_kvdrh0000gn/T/ipykernel_76994/2372992191.py:3: InterpolationWarning:

The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.


4 Application to Stock Prices

In practice

Download the stock prices of The Boeing Company (BA) from May 2020 to the most recent available observation. Is the time series stationary? Visually inspect the plot and apply the appropriate statistical tests to assess stationarity.


Download the times series

ba_all = yf.download("BA", start="2020-05-01", end=None)
ba_pr  = ba_all['Close'].reset_index()
ba_pr['Date']  = pd.to_datetime(ba_pr['Date']).dt.date
YF.download() has changed argument auto_adjust default to True
[*********************100%***********************]  1 of 1 completed




Test for stationarity

adf_ba   = adfuller(ba_pr['BA'])[1]
kpss_ba  = kpss(ba_pr['BA'], regression='c', nlags='auto')[1]

print(f"ADF p-value: {adf_ba:.4f}")
print(f"KPSS p-value: {kpss_ba:.4f}")
ADF p-value: 0.0300
KPSS p-value: 0.0322

Is it white noise?

ljung_ba = acorr_ljungbox(ba_pr['BA'], lags=[5], return_df=True)['lb_pvalue'].iloc[0]

print(f"Ljung-Box p-value: {ljung_ba:.4f}")
Ljung-Box p-value: 0.0000