import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from datetime import datetime
import yfinance as yf
from pandas_datareader import data as pdr
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.stats.diagnostic import acorr_ljungbox
import plotly.graph_objects as goStationarity
Disclaimer: This document is intended for educational purposes only. It is part of my Econometrics courses at Tec de Monterrey.
Stationarity implies that the statistical properties of a time series are constant over time, meaning that the past is informative for predicting the future. It is a key requirement for ensuring the validity of forecasts based on time series models. In this document, we study standard tests used to assess whether a time series is stationary.
1 Stationary
This is a stationary process with positive autocorrelation
np.random.seed(42)
T = 100
y = np.zeros(T)
rho = 0.8
u = np.random.normal(0, 1, T)
for t in range(1, T):
y[t] = rho * y[t-1] + u[t]
data = pd.DataFrame({
'y':y}, index=pd.date_range(start='2000-01-01', periods=T, freq='QE'))
data = data.reset_index()
data = data.rename(columns={'index': 'Date'})plt.figure(figsize=(7, 5))
plt.plot(data['Date'], data['y'])
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Stationary Times Series (AR1)')
plt.tight_layout()
# plt.grid(True)
plt.show()1.1 White noise
This is a stationary process with no autocorrelation
y1 = np.random.normal(0, 1, T)
data['y1'] = y1
plt.figure(figsize=(7, 5))
plt.plot(data['Date'], data['y1'])
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Stationary Times Series: White Noise')
plt.tight_layout()
# plt.grid(True)
plt.show()2 Non-stationarity
np.random.seed(42)
T = 100
y2 = np.zeros(T)
rho1 = 1
u2 = np.random.normal(0, 1, T)
for t in range(1, T):
y2[t] = rho1 * y2[t-1] + u2[t]
data['y2'] = y2
plt.figure(figsize=(7, 5))
plt.plot(data['Date'], data['y2'])
plt.xlabel('Date')
plt.ylabel('Value')
plt.title('Non-Stationary Times Series: Random Walk')
plt.tight_layout()
# plt.grid(True)
plt.show()3 Statistical tests
ljung_p = acorr_ljungbox(y, lags=[5], return_df=True)['lb_pvalue'].iloc[0]
adf_p = adfuller(y)[1]
kpss_p = kpss(y, regression='c', nlags='auto')[1]
print(f"ADF p-value: {adf_p:.4f}")
print(f"KPSS p-value: {kpss_p:.4f}")
print(f"Ljung-Box p-value: {ljung_p:.4f}")ADF p-value: 0.0082
KPSS p-value: 0.1000
Ljung-Box p-value: 0.0000
/var/folders/wl/12fdw3c55777609gp0_kvdrh0000gn/T/ipykernel_76994/551625575.py:3: InterpolationWarning:
The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.
def run_tests(series):
adf_p = adfuller(series)[1]
kpss_p = kpss(series, regression='c', nlags='auto')[1]
ljung_p = acorr_ljungbox(series, lags=[5], return_df=True)['lb_pvalue'].iloc[0]
return pd.Series({
'ADF p-value': adf_p,
'KPSS p-value': kpss_p,
'Ljung-Box p-value': ljung_p
})test_y1 = run_tests(data['y1'])
print(f"ADF p-value: {test_y1.iloc[0]:.4f}")
print(f"KPSS p-value: {test_y1.iloc[1]:.4f}")
print(f"Ljung-Box p-value: {test_y1.iloc[2]:.4f}")ADF p-value: 0.0000
KPSS p-value: 0.1000
Ljung-Box p-value: 0.4135
/var/folders/wl/12fdw3c55777609gp0_kvdrh0000gn/T/ipykernel_76994/2372992191.py:3: InterpolationWarning:
The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is greater than the p-value returned.
test_y2 = run_tests(data['y2'])
print(f"ADF p-value: {test_y2.iloc[0]:.4f}")
print(f"KPSS p-value: {test_y2.iloc[1]:.4f}")
print(f"Ljung-Box p-value: {test_y2.iloc[2]:.4f}")ADF p-value: 0.6021
KPSS p-value: 0.0100
Ljung-Box p-value: 0.0000
/var/folders/wl/12fdw3c55777609gp0_kvdrh0000gn/T/ipykernel_76994/2372992191.py:3: InterpolationWarning:
The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.
4 Application to Stock Prices
Download the stock prices of The Boeing Company (BA) from May 2020 to the most recent available observation. Is the time series stationary? Visually inspect the plot and apply the appropriate statistical tests to assess stationarity.
Download the times series
ba_all = yf.download("BA", start="2020-05-01", end=None)
ba_pr = ba_all['Close'].reset_index()
ba_pr['Date'] = pd.to_datetime(ba_pr['Date']).dt.dateYF.download() has changed argument auto_adjust default to True
[*********************100%***********************] 1 of 1 completed
Test for stationarity
adf_ba = adfuller(ba_pr['BA'])[1]
kpss_ba = kpss(ba_pr['BA'], regression='c', nlags='auto')[1]
print(f"ADF p-value: {adf_ba:.4f}")
print(f"KPSS p-value: {kpss_ba:.4f}")ADF p-value: 0.0300
KPSS p-value: 0.0322
Is it white noise?
ljung_ba = acorr_ljungbox(ba_pr['BA'], lags=[5], return_df=True)['lb_pvalue'].iloc[0]
print(f"Ljung-Box p-value: {ljung_ba:.4f}")Ljung-Box p-value: 0.0000