# !pip install yfinance arch
from arch import arch_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from datetime import datetime
import yfinance as yf
from pandas_datareader import data as pdr
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_errorGARCH Models
Disclaimer: This document is intended for educational purposes only. It is part of my Econometrics courses at Tec de Monterrey.
In this document, we explore volatility modeling and forecasting using the Generalized Autoregressive Conditional Heteroskedasticity (GARCH) framework. Volatility clustering is a common characteristic of financial return series, where large changes tend to be followed by large changes, and small by small ones, regardless of direction. To capture this behavior, we estimate a GARCH(1,1) model using daily returns of the S&P 500 index.
stock_all = yf.download("^GSPC", start="2021-01-02", end="2025-01-01")
stock_pr = stock_all['Close'].reset_index()
stock_pr['Date'] = pd.to_datetime(stock_pr['Date']).dt.date
stock_pr['ret'] = np.log(stock_pr['^GSPC']).diff() * 100
stock_pr = stock_pr.dropna()YF.download() has changed argument auto_adjust default to True
[*********************100%***********************] 1 of 1 completed
plt.figure(figsize=(7, 5))
plt.plot(stock_pr['Date'], stock_pr['ret'], linewidth=1)
plt.title('Daily log rate of returns ')
plt.xlabel('Date')
plt.ylabel('Percent')
plt.tight_layout()
plt.show()1 Fit the GARCH model
The model of interest is:
\[ Ret_t = \mu + \varepsilon_t \] with \[ \sigma^2_t=\omega +\alpha_1\varepsilon^2_{t-1}+\beta_1\sigma^2_{t-1} \] Note that we specified no lagged predictors in equation for the returns, why?
model = arch_model(stock_pr['ret'], vol='GARCH', p=1, q=1, mean='Constant', dist='normal')
model_result = model.fit(disp="off")
print(model_result.summary()) Constant Mean - GARCH Model Results
==============================================================================
Dep. Variable: ret R-squared: 0.000
Mean Model: Constant Mean Adj. R-squared: 0.000
Vol Model: GARCH Log-Likelihood: -1372.39
Distribution: Normal AIC: 2752.79
Method: Maximum Likelihood BIC: 2772.43
No. Observations: 1004
Date: Sun, Jun 01 2025 Df Residuals: 1003
Time: 12:25:29 Df Model: 1
Mean Model
==========================================================================
coef std err t P>|t| 95.0% Conf. Int.
--------------------------------------------------------------------------
mu 0.0728 2.804e-02 2.598 9.379e-03 [1.789e-02, 0.128]
Volatility Model
=============================================================================
coef std err t P>|t| 95.0% Conf. Int.
-----------------------------------------------------------------------------
omega 0.0171 1.355e-02 1.260 0.208 [-9.483e-03,4.362e-02]
alpha[1] 0.0646 3.270e-02 1.975 4.826e-02 [4.937e-04, 0.129]
beta[1] 0.9186 4.365e-02 21.045 2.555e-98 [ 0.833, 1.004]
=============================================================================
Covariance estimator: robust
The two coefficients in the GARCH model (the coefficients on \(\alpha_1\) and \(\beta_1\)) are both individually statistically significant at the 5% significance level.
2 Diagnotic checking
residuals = model_result.std_resid.dropna()
plt.figure(figsize=(7, 5))
plot_acf(residuals, zero =False, lags=40, alpha=0.05)
plt.title("ACF of Standardized Residuals")
plt.xlabel("Lag")
plt.ylabel("Autocorrelation")
plt.tight_layout()
plt.show()<Figure size 672x480 with 0 Axes>
print("\nLjung-Box Test Residuals:")
print(acorr_ljungbox(residuals, lags=[10], return_df=True))
Ljung-Box Test Residuals:
lb_stat lb_pvalue
10 7.181708 0.708186
sq_residuals = residuals ** 2
plt.figure(figsize=(7, 5))
plot_acf(sq_residuals, zero=False, lags=40, alpha=0.05)
plt.title("ACF of Squared Standardized Residuals")
plt.xlabel("Lag")
plt.ylabel("Autocorrelation")
plt.tight_layout()
plt.show()<Figure size 672x480 with 0 Axes>
print("\nLjung-Box Test Squared Residuals:")
print(acorr_ljungbox(sq_residuals, lags=[10], return_df=True))
Ljung-Box Test Squared Residuals:
lb_stat lb_pvalue
10 3.94348 0.949861
3 Forecasting
conditional_vol = model_result.conditional_volatility
f_horizon = 5
forecast = model_result.forecast(horizon=f_horizon)
forecast_vol = np.sqrt(forecast.variance.iloc[-1].values)
last_date = stock_pr['Date'].iloc[-1]
forecast_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=f_horizon, freq='B')
plt.figure(figsize=(10, 5))
plt.plot(stock_pr['Date'], conditional_vol*np.sqrt(252), label='Estimated Volatility')
plt.plot(forecast_dates, forecast_vol*np.sqrt(252), color='orange', label='Forecasted Volatility')
plt.title(' Estimated and Forecasted Annualized Volatility')
plt.xlabel('Date')
plt.ylabel('Volatility (%)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()annualized_vol_forecast = forecast_vol * np.sqrt(252)
vol_table = pd.DataFrame({
'Date': forecast_dates,
'Daily Volatility (%)': forecast_vol.round(4),
'Annualized Volatility (%)': annualized_vol_forecast.round(4)
})
# Display table
print(vol_table) Date Daily Volatility (%) Annualized Volatility (%)
0 2025-01-01 0.9497 15.0753
1 2025-01-02 0.9506 15.0909
2 2025-01-03 0.9516 15.1063
3 2025-01-06 0.9526 15.1214
4 2025-01-07 0.9535 15.1363