library(PerformanceAnalytics)
library(quantmod)
library(ggplot2)
library(forecast)
library(tseries)
library(FinTS)
library(rugarch)
library(dplyr)
library(gridExtra)
library(stats)
library(fGarch)
library(xts)
library(evir)
library(tidyr)

getSymbols("AAPL", src = "yahoo", from = "2015-01-01")
## [1] "AAPL"
closing_prices <- AAPL$AAPL.Adjusted

1 Exploratory Data Analysis

par(mfrow = c(1,2))

hist(closing_prices, main = "Distribution of Closing Prices", col = "steelblue", xlab = "Price")
plot(closing_prices, type='l', main = "Closing Prices over Time", col = "blue", ylab = "Closing Prices")

par(mfrow = c(1,1))

plot(decompose(ts(closing_prices, frequency = 30)))

1.1 Assessing Stationarity

kpss.test(closing_prices)
## 
##  KPSS Test for Level Stationarity
## 
## data:  closing_prices
## KPSS Level = 26.094, Truncation lag parameter = 9, p-value = 0.01

Closing Prices are not stationarity. Using log returns to achieve stationarity.

returns <- na.omit(diff(log(closing_prices)))

adf.test(returns)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  returns
## Dickey-Fuller = -13.495, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(returns)
## 
##  KPSS Test for Level Stationarity
## 
## data:  returns
## KPSS Level = 0.05924, Truncation lag parameter = 9, p-value = 0.1

Log returns are stationary.

2 Predictive Modelling

2.1 Fitting a Seasonal ARIMA Model

Identifying an appropriate SARIMA model and validating its fit by analyzing the residuals.

best_fit <- auto.arima(returns, seasonal = FALSE, stepwise = FALSE, approximation = FALSE)

checkresiduals(best_fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,1) with non-zero mean
## Q* = 17.021, df = 5, p-value = 0.00446
## 
## Model df: 5.   Total lags used: 10

Model is a good fit.

2.2 Fitting an ARIMAX Model

Enhancing the model by adding external market factors (S&P 500, VIX) to build an ARIMAX model.

getSymbols(c("SPY", "VIX"), from = "2015-01-01")
## [1] "SPY" "VIX"
SPY_close  <- SPY$SPY.Adjusted
VIX_close  <- VIX$VIX.Adjusted

spy_ret  <- na.omit(diff(log(SPY_close)))
vix_ret  <- na.omit(diff(log(VIX_close)))

spy_lag1 <- lag(spy_ret, 1)

xreg <- na.omit(cbind(spy_ret, spy_lag1, vix_ret))
y <- returns[index(xreg)]

fit_arimax <- auto.arima(y, xreg = xreg)

checkresiduals(fit_arimax)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(0,0,0) errors
## Q* = 9.4136, df = 10, p-value = 0.4934
## 
## Model df: 0.   Total lags used: 10

2.3 Identifying the Best Fit Model.

accuracy(forecast(best_fit, h=30))
##                        ME       RMSE        MAE MPE MAPE      MASE        ACF1
## Training set 1.153351e-05 0.01812921 0.01263537 NaN  Inf 0.6913076 0.001816722
accuracy(forecast(fit_arimax, xreg = xreg, h=30))
##                        ME       RMSE         MAE MPE MAPE      MASE      ACF1
## Training set 0.0001809882 0.01147114 0.007844145 Inf  Inf 0.5343221 0.0341105

ARIMAX has lower RMSE, MAE, and MASE than baseline ARIMA; better forecast accuracy.

arch_test <- ArchTest(residuals(fit_arimax), lags = 12)
arch_test
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  residuals(fit_arimax)
## Chi-squared = 5.8694, df = 12, p-value = 0.9225

No significant ARCH effects in the ARIMAX residuals ie conditional volatility is roughly constant.

The ARIMAX model adequately captures the return dynamics without requiring a GARCH model.

3 Extreme Value Analysis

Forecasting volatility and computing Value-at-Risk (VaR) and Expected Shortfall (ES).

3.1 Parametric VaR

Assuming Student-t Distribution with 5 degrees of freedom.

sigma <- sd(residuals(fit_arimax))

probs <- c(0.95, 0.99, 0.999)
mu <- 0
VaR <- -qt(probs, df = 5) * sigma + mu
ES  <- -sigma * (dt(qt(probs, df=5), df=5) / (1 - probs)) *
       ((5 + qt(probs, df=5)^2)/(5 - 1)) + mu

VaR <- VaR * -100
ES <- ES * -100

risk_table <- data.frame(
  Confidence = probs,
  VaR = round(VaR, 5),
  ES = round(ES, 5)
)

3.2 Historical VaR

Using actual historical returns.

hist_VaR_95 <- -quantile(returns, probs = 0.05) * 100
hist_VaR_99 <- -quantile(returns, probs = 0.01) * 100
hist_VaR_999 <- -quantile(returns, probs = 0.001) * 100

hist_ES_95 <- -mean(returns[returns <= quantile(returns, 0.05)]) * 100
hist_ES_99 <- -mean(returns[returns <= quantile(returns, 0.01)]) * 100
hist_ES_999 <- -mean(returns[returns <= quantile(returns, 0.001)]) * 100

risk_table_hist <- data.frame(
  Confidence = c(0.95, 0.99, 0.999),
  Historical_VaR = c(hist_VaR_95, hist_VaR_99, hist_VaR_999),
  Historical_ES  = c(hist_ES_95, hist_ES_99, hist_ES_999)
)

3.3 Monte-Carlo VaR

Using 10000 simulated returns generated from the ARIMAX model.

set.seed(123)

probs <- c(0.95, 0.99, 0.999)

sim_returns <- sample(residuals(fit_arimax), size = 10000, replace = TRUE)

mc_VaR <- -quantile(sim_returns, probs = 1 - probs)  * 100

mc_ES <- sapply(1 - probs, function(p) -mean(sim_returns[sim_returns <= quantile(sim_returns, p)]))  * 100

mc_table <- data.frame(
  Confidence = probs,
  MonteCarlo_VaR = mc_VaR,
  MonteCarlo_ES  = mc_ES
)

3.4 Conclusion

Key insight:

  • Parametric method tends to overestimate tail losses if returns are non-normal.

  • Historical and Monte-Carlo methods provide more data-driven risk estimates, capturing real or simulated extreme events.

  • Using all three together gives a comprehensive risk profile for the portfolio.

risk_combined <- data.frame(
  Confidence = c(0.95, 0.99, 0.999),
  Parametric_VaR   = round(VaR, 5),
  Parametric_ES    = round(ES, 5),
  Historical_VaR   = c(hist_VaR_95, hist_VaR_99, hist_VaR_999),
  Historical_ES    = c(hist_ES_95, hist_ES_99, hist_ES_999),
  MonteCarlo_VaR   = mc_table$MonteCarlo_VaR,
  MonteCarlo_ES    = mc_table$MonteCarlo_ES
)

risk_long <- risk_combined %>%
  pivot_longer(
    cols = -Confidence,
    names_to = c("Method", "Measure"),
    names_sep = "_",
    values_to = "Value"
  )


ggplot(risk_long, aes(x = as.factor(Confidence), y = Value, fill = Method, shape = Measure, color = Method)) +
  geom_bar(data = subset(risk_long, Measure == "VaR"),
           stat = "identity", position = position_dodge(width = 0.8)) +
  geom_point(data = subset(risk_long, Measure == "ES"),
             position = position_dodge(width = 0.8), size = 3) +
  scale_fill_brewer(palette = "Set1") +
  scale_color_brewer(palette = "Set1") +
  labs(
    title = "Comparison of Parametric, Historical, and Monte-Carlo VaR & ES",
    x = "Confidence Level",
    y = "Potential Loss of Portfolio Value (%)"
  ) +
  theme_minimal(base_size = 13)

risk_long
## # A tibble: 18 × 4
##    Confidence Method     Measure Value
##         <dbl> <chr>      <chr>   <dbl>
##  1      0.95  Parametric VaR      2.31
##  2      0.95  Parametric ES       3.32
##  3      0.95  Historical VaR      2.80
##  4      0.95  Historical ES       4.25
##  5      0.95  MonteCarlo VaR      1.80
##  6      0.95  MonteCarlo ES       2.75
##  7      0.99  Parametric VaR      3.86
##  8      0.99  Parametric ES       5.11
##  9      0.99  Historical VaR      4.93
## 10      0.99  Historical ES       6.83
## 11      0.99  MonteCarlo VaR      3.05
## 12      0.99  MonteCarlo ES       4.28
## 13      0.999 Parametric VaR      6.76
## 14      0.999 Parametric ES       8.62
## 15      0.999 Historical VaR      9.89
## 16      0.999 Historical ES      11.6 
## 17      0.999 MonteCarlo VaR      6.69
## 18      0.999 MonteCarlo ES       6.69