#install.packages(c("quantmod", "PerformanceAnalytics", "ggplot2", "dplyr", "zoo", "quarks", "rugarch"))
#remove hash if install of libraries needed

library(quantmod)
library(PerformanceAnalytics)
library(ggplot2)
library(dplyr)
library(zoo)
library(quarks)
library(rugarch)

#Stock data of nvidia + closing prices
getSymbols("NVDA", from = "2021-01-01", to = "2026-05-01")
stock_prices = Cl(NVDA)

#Calculate log returns and clean up
returns = na.omit(CalculateReturns(stock_prices, method = "log"))
returns_clean = as.numeric(returns)

summary(returns_clean) #Display returns

##### VaR Methods

var_h = VaR(returns_clean, p=0.95, method="historical") #Historical

var_p = VaR(returns_clean, p=0.95, method="gaussian") #Parametric

#Monte Carlo
mean_nvda = mean(returns_clean) #calculate mean
sd_nvda = sd(returns_clean)#calculate standard dev

set.seed(123)
simulated_returns = rnorm(10000, mean = mean_nvda, sd = sd_nvda)

var_mc = quantile(simulated_returns, 0.05) #monte carlo

var_comp = data.frame(
  method = c("Historical Simulation", "Parametric Method", "Monte Carlo"),
  Var_95 = c(var_h, var_p, var_mc)
)

print("VaR Comparison Results:")
print(var_comp)

#results:
#Historical: -0.04838 (~4.83%)
#Parametric: -0.050784 (~5.08%)
#Monte Carlo: -0.050706 (~5.07%)

## Visualisation

past_data <- as.numeric(returns_clean)

p1 <- ggplot(data.frame(returns = past_data), aes(x = returns)) +
  geom_histogram(aes(y = ..density..), bins = 50, fill = "lightblue", color = "black") +
  geom_density(color = "red", size = 1) +
  geom_vline(xintercept = var_h, color = "blue", size = 1.2, linetype = "dashed") +
  geom_vline(xintercept = var_p, color = "red", size = 0.8, linetype = "solid") +
  geom_vline(xintercept = var_mc, color = "green", size = 0.8, linetype = "dashed") +
  labs(title = "VaR Comparison - 95% Confidence Level",
       x = "Daily Returns", y = "Density") +
  annotate("text", x = var_h, y = 5, label = "Historical", 
           angle = 90, hjust = -0.5, color = "blue") +
  annotate("text", x = var_p, y = 5, label = "Parametric", 
           angle = 90, vjust = -0.5, color = "red") +
  annotate("text", x = var_mc, y = 7, label = "MC", 
           angle = 90, vjust = 1.5, color = "green") +
  theme_minimal()

print(p1)

###### VaR threshold vis

vp <- as.numeric(var_p) #Parametric chosen as it's assuming more losses. Value almost identical 
#to Monte Carlo therefore functionally the same
returns_vector <- as.numeric(returns_clean)

#data frame
returns_df <- data.frame(
  Date = index(returns_clean),
  Returns = returns_vector,
  VaR_historical = vp,
  Exceedance = returns_vector < vp
)

# Plot
p2 <- ggplot(returns_df, aes(x = Date, y = Returns)) +
  geom_line(color = "darkgray") +
  geom_point(data = returns_df[returns_df$Exceedance, ], 
             aes(x = Date, y = Returns), 
             color = "red", size = 1, alpha = 0.6) +
  geom_hline(yintercept = vp, 
             color = "blue", 
             linetype = "dashed", 
             size = 1) +
  labs(title = "NVDA Returns with VaR Breaches (Historical Simulation)",
       subtitle = paste("95% VaR Threshold:", round(vp * 100, 2), "%"),
       x = "Date", y = "Daily Returns") +
  theme_minimal() +
  annotate("text", 
           x = min(returns_df$Date), 
           y = vp, 
           label = "VaR Threshold", 
           hjust = 0, 
           vjust = -0.5, 
           color = "blue")

p2

##### CVaR calculations

#Expected Shortfall (CVaR)
cvar_h = ES(returns_clean, p= 0.95, method = "historical")
cvar_p = ES(returns_clean, p= 0.95, method = "gaussian")
#monte carlo cvar
mc_tail = simulated_returns[simulated_returns <= var_mc]
cvar_mc = mean(mc_tail)

cvar_comp <- data.frame(
  Method = c("Historical Simulation", "Parametric (Gaussian)", "Monte Carlo"),
  CVaR_95 = c(as.numeric(cvar_h), as.numeric(cvar_p), as.numeric(cvar_mc))
)

print("Conditional VaR (Expected Shortfall) Results:")
print(cvar_comp)

#Results:
#Method     CVaR_95
#1 Historical Simulation -0.06895066 (~6.89%)
#2 Parametric (Gaussian) -0.06420359 (~6.42%)
#3           Monte Carlo -0.06415743 (~6.42%)

##### Backtesting
#number of exceptions

var_threshold <- as.numeric(var_h)
returns_vector <- as.numeric(returns_clean)  # Convert xts to numeric
exceptions <- sum(returns_vector < var_threshold)
total_obs <- length(returns_clean)
exception_rate <- exceptions / total_obs

# Calculate expected exceptions (5% for 95% VaR)
expected_exceptions <- total_obs * 0.05

# Calculate exception rate as percentage
exception_rate_pct <- exception_rate * 100

backtest_results <- data.frame(
  Metric = c("Total Observations", "Expected Exceptions", "Actual Exceptions", 
             "Exception Rate (%)", "VaR Confidence Level"),
  Value = c(total_obs, round(expected_exceptions, 2), exceptions, 
            round(exception_rate_pct, 2), "95%")
)

print("Backtesting Results:")
print(backtest_results)

##Kupiec Test

kupiec_test <- binom.test(exceptions, total_obs, p = 0.05, 
                          alternative = "two.sided")

print("Kupiec Proportion of Failures Test:")
print(kupiec_test)

#Visualization of Kupiec Test
kupiec_plot <- data.frame(
  Exception_Rate = seq(0, 0.12, 0.001),
  Density = dnorm(seq(0, 0.12, 0.001), 
                  mean = 0.05053, 
                  sd = sqrt(0.05053 * (1-0.05053) / 1336))
)

ggplot() +
  geom_area(data = kupiec_plot, aes(x = Exception_Rate, y = Density), 
            fill = "lightblue", alpha = 0.5) +
  geom_vline(xintercept = 0.05, color = "red", size = 1.2, linetype = "dashed") +
  geom_vline(xintercept = 0.05053, color = "darkgreen", size = 1.2) +
  geom_vline(xintercept = c(0.0360, 0.0687), color = "blue", size = 0.8, linetype = "dotted") +
  annotate("rect", xmin = 0.0360, xmax = 0.0687, ymin = 0, ymax = 30, 
           alpha = 0.2, fill = "blue") +
  annotate("text", x = 0.05, y = 28, label = "Expected Rate (5%)", 
           color = "red", hjust = -0.1) +
  annotate("text", x = 0.05053, y = 30, label = "Observed Rate (5.05%)", 
           color = "darkgreen", hjust = -0.1) +
  annotate("text", x = 0.052, y = 25, label = "95% Confidence Interval", 
           color = "blue", hjust = -0.1) +
  labs(title = "Kupiec Test: VaR Exception Rate Distribution",
       subtitle = "Historical Simulation Method (95% Confidence Level)",
       x = "Exception Rate", y = "Density") +
  theme_minimal()

Results:

Value at Risk

## [1] "VaR Comparison Results:"
##                  method      Var_95
## 1 Historical Simulation -0.04837900
## 2     Parametric Method -0.05078457
## 3           Monte Carlo -0.05070649

Visualization of Data

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Backtesting

##                 Metric Value
## 1   Total Observations  1336
## 2  Expected Exceptions  66.8
## 3    Actual Exceptions    67
## 4   Exception Rate (%)  5.01
## 5 VaR Confidence Level   95%

Kupiec Test:

## [1] "Kupiec Proportion of Failures Test:"
## 
##  Exact binomial test
## 
## data:  exceptions and total_obs
## number of successes = 67, number of trials = 1336, p-value = 0.95
## alternative hypothesis: true probability of success is not equal to 0.05
## 95 percent confidence interval:
##  0.03907353 0.06325387
## sample estimates:
## probability of success 
##              0.0501497

Visualize the Kupiec Test:

Conditional Value at Risk

## [1] "Conditional VaR (Expected Shortfall) Results:"
##                  Method     CVaR_95
## 1 Historical Simulation -0.06895066
## 2 Parametric (Gaussian) -0.06420359
## 3           Monte Carlo -0.06422956