Copula-Based Portfolio Risk Analysis

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
suppressPackageStartupMessages({
  library(quantmod); library(dplyr); library(ggplot2)
  library(gamlss); library(VineCopula); library(copula)
})

1. Data & Monthly Loss Rates

get_monthly_losses <- function(ticker) {
  raw <- getSymbols(ticker, src = "yahoo", from = "2021-03-01", to = "2026-03-01",
                    auto.assign = FALSE)[, 6]
  monthly <- as.numeric(Cl(to.monthly(raw)))
  round(-diff(monthly) / head(monthly, -1) * 100, 4)
}

nvda_loss   <- get_monthly_losses("NVDA")
aramco_loss <- get_monthly_losses("2222.SR")
byd_loss    <- get_monthly_losses("BYDDF")

losses <- data.frame(NVDA = nvda_loss, Aramco = aramco_loss, BYD = byd_loss) |>
  na.omit()

cat("Observations:", nrow(losses), "\n")

## Observations: 59

summary(losses)

##       NVDA             Aramco              BYD         
##  Min.   :-36.344   Min.   :-12.3839   Min.   :-36.922  
##  1st Qu.:-14.755   1st Qu.: -2.8376   1st Qu.: -6.975  
##  Median : -5.620   Median :  0.3610   Median : -2.511  
##  Mean   : -5.539   Mean   : -0.1617   Mean   : -1.554  
##  3rd Qu.:  4.827   3rd Qu.:  2.9672   3rd Qu.:  6.653  
##  Max.   : 32.027   Max.   :  7.7175   Max.   : 19.610

2. Marginal Distribution Fitting

fit_best <- function(x, name) {
  families <- list(NO = NO, TF = TF, GU = GU, LO = LO)
  fits <- lapply(families, \(f) try(gamlss(x ~ 1, family = f, trace = FALSE), silent = TRUE))
  aics <- sapply(fits, \(m) if (inherits(m, "gamlss")) AIC(m) else Inf)
  best_name <- names(which.min(aics))
  cat(name, "-> best family:", best_name, "| AIC:", round(min(aics), 2), "\n")
  fits[[best_name]]
}

fit_nvda   <- fit_best(losses$NVDA,   "NVDA")

## NVDA -> best family: NO | AIC: 488.55

fit_aramco <- fit_best(losses$Aramco, "Aramco")

## Aramco -> best family: GU | AIC: 339.47

fit_byd    <- fit_best(losses$BYD,    "BYD")

## BYD -> best family: GU | AIC: 456.53

3. PIT — Parametric Uniform Transforms

get_pit <- function(model, x) {
  fam  <- model$family[1]
  pfun <- get(paste0("p", fam))
  mu    <- fitted(model, "mu")
  sigma <- fitted(model, "sigma")
  if ("nu" %in% model$parameters) {
    nu <- fitted(model, "nu")
    return(pfun(x, mu = mu, sigma = sigma, nu = nu))
  }
  pfun(x, mu = mu, sigma = sigma)
}

pit_list <- list(NVDA = get_pit(fit_nvda,   losses$NVDA), 
                 Aramco = get_pit(fit_aramco,   losses$Aramco),
                 BYD = get_pit(fit_byd,   losses$BYD))

par(mfrow = c(1, 3))
for (name in names(pit_list)) {
  hist(pit_list[[name]], breaks = 10, freq = FALSE, col = "steelblue", border = "white",
       main = paste("PIT –", name), xlab = "u", ylim = c(0, 2))
  abline(h = 1, col = "red", lty = 2, lwd = 2)
}

par(mfrow = c(1, 1))

4. Pseudo-Observations (Empirical Uniform Transform)

# pobs is the standard non-parametric route for copula estimation
U <- pobs(as.matrix(losses))
colnames(U) <- c("NVDA", "Aramco", "BYD")

par(mfrow = c(1, 3))
for (i in 1:3) {
  hist(U[, i], breaks = 10, freq = FALSE, col = "steelblue", border = "white",
       main = colnames(U)[i], xlab = "u", ylim = c(0, 2))
  abline(h = 1, col = "red", lty = 2, lwd = 2)
}

par(mfrow = c(1, 1))

5. C-Vine Construction

The general pair copula decomposition (PCC) for three variables without the simplifying assumption is: \[f(x_1,x_2,x_3) = f_1 \cdot f_2 \cdot f_3 \cdot c_{12}(F_1,F_2) \cdot c_{13}(F_1,F_3) \cdot c_{23|1}(F_{2|1}, F_{3|1}| x_1)\] where the conditional copula \(c_{23;1}\) and its parameters may depend on the specific value of the root \(x_1\).

To minimize this dependency and simplify the model, we select the root node as the variable with the highest sum of absolute pairwise correlations (Kendall’s \(\tau\)). This captures the most significant dependence in the first tree.

By applying the Simplifying Assumption, we assume the copula type and parameters of \(c_{23|1}\) are constant regardless of \(x_1\), yielding the operational C-vine formula: \[f(x_1,x_2,x_3) = f_1 \cdot f_2 \cdot f_3 \cdot c_{12}(F_1,F_2) \cdot c_{13}(F_1,F_3) \cdot c_{23|1}(F_{2|1}, F_{3|1})\]

tau_matrix <- TauMatrix(U)

tau_sums <- colSums(abs(tau_matrix) - diag(3))
cat("Sum of |tau| per variable:\n")

## Sum of |tau| per variable:

print(round(tau_sums, 4))

##   NVDA Aramco    BYD 
## 0.1788 0.0725 0.2221

root <- names(which.max(tau_sums))
cat("Suggested root:", root, "\n")

## Suggested root: BYD

# Tree 1: unconditional pairs (BYD as root)
bc_byd_nvda   <- BiCopSelect(U[,"BYD"], U[,"NVDA"],   familyset = NA, selectioncrit = "AIC",
                              indeptest = TRUE, level = 0.05)
bc_byd_aramco <- BiCopSelect(U[,"BYD"], U[,"Aramco"], familyset = NA, selectioncrit = "AIC",
                              indeptest = TRUE, level = 0.05)

# H-functions: F(NVDA|BYD), F(Aramco|BYD)
h_nvda_byd   <- pmax(pmin(BiCopHfunc2(U[,"BYD"], U[,"NVDA"],   bc_byd_nvda),   1-1e-10), 1e-10)
h_aramco_byd <- pmax(pmin(BiCopHfunc2(U[,"BYD"], U[,"Aramco"], bc_byd_aramco), 1-1e-10), 1e-10)

# Tree 2: conditional pair (NVDA – Aramco | BYD)
bc_nvda_aramco_given_byd <- BiCopSelect(h_nvda_byd, h_aramco_byd, familyset = NA,
                                         selectioncrit = "AIC", indeptest = TRUE, level = 0.05)

cat("=== C-VINE STRUCTURE (root: BYD) ===\n")

## === C-VINE STRUCTURE (root: BYD) ===

cat(sprintf("c_BYD-NVDA              %-20s tau=%+.4f\n", bc_byd_nvda$familyname,              bc_byd_nvda$tau))

## c_BYD-NVDA              Independence         tau=+0.0000

cat(sprintf("c_BYD-Aramco            %-20s tau=%+.4f\n", bc_byd_aramco$familyname,            bc_byd_aramco$tau))

## c_BYD-Aramco            Independence         tau=+0.0000

cat(sprintf("c_NVDA-Aramco|BYD       %-20s tau=%+.4f\n", bc_nvda_aramco_given_byd$familyname, bc_nvda_aramco_given_byd$tau))

## c_NVDA-Aramco|BYD       t                    tau=+0.9910

6. Copula Visualization

par(mfrow = c(1, 3))

plot(U[,"BYD"], U[,"NVDA"], pch = 19, col = rgb(.2,.4,.8,.6), cex = .8,
     xlab = "BYD", ylab = "NVDA",
     main = sprintf("c_BYD-NVDA: %s", bc_byd_nvda$familyname)); grid()

plot(U[,"BYD"], U[,"Aramco"], pch = 19, col = rgb(.8,.3,.2,.6), cex = .8,
     xlab = "BYD", ylab = "Aramco",
     main = sprintf("c_BYD-Aramco: %s", bc_byd_aramco$familyname)); grid()

plot(h_nvda_byd, h_aramco_byd, pch = 19, col = rgb(.2,.7,.3,.6), cex = .8,
     xlab = "F(NVDA|BYD)", ylab = "F(Aramco|BYD)",
     main = sprintf("c_NVDA-Aramco|BYD: %s", bc_nvda_aramco_given_byd$familyname)); grid()

par(mfrow = c(1, 1))

7. Simulation (100 000 paths)

set.seed(42)
N <- 100000

# Step 1: simulate (u_BYD, u_NVDA) from Tree 1 pair c_BYD-NVDA
sim_byd_nvda <- BiCopSim(N, bc_byd_nvda)
u_byd  <- sim_byd_nvda[, 1]
u_nvda <- sim_byd_nvda[, 2]

# Step 2: simulate u_Aramco using Tree 1 pair c_BYD-Aramco and Tree 2 pair c_NVDA-Aramco|BYD
# Draw v from Tree 2 conditional copula, then invert h-function from Tree 1
v_tree2  <- BiCopHinv2(
              BiCopHfunc2(u_byd, u_nvda, bc_byd_nvda),  # F(NVDA|BYD)
              runif(N),
              bc_nvda_aramco_given_byd)                  # invert c_NVDA-Aramco|BYD

u_aramco <- BiCopHinv2(u_byd, v_tree2, bc_byd_aramco)  # invert c_BYD-Aramco

sim_U <- cbind(u_nvda, u_aramco, u_byd)
colnames(sim_U) <- c("NVDA", "Aramco", "BYD")

# Back-transform to loss scale using empirical quantiles
q_nvda   <- quantile(losses$NVDA,   probs = u_nvda,   type = 1)
q_aramco <- quantile(losses$Aramco, probs = u_aramco, type = 1)
q_byd    <- quantile(losses$BYD,    probs = u_byd,    type = 1)

portfolio_loss <- (q_nvda + q_aramco + q_byd) / 3

VaR_95 <- quantile(portfolio_loss, 0.95)
ES_95  <- mean(portfolio_loss[portfolio_loss > VaR_95])

cat(sprintf("VaR (95%%):  %.4f%%\nES  (95%%):  %.4f%%\n", VaR_95, ES_95))

## VaR (95%):  9.0054%
## ES  (95%):  11.4795%

hist(portfolio_loss, breaks = 100, freq = FALSE, col = "lightblue", border = "white",
     main = "Simulated Portfolio Loss Distribution",
     xlab = "Monthly Loss (%)")
abline(v = VaR_95, col = "red",     lwd = 2, lty = 2)
abline(v = ES_95,  col = "darkred", lwd = 2, lty = 1)
legend("topright", legend = c(sprintf("VaR 95%% = %.2f%%", VaR_95),
                               sprintf("ES  95%% = %.2f%%", ES_95)),
       col = c("red", "darkred"), lty = c(2, 1), lwd = 2, bty = "n")