library(readxl)
## Warning: package 'readxl' was built under R version 4.5.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 4.5.2
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: survival
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.1
setwd("C:/Users/user/OneDrive/Documents/CAS")
loss_data <- read_excel("DATASET_CLEANED_PREDICTION.xlsx")
colnames(loss_data) <- c("Year", "Original_Loss","Normalized_Loss","Frequency")
loss_data <- loss_data %>%
mutate(severity_per_event = Normalized_Loss / Frequency)
# ====================================================
# Fitting Frequency Model
lambda = mean(loss_data$Frequency)
lambda
## [1] 5.762712
variance = var(loss_data$Frequency)
variance
## [1] 14.83928
# Variance more than Mean : Use Negative Binomial
nb_fit <- fitdistr(loss_data$Frequency, "Negative Binomial")
nb_fit
## size mu
## 4.2428762 5.7626903
## (1.3576007) (0.4799279)
# =======================================================
# Fitting Severity Model
sev_lnorm <- fitdist(loss_data$severity_per_event,"lnorm")
sev_lnorm
## Fitting of the distribution ' lnorm ' by maximum likelihood
## Parameters:
## estimate Std. Error
## meanlog 19.7243412 0.11883717
## sdlog 0.9128056 0.08403012
# =============================================
# Monte Carlo Simulation
set.seed(123)
n_sim = 10000
sim_total_loss = numeric(n_sim)
for(i in 1:n_sim){
# Simulate number of events for 2026
frequency_2026 <- rnbinom(n = 1,
size = nb_fit$estimate["size"],
mu = nb_fit$estimate["mu"])
# Simulate severity of events for 2026
severity_2026 <- rlnorm(
frequency_2026,
meanlog = sev_lnorm$estimate["meanlog"],
sdlog = sev_lnorm$estimate["sdlog"]
)
sim_total_loss[i] <- sum(severity_2026)
}
EL_2026 <- mean(sim_total_loss)
SD_2026 <- sd(sim_total_loss)
print(paste("Forecasted Expected Loss (EL) for 2026:", EL_2026))
## [1] "Forecasted Expected Loss (EL) for 2026: 3218888034.27489"
print(paste("Forecasted Standard Deviation (SD) for 2026:", SD_2026))
## [1] "Forecasted Standard Deviation (SD) for 2026: 2560215995.31354"
# =================================================
# Risk Metrics
# TVaR (Expected Shortfall)
TVaR_98 <- mean(sim_total_loss[sim_total_loss > quantile(sim_total_loss,0.98)])
TVaR_99 <- mean(sim_total_loss[sim_total_loss > quantile(sim_total_loss,0.99)])
TVaR_995 <- mean(sim_total_loss[sim_total_loss > quantile(sim_total_loss,0.995)])
TVaR_998 <- mean(sim_total_loss[sim_total_loss > quantile(sim_total_loss,0.998)])
# VaR / PML
return_periods <- c(50, 100, 200, 500)
rp_losses <- quantile(sim_total_loss, 1 - 1/return_periods)
rp_names <- c("1-in-50", "1-in-100", "1-in-200", "1-in-500")
aep_values <- c("2.00%", "1.00%", "0.50%", "0.20%")
#=================================================
# TABLE
data.frame(
"Return_Period" = rp_names,
"AEP" = aep_values,
"PML_VaR" = rp_losses / 1e9,
"TVaR" = c(TVaR_98 / 1e9, TVaR_99 / 1e9, TVaR_995 / 1e9, TVaR_998 / 1e9)
)
## Return_Period AEP PML_VaR TVaR
## 98% 1-in-50 2.00% 10.21044 12.40809
## 99% 1-in-100 1.00% 11.79447 13.90777
## 99.5% 1-in-200 0.50% 13.26526 15.31025
## 99.8% 1-in-500 0.20% 15.37509 17.08181
#================================================
# GRAPHS
#================================================
# Histogram
sim_total_loss_mill = sim_total_loss/1000000
hist(sim_total_loss/1e9, breaks = 100,
col = "skyblue",
main = "Monte Carlo Simulated Loss Distribution",
xlab = "Annual Loss (Billion AUD)",
ylab = "Frequency",
freq = TRUE)

#==================================================
# EP Curve
sorted_losses <- sort(sim_total_loss/1e9)
exceed_prob <- 1 - (1:n_sim)/(n_sim + 1)
tvar_losses <- numeric(n_sim)
for (i in 1:n_sim) {
tvar_losses[i] <- mean(sorted_losses[i:n_sim])
}
plot(sorted_losses, exceed_prob,
log="y",
type="l",
main = "Exceedance Probability (EP) Curve",
xlab = "Loss Value (Billion AUD)",
ylab = "Annual Exceedance Probability (%)",
col = "blue",
lwd = 2)
lines(tvar_losses, exceed_prob,
col = "red",
lwd = 3,
lty =2)
rp_losses_billion <- quantile(sim_total_loss/1e9, c(0.98, 0.99, 0.995, 0.998))
rp_aep <- 1 - c(0.98, 0.99, 0.995, 0.998)
tvar_losses_billion <- quantile(tvar_losses, c(0.98, 0.99, 0.995, 0.998))
points(rp_losses_billion, rp_aep, pch=19, col="black", cex=1.0)
points(tvar_losses_billion, rp_aep, pch=19, col="black", cex=1.0)
legend("topright",
legend = c("VaR Curve", "TVaR Curve"),
col = c("blue", "red"),
lty = c(1, 2),
lwd = 3,
bty = "n")
grid(col="gray", lty="dotted")

#================================================================
# Bootstrap Validation
hist_losses <- loss_data$Normalized_Loss
set.seed(456)
boot_loss <- replicate(10000, sample(hist_losses,1, replac = TRUE))
quantile(boot_loss, c(0.50, 0.75, 0.90,0.95,0.99))
## 50% 75% 90% 95% 99%
## 1844300000 3793500000 7157000000 8253386925 16180600000
boxplot(list(Bootstrap = boot_loss/1e9, MonteCarlo = sim_total_loss/1e9),
main = "Historical vs. Forecasted Loss Distributions",
xlab = "Simulation Type",
ylab = "Annual Loss (Billion AUD)",
col = c("orange", "lightblue"))

df_plot <- data.frame(
loss = c(boot_loss/1e9, sim_total_loss/1e9),
type = rep(c("Bootstrap", "Monte Carlo"),
times = c(length(boot_loss/1e9), length(sim_total_loss/1e9)))
)
ggplot(df_plot, aes(x=type, y=loss, fill=type)) +
geom_violin(alpha = 0.4,
draw_quantiles = c(0.50)) +
geom_boxplot(width = 0.1, notch = TRUE, outlier.shape = NA) +
scale_y_continuous(labels = scales::comma) +
labs(title = "Catastrophe Loss Distribution Comparison",
x = "",
y = "Annual Loss (Billion AUD)") +
theme_minimal() +
theme(legend.position = "none",
plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
axis.text.x = element_text(size = 11, face = "bold"),
axis.title.y = element_text(size = 12, face = "bold"))
## Warning: The `draw_quantiles` argument of `geom_violin()` is deprecated as of ggplot2
## 4.0.0.
## ℹ Please use the `quantiles.linetype` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
