library(readxl)
## Warning: package 'readxl' was built under R version 4.5.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 4.5.2
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: survival
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.1
setwd("C:/Users/user/OneDrive/Documents/CAS")

loss_data <- read_excel("DATASET_CLEANED_PREDICTION.xlsx")
colnames(loss_data) <- c("Year", "Original_Loss","Normalized_Loss","Frequency")


loss_data <- loss_data %>%
  mutate(severity_per_event = Normalized_Loss / Frequency)



# ====================================================
# Fitting Frequency Model
lambda = mean(loss_data$Frequency)
lambda
## [1] 5.762712
variance = var(loss_data$Frequency)
variance
## [1] 14.83928
# Variance more than Mean : Use Negative Binomial
nb_fit <- fitdistr(loss_data$Frequency, "Negative Binomial")
nb_fit
##      size         mu    
##   4.2428762   5.7626903 
##  (1.3576007) (0.4799279)
# =======================================================
# Fitting Severity Model
sev_lnorm <- fitdist(loss_data$severity_per_event,"lnorm")
sev_lnorm
## Fitting of the distribution ' lnorm ' by maximum likelihood 
## Parameters:
##           estimate Std. Error
## meanlog 19.7243412 0.11883717
## sdlog    0.9128056 0.08403012
# =============================================
# Monte Carlo Simulation

set.seed(123)
n_sim = 10000
sim_total_loss = numeric(n_sim)

for(i in 1:n_sim){
  
  # Simulate number of events for 2026
  frequency_2026 <- rnbinom(n = 1, 
                            size = nb_fit$estimate["size"],
                            mu = nb_fit$estimate["mu"])
  
  # Simulate severity of events for 2026
  severity_2026 <- rlnorm(
    frequency_2026,
    meanlog = sev_lnorm$estimate["meanlog"],
    sdlog   = sev_lnorm$estimate["sdlog"]
  )

  sim_total_loss[i] <- sum(severity_2026)
}

EL_2026 <- mean(sim_total_loss)
SD_2026 <- sd(sim_total_loss)
print(paste("Forecasted Expected Loss (EL) for 2026:", EL_2026))
## [1] "Forecasted Expected Loss (EL) for 2026: 3218888034.27489"
print(paste("Forecasted Standard Deviation (SD) for 2026:", SD_2026))
## [1] "Forecasted Standard Deviation (SD) for 2026: 2560215995.31354"
# =================================================
# Risk Metrics

# TVaR (Expected Shortfall)
TVaR_98 <- mean(sim_total_loss[sim_total_loss > quantile(sim_total_loss,0.98)])
TVaR_99 <- mean(sim_total_loss[sim_total_loss > quantile(sim_total_loss,0.99)])
TVaR_995 <- mean(sim_total_loss[sim_total_loss > quantile(sim_total_loss,0.995)])
TVaR_998 <- mean(sim_total_loss[sim_total_loss > quantile(sim_total_loss,0.998)])


# VaR / PML
return_periods <- c(50, 100, 200, 500)
rp_losses <- quantile(sim_total_loss, 1 - 1/return_periods)

rp_names <- c("1-in-50", "1-in-100", "1-in-200", "1-in-500")
aep_values <- c("2.00%", "1.00%", "0.50%", "0.20%")



#=================================================
# TABLE
data.frame(
  "Return_Period" = rp_names,
  "AEP" = aep_values,
  "PML_VaR" = rp_losses / 1e9,
  "TVaR" = c(TVaR_98 / 1e9, TVaR_99 / 1e9, TVaR_995 / 1e9, TVaR_998 / 1e9)
)
##       Return_Period   AEP  PML_VaR     TVaR
## 98%         1-in-50 2.00% 10.21044 12.40809
## 99%        1-in-100 1.00% 11.79447 13.90777
## 99.5%      1-in-200 0.50% 13.26526 15.31025
## 99.8%      1-in-500 0.20% 15.37509 17.08181
#================================================
# GRAPHS

#================================================
# Histogram
sim_total_loss_mill = sim_total_loss/1000000

hist(sim_total_loss/1e9, breaks = 100,
     col = "skyblue",
     main = "Monte Carlo Simulated Loss Distribution",
     xlab = "Annual Loss (Billion AUD)",
     ylab = "Frequency",
     freq = TRUE)

#==================================================
# EP Curve
sorted_losses <- sort(sim_total_loss/1e9)
exceed_prob <- 1 - (1:n_sim)/(n_sim + 1)


tvar_losses <- numeric(n_sim)
for (i in 1:n_sim) {
    tvar_losses[i] <- mean(sorted_losses[i:n_sim])
}

plot(sorted_losses, exceed_prob, 
     log="y",                  
     type="l",                
     main = "Exceedance Probability (EP) Curve",
     xlab = "Loss Value (Billion AUD)", 
     ylab = "Annual Exceedance Probability (%)",
     col = "blue",
     lwd = 2)

lines(tvar_losses, exceed_prob,
      col = "red",
      lwd = 3,
      lty =2)

rp_losses_billion <- quantile(sim_total_loss/1e9, c(0.98, 0.99, 0.995, 0.998))
rp_aep <- 1 - c(0.98, 0.99, 0.995, 0.998)

tvar_losses_billion <- quantile(tvar_losses, c(0.98, 0.99, 0.995, 0.998))

points(rp_losses_billion, rp_aep, pch=19, col="black", cex=1.0)
points(tvar_losses_billion, rp_aep, pch=19, col="black", cex=1.0)


legend("topright", 
       legend = c("VaR Curve", "TVaR Curve"), 
       col = c("blue", "red"), 
       lty = c(1, 2), 
       lwd = 3, 
       bty = "n")

grid(col="gray", lty="dotted")

#================================================================
# Bootstrap Validation
hist_losses <- loss_data$Normalized_Loss

set.seed(456)
boot_loss <- replicate(10000, sample(hist_losses,1, replac = TRUE))
quantile(boot_loss, c(0.50, 0.75, 0.90,0.95,0.99))
##         50%         75%         90%         95%         99% 
##  1844300000  3793500000  7157000000  8253386925 16180600000
boxplot(list(Bootstrap = boot_loss/1e9, MonteCarlo = sim_total_loss/1e9),
        main = "Historical vs. Forecasted Loss Distributions",
        xlab = "Simulation Type",
        ylab = "Annual Loss (Billion AUD)",
        col = c("orange", "lightblue"))

df_plot <- data.frame(
  loss = c(boot_loss/1e9, sim_total_loss/1e9),
  type = rep(c("Bootstrap", "Monte Carlo"),
             times = c(length(boot_loss/1e9), length(sim_total_loss/1e9)))
)


ggplot(df_plot, aes(x=type, y=loss, fill=type)) +
  geom_violin(alpha = 0.4,
              draw_quantiles = c(0.50)) +
  geom_boxplot(width = 0.1, notch = TRUE, outlier.shape = NA) +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "Catastrophe Loss Distribution Comparison",
       x = "",
       y = "Annual Loss (Billion AUD)") +
  theme_minimal() +
  theme(legend.position = "none",
        plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
        axis.text.x = element_text(size = 11, face = "bold"),
        axis.title.y = element_text(size = 12, face = "bold"))
## Warning: The `draw_quantiles` argument of `geom_violin()` is deprecated as of ggplot2
## 4.0.0.
## ℹ Please use the `quantiles.linetype` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.