data <- readRDS("C:/Users/aidan/Downloads/data.rds")
View(data)
library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 4.4.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 4.4.2
## Loading required package: survival
## Warning: package 'survival' was built under R version 4.4.3
#Code for sorting the data by individual Claim amount:
# Estimated above reinsurance ($25,000)
EstM <- 0.23*nrow(data[data$clmcost=='0',])
# Total Claims Number
Total_claims <- sum(data$clmcost) + EstM*25000 ; Total_claims
## [1] 54798154
# Average Claim net reinsurance
Avg_claims <- (sum(data$clmcost)+EstM*25000)/(sum(data$numclms) + EstM) ; Avg_claims
## [1] 18261.91
# A is a list of all non-zero claims. X is a list of all claims UNDER retention.
A<- subset(data[c("numclms","clmcosts")], numclms>0)
X <- c()
for (row in 1:nrow(A)) {
if (A[row,1]==1) {
X <- append(X, A[row,2])
} else {
for (claimno in 1:A[row,1]) {
X <- append(X,A[row,2]/A[row,1])
}
}
}
hist(X, main = "Non-Zero claims without retention")
#Question 1 pt. 2 lognormal
library(fitdistrplus)
min(X)
## [1] 10
max(X)
## [1] 7355
# Log normal Distribution:
fit_ln <- fitdist(X, "lnorm", method = "mme") # Moment Matching Estimation
fit_ln2 <- fitdist(X, "lnorm", method = "mle") # Method Maxium Likelihood
fit_ln3 <- fitdist(X, "lnorm", method = "mse") # Maxium goodness of fit
gofstat(list(fit_ln, fit_ln2, fit_ln3), fitnames = c("MME","MLE", "MSE"))
## Goodness-of-fit statistics
## MME MLE MSE
## Kolmogorov-Smirnov statistic 0.1933909 0.07359625 0.1434873
## Cramer-von Mises statistic 8.3596999 1.39874800 5.7000458
## Anderson-Darling statistic 132.7899165 9.43582444 61.0036246
##
## Goodness-of-fit criteria
## MME MLE MSE
## Akaike's Information Criterion 13345.70 12691.55 12873.75
## Bayesian Information Criterion 13355.15 12701.01 12883.20
par(mfrow = c(3,3))
plot(fit_ln)
plot(fit_ln2)
plot(fit_ln3)
# on a level of AIC, BIC and other, MLE dominates the other methods
gofstat(fit_ln2)
## Goodness-of-fit statistics
## 1-mle-lnorm
## Kolmogorov-Smirnov statistic 0.07359625
## Cramer-von Mises statistic 1.39874800
## Anderson-Darling statistic 9.43582444
##
## Goodness-of-fit criteria
## 1-mle-lnorm
## Akaike's Information Criterion 12691.55
## Bayesian Information Criterion 12701.01
gof_ln <- gofstat(fit_ln2)
# Code written for quick change between models
model <- fit_ln2
set.seed(1234)
sim1<- rlnorm(length(X), meanlog = model$estimate[1], sdlog = model$estimate[2])
par(mfrow=c(2,2))
hist(X)
hist(sim1)
# Modelled and Simulation quantiles compared to observed quantiles
data.frame( Modelled = round(c(qlnorm(0, meanlog = model$estimate[1], sdlog = model$estimate[2]), qlnorm(0.25, meanlog = model$estimate[1], sdlog = model$estimate[2] ), qlnorm(0.5,meanlog = model$estimate[1], sdlog = model$estimate[2]), qlnorm(0.75, meanlog = model$estimate[1], sdlog = model$estimate[2]), qlnorm(1, meanlog = model$estimate[1], sdlog = model$estimate[2])),1), Simulation = round((quantile(sim1)),1), Observed = quantile(X))
summary(model)
## Fitting of the distribution ' lnorm ' by maximum likelihood
## Parameters :
## estimate Std. Error
## meanlog 5.697840 0.05595784
## sdlog 1.616978 0.03956810
## Loglikelihood: -6343.777 AIC: 12691.55 BIC: 12701.01
## Correlation matrix:
## meanlog sdlog
## meanlog 1 0
## sdlog 0 1
# fitted distribution mean
exp(model$estimate[1]+0.5*model$estimate[2]^2)
## meanlog
## 1102.294
# mean of simulation
data.frame(statistic = c("mean", "standard deviation"), lognormal_Sim = c(round(mean(sim1),1), round(sd(sim1),1)), Observed = c(round(mean(X),1), round(sd(X),1)))
#Fitting Exponential
# Fitting of exponential distribution
x <- as.numeric(X)
x <- x[is.finite(x) & x >= 0]
x_pos <- x[x > 0]
fit_exp <- fitdist(
x_pos, "exp",
start = list(rate = 1/mean(x_pos)),
lower = 1e-12, # keep rate > 0
method = "mle",
optim.method = "Nelder-Mead" # more stable for 1-param cases
)
summary(fit_exp)
## Fitting of the distribution ' exp ' by maximum likelihood
## Parameters :
## estimate
## rate 0.001272567
## Loglikelihood: -6401.71 AIC: 12805.42 BIC: 12810.15
gofstat(fit_exp)
## Goodness-of-fit statistics
## 1-mle-exp
## Kolmogorov-Smirnov statistic 0.1489007
## Cramer-von Mises statistic 6.3393067
## Anderson-Darling statistic 46.8470937
##
## Goodness-of-fit criteria
## 1-mle-exp
## Akaike's Information Criterion 12805.42
## Bayesian Information Criterion 12810.15
gof_exp <- gofstat(fit_exp)
coef(fit_exp)
## rate
## 0.001272567
plot(fit_exp)
# Code written for quick change between models
model <- fit_exp
set.seed(1234)
sim1<- rexp(length(X), rate = fit_exp$estimate)
par(mfrow=c(2,2))
hist(X)
hist(sim1)
# Simulation quantiles compared to observed quantiles
data.frame( Modelled = round(c(qexp(0, rate = fit_exp$estimate), qexp(0.25, rate = fit_exp$estimate ), qexp(0.5, rate = fit_exp$estimate), qexp(0.75, rate = fit_exp$estimate), qexp(1, rate = fit_exp$estimate)),1), Simulation = round((quantile(sim1)),1), Observed = quantile(X))
summary(model)
## Fitting of the distribution ' exp ' by maximum likelihood
## Parameters :
## estimate
## rate 0.001272567
## Loglikelihood: -6401.71 AIC: 12805.42 BIC: 12810.15
# pure mean under fitted model
1/fit_exp$estimate
## rate
## 785.8132
# observed mean
mean(X)
## [1] 785.8132
# mean of simulation
data.frame(statistic = c("mean", "standard deviation"), Exponetial_sim = c(round(mean(sim1),1), round(sd(sim1),1)), Observed = c(round(mean(X),1), round(sd(X),1)))
#Gamma model fitted through mme
### Gamma ###
fit_gam <- fitdist(X, "gamma", method = "mme")
gofstat(fit_gam)
## Goodness-of-fit statistics
## 1-mme-gamma
## Kolmogorov-Smirnov statistic 0.07455777
## Cramer-von Mises statistic 0.21349555
## Anderson-Darling statistic 3.47019630
##
## Goodness-of-fit criteria
## 1-mme-gamma
## Akaike's Information Criterion 12679.84
## Bayesian Information Criterion 12689.29
gof_gamma <- gofstat(fit_gam)
summary(fit_gam)
## Fitting of the distribution ' gamma ' by matching moments
## Parameters :
## estimate Std. Error
## shape 0.5456635627 4.494607e-02
## rate 0.0006943935 6.580093e-05
## Loglikelihood: -6337.918 AIC: 12679.84 BIC: 12689.29
## Correlation matrix:
## shape rate
## shape 1.0000000 0.8692414
## rate 0.8692414 1.0000000
plot(fit_gam)
# Code written for quick change between models
model <- fit_gam
set.seed(1234)
fit_gam$estimate
## shape rate
## 0.5456635627 0.0006943935
sim1<- rgamma(length(X), shape = fit_gam$estimate[1], rate = fit_gam$estimate[2])
# Graph
par(mfrow=c(2,2))
hist(X)
hist(sim1)
# Simulation quantiles compared to observed quantiles
data.frame(
Modelled = round(c(qgamma(0, shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]), qgamma(0.25, shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]), qgamma(0.5, shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]), qgamma(0.75,shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]), qgamma(1, shape = fit_gam$estimate[1], rate = fit_gam$estimate[2])),1),
Simulation = round((quantile(sim1)),1),
Observed = quantile(X)
)
summary(model)
## Fitting of the distribution ' gamma ' by matching moments
## Parameters :
## estimate Std. Error
## shape 0.5456635627 4.494607e-02
## rate 0.0006943935 6.580093e-05
## Loglikelihood: -6337.918 AIC: 12679.84 BIC: 12689.29
## Correlation matrix:
## shape rate
## shape 1.0000000 0.8692414
## rate 0.8692414 1.0000000
# fitted distribution mean
mean(sim1)
## [1] 784.6038
# Mean and variance of simulation compared to Observed
data.frame(statistic = c("mean", "standard deviation"), Gamma_Sim = c(round(mean(sim1),1), round(sd(sim1),1)), Observed = c(round(mean(X),1), round(sd(X),1)))
#Weibull distribution MLE
fit_wei <- fitdist(X, "weibull", method = "mle")
gofstat(fit_wei)
## Goodness-of-fit statistics
## 1-mle-weibull
## Kolmogorov-Smirnov statistic 0.04605632
## Cramer-von Mises statistic 0.23584832
## Anderson-Darling statistic 2.50396357
##
## Goodness-of-fit criteria
## 1-mle-weibull
## Akaike's Information Criterion 12652.32
## Bayesian Information Criterion 12661.77
gof_wei <- gofstat(fit_wei)
plot(fit_wei)
model <- fit_wei
set.seed(1234)
fit_wei$estimate
## shape scale
## 0.7330834 644.9072807
sim1<- rweibull(length(X), shape = fit_wei$estimate[1], scale = fit_wei$estimate[2])
par(mfrow=c(2,2))
hist(X)
hist(sim1)
# Modelled, Simulation quantiles compared to observed quantiles
data.frame(
Modelled = round(c(qweibull(0, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2]), qweibull(0.25, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2] ), qweibull(0.5, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2]), qweibull(0.75, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2]), qweibull(1, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2])),1),
Simulation = round((quantile(sim1)),1),
Observed = quantile(X)
)
summary(model)
## Fitting of the distribution ' weibull ' by maximum likelihood
## Parameters :
## estimate Std. Error
## shape 0.7330834 0.01975975
## scale 644.9072807 32.13525400
## Loglikelihood: -6324.159 AIC: 12652.32 BIC: 12661.77
## Correlation matrix:
## shape scale
## shape 1.0000000 0.3205504
## scale 0.3205504 1.0000000
# fitted distribution mean
mean(sim1)
## [1] 771.2906
# Mean and variance of simulation compared to Observed
data.frame(statistic = c("mean", "standard deviation"), Weibull_sim = c(round(mean(sim1),1), round(sd(sim1),1)), Observed = c(round(mean(X),1), round(sd(X),1)))
#Goodness of fit Summary for all 4 models
library(caret)
## Warning: package 'caret' was built under R version 4.4.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
set.seed(1234)
data.frame(Goodness_of_fit = c("AIC", "log-likelihood", "RMSE", "AD - Test", "KS - Test") ,
Lognormal = c(round(fit_ln2$aic,1), round(fit_ln2$loglik,1), round(RMSE(rlnorm(length(X), meanlog = fit_ln2$estimate[1], sdlog = fit_ln2$estimate[2]), X),1), gof_ln$adtest, gof_ln$kstest),
Exponetial = c(round(fit_exp$aic,1), round(fit_exp$loglik,1), round(RMSE(rexp(length(X), rate = fit_exp$estimate),X),1), gof_exp$adtest, gof_exp$kstest),
Gamma = c(round(fit_gam$aic,1), round(fit_gam$loglik,1), round(RMSE(rgamma(length(X), shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]),X),1), gof_gamma$adtest, gof_gamma$kstest),
Weibull = c(round(fit_wei$aic,1), round(fit_wei$loglik,1), round(RMSE(rweibull(length(X), shape = fit_wei$estimate[1], scale = fit_wei$estimate[2]),X),1), gof_wei$adtest, gof_wei$kstest)
)
#Resulting effect of assuming no distribution between 8,400 -> 25,000 under gamma, weibull distribution and rule of 3
# Using our gamma to estimate expected loss which would accure associated with P[8,400 < X < 25,000]
set.seed(1234)
sim1<- rgamma(length(X), shape = fit_gam$estimate[1], rate = fit_gam$estimate[2])
max <- max(sim1)
q <- pgamma(25000, shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]) - pgamma(max, shape = fit_gam$estimate[1], rate = fit_gam$estimate[2])
upper_impact <- 25000 * q # <= loss in LEV if you assume no mass in (8.4k,25k)
lambda <- sum(data$numclms)/sum(data$exposure)
upper_impact*lambda
## [1] 2.951265
# Using our weibul to estimate expected loss which would accure associated with P[8,400 < X < 25,000]
set.seed(1234)
sim1<- rweibull(length(X), shape = fit_wei$estimate[1], scale = fit_wei$estimate[2])
max <- max(sim1)
q <- pweibull(25000, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2]) - pweibull(max, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2])
upper_impact <- 25000 * q # <= loss in LEV if you assume no mass in (8.4k,25k)
lambda <- sum(data$numclms)/sum(data$exposure)
upper_impact*lambda
## [1] 1.361063
# Rule of 3: for distribution free upper limit estimation
n_nonzero <- sum(X > 0) ; n_nonzero
## [1] 835
p_upper <- 3 / n_nonzero # 95% upper bound with 0 events
upper_impact_emp_per_claim <- 25000 * p_upper
lambda <- sum(data$numclms) / sum(data$exposure)
upper_impact_emp_per_exposure <- upper_impact_emp_per_claim * lambda
upper_impact_emp_total <- upper_impact_emp_per_claim * sum(X)
upper_impact_emp_per_exposure
## [1] 14.32528
upper_impact_emp_total
## [1] 58935988
upper_impact_emp_per_claim
## [1] 89.82036
Adding RAdding RAdding Retention level / Cap
# Following on from original code with the addition of the retention level cap:
numRet <- round(EstM, digits=0)
X<-append(X,rep(25000,times=numRet))
hist(X,main = "Non-Zero claims with retention")
Threshold exceedance using Pareto and swiss Re and Lloyds curve for X<M
## ---------------------------------------------------------------
## Inputs
## ---------------------------------------------------------------
M <- 25000 # retention / cap
X <- as.numeric(X) # severities (each capped at M in your data)
lambda <- sum(data$numclms, na.rm = TRUE) / sum(data$exposure, na.rm = TRUE)
## Normalized severities for exposure-curve fit
Z <- pmin(X, M) / M
Z_obs <- Z[Z < 1]
ec <- ecdf(Z_obs)
grid <- seq(0, max(Z_obs), length.out = 200)
Fn_emp <- ec(grid)
## ---------------------------------------------------------------
## Exposure curves
## ---------------------------------------------------------------
F_swiss <- function(z, alpha) 1 - (1 - pmin(pmax(z,0),1))^alpha # alpha>0
F_lloyds <- function(z, beta) pmin(pmax(z,0),1)^beta # beta>=1
sse_swiss <- function(par) { alpha <- exp(par); mean((F_swiss(grid, alpha) - Fn_emp)^2) }
sse_lloyds <- function(par) { beta <- 1 + exp(par); mean((F_lloyds(grid, beta) - Fn_emp)^2) }
set.seed(1)
fit_sw <- optim(par = log(0.3), fn = sse_swiss, method = "BFGS")
fit_ll <- optim(par = log(2 - 1.0), fn = sse_lloyds, method = "BFGS")
alpha_hat <- exp(fit_sw$par)
beta_hat <- 1 + exp(fit_ll$par)
## Limited expected value from each curve: LEV(M) = M * ∫_0^1 [1 - F(z)] dz
area_sw <- integrate(function(z) 1 - F_swiss(z, alpha_hat), 0, 1)$value
area_ll <- integrate(function(z) 1 - F_lloyds(z, beta_hat), 0, 1)$value
LEV_M_swiss <- M * area_sw
LEV_M_lloyds <- M * area_ll
## ---------------------------------------------------------------
## Exceedance probability at M (observable)
## ---------------------------------------------------------------
S_M <- length(X[X>=M])/length(X) # proportion of capped observations among non-zero claims
S_M
## [1] 0.7217594
## ---------------------------------------------------------------
## Pareto tail continuation above M
## E[X - M | X > M] = M / (alpha_tail - 1), alpha_tail > 1
##
## With sensitivity testing to alpha levels
## ---------------------------------------------------------------
alpha_tail_vec <- c(1.1, 1.2, 1.5, 1.7, 2.0, 2.2, 2.5, 3) # sensitivity set, smaller the value of alpha the heavier the tail, i.e. more prudent (Changes Pareto distribition, that is the distribution of claims above M)
EX_ceded_per_claim <- function(alpha_tail) {
if (alpha_tail <= 1) return(Inf)
S_M * (M / (alpha_tail - 1))
}
EX_ceded_list <- sapply(alpha_tail_vec, EX_ceded_per_claim)
names(EX_ceded_list) <- paste0("alpha_", alpha_tail_vec)
## Combine with retained LEV from exposure curves (choose Swiss or Lloyd's)
LEV_ret <- c(Swiss = LEV_M_swiss, Lloyds = LEV_M_lloyds)
## Ground-up mean severity for each curve & tail alpha:
make_summary <- function(LEV_retained) {
EX_ground <- LEV_retained + EX_ceded_list # per claim
cbind(
LEV_retained = LEV_retained, # Come back and change this to value from question 1 distribution
EX_ceded = EX_ceded_list,
EX_ground = EX_ground,
PP_retained = lambda * LEV_retained,
PP_ceded = lambda * EX_ceded_list,
PP_ground = lambda * EX_ground
)
}
summary_swiss <- make_summary(LEV_M_swiss)
summary_lloyds <- make_summary(LEV_M_lloyds)
summary_swiss
## LEV_retained EX_ceded EX_ground PP_retained PP_ceded PP_ground
## alpha_1.1 110.0596 180439.853 180549.913 17.5532 28778.011 28795.564
## alpha_1.2 110.0596 90219.927 90329.986 17.5532 14389.006 14406.559
## alpha_1.5 110.0596 36087.971 36198.030 17.5532 5755.602 5773.155
## alpha_1.7 110.0596 25777.122 25887.182 17.5532 4111.144 4128.698
## alpha_2 110.0596 18043.985 18154.045 17.5532 2877.801 2895.354
## alpha_2.2 110.0596 15036.654 15146.714 17.5532 2398.168 2415.721
## alpha_2.5 110.0596 12029.324 12139.383 17.5532 1918.534 1936.087
## alpha_3 110.0596 9021.993 9132.052 17.5532 1438.901 1456.454
summary_lloyds
## LEV_retained EX_ceded EX_ground PP_retained PP_ceded PP_ground
## alpha_1.1 12500 180439.853 192939.85 1993.601 28778.011 30771.613
## alpha_1.2 12500 90219.927 102719.93 1993.601 14389.006 16382.607
## alpha_1.5 12500 36087.971 48587.97 1993.601 5755.602 7749.204
## alpha_1.7 12500 25777.122 38277.12 1993.601 4111.144 6104.746
## alpha_2 12500 18043.985 30543.99 1993.601 2877.801 4871.402
## alpha_2.2 12500 15036.654 27536.65 1993.601 2398.168 4391.769
## alpha_2.5 12500 12029.324 24529.32 1993.601 1918.534 3912.135
## alpha_3 12500 9021.993 21521.99 1993.601 1438.901 3432.502
## ---------------------------------------------------------------
## Simple plots (optional)
## ---------------------------------------------------------------
par(mfrow=c(1,2))
plot(ec, main = "ECDF of Z and fitted curves", xlab = "z = X/M", ylab = "F(z)")
curve(F_swiss(x, alpha_hat), add = TRUE, col = 2, lwd = 2)
curve(F_lloyds(x, beta_hat), add = TRUE, col = 4, lwd = 2)
legend("bottomright", c("Empirical", "Swiss (fit)", "Lloyd's (fit)"),
lty = 1, col = c(1,2,4), bty = "n")
curve(1 - F_swiss(x, alpha_hat), 0, 1, n = 200, col = 2, lwd = 2,
main = "1 - F(z) (area × M = LEV(M))", xlab = "z", ylab = "1 - F(z)")
curve(1 - F_lloyds(x, beta_hat), add = TRUE, col = 4, lwd = 2)
legend("topright", c("Swiss", "Lloyd's"), col = c(2,4), lty = 1, bty = "n")
make_summary <- function(lev) {
rn <- names(EX_ceded_list) # e.g. "alpha_1.1", ...
df <- data.frame(
alpha = alpha_tail_vec,
LEV_retained = lev,
EX_ceded = EX_ceded_list,
EX_ground = lev + EX_ceded_list,
PP_retained = lambda * lev,
PP_ceded = lambda * EX_ceded_list,
PP_ground = lambda * (lev + EX_ceded_list)
)
rownames(df) <- rn
df
}
summary_swiss <- make_summary(LEV_M_swiss)
summary_lloyds <- make_summary(LEV_M_lloyds)
round(summary_swiss, 2)
round(summary_lloyds, 2)
plot(alpha_tail_vec, EX_ceded_list, type="b", pch=16, lwd=2,
xlab="Tail alpha", ylab="E[(X-M)^+]", main="Ceded Expectation vs Tail α")
abline(v=1.5, lty=3, col=8); text(1.5, max(EX_ceded_list), "α=1.5", pos=4, col=8)
Weibull and Gamma for X < M
S_M <- length(X[X>=M])/length(X) # proportion of capped observations among non-zero claims
S_M
## [1] 0.7217594
# Pareto
alpha_tail_vec <- c(1.1, 1.2, 1.5, 1.7, 2.0, 2.2, 2.5, 3) # sensitivity set, smaller the value of alpha the heavier the tail, i.e. more prudent (Changes Pareto distribition, that is the distribution of claims above M)
EX_ceded_per_claim <- function(alpha_tail) {
if (alpha_tail <= 1) return(Inf)
S_M * (M / (alpha_tail - 1))
}
EX_ceded_list <- sapply(alpha_tail_vec, EX_ceded_per_claim)
names(EX_ceded_list) <- paste0("alpha_", alpha_tail_vec)
# Summary function:
make_summary <- function(lev) {
rn <- names(EX_ceded_list) # e.g. "alpha_1.1", ...
df <- data.frame(
alpha = alpha_tail_vec,
LEV_retained = lev,
EX_ceded = EX_ceded_list,
EX_ground = lev + EX_ceded_list,
PP_retained = lambda * lev,
PP_ceded = lambda * EX_ceded_list,
PP_ground = lambda * (lev + EX_ceded_list)
)
rownames(df) <- rn
df
}
# Weibull
k <- unname(fit_wei$estimate[1])
th <- unname(fit_wei$estimate[2])
# LEV for Weibull
lev_weibull <- function(u, shape, scale) {
t <- (u/scale)^shape
scale * gamma(1 + 1/shape) * pgamma(t, shape = 1 + 1/shape, rate = 1) +
u * exp(-t)
}
LEV_M_weibull <- lev_weibull(M, shape = k, scale = th)
summary_weibull <- make_summary(LEV_M_weibull)
round(summary_weibull,1)
# Gamma
shape<-fit_gam$estimate[1]
rate<-fit_gam$estimate[2]
LEV_gamma <- function(M, shape, rate) {
term1 <- pgamma(rate * M, shape = shape + 1) / rate
term2 <- M * (1 - pgamma(rate * M, shape = shape))
term1 + term2
}
LEV_M_gamma <- LEV_gamma(M, shape = shape, rate = rate)
LEV_M_gamma
## rate
## 1440.106
summary_gamma <- make_summary(LEV_M_gamma)
## Warning in data.frame(alpha = alpha_tail_vec, LEV_retained = lev, EX_ceded =
## EX_ceded_list, : row names were found from a short variable and have been
## discarded
round(summary_gamma,1)
Spliced distributions:
## 1) Tail weight among non-zero claims
S_M <- length(X[X >= M & X > 0]) / length(X[X > 0])
lambda <- sum(data$numclms, na.rm = TRUE) / sum(data$exposure, na.rm = TRUE)
## 2) Pareto EX above M (unchanged — correct)
alpha_tail_vec <- c(1.1, 1.2, 1.5, 1.7, 2.0, 2.2, 2.5, 3)
EX_ceded_per_claim <- function(alpha_tail) {
if (alpha_tail <= 1) return(Inf)
S_M * (M / (alpha_tail - 1))
}
EX_ceded_list <- sapply(alpha_tail_vec, EX_ceded_per_claim)
names(EX_ceded_list) <- paste0("alpha_", alpha_tail_vec)
## 3) Summary builder
make_summary <- function(lev_retained) {
data.frame(
alpha = alpha_tail_vec,
LEV_retained= lev_retained, # same across alphas at u = M
EX_ceded = EX_ceded_list, # varies with alpha
EX_ground = lev_retained + EX_ceded_list, # = E[X] under splice
PP_retained = lambda * lev_retained,
PP_ceded = lambda * EX_ceded_list,
PP_ground = lambda * (lev_retained + EX_ceded_list)
)
}
## WEIBULL BODY (truncated at M)
k <- unname(fit_wei$estimate[1]) # shape
th <- unname(fit_wei$estimate[2]) # scale
# FULL-distribution LEV up to u for Weibull body (you already had this)
lev_weibull_full <- function(u, shape, scale) {
t <- (u/scale)^shape
scale * gamma(1 + 1/shape) * pgamma(t, shape = 1 + 1/shape, rate = 1) +
u * exp(-t)
}
# Body CDF at M
F_M_weib <- pweibull(M, shape = k, scale = th)
# *** Retained LEV at u = M under the splice ***
# (1 - S_M) * [ FULL body LEV at M / F_b(M) ] + M * S_M
LEV_M_weibull_retained <- (1 - S_M) * (lev_weibull_full(M, k, th) / F_M_weib) + M * S_M
summary_weibull <- make_summary(LEV_M_weibull_retained)
round(summary_weibull, 1)
## ==== GAMMA BODY (truncated at M) ====
shape <- unname(fit_gam$estimate[1])
rate <- unname(fit_gam$estimate[2])
# FULL-distribution LEV up to M for Gamma body
LEV_gamma_full <- function(u, shape, rate) {
term1 <- pgamma(rate * u, shape = shape + 1) / rate
term2 <- u * (1 - pgamma(rate * u, shape = shape))
term1 + term2
}
F_M_gamma <- pgamma(M, shape = shape, rate = rate)
LEV_M_gamma_retained <- (1 - S_M) * (LEV_gamma_full(M, shape, rate) / F_M_gamma) + M * S_M
summary_gamma <- make_summary(LEV_M_gamma_retained)
round(summary_gamma, 1)
## 1) Tail weight among non-zero claims
S_M <- length(X[X >= M & X > 0]) / length(X[X > 0])
## 2) Pareto EX above M (your function is correct)
alpha_tail_vec <- c(1.1, 1.2, 1.5, 1.7, 2.0, 2.2, 2.5, 3)
EX_ceded_per_claim <- function(alpha_tail) {
if (alpha_tail <= 1) return(Inf)
S_M * (M / (alpha_tail - 1))
}
EX_ceded_list <- sapply(alpha_tail_vec, EX_ceded_per_claim)
names(EX_ceded_list) <- paste0("alpha_", alpha_tail_vec)
## 3) Summary builder (lambda must be defined earlier)
make_summary <- function(lev_retained_M) {
data.frame(
alpha = alpha_tail_vec,
LEV_retained = lev_retained_M, # same across alphas at u = M
EX_ceded = EX_ceded_list, # varies with alpha
EX_ground = lev_retained_M + EX_ceded_list, # E[X] under splice
PP_retained = lambda * lev_retained_M,
PP_ceded = lambda * EX_ceded_list,
PP_ground = lambda * (lev_retained_M + EX_ceded_list)
)
}
## ===== WEIBULL BODY (fit on X < M) =====
k <- unname(fit_wei$estimate[1]) # shape
th <- unname(fit_wei$estimate[2]) # scale
# Full (one-piece) body LEV up to u
lev_weibull_full <- function(u, shape, scale) {
t <- (u/scale)^shape
scale * gamma(1 + 1/shape) * pgamma(t, shape = 1 + 1/shape, rate = 1) +
u * exp(-t)
}
# Body CDF at M
F_M_weib <- pweibull(M, shape = k, scale = th)
# *** Spliced retained LEV at u = M ***
# (1 - S_M) * [ full-body LEV at M / F_b(M) ] + M * S_M
LEV_M_weibull_retained <- (1 - S_M) * (lev_weibull_full(M, k, th) / F_M_weib) + M * S_M
summary_weibull <- make_summary(LEV_M_weibull_retained)
round(summary_weibull, 1)
## ===== GAMMA BODY (fit on X < M) =====
shape <- unname(fit_gam$estimate[1]) # Gamma shape
rate <- unname(fit_gam$estimate[2]) # Gamma rate (1/scale)
# Full (one-piece) body LEV up to u for Gamma
LEV_gamma_full <- function(u, shape, rate) {
term1 <- pgamma(rate * u, shape = shape + 1) / rate
term2 <- u * (1 - pgamma(rate * u, shape = shape))
term1 + term2
}
F_M_gamma <- pgamma(M, shape = shape, rate = rate)
LEV_M_gamma_retained <- (1 - S_M) * (LEV_gamma_full(M, shape, rate) / F_M_gamma) + M * S_M
summary_gamma <- make_summary(LEV_M_gamma_retained)
round(summary_gamma, 1)
Plot of Spliced Distribution based on Weibull under alpha = 1.5
## ---- Inputs you already have ----
# X: your data (non-zero + zeros ok)
# M: cap / splice point (e.g. 25000)
S_M <- mean(X >= M & X > 0) # tail weight among non-zero claims
# WEIBULL body fit on X < M
k <- unname(fit_wei$estimate[1])
th <- unname(fit_wei$estimate[2])
# Gamme distribution has being hashed out in all cases below
# a <- unname(fit_gam$estimate[1]);
#b <- unname(fit_gam$estimate[2])
## ---- Body CDF/PDF helpers ----
F_b <- function(x) pweibull(x, shape = k, scale = th)
f_b <- function(x) dweibull(x, shape = k, scale = th)
# For Gamma instead:
# F_b <- function(x) pgamma(x, shape = a, rate = b)
# f_b <- function(x) dgamma(x, shape = a, rate = b)
F_M <- F_b(M); stopifnot(F_M > 0 && S_M < 1)
w <- 1 - S_M
## ---- Choose a Pareto tail shape alpha ----
# Option B: set alpha by judgement/sensitivity
alpha <- 1.5 # e.g., pick from c(1.2, 1.5, 1.7, 2.0, 2.5)
stopifnot(alpha > 1)
# Truncated body parts:
F_M <- F_b(M); w <- 1 - S_M
f_b_trunc <- function(x) f_b(x) / F_M
F_b_trunc <- function(x) F_b(x) / F_M
S_b_trunc <- function(x) 1 - F_b_trunc(x)
# Spliced survival / pdf:
S_total <- function(x) ifelse(x < M, w * S_b_trunc(x) + S_M, S_M * (M/x)^alpha)
f_total <- function(x) ifelse(x < M, w * f_b_trunc(x), S_M * alpha * M^alpha / x^(alpha+1))
## ---- Spliced PDF/CDF/Survival ----
# Truncated body pieces on [0, M)
f_b_trunc <- function(x) f_b(x) / F_M
F_b_trunc <- function(x) F_b(x) / F_M
S_b_trunc <- function(x) 1 - F_b_trunc(x)
# Survival (global)
S_total <- function(x) ifelse(
x < M,
w * S_b_trunc(x) + S_M, # below M
S_M * (M / x)^alpha # tail
)
# CDF (global)
F_total <- function(x) 1 - S_total(x)
# PDF (global)
f_total <- function(x) ifelse(
x < M,
w * f_b_trunc(x), # below M
S_M * alpha * M^alpha / (x^(alpha + 1)) # tail density
)
## ---- Plot grids ----
xs <- sort(unique(c(
seq(0, M, length.out = 800),
seq(M, max(M*5, quantile(X[X>0], 0.999, na.rm=TRUE)), length.out = 800)
)))
# Base R plots (simple & dependency-free)
par(mfrow = c(1,3), mar = c(4,4,2,1))
# 1) PDF
plot(xs, f_total(xs), type="l", lwd=2, xlab="x", ylab="pdf f(x)",
main="Spliced PDF")
abline(v=M, lty=3)
# 2) CDF
plot(xs, F_total(xs), type="l", lwd=2, xlab="x", ylab="CDF F(x)",
main="Spliced CDF")
abline(v=M, lty=3)
# 3) Survival (log–log tail view helps)
plot(xs, S_total(xs), type="l", lwd=2, xlab="x", ylab="Survival S(x)",
main="Spliced Survival"); abline(v=M, lty=3)
## ----------------------------
## Spliced severity simulator
## ----------------------------
# Inputs:
# M : splice/cap threshold (e.g., 25000)
# S_M : tail weight among non-zero claims = P(X >= M | X > 0)
# alpha : Pareto tail shape (>1), pick by judgment/sensitivity
# body : "weibull" or "gamma"
# pars : list of body params
# - Weibull: list(shape = k, scale = th)
# - Gamma : list(shape = a, rate = b)
# Returns a function r_splice(n) that simulates n severities.
make_r_splice <- function(M, S_M, alpha, body = c("weibull","gamma"), pars) {
stopifnot(S_M >= 0, S_M < 1, alpha > 1)
body <- match.arg(body)
if (body == "weibull") {
k <- pars$shape; th <- pars$scale
F_b <- function(x) pweibull(x, shape=k, scale=th)
q_b <- function(p) qweibull(p, shape=k, scale=th)
} else {
a <- pars$shape; b <- pars$rate
F_b <- function(x) pgamma(x, shape=a, rate=b)
q_b <- function(p) qgamma(p, shape=a, rate=b)
}
F_M <- F_b(M); stopifnot(F_M > 0)
w <- 1 - S_M
function(n) {
u_mix <- runif(n)
x <- numeric(n)
# Body draws: truncated at M → inverse-CDF on [0, F_b(M)]
idx_b <- which(u_mix > S_M)
nb <- length(idx_b)
if (nb > 0) {
u_body <- runif(nb, min = 0, max = F_M) # uniform on [0, F_b(M)]
x[idx_b] <- q_b(u_body) # ensures x < M almost surely
}
# Tail draws: Pareto on [M, ∞) with scale M, shape alpha
idx_t <- which(u_mix <= S_M)
nt <- length(idx_t)
if (nt > 0) {
u_tail <- runif(nt) # on (0,1)
x[idx_t] <- M / (u_tail)^(1/alpha) # qPareto(u)
}
x
}
}
## ---------- Example usage ----------
set.seed(1)
M <- 25000
# Tail weight among non-zero claims:
S_M <- mean(X >= M & X > 0) # use your data
# Choose a tail shape (sensitivity)
alpha <- 1.5 # try c(1.2, 1.5, 1.7, 2.0, 2.5)
# WEIBULL body (from your fit)
k <- unname(fit_wei$estimate[1])
th <- unname(fit_wei$estimate[2])
r_splice_weib <- make_r_splice(
M=M, S_M=S_M, alpha=alpha,
body="weibull", pars=list(shape=k, scale=th)
)
# Or GAMMA body:
# a <- unname(fit_gam$estimate[1]); b <- unname(fit_gam$estimate[2])
# r_splice_gam <- make_r_splice(M, S_M, alpha, body="gamma", pars=list(shape=a, rate=b))
# Simulate
n <- 1e5
X_sim <- r_splice_weib(n)
# Retained and ceded pieces
Retained <- pmin(X_sim, M)
Ceded <- pmax(X_sim - M, 0)
# SanCeded# Sanity checks
mean(Retained) # ≈ LEV(M) under splice
## [1] 18254.45
mean(Ceded) # ≈ S_M * M / (alpha - 1)
## [1] 36224.79
mean(Retained) + mean(Ceded) # ≈ ground-up mean E[X] under splice
## [1] 54479.24
# Quick visuals
hist(X_sim[X_sim < M], breaks=50, freq=FALSE,
main="Spliced PDF (below M) + Tail overlay", xlab="x")
# Overlay model pdf below M (scaled by w/F_M if you want exact heights);
# for quick check, just add a kernel density for simulated below-M:
lines(density(X_sim[X_sim < M]), lwd=2)
# Survival on log-log to inspect Pareto tail
xs <- seq(M, quantile(X_sim, 0.999), length.out=300)
S_emp <- sapply(xs, function(t) mean(X_sim >= t))
plot(xs, S_emp, log="xy", type="l", lwd=2,
xlab="x (log)", ylab="S(x) (log)", main="Tail ~ Pareto (log-log)")
abline(v=M, lty=3)
## Assumes you already have:
## X_sim (simulated from the splice), M, S_M, alpha,
## and a body fit (Weibull or Gamma) with its params.
## --- Define body pdf f_b(x) and body CDF at M ---
# If WEIBULL body:
k <- unname(fit_wei$estimate[1])
th <- unname(fit_wei$estimate[2])
f_b <- function(x) dweibull(x, shape = k, scale = th)
F_M <- pweibull(M, shape = k, scale = th)
# (If GAMMA body instead, swap the two lines above for:)
# a <- unname(fit_gam$estimate[1]); b <- unname(fit_gam$estimate[2])
# f_b <- function(x) dgamma(x, shape = a, rate = b)
# F_M <- pgamma(M, shape = a, rate = b)
stopifnot(F_M > 0)
w <- 1 - S_M
## --- Spliced pdf components ---
f_body_spliced <- function(x) ifelse(x < M, w * f_b(x) / F_M, 0) # truncated body, weighted
f_pareto <- function(x) ifelse(x >= M, S_M * alpha * M^alpha / x^(alpha + 1), 0)
f_total <- function(x) f_body_spliced(x) + f_pareto(x)
## --- Plot: histogram + KDE for below M + overlays for body/pareto/total ---
# pick a wider max for the axis
xmax <- max(M * 3, quantile(X_sim, 0.9995, na.rm = TRUE)) # widen as needed
par(mar = c(4,4,2,1)) # optional: add a bit of right margin
hist(X_sim, breaks = 120, freq = FALSE,
border = "gray80", col = "gray95",
xlim = c(0, xmax), xaxs = "i", # "i" = no extra padding
main = "Spliced PDF with Pareto Tail Overlay", xlab = "Claim size")
abline(v = M, lty = 3)
# re-add your overlays after hist():
lines(density(X_sim[X_sim < M]), lwd = 2)
xs_left <- seq(0, M, length.out = 800)
xs_right <- seq(M, xmax, length.out = 800)
lines(xs_left, f_body_spliced(xs_left), lwd = 2, lty = 2)
lines(xs_right, f_pareto(xs_right), lwd = 2, lty = 3)
lines(c(xs_left, xs_right), f_total(c(xs_left, xs_right)), lwd = 2)
# Kernel density of simulated values (optional visual smooth)
lines(density(X_sim[X_sim < M]), lwd = 2) # default color = black
# Overlays from the model
xs_left <- seq(0, M, length.out = 800)
xs_right <- seq(M, xmax, length.out = 800)
# Body-only (spliced, below M)
lines(xs_left, f_body_spliced(xs_left), lwd = 2, lty = 2) # dashed
# Pareto-only (tail, >= M)
lines(xs_right, f_pareto(xs_right), lwd = 2, lty = 3) # dotted
# Total spliced pdf
lines(c(xs_left, xs_right), f_total(c(xs_left, xs_right)), lwd = 2)
abline(v = M, lty = 3)
legend("topright",
c("KDE (sim, <M)", "Body (spliced)", "Pareto tail", "Total spliced pdf"),
lty = c(1, 2, 3, 1), lwd = c(2, 2, 2, 2),
col = c("black", "black", "black", "black"),
bty = "n")