Load in the data
data <- readRDS("C:/Users/aidan/Downloads/data.rds")
View(data)
library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 4.4.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 4.4.2
## Loading required package: survival
## Warning: package 'survival' was built under R version 4.4.3
Manipulate data to represent Individual Claim Amount, X
# Estimated above reinsurance ($25,000)
EstM <- 0.23*nrow(data[data$clmcost=='0',])
# Total Claims Number
Total_claims <- sum(data$clmcost) + EstM*25000 ; Total_claims
## [1] 54798154
# Average Claim net reinsurance
Avg_claims <- (sum(data$clmcost)+EstM*25000)/(sum(data$numclms) + EstM) ; Avg_claims
## [1] 18261.91
# A is a list of all non-zero claims. X is a list of all claims UNDER retention.
A<- subset(data[c("numclms","clmcosts")], numclms>0)
X <- c()
for (row in 1:nrow(A)) {
if (A[row,1]==1) {
X <- append(X, A[row,2])
} else {
for (claimno in 1:A[row,1]) {
X <- append(X,A[row,2]/A[row,1])
}
}
}
hist(X, main = "Non-Zero claims without retention")
Log-normal Model Fit:
(Log-normal model fitted under MLE, MME, MSE)
library(fitdistrplus)
min(X)
## [1] 10
max(X)
## [1] 7355
# Log normal Distribution:
fit_ln <- fitdist(X, "lnorm", method = "mme") # Moment Matching Estimation
fit_ln2 <- fitdist(X, "lnorm", method = "mle") # Method Maxium Likelihood
fit_ln3 <- fitdist(X, "lnorm", method = "mse") # Maxium goodness of fit
gofstat(list(fit_ln, fit_ln2, fit_ln3), fitnames = c("MME","MLE", "MSE"))
## Goodness-of-fit statistics
## MME MLE MSE
## Kolmogorov-Smirnov statistic 0.1933909 0.07359625 0.1434873
## Cramer-von Mises statistic 8.3596999 1.39874800 5.7000458
## Anderson-Darling statistic 132.7899165 9.43582444 61.0036246
##
## Goodness-of-fit criteria
## MME MLE MSE
## Akaike's Information Criterion 13345.70 12691.55 12873.75
## Bayesian Information Criterion 13355.15 12701.01 12883.20
par(mfrow = c(3,3))
plot(fit_ln)
plot(fit_ln2)
plot(fit_ln3)
# on a level of AIC, BIC and other, MLE dominates the other methods
gofstat(fit_ln2)
## Goodness-of-fit statistics
## 1-mle-lnorm
## Kolmogorov-Smirnov statistic 0.07359625
## Cramer-von Mises statistic 1.39874800
## Anderson-Darling statistic 9.43582444
##
## Goodness-of-fit criteria
## 1-mle-lnorm
## Akaike's Information Criterion 12691.55
## Bayesian Information Criterion 12701.01
gof_ln <- gofstat(fit_ln2)
# Code written for quick change between models
model <- fit_ln2
set.seed(1234)
sim1<- rlnorm(length(X), meanlog = model$estimate[1], sdlog = model$estimate[2])
par(mfrow=c(2,2))
# choose a common upper limit
xlim <- c(0, 8000)
par(mfrow = c(1,2), mar=c(4,4,2,1))
hist(X, xlim = xlim, xaxs = "i", main="Histogram of X", breaks = 50)
hist(sim1, xlim = xlim, xaxs = "i", main="Histogram of sim1", breaks = 50)
par(mfrow=c(1,1))
xmax <- max(c(X, sim1))
xlim <- c(0, xmax)
hist(sim1, xlim = xlim, xaxs = "i", main="Histogram of sim1 non scaled x axis")
# Modelled and Simulation quantiles compared to observed quantiles
data.frame( Modelled = round(c(qlnorm(0, meanlog = model$estimate[1], sdlog = model$estimate[2]), qlnorm(0.25, meanlog = model$estimate[1], sdlog = model$estimate[2] ), qlnorm(0.5,meanlog = model$estimate[1], sdlog = model$estimate[2]), qlnorm(0.75, meanlog = model$estimate[1], sdlog = model$estimate[2]), qlnorm(1, meanlog = model$estimate[1], sdlog = model$estimate[2])),1), Simulation = round((quantile(sim1)),1), Observed = quantile(X))
summary(model)
## Fitting of the distribution ' lnorm ' by maximum likelihood
## Parameters :
## estimate Std. Error
## meanlog 5.697840 0.05595784
## sdlog 1.616978 0.03956810
## Loglikelihood: -6343.777 AIC: 12691.55 BIC: 12701.01
## Correlation matrix:
## meanlog sdlog
## meanlog 1 0
## sdlog 0 1
# fitted distribution mean
exp(model$estimate[1]+0.5*model$estimate[2]^2)
## meanlog
## 1102.294
# mean of simulation
data.frame(statistic = c("mean", "standard deviation"), lognormal_Sim = c(round(mean(sim1),1), round(sd(sim1),1)), Observed = c(round(mean(X),1), round(sd(X),1)))
Exponential Fitting under only MLE:
# Fitting of exponential distribution
x <- as.numeric(X)
x <- x[is.finite(x) & x >= 0]
x_pos <- x[x > 0]
fit_exp <- fitdist(
x_pos, "exp",
start = list(rate = 1/mean(x_pos)),
lower = 1e-12, # keep rate > 0
method = "mle",
optim.method = "Nelder-Mead" # more stable for 1-param cases
)
summary(fit_exp)
## Fitting of the distribution ' exp ' by maximum likelihood
## Parameters :
## estimate
## rate 0.001272567
## Loglikelihood: -6401.71 AIC: 12805.42 BIC: 12810.15
gofstat(fit_exp)
## Goodness-of-fit statistics
## 1-mle-exp
## Kolmogorov-Smirnov statistic 0.1489007
## Cramer-von Mises statistic 6.3393067
## Anderson-Darling statistic 46.8470937
##
## Goodness-of-fit criteria
## 1-mle-exp
## Akaike's Information Criterion 12805.42
## Bayesian Information Criterion 12810.15
gof_exp <- gofstat(fit_exp)
coef(fit_exp)
## rate
## 0.001272567
plot(fit_exp)
# Code written for quick change between models
model <- fit_exp
set.seed(1234)
sim1<- rexp(length(X), rate = fit_exp$estimate)
par(mfrow=c(2,2))
# choose a common upper limit
xlim <- c(0, 8000)
par(mfrow = c(1,2), mar=c(4,4,2,1))
hist(X, xlim = xlim, xaxs = "i", main="Histogram of X", breaks = 50)
hist(sim1, xlim = xlim, xaxs = "i", main="Histogram of sim1", breaks = 50)
par(mfrow=c(1,1))
xmax <- max(c(X, sim1))
xlim <- c(0, xmax)
hist(sim1, xlim = xlim, xaxs = "i", main="Histogram of sim1 non scaled x axis")
# Simulation quantiles compared to observed quantiles
data.frame( Modelled = round(c(qexp(0, rate = fit_exp$estimate), qexp(0.25, rate = fit_exp$estimate ), qexp(0.5, rate = fit_exp$estimate), qexp(0.75, rate = fit_exp$estimate), qexp(1, rate = fit_exp$estimate)),1), Simulation = round((quantile(sim1)),1), Observed = quantile(X))
summary(model)
## Fitting of the distribution ' exp ' by maximum likelihood
## Parameters :
## estimate
## rate 0.001272567
## Loglikelihood: -6401.71 AIC: 12805.42 BIC: 12810.15
# pure mean under fitted model
1/fit_exp$estimate
## rate
## 785.8132
# observed mean
mean(X)
## [1] 785.8132
# mean of simulation
data.frame(statistic = c("mean", "standard deviation"), Exponetial_sim = c(round(mean(sim1),1), round(sd(sim1),1)), Observed = c(round(mean(X),1), round(sd(X),1)))
Gamma Fitting through MME:
### Gamma ###
fit_gam <- fitdist(X, "gamma", method = "mme")
gofstat(fit_gam)
## Goodness-of-fit statistics
## 1-mme-gamma
## Kolmogorov-Smirnov statistic 0.07455777
## Cramer-von Mises statistic 0.21349555
## Anderson-Darling statistic 3.47019630
##
## Goodness-of-fit criteria
## 1-mme-gamma
## Akaike's Information Criterion 12679.84
## Bayesian Information Criterion 12689.29
gof_gamma <- gofstat(fit_gam)
summary(fit_gam)
## Fitting of the distribution ' gamma ' by matching moments
## Parameters :
## estimate Std. Error
## shape 0.5456635627 4.494607e-02
## rate 0.0006943935 6.580093e-05
## Loglikelihood: -6337.918 AIC: 12679.84 BIC: 12689.29
## Correlation matrix:
## shape rate
## shape 1.0000000 0.8692414
## rate 0.8692414 1.0000000
plot(fit_gam)
# Code written for quick change between models
model <- fit_gam
set.seed(1234)
fit_gam$estimate
## shape rate
## 0.5456635627 0.0006943935
sim1<- rgamma(length(X), shape = fit_gam$estimate[1], rate = fit_gam$estimate[2])
# Graph
par(mfrow=c(2,2))
# choose a common upper limit
xlim <- c(0, 8000)
par(mfrow = c(1,2), mar=c(4,4,2,1))
hist(X, xlim = xlim, xaxs = "i", main="Histogram of X", breaks = 50)
hist(sim1, xlim = xlim, xaxs = "i", main="Histogram of sim1", breaks = 50)
par(mfrow=c(1,1))
xmax <- max(c(X, sim1))
xlim <- c(0, xmax)
hist(sim1, xlim = xlim, xaxs = "i", main="Histogram of sim1 non scaled x axis")
# Simulation quantiles compared to observed quantiles
data.frame(
Modelled = round(c(qgamma(0, shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]), qgamma(0.25, shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]), qgamma(0.5, shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]), qgamma(0.75,shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]), qgamma(1, shape = fit_gam$estimate[1], rate = fit_gam$estimate[2])),1),
Simulation = round((quantile(sim1)),1),
Observed = quantile(X)
)
summary(model)
## Fitting of the distribution ' gamma ' by matching moments
## Parameters :
## estimate Std. Error
## shape 0.5456635627 4.494607e-02
## rate 0.0006943935 6.580093e-05
## Loglikelihood: -6337.918 AIC: 12679.84 BIC: 12689.29
## Correlation matrix:
## shape rate
## shape 1.0000000 0.8692414
## rate 0.8692414 1.0000000
# fitted distribution mean
mean(sim1)
## [1] 784.6038
# Mean and variance of simulation compared to Observed
data.frame(statistic = c("mean", "standard deviation"), Gamma_Sim = c(round(mean(sim1),1), round(sd(sim1),1)), Observed = c(round(mean(X),1), round(sd(X),1)))
Weibull Distribution through MLE:
fit_wei <- fitdist(X, "weibull", method = "mle")
gofstat(fit_wei)
## Goodness-of-fit statistics
## 1-mle-weibull
## Kolmogorov-Smirnov statistic 0.04605632
## Cramer-von Mises statistic 0.23584832
## Anderson-Darling statistic 2.50396357
##
## Goodness-of-fit criteria
## 1-mle-weibull
## Akaike's Information Criterion 12652.32
## Bayesian Information Criterion 12661.77
gof_wei <- gofstat(fit_wei)
plot(fit_wei)
model <- fit_wei
set.seed(1234)
fit_wei$estimate
## shape scale
## 0.7330834 644.9072807
sim1<- rweibull(length(X), shape = fit_wei$estimate[1], scale = fit_wei$estimate[2])
# choose a common upper limit
xlim <- c(0, 8000)
par(mfrow = c(1,2), mar=c(4,4,2,1))
hist(X, xlim = xlim, xaxs = "i", main="Histogram of X", breaks = 50)
hist(sim1, xlim = xlim, xaxs = "i", main="Histogram of sim1", breaks = 50)
par(mfrow=c(1,1))
xmax <- max(c(X, sim1))
xlim <- c(0, xmax)
hist(sim1, xlim = xlim, xaxs = "i", main="Histogram of sim1 non scaled x axis")
# Modelled, Simulation quantiles compared to observed quantiles
data.frame(
Modelled = round(c(qweibull(0, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2]), qweibull(0.25, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2] ), qweibull(0.5, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2]), qweibull(0.75, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2]), qweibull(1, shape = fit_wei$estimate[1], scale = fit_wei$estimate[2])),1),
Simulation = round((quantile(sim1)),1),
Observed = quantile(X)
)
summary(model)
## Fitting of the distribution ' weibull ' by maximum likelihood
## Parameters :
## estimate Std. Error
## shape 0.7330834 0.01975975
## scale 644.9072807 32.13525400
## Loglikelihood: -6324.159 AIC: 12652.32 BIC: 12661.77
## Correlation matrix:
## shape scale
## shape 1.0000000 0.3205504
## scale 0.3205504 1.0000000
# fitted distribution mean
mean(sim1)
## [1] 771.2906
# Mean and variance of simulation compared to Observed
data.frame(statistic = c("mean", "standard deviation"), Weibull_sim = c(round(mean(sim1),1), round(sd(sim1),1)), Observed = c(round(mean(X),1), round(sd(X),1)))
Goodness-of-fit Summary table for 4 models:
library(caret)
## Warning: package 'caret' was built under R version 4.4.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
set.seed(1234)
data.frame(Goodness_of_fit = c("AIC", "log-likelihood", "RMSE", "AD - Test", "KS - Test") ,
Lognormal = c(round(fit_ln2$aic,1), round(fit_ln2$loglik,1), round(RMSE(rlnorm(length(X), meanlog = fit_ln2$estimate[1], sdlog = fit_ln2$estimate[2]), X),1), gof_ln$adtest, gof_ln$kstest),
Exponetial = c(round(fit_exp$aic,1), round(fit_exp$loglik,1), round(RMSE(rexp(length(X), rate = fit_exp$estimate),X),1), gof_exp$adtest, gof_exp$kstest),
Gamma = c(round(fit_gam$aic,1), round(fit_gam$loglik,1), round(RMSE(rgamma(length(X), shape = fit_gam$estimate[1], rate = fit_gam$estimate[2]),X),1), gof_gamma$adtest, gof_gamma$kstest),
Weibull = c(round(fit_wei$aic,1), round(fit_wei$loglik,1), round(RMSE(rweibull(length(X), shape = fit_wei$estimate[1], scale = fit_wei$estimate[2]),X),1), gof_wei$adtest, gof_wei$kstest)
)
# Assessing GLM fit for Poisson, try incoporate covariates in rate estimation
B<- subset(data, numclms>0)
B$status <- factor(B$status)
B$type <- factor(B$type)
B$licyrs <- as.integer(B$licyrs)
B$fuel <- factor(B$fuel)
# poisson family cuz >1 claims possible (can't be binomial)
B.glm <- glm(numclms ~ status + drvage + ncd + licyrs + vehage + type + fuel + exposure, data = B, family = "poisson")
summary(B.glm)
##
## Call:
## glm(formula = numclms ~ status + drvage + ncd + licyrs + vehage +
## type + fuel + exposure, family = "poisson", data = B)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.0678769 0.2927988 0.232 0.817
## statusMarried -0.0636297 0.2341879 -0.272 0.786
## statusSingle -0.1107747 0.2885774 -0.384 0.701
## statusWidow 0.0412625 0.3501313 0.118 0.906
## drvage -0.0005002 0.0035515 -0.141 0.888
## ncd -0.0126230 0.0249743 -0.505 0.613
## licyrs 0.0029012 0.0259188 0.112 0.911
## vehage -0.0006595 0.0155972 -0.042 0.966
## typeB -0.0647488 0.3602613 -0.180 0.857
## typeC 0.0618769 0.1593522 0.388 0.698
## typeD 0.0254617 0.0936180 0.272 0.786
## typeE 0.0028157 0.0930145 0.030 0.976
## typeF -0.0346429 0.4538569 -0.076 0.939
## typeG 0.1572354 0.1833972 0.857 0.391
## typeH 0.1097014 0.3894203 0.282 0.778
## fuelLPG 0.6512332 0.7140966 0.912 0.362
## fuelPetrol 0.0706388 0.2662102 0.265 0.791
## exposure 0.1248079 0.1512130 0.825 0.409
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 34.344 on 789 degrees of freedom
## Residual deviance: 31.279 on 772 degrees of freedom
## AIC: 1674.4
##
## Number of Fisher Scoring iterations: 4
# under MLE for Poisson
var(data$numclms)
## [1] 0.08433952
mean(data$numclms)
## [1] 0.08181462
# Similar mean and variance so we use Poisson MLE
RateEst <- sum(data$numclms)/sum(data$exposure)
# Following on from original code with the addition of the retention level cap:
numRet <- round(EstM, digits=0)
X<-append(X,rep(25000,times=numRet))
hist(X,main = "Non-Zero claims with retention", breaks = 30)
# new lambda estimate:
lambda <- length(X)/sum(data$exposure)
lambda
## [1] 0.5732022
## ---------------------------------------------------------------
## Inputs
## ---------------------------------------------------------------
M <- 25000 # retention / cap
X <- as.numeric(X) # severities
lambda <- length(X)/sum(data$exposure) # Under Poisson MLE
## Normalized severities for exposure-curve fit
Z <- pmin(X, M) / M
Z_obs <- Z[Z < 1]
ec <- ecdf(Z_obs)
grid <- seq(0, max(Z_obs), length.out = 200)
Fn_emp <- ec(grid)
## ---------------------------------------------------------------
## Exposure curves
## ---------------------------------------------------------------
F_swiss <- function(z, alpha) 1 - (1 - pmin(pmax(z,0),1))^alpha # alpha>0
F_lloyds <- function(z, beta) pmin(pmax(z,0),1)^beta # beta>=1
sse_swiss <- function(par) { alpha <- exp(par); mean((F_swiss(grid, alpha) - Fn_emp)^2) }
sse_lloyds <- function(par) { beta <- 1 + exp(par); mean((F_lloyds(grid, beta) - Fn_emp)^2) }
set.seed(1)
fit_sw <- optim(par = log(0.3), fn = sse_swiss, method = "BFGS")
fit_ll <- optim(par = log(2 - 1.0), fn = sse_lloyds, method = "BFGS")
alpha_hat <- exp(fit_sw$par)
beta_hat <- 1 + exp(fit_ll$par)
## Limited expected value from each curve: LEV(M) = M * ∫_0^1 [1 - F(z)] dz
area_sw <- integrate(function(z) 1 - F_swiss(z, alpha_hat), 0, 1)$value
area_ll <- integrate(function(z) 1 - F_lloyds(z, beta_hat), 0, 1)$value
LEV_M_swiss <- M * area_sw
LEV_M_lloyds <- M * area_ll
## ---------------------------------------------------------------
## Exceedance probability at M (observable)
## ---------------------------------------------------------------
S_M <- length(X[X>=M])/length(X) # proportion of capped observations among non-zero claims
S_M
## [1] 0.7217594
## ---------------------------------------------------------------
## Pareto tail continuation above M
## E[X - M | X > M] = M / (alpha_tail - 1), alpha_tail > 1
## With sensitivity testing to alpha levels
## ---------------------------------------------------------------
alpha_tail_vec <- c(1.1, 1.2, 1.5, 1.7, 2.0, 2.2, 2.5, 3) # sensitivity set, smaller the value of alpha the heavier the tail, i.e. more prudent (Changes Pareto distribition, that is the distribution of claims above M)
EX_ceded_per_claim <- function(alpha_tail) {
if (alpha_tail <= 1) return(Inf)
S_M * (M / (alpha_tail - 1))
}
EX_ceded_list <- sapply(alpha_tail_vec, EX_ceded_per_claim)
names(EX_ceded_list) <- paste0("alpha_", alpha_tail_vec)
## Combine with retained LEV from exposure curves
LEV_ret <- c(Swiss = LEV_M_swiss, Lloyds = LEV_M_lloyds)
## Ground-up mean severity for each curve & tail alpha:
make_summary <- function(LEV_retained) {
EX_ground <- LEV_retained + EX_ceded_list # per claim
cbind(
LEV_retained = LEV_retained,
EX_ceded = EX_ceded_list,
EX_ground = EX_ground,
PP_retained = lambda * LEV_retained,
PP_ceded = lambda * EX_ceded_list,
PP_ground = lambda * EX_ground
)
}
summary_swiss <- make_summary(LEV_M_swiss)
summary_lloyds <- make_summary(LEV_M_lloyds)
summary_swiss
## LEV_retained EX_ceded EX_ground PP_retained PP_ceded PP_ground
## alpha_1.1 110.0596 180439.853 180549.913 63.08642 103428.517 103491.603
## alpha_1.2 110.0596 90219.927 90329.986 63.08642 51714.258 51777.345
## alpha_1.5 110.0596 36087.971 36198.030 63.08642 20685.703 20748.790
## alpha_1.7 110.0596 25777.122 25887.182 63.08642 14775.502 14838.589
## alpha_2 110.0596 18043.985 18154.045 63.08642 10342.852 10405.938
## alpha_2.2 110.0596 15036.654 15146.714 63.08642 8619.043 8682.129
## alpha_2.5 110.0596 12029.324 12139.383 63.08642 6895.234 6958.321
## alpha_3 110.0596 9021.993 9132.052 63.08642 5171.426 5234.512
summary_lloyds
## LEV_retained EX_ceded EX_ground PP_retained PP_ceded PP_ground
## alpha_1.1 12500 180439.853 192939.85 7165.027 103428.517 110593.54
## alpha_1.2 12500 90219.927 102719.93 7165.027 51714.258 58879.29
## alpha_1.5 12500 36087.971 48587.97 7165.027 20685.703 27850.73
## alpha_1.7 12500 25777.122 38277.12 7165.027 14775.502 21940.53
## alpha_2 12500 18043.985 30543.99 7165.027 10342.852 17507.88
## alpha_2.2 12500 15036.654 27536.65 7165.027 8619.043 15784.07
## alpha_2.5 12500 12029.324 24529.32 7165.027 6895.234 14060.26
## alpha_3 12500 9021.993 21521.99 7165.027 5171.426 12336.45
## ---------------------------------------------------------------
## Simple plots
## ---------------------------------------------------------------
par(mfrow=c(1,2))
plot(ec, main = "ECDF of Z and fitted curves", xlab = "z = X/M", ylab = "F(z)")
curve(F_swiss(x, alpha_hat), add = TRUE, col = 2, lwd = 2)
curve(F_lloyds(x, beta_hat), add = TRUE, col = 4, lwd = 2)
legend("bottomright", c("Empirical", "Swiss (fit)", "Lloyd's (fit)"),
lty = 1, col = c(1,2,4), bty = "n")
curve(1 - F_swiss(x, alpha_hat), 0, 1, n = 200, col = 2, lwd = 2,
main = "1 - F(z) (area × M = LEV(M))", xlab = "z", ylab = "1 - F(z)")
curve(1 - F_lloyds(x, beta_hat), add = TRUE, col = 4, lwd = 2)
legend("topright", c("Swiss", "Lloyd's"), col = c(2,4), lty = 1, bty = "n")
make_summary <- function(lev) {
rn <- names(EX_ceded_list)
df <- data.frame(
alpha = alpha_tail_vec,
LEV_retained = lev,
EX_ceded = EX_ceded_list,
EX_ground = lev + EX_ceded_list,
PP_retained = lambda * lev,
PP_ceded = lambda * EX_ceded_list,
PP_ground = lambda * (lev + EX_ceded_list)
)
rownames(df) <- rn
df
}
summary_swiss <- make_summary(LEV_M_swiss)
summary_lloyds <- make_summary(LEV_M_lloyds)
round(summary_swiss, 2)
round(summary_lloyds, 2)
## 1) Tail weight among non-zero claims
S_M <- length(X[X >= M & X > 0]) / length(X[X > 0])
lambda <- length(X)/sum(data$exposure)
lambda
## [1] 0.5732022
## 2) Pareto EX above M
alpha_tail_vec <- c(1.1, 1.2, 1.5, 1.7, 2.0, 2.2, 2.5, 3)
EX_ceded_per_claim <- function(alpha_tail) {
if (alpha_tail <= 1) return(Inf)
S_M * (M / (alpha_tail - 1))
}
EX_ceded_list <- sapply(alpha_tail_vec, EX_ceded_per_claim)
names(EX_ceded_list) <- paste0("alpha_", alpha_tail_vec)
## 3) Summary builder
make_summary <- function(ex_retained) {
data.frame(
alpha = alpha_tail_vec,
EX_retained = ex_retained,
EX_ceded = EX_ceded_list,
EX_ground = ex_retained + EX_ceded_list,
PP_retained = lambda * ex_retained,
PP_ceded = lambda * EX_ceded_list,
PP_ground = lambda * (ex_retained + EX_ceded_list)
)
}
## WEIBULL (truncated at M) for X<=M
k <- unname(fit_wei$estimate[1]) # shape
th <- unname(fit_wei$estimate[2]) # scale
# FULL-distribution LEV up to u for Weibull Dist.
ex_weibull_full <- function(u, shape, scale) {
t <- (u/scale)^shape
scale * gamma(1 + 1/shape) * pgamma(t, shape = 1 + 1/shape, rate = 1) +
u * exp(-t)
}
# Body CDF at M
F_M_weib <- pweibull(M, shape = k, scale = th)
# Retained LEV at u = M under the splice
# (1 - S_M) * [ FULL body LEV at M / F_b(M) ] + M * S_M
EX_M_weibull_retained <- (1 - S_M) * (ex_weibull_full(M, k, th) / F_M_weib) + M * S_M
summary_weibull <- make_summary(EX_M_weibull_retained)
round(summary_weibull, 1)
## GAMMA (truncated at M) for X<=M
shape <- unname(fit_gam$estimate[1])
rate <- unname(fit_gam$estimate[2])
# FULL-distribution LEV up to M for Gamma Dist.
EX_gamma_full <- function(u, shape, rate) {
term1 <- pgamma(rate * u, shape = shape + 1) / rate
term2 <- u * (1 - pgamma(rate * u, shape = shape))
term1 + term2
}
F_M_gamma <- pgamma(M, shape = shape, rate = rate)
EX_M_gamma_retained <- (1 - S_M) * (EX_gamma_full(M, shape, rate) / F_M_gamma) + M * S_M
summary_gamma <- make_summary(EX_M_gamma_retained)
round(summary_gamma, 1)
# Elbow Graph
plot(alpha_tail_vec, EX_ceded_list, type="b", pch=16, lwd=2,
xlab="Tail alpha", ylab="E[(X-M)^+]", main="Ceded Expectation vs Tail α")
abline(v=1.5, lty=3, col=8); text(1.5, max(EX_ceded_list), "α=1.5", pos=4, col=8)
S_M <- mean(X >= M & X > 0) # tail weight among non-zero claims
# WEIBULL body fit on X < M
k <- unname(fit_wei$estimate[1])
th <- unname(fit_wei$estimate[2])
# Gamme distribution has being hashed out in all cases below
# a <- unname(fit_gam$estimate[1]);
#b <- unname(fit_gam$estimate[2])
## ---- Body CDF/PDF helpers ----
F_b <- function(x) pweibull(x, shape = k, scale = th)
f_b <- function(x) dweibull(x, shape = k, scale = th)
# For Gamma instead:
# F_b <- function(x) pgamma(x, shape = a, rate = b)
# f_b <- function(x) dgamma(x, shape = a, rate = b)
F_M <- F_b(M); stopifnot(F_M > 0 && S_M < 1)
w <- 1 - S_M
## ---- Choose a Pareto tail shape alpha ----
# Option B: set alpha by judgement/sensitivity
alpha <- 1.5
stopifnot(alpha > 1)
# Truncated body parts:
F_M <- F_b(M); w <- 1 - S_M
f_b_trunc <- function(x) f_b(x) / F_M
F_b_trunc <- function(x) F_b(x) / F_M
S_b_trunc <- function(x) 1 - F_b_trunc(x)
# Spliced survival / pdf:
S_total <- function(x) ifelse(x < M, w * S_b_trunc(x) + S_M, S_M * (M/x)^alpha)
f_total <- function(x) ifelse(x < M, w * f_b_trunc(x), S_M * alpha * M^alpha / x^(alpha+1))
## Spliced PDF/CDF/Survival
# Truncated body pieces on [0, M)
f_b_trunc <- function(x) f_b(x) / F_M
F_b_trunc <- function(x) F_b(x) / F_M
S_b_trunc <- function(x) 1 - F_b_trunc(x)
# Survival (global)
S_total <- function(x) ifelse(
x < M,
w * S_b_trunc(x) + S_M, # below M
S_M * (M / x)^alpha # tail
)
# CDF (global)
F_total <- function(x) 1 - S_total(x)
# PDF (global)
f_total <- function(x) ifelse(
x < M,
w * f_b_trunc(x), # below M
S_M * alpha * M^alpha / (x^(alpha + 1)) # tail density
)
## Plot grids
xs <- sort(unique(c(
seq(0, M, length.out = 800),
seq(M, max(M*5, quantile(X[X>0], 0.999, na.rm=TRUE)), length.out = 800)
)))
# Base R plots
par(mfrow = c(1, 2))
# 1) PDF
plot(xs, f_total(xs), type="l", lwd=2, xlab="x", ylab="pdf f(x)",
main="Spliced PDF")
abline(v=M, lty=3)
# 2) CDF
plot(xs, F_total(xs), type="l", lwd=2, xlab="x", ylab="CDF F(x)",
main="Spliced CDF")
abline(v=M, lty=3)
# Vector of Pareto tail parameters
alpha_tail_vec <- c(1.1, 1.2, 1.5, 1.7, 2.0, 2.2, 2.5, 3)
# Common x-grid
xs <- sort(unique(c(
seq(0, M, length.out = 800),
seq(M, max(M*5, quantile(X[X>0], 0.999, na.rm=TRUE)), length.out = 800)
)))
# Set up layout
par(mfrow = c(1, 2), mar = c(5, 5, 2, 1))
cols <- rainbow(length(alpha_tail_vec))
# Expand x-axis by 10 %
xlim_expanded <- range(xs) * c(0.9, 1.1)
# PDF PLOT
plot(NA, NA,
xlim = xlim_expanded,
ylim = c(0, 0.0002),
xlab = "x", ylab = "PDF f(x)",
main = "Spliced PDF (stretched x-axis)")
abline(v = M, lty = 3)
for (i in seq_along(alpha_tail_vec)) {
alpha <- alpha_tail_vec[i]
f_total <- function(x) ifelse(
x < M,
(1 - S_M) * (dweibull(x, k, th) / pweibull(M, k, th)),
S_M * alpha * M^alpha / (x^(alpha + 1))
)
lines(xs, f_total(xs), col = cols[i], lwd = 2)
}
legend("topright", legend = paste0("α = ", alpha_tail_vec),
col = cols, lwd = 2, cex = 0.8, bty = "n")
# CDF PLOT
plot(NA, NA,
xlim = xlim_expanded,
ylim = c(0, 1),
xlab = "x", ylab = "CDF F(x)",
main = "Spliced CDF (stretched x-axis)")
abline(v = M, lty = 3)
for (i in seq_along(alpha_tail_vec)) {
alpha <- alpha_tail_vec[i]
S_total <- function(x) ifelse(
x < M,
(1 - S_M) * (1 - pweibull(x, k, th) / pweibull(M, k, th)) + S_M,
S_M * (M / x)^alpha
)
lines(xs, 1 - S_total(xs), col = cols[i], lwd = 2)
}
legend("bottomright", legend = paste0("α = ", alpha_tail_vec),
col = cols, lwd = 2, cex = 0.8, bty = "n")
## ----------------------------
## Spliced severity simulator
## ----------------------------
make_r_splice <- function(M, S_M, alpha, body = c("weibull","gamma"), pars) {
stopifnot(S_M >= 0, S_M < 1, alpha > 1)
body <- match.arg(body)
if (body == "weibull") {
k <- pars$shape; th <- pars$scale
F_b <- function(x) pweibull(x, shape=k, scale=th)
q_b <- function(p) qweibull(p, shape=k, scale=th)
} else {
a <- pars$shape; b <- pars$rate
F_b <- function(x) pgamma(x, shape=a, rate=b)
q_b <- function(p) qgamma(p, shape=a, rate=b)
}
F_M <- F_b(M); stopifnot(F_M > 0)
w <- 1 - S_M
function(n) {
u_mix <- runif(n)
x <- numeric(n)
# Body draws (Weibull truncated at M)
idx_b <- which(u_mix > S_M)
if (length(idx_b) > 0) {
u_body <- runif(length(idx_b), 0, F_M)
x[idx_b] <- q_b(u_body)
}
# Tail draws (Pareto )
idx_t <- which(u_mix <= S_M)
if (length(idx_t) > 0) {
u_tail <- runif(length(idx_t))
x[idx_t] <- M / (u_tail)^(1/alpha)
}
x
}
}
## ----------------------------
## Our context
## ----------------------------
set.seed(1)
M <- 25000
S_M <- mean(X >= M & X > 0)
alpha <- 1.5 # Chosen tail weight
k <- unname(fit_wei$estimate[1])
th <- unname(fit_wei$estimate[2])
# Simulator
r_splice_weib <- make_r_splice(
M=M, S_M=S_M, alpha=alpha,
body="weibull", pars=list(shape=k, scale=th)
)
# Simulate
n <- 1e5
X_sim <- r_splice_weib(n)
## ----------------------------
## Histogram with fitted Weibull overlay
## ----------------------------
par(mfrow = c(1, 2), mar = c(5,5,3,1))
# Histogram of observed data (only body portion)
hist(X[X < M], breaks = 40, freq = FALSE,
main = "Observed Data + Fitted Weibull", xlab = "Loss Amount")
# Overlay Weibull PDF (fitted to X < M)
xs <- seq(0, M, length.out = 400)
lines(xs, dweibull(xs, shape = k, scale = th), lwd = 2, col = "blue")
# Overlay simulated density for comparison
lines(density(X_sim[X_sim < M]), col = "red", lwd = 2, lty = 2)
legend("topright", c("Weibull fit", "Simulated (body)"),
col = c("blue","red"), lwd = 2, lty = c(1,2), cex = 0.8, bty = "n")
## ----------------------------
## Log–log survival (Pareto tail)
## ----------------------------
xs_tail <- seq(M, quantile(X_sim, 0.999), length.out = 300)
S_emp <- sapply(xs_tail, function(t) mean(X_sim >= t))
plot(xs_tail, S_emp, log = "xy", type = "l", lwd = 2,
xlab = "x (log scale)", ylab = "S(x) = P(X ≥ x) (log scale)",
main = "Simulated Pareto Tail (log–log)")
abline(v = M, lty = 3)
lines(xs_tail, S_M * (M / xs_tail)^alpha, col = "blue", lwd = 2, lty = 2)
legend("topright", c("Empirical (simulated)", "Theoretical Pareto"),
col = c("black", "blue"), lwd = 2, lty = c(1,2), cex = 0.8, bty = "n")
## ===================================================================================
## Variance of CAPPED Spliced Severity Model (Capped at 500,000, shown by blue line)
## ===================================================================================
set.seed(123)
# Inputs
M <- 25000 # splice threshold
Umax <- 500000 # cap level (change to test sensitivity)
X_nonzero <- X[X > 0 & is.finite(X)]
S_M <- length(X_nonzero[X_nonzero >= M]) / length(X_nonzero) # empirical tail weight
# Weibull body parameters
k <- unname(fit_wei$estimate[1])
th <- unname(fit_wei$estimate[2])
# alpha sensitivity vector
alpha_tail_vec <- c(1.1, 1.2, 1.5, 1.7, 2, 2.2, 2.5, 3)
# Helper generators
rweibull_trunc_upper <- function(n, shape, scale, M){
Fu <- pweibull(M, shape=shape, scale=scale)
u <- runif(n, 0, Fu)
qweibull(u, shape=shape, scale=scale)
}
rpareto_unlimited <- function(n, M, alpha){
M * (runif(n))^(-1/alpha)
}
# Simulation setup
n <- 1e6
u_mix <- runif(n)
is_tail <- (u_mix < S_M)
nb <- sum(!is_tail); nt <- sum(is_tail)
# Dist component (same for all α) (i.e. Weibull)
X_body <- if (nb > 0) rweibull_trunc_upper(nb, k, th, M) else numeric(0)
# Table initialisation
results <- data.frame(
alpha = alpha_tail_vec,
E_Xcap = NA_real_,
Var_Xcap = NA_real_,
SD_Xcap = NA_real_
)
# Main loop
for (i in seq_along(alpha_tail_vec)) {
alpha <- alpha_tail_vec[i]
# Tail simulation for this α
X_tail <- if (nt > 0) rpareto_unlimited(nt, M, alpha) else numeric(0)
# Combine body & tail
X_sim <- numeric(n)
if (nb > 0) X_sim[!is_tail] <- X_body
if (nt > 0) X_sim[is_tail] <- X_tail
# Apply cap
X_cap <- pmin(X_sim, Umax)
# Stats
results$E_Xcap[i] <- mean(X_cap)
results$Var_Xcap[i] <- var(X_cap)
results$SD_Xcap[i] <- sqrt(results$Var_Xcap[i])
}
# Output neatly formatted
print(
data.frame(
alpha = results$alpha,
Mean = format(round(results$E_Xcap, 2), big.mark=","),
Variance = format(round(results$Var_Xcap, 2), big.mark=","),
SD = format(round(results$SD_Xcap, 2), big.mark=",")
),
row.names = FALSE
)
## alpha Mean Variance SD
## 1.1 64,908.53 10,069,449,370 100,346.65
## 1.2 58,951.82 8,238,067,165 90,763.80
## 1.5 46,301.13 4,569,953,885 67,601.43
## 1.7 40,901.97 3,161,783,565 56,229.74
## 2.0 35,480.31 1,924,919,865 43,873.91
## 2.2 32,868.65 1,393,530,662 37,330.02
## 2.5 30,161.10 939,174,869 30,645.96
## 3.0 27,286.24 567,178,599 23,815.51
# Graphing (one plot per alpha)
par(mfrow = c(2, 2))
for (i in seq_along(alpha_tail_vec)) {
alpha <- alpha_tail_vec[i]
X_tail <- if (nt > 0) rpareto_unlimited(nt, M, alpha) else numeric(0)
X_sim <- numeric(n)
if (nb > 0) X_sim[!is_tail] <- X_body
if (nt > 0) X_sim[is_tail] <- X_tail
X_cap <- pmin(X_sim, Umax)
hist(X_cap[X_cap < Umax], breaks=80, freq=FALSE,
main=sprintf("α = %.1f", alpha),
xlab="Claim Size", col="lightgrey", border="white")
abline(v=c(M,Umax), col=c("red","blue"), lty=2, lwd=2)
legend("topright", legend=c("Splice M","Cap Umax"), col=c("red","blue"), lwd=2, lty=2)
}