library(ChainLadder)
##
## Welcome to ChainLadder version 0.2.18
##
##
## To cite package 'ChainLadder' in publications use:
##
## Gesmann M, Murphy D, Zhang Y, Carrato A, Wuthrich M, Concina F, Dal
## Moro E (2023). _ChainLadder: Statistical Methods and Models for
## Claims Reserving in General Insurance_. R package version 0.2.18,
## <https://CRAN.R-project.org/package=ChainLadder>.
##
## To suppress this message use:
## suppressPackageStartupMessages(library(ChainLadder))
data(auto)
auto
## $PersonalAutoPaid
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 101125 209921 266618 305107 327850 340669 348430 351193 353353 353584
## [2,] 102541 203213 260677 303182 328932 340948 347333 349813 350523 NA
## [3,] 114932 227704 298120 345542 367760 377999 383611 385224 NA NA
## [4,] 114452 227761 301072 340669 359979 369248 373325 NA NA NA
## [5,] 115597 243611 315215 354490 372376 382738 NA NA NA NA
## [6,] 127760 259416 326975 365780 386725 NA NA NA NA NA
## [7,] 135616 262294 327086 367357 NA NA NA NA NA NA
## [8,] 127177 244249 317972 NA NA NA NA NA NA NA
## [9,] 128631 246803 NA NA NA NA NA NA NA NA
## [10,] 126288 NA NA NA NA NA NA NA NA NA
##
## $PersonalAutoIncurred
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 325423 336426 346061 347726 350995 353598 354797 355025 354986 355363
## [2,] 323627 339267 344507 349295 351038 351583 352050 352231 352193 NA
## [3,] 358410 386330 385684 384699 387678 387954 388540 389436 NA NA
## [4,] 405319 396641 391833 384819 380914 380163 379706 NA NA NA
## [5,] 434065 429311 422181 409322 394154 392802 NA NA NA NA
## [6,] 417178 422307 413486 406711 406503 NA NA NA NA NA
## [7,] 398929 398787 398020 400540 NA NA NA NA NA NA
## [8,] 378754 361097 369328 NA NA NA NA NA NA NA
## [9,] 351081 335507 NA NA NA NA NA NA NA NA
## [10,] 329236 NA NA NA NA NA NA NA NA NA
##
## $CommercialAutoPaid
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,] 19827 44449 61205 77398 88079 95695 99853 104789 105427 106690
## [2,] 22331 48480 68789 92356 104958 112399 115638 117415 118571 NA
## [3,] 22533 44484 65691 88435 102044 112672 115973 118359 NA NA
## [4,] 23128 51328 81542 98063 113149 121515 124347 NA NA NA
## [5,] 25053 57220 84607 104936 117663 126180 NA NA NA NA
## [6,] 30136 64767 92288 108835 121326 NA NA NA NA NA
## [7,] 34764 69125 91354 111987 NA NA NA NA NA NA
## [8,] 31803 63471 92439 NA NA NA NA NA NA NA
## [9,] 40559 77667 NA NA NA NA NA NA NA NA
## [10,] 46285 NA NA NA NA NA NA NA NA NA
personal_paid=as.triangle(auto$PersonalAutoPaid)
commercial_paid=as.triangle(auto$CommercialAutoPaid)
incremental_personal_paid=cum2incr(personal_paid)
incremental_commercial_paid=cum2incr(commercial_paid)
plot(personal_paid)
plot(commercial_paid)
plot(incremental_personal_paid)
plot(incremental_commercial_paid)
plot(personal_paid,lattice=TRUE)
plot(commercial_paid,lattice=TRUE)
#Test for Independence of Subsequent Dev Factors before modeling: Triangle 1 failed the test, later it might not meet Assumption 2
cortest_1=dfCorTest(personal_paid,ci=0.95)
plot(cortest_1)
summary(cortest_1)
## $Results
## Value
## T 0.54965986
## E[T] 0.00000000
## Var[T] 0.03571429
##
## $Range
## Value
## Lower -0.3703984
## Upper 0.3703984
print(cortest_1)
## Development Factor Correlation
##
## T = 0.5496599
##
## 95%-Range = ( -0.3703984 ; 0.3703984 )
##
## Development Factor Correlation: TRUE
cortest_2=dfCorTest(commercial_paid,ci=0.95)
plot(cortest_2)
summary(cortest_2)
## $Results
## Value
## T 0.35034014
## E[T] 0.00000000
## Var[T] 0.03571429
##
## $Range
## Value
## Lower -0.3703984
## Upper 0.3703984
print(cortest_2)
## Development Factor Correlation
##
## T = 0.3503401
##
## 95%-Range = ( -0.3703984 ; 0.3703984 )
##
## Development Factor Correlation: FALSE
#Test for calendar year effect before modeling: Triangle 1 failed the test, suggesting it might violate Assumption 1
indep_1=cyEffTest(personal_paid,ci=0.95)
print(indep_1)
## Calendar Year Effect
##
## Z = 5
##
## 95%-Range = ( 8.914978 ; 16.08502 )
##
## Calendar Year Effect: TRUE
plot(indep_1)
indep_2=cyEffTest(commercial_paid,ci=0.95)
print(indep_2)
## Calendar Year Effect
##
## Z = 11
##
## 95%-Range = ( 8.914978 ; 16.08502 )
##
## Calendar Year Effect: FALSE
plot(indep_2)
mack1=MackChainLadder(personal_paid,est.sigma = "Mack")
mack1
## MackChainLadder(Triangle = personal_paid, est.sigma = "Mack")
##
## Latest Dev.To.Date Ultimate IBNR Mack.S.E CV(IBNR)
## 1 353,584 1.000 353,584 0 0 NaN
## 2 350,523 0.999 350,752 229 998 4.3544
## 3 385,224 0.995 387,054 1,830 1,713 0.9360
## 4 373,325 0.989 377,481 4,156 1,885 0.4537
## 5 382,738 0.973 393,454 10,716 2,872 0.2680
## 6 386,725 0.943 409,932 23,207 3,847 0.1658
## 7 367,357 0.887 414,305 46,948 6,405 0.1364
## 8 317,972 0.780 407,609 89,637 9,177 0.1024
## 9 246,803 0.607 406,593 159,790 12,532 0.0784
## 10 126,288 0.305 414,021 287,733 19,085 0.0663
##
## Totals
## Latest: 3,290,539.00
## Dev: 0.84
## Ultimate: 3,914,785.82
## IBNR: 624,246.82
## Mack.S.E 30,358.21
## CV(IBNR): 0.05
plot(mack1) #residual plots for Assumption 3 verification
plot(mack1, lattice=TRUE)
mack2=MackChainLadder(commercial_paid,est.sigma = "Mack")
mack2
## MackChainLadder(Triangle = commercial_paid, est.sigma = "Mack")
##
## Latest Dev.To.Date Ultimate IBNR Mack.S.E CV(IBNR)
## 1 106,690 1.000 106,690 0 0 NaN
## 2 118,571 0.988 119,991 1,420 66 0.0465
## 3 118,359 0.980 120,744 2,385 387 0.1622
## 4 124,347 0.954 130,335 5,988 2,540 0.4241
## 5 126,180 0.926 136,302 10,122 2,861 0.2826
## 6 121,326 0.856 141,667 20,341 3,491 0.1716
## 7 111,987 0.754 148,471 36,484 4,211 0.1154
## 8 92,439 0.603 153,230 60,791 8,896 0.1463
## 9 77,667 0.419 185,254 107,587 13,705 0.1274
## 10 46,285 0.201 229,947 183,662 20,180 0.1099
##
## Totals
## Latest: 1,043,851.00
## Dev: 0.71
## Ultimate: 1,472,631.58
## IBNR: 428,780.58
## Mack.S.E 31,488.35
## CV(IBNR): 0.07
plot(mack2) #residual plots
plot(mack2,lattice=TRUE)
CDR(mack2)
CDR(mack2,dev="all")
#C-C plots for personal auto claims (full triangle) to verify Assumption 2
personal_C_i1=mack1$FullTriangle[1,]
personal_C_i2=mack1$FullTriangle[2,]
df1=data.frame(personal_C_i2,personal_C_i1)
personal_C_i3=mack1$FullTriangle[3,]
df2=data.frame(personal_C_i3,personal_C_i2)
personal_C_i4=mack1$FullTriangle[4,]
df3=data.frame(personal_C_i4,personal_C_i3)
personal_C_i9=mack1$FullTriangle[9,]
personal_C_i10=mack1$FullTriangle[10,]
df4=data.frame(personal_C_i10,personal_C_i9)
m1=lm(personal_C_i2~personal_C_i1,data=df1)
with(df1,plot(personal_C_i1,personal_C_i2))
abline(m1,col = 4, lwd = 3)
m2=lm(personal_C_i3~personal_C_i2,data=df2)
with(df1,plot(personal_C_i2,personal_C_i3))
abline(m1,col = 3, lwd = 3)
m1=lm(personal_C_i4~personal_C_i3,data=df3)
with(df1,plot(personal_C_i3,personal_C_i4))
abline(m1,col = 2, lwd = 3)
m1=lm(personal_C_i10~personal_C_i9,data=df4)
with(df1,plot(personal_C_i9,personal_C_i10))
abline(m1,col = 5, lwd = 3)
#basically the same thing but in a more clever way
m11=(mack1[["Models"]][[1]])
summary(m11)
##
## Call:
## lm(formula = y ~ x + 0, data = data.frame(x = Triangle[, i],
## y = Triangle[, i + 1]), weights = weights[, i]/Triangle[,
## i]^delta[i])
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -25.572 -20.584 -2.631 14.478 39.925
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 1.98999 0.02232 89.16 2.8e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.06 on 8 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.999, Adjusted R-squared: 0.9989
## F-statistic: 7949 on 1 and 8 DF, p-value: 2.795e-13
op=par(mfrow=c(2,2))
plot(m11)
par(op)
Alternative: Bootstrap
#Bootstrap for personal triangle
b1=BootChainLadder(personal_paid,R=999,process.distr = "gamma")
b1
## BootChainLadder(Triangle = personal_paid, R = 999, process.distr = "gamma")
##
## Latest Mean Ultimate Mean IBNR IBNR.S.E IBNR 75% IBNR 95%
## 1 353,584 353,584 0 0 0 0
## 2 350,523 350,715 192 476 316 1,086
## 3 385,224 387,085 1,861 1,263 2,622 4,307
## 4 373,325 377,544 4,219 1,732 5,317 7,416
## 5 382,738 393,526 10,788 2,642 12,497 15,162
## 6 386,725 410,056 23,331 3,950 25,909 30,177
## 7 367,357 414,619 47,262 5,597 50,739 57,114
## 8 317,972 407,522 89,550 7,543 94,546 102,594
## 9 246,803 406,530 159,727 11,725 167,237 179,017
## 10 126,288 413,514 287,226 21,907 302,305 325,892
##
## Totals
## Latest: 3,290,539
## Mean Ultimate: 3,914,696
## Mean IBNR: 624,157
## IBNR.S.E 29,142
## Total IBNR 75%: 643,210
## Total IBNR 95%: 674,718
plot(b1)
quantile(b1, c(0.75,0.95,0.99, 0.995))
## $ByOrigin
## IBNR 75% IBNR 95% IBNR 99% IBNR 99.5%
## 1 0.000 0.000 0.000 0.000
## 2 316.382 1085.749 1881.005 2110.217
## 3 2622.095 4306.882 5708.486 6113.093
## 4 5317.002 7415.863 8518.720 9088.369
## 5 12496.705 15162.100 18214.033 18973.197
## 6 25908.746 30176.553 33572.218 35247.916
## 7 50738.764 57114.493 61139.995 62037.959
## 8 94545.696 102594.021 106959.431 109424.251
## 9 167237.105 179016.808 187830.259 191326.841
## 10 302304.763 325892.361 337280.218 341847.706
##
## $Totals
## Totals
## IBNR 75%: 643210.4
## IBNR 95%: 674718.1
## IBNR 99%: 697112.0
## IBNR 99.5%: 701600.6
library(MASS)
plot(ecdf(b1$IBNR.Totals))
fit <- fitdistr(b1$IBNR.Totals[b1$IBNR.Totals>0], "lognormal")
fit
## meanlog sdlog
## 13.343071048 0.046604024
## ( 0.001474486) ( 0.001042619)
curve(plnorm(x,fit$estimate["meanlog"], fit$estimate["sdlog"]),
col="red", add=TRUE)
b2=BootChainLadder(personal_paid,R=999,process.distr = "od.pois")
b2
## BootChainLadder(Triangle = personal_paid, R = 999, process.distr = "od.pois")
##
## Latest Mean Ultimate Mean IBNR IBNR.S.E IBNR 75% IBNR 95%
## 1 353,584 353,584 0 0 0 0
## 2 350,523 350,753 230 472 380 1,218
## 3 385,224 387,076 1,852 1,166 2,562 4,000
## 4 373,325 377,499 4,174 1,707 5,150 7,198
## 5 382,738 393,492 10,754 2,534 12,376 15,136
## 6 386,725 410,097 23,372 3,902 26,078 29,971
## 7 367,357 414,691 47,334 5,452 50,636 56,491
## 8 317,972 407,228 89,256 8,011 94,254 102,342
## 9 246,803 406,910 160,107 12,230 168,300 179,792
## 10 126,288 413,322 287,034 22,498 302,246 327,146
##
## Totals
## Latest: 3,290,539
## Mean Ultimate: 3,914,652
## Mean IBNR: 624,113
## IBNR.S.E 31,191
## Total IBNR 75%: 645,044
## Total IBNR 95%: 677,121
plot(b2)
Another alternative: GLM
#GLM for personal triangle
No=nrow(personal_paid)
origin=NA
devv=NA
for(i in 1:No)
{origin=c(origin,1:(No-i+1)); devv=c(devv,rep(i,No-i+1))}
origin
## [1] NA 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 1 2 3 4 5
## [26] 6 7 8 1 2 3 4 5 6 7 1 2 3 4 5 6 1 2 3 4 5 1 2 3 4
## [51] 1 2 3 1 2 1
devv
## [1] NA 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 3 3 3 3 3
## [26] 3 3 3 4 4 4 4 4 4 4 5 5 5 5 5 5 6 6 6 6 6 7 7 7 7
## [51] 8 8 8 9 9 10
origin=origin[is.na(origin)==FALSE]
origin
## [1] 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6
## [26] 7 8 1 2 3 4 5 6 7 1 2 3 4 5 6 1 2 3 4 5 1 2 3 4 1
## [51] 2 3 1 2 1
devv=devv[is.na(devv)==FALSE]
devv
## [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3
## [26] 3 3 4 4 4 4 4 4 4 5 5 5 5 5 5 6 6 6 6 6 7 7 7 7 8
## [51] 8 8 9 9 10
devF=as.factor(devv)
yrF=as.factor(origin)
devF
## [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3
## [26] 3 3 4 4 4 4 4 4 4 5 5 5 5 5 5 6 6 6 6 6 7 7 7 7 8
## [51] 8 8 9 9 10
## Levels: 1 2 3 4 5 6 7 8 9 10
yrF
## [1] 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 1 2 3 4 5 6
## [26] 7 8 1 2 3 4 5 6 7 1 2 3 4 5 6 1 2 3 4 5 1 2 3 4 1
## [51] 2 3 1 2 1
## Levels: 1 2 3 4 5 6 7 8 9 10
vec.personal=as.vector(incremental_personal_paid)[is.na(as.vector(incremental_personal_paid))==FALSE]
vec.personal
## [1] 101125 102541 114932 114452 115597 127760 135616 127177 128631 126288
## [11] 108796 100672 112772 113309 128014 131656 126678 117072 118172 56697
## [21] 57464 70416 73311 71604 67559 64792 73723 38489 42505 47422
## [31] 39597 39275 38805 40271 22743 25750 22218 19310 17886 20945
## [41] 12819 12016 10239 9269 10362 7761 6385 5612 4077 2763
## [51] 2480 1613 2160 710 231
data.new = data.frame(origin,devv,vec.personal)
data.new
glm_result1 <- glm(vec.personal ~ yrF + devF, family = Gamma(link = log))
summary(glm_result1)
##
## Call:
## glm(formula = vec.personal ~ yrF + devF, family = Gamma(link = log))
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.767531 0.084221 139.722 < 2e-16 ***
## yrF2 -0.141775 0.082206 -1.725 0.0932 .
## yrF3 -0.126462 0.085973 -1.471 0.1500
## yrF4 -0.182343 0.090085 -2.024 0.0504 .
## yrF5 -0.116818 0.094976 -1.230 0.2267
## yrF6 -0.067147 0.101177 -0.664 0.5111
## yrF7 -0.047350 0.109593 -0.432 0.6683
## yrF8 -0.031898 0.122094 -0.261 0.7954
## yrF9 -0.039817 0.143557 -0.277 0.7831
## yrF10 -0.021211 0.193657 -0.110 0.9134
## devF2 -0.009408 0.082206 -0.114 0.9095
## devF3 -0.563219 0.085973 -6.551 1.28e-07 ***
## devF4 -1.047838 0.090085 -11.632 9.44e-14 ***
## devF5 -1.686576 0.094976 -17.758 < 2e-16 ***
## devF6 -2.357462 0.101177 -23.300 < 2e-16 ***
## devF7 -2.973480 0.109593 -27.132 < 2e-16 ***
## devF8 -3.950015 0.122094 -32.352 < 2e-16 ***
## devF9 -4.461617 0.143557 -31.079 < 2e-16 ***
## devF10 -6.325113 0.193657 -32.661 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Gamma family taken to be 0.03040991)
##
## Null deviance: 72.2803 on 54 degrees of freedom
## Residual deviance: 1.1714 on 36 degrees of freedom
## AIC: 1122.2
##
## Number of Fisher Scoring iterations: 6
glm_result2 <- glm(vec.personal ~ yrF + devF, family = gaussian(link = log))
summary(glm_result2)
##
## Call:
## glm(formula = vec.personal ~ yrF + devF, family = gaussian(link = log))
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.56666 0.02986 387.318 < 2e-16 ***
## yrF2 -0.01712 0.04003 -0.428 0.671417
## yrF3 0.10264 0.03780 2.715 0.010103 *
## yrF4 0.09668 0.03790 2.551 0.015149 *
## yrF5 0.14633 0.03710 3.945 0.000354 ***
## yrF6 0.19204 0.03643 5.271 6.55e-06 ***
## yrF7 0.19961 0.03645 5.477 3.48e-06 ***
## yrF8 0.16041 0.03751 4.276 0.000134 ***
## yrF9 0.16247 0.03895 4.171 0.000183 ***
## yrF10 0.17966 0.04740 3.790 0.000553 ***
## devF2 -0.01147 0.01850 -0.620 0.539124
## devF3 -0.56905 0.02795 -20.357 < 2e-16 ***
## devF4 -1.06064 0.04531 -23.406 < 2e-16 ***
## devF5 -1.69332 0.09036 -18.741 < 2e-16 ***
## devF6 -2.34411 0.19216 -12.199 2.39e-14 ***
## devF7 -2.93351 0.39469 -7.432 8.97e-09 ***
## devF8 -3.87601 1.18893 -3.260 0.002437 **
## devF9 -4.28497 2.28078 -1.879 0.068398 .
## devF10 -6.12424 20.12225 -0.304 0.762612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 21606093)
##
## Null deviance: 1.2343e+11 on 54 degrees of freedom
## Residual deviance: 7.7782e+08 on 36 degrees of freedom
## AIC: 1101.6
##
## Number of Fisher Scoring iterations: 5
quantile(glm_result1$residuals)
## 0% 25% 50% 75% 100%
## -0.45056971 -0.07084502 0.00146344 0.08534259 0.45056047
GMCL Approach by Zhang 2010
auto1 = as(auto, "triangles")
fit.scl = MultiChainLadder(auto1, "OLS") #the regular CL method, or SCL
fit.mcl = MultiChainLadder2(auto1) #MCL
These codes above won’t run because the input residual covariance is almost singular Solution: Split the data so that SCL is used for years 7:10 in MCL and GMCL
da1 <- auto1[, 1:7]
da2 <- auto1[, 7:10]