library(ChainLadder)
## 
## Welcome to ChainLadder version 0.2.18
## 
## 
## To cite package 'ChainLadder' in publications use:
## 
##   Gesmann M, Murphy D, Zhang Y, Carrato A, Wuthrich M, Concina F, Dal
##   Moro E (2023). _ChainLadder: Statistical Methods and Models for
##   Claims Reserving in General Insurance_. R package version 0.2.18,
##   <https://CRAN.R-project.org/package=ChainLadder>.
## 
## To suppress this message use:
## suppressPackageStartupMessages(library(ChainLadder))
data(auto)
auto
## $PersonalAutoPaid
##         [,1]   [,2]   [,3]   [,4]   [,5]   [,6]   [,7]   [,8]   [,9]  [,10]
##  [1,] 101125 209921 266618 305107 327850 340669 348430 351193 353353 353584
##  [2,] 102541 203213 260677 303182 328932 340948 347333 349813 350523     NA
##  [3,] 114932 227704 298120 345542 367760 377999 383611 385224     NA     NA
##  [4,] 114452 227761 301072 340669 359979 369248 373325     NA     NA     NA
##  [5,] 115597 243611 315215 354490 372376 382738     NA     NA     NA     NA
##  [6,] 127760 259416 326975 365780 386725     NA     NA     NA     NA     NA
##  [7,] 135616 262294 327086 367357     NA     NA     NA     NA     NA     NA
##  [8,] 127177 244249 317972     NA     NA     NA     NA     NA     NA     NA
##  [9,] 128631 246803     NA     NA     NA     NA     NA     NA     NA     NA
## [10,] 126288     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 
## $PersonalAutoIncurred
##         [,1]   [,2]   [,3]   [,4]   [,5]   [,6]   [,7]   [,8]   [,9]  [,10]
##  [1,] 325423 336426 346061 347726 350995 353598 354797 355025 354986 355363
##  [2,] 323627 339267 344507 349295 351038 351583 352050 352231 352193     NA
##  [3,] 358410 386330 385684 384699 387678 387954 388540 389436     NA     NA
##  [4,] 405319 396641 391833 384819 380914 380163 379706     NA     NA     NA
##  [5,] 434065 429311 422181 409322 394154 392802     NA     NA     NA     NA
##  [6,] 417178 422307 413486 406711 406503     NA     NA     NA     NA     NA
##  [7,] 398929 398787 398020 400540     NA     NA     NA     NA     NA     NA
##  [8,] 378754 361097 369328     NA     NA     NA     NA     NA     NA     NA
##  [9,] 351081 335507     NA     NA     NA     NA     NA     NA     NA     NA
## [10,] 329236     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 
## $CommercialAutoPaid
##        [,1]  [,2]  [,3]   [,4]   [,5]   [,6]   [,7]   [,8]   [,9]  [,10]
##  [1,] 19827 44449 61205  77398  88079  95695  99853 104789 105427 106690
##  [2,] 22331 48480 68789  92356 104958 112399 115638 117415 118571     NA
##  [3,] 22533 44484 65691  88435 102044 112672 115973 118359     NA     NA
##  [4,] 23128 51328 81542  98063 113149 121515 124347     NA     NA     NA
##  [5,] 25053 57220 84607 104936 117663 126180     NA     NA     NA     NA
##  [6,] 30136 64767 92288 108835 121326     NA     NA     NA     NA     NA
##  [7,] 34764 69125 91354 111987     NA     NA     NA     NA     NA     NA
##  [8,] 31803 63471 92439     NA     NA     NA     NA     NA     NA     NA
##  [9,] 40559 77667    NA     NA     NA     NA     NA     NA     NA     NA
## [10,] 46285    NA    NA     NA     NA     NA     NA     NA     NA     NA
personal_paid=as.triangle(auto$PersonalAutoPaid)
commercial_paid=as.triangle(auto$CommercialAutoPaid)
incremental_personal_paid=cum2incr(personal_paid)
incremental_commercial_paid=cum2incr(commercial_paid)
plot(personal_paid)

plot(commercial_paid)

plot(incremental_personal_paid)

plot(incremental_commercial_paid)

plot(personal_paid,lattice=TRUE)

plot(commercial_paid,lattice=TRUE)

#Test for Independence of Subsequent Dev Factors before modeling: Triangle 1 failed the test, later it might not meet Assumption 2
cortest_1=dfCorTest(personal_paid,ci=0.95) 
plot(cortest_1)

summary(cortest_1)
## $Results
##             Value
## T      0.54965986
## E[T]   0.00000000
## Var[T] 0.03571429
## 
## $Range
##            Value
## Lower -0.3703984
## Upper  0.3703984
print(cortest_1)
## Development Factor Correlation
## 
## T = 0.5496599
## 
## 95%-Range = ( -0.3703984 ; 0.3703984 )
## 
## Development Factor Correlation: TRUE
cortest_2=dfCorTest(commercial_paid,ci=0.95) 
plot(cortest_2)

summary(cortest_2)
## $Results
##             Value
## T      0.35034014
## E[T]   0.00000000
## Var[T] 0.03571429
## 
## $Range
##            Value
## Lower -0.3703984
## Upper  0.3703984
print(cortest_2)
## Development Factor Correlation
## 
## T = 0.3503401
## 
## 95%-Range = ( -0.3703984 ; 0.3703984 )
## 
## Development Factor Correlation: FALSE
#Test for calendar year effect before modeling: Triangle 1 failed the test, suggesting it might violate Assumption 1
indep_1=cyEffTest(personal_paid,ci=0.95)
print(indep_1)
## Calendar Year Effect
## 
## Z = 5
## 
## 95%-Range = ( 8.914978 ; 16.08502 )
## 
## Calendar Year Effect: TRUE
plot(indep_1)

indep_2=cyEffTest(commercial_paid,ci=0.95)
print(indep_2)
## Calendar Year Effect
## 
## Z = 11
## 
## 95%-Range = ( 8.914978 ; 16.08502 )
## 
## Calendar Year Effect: FALSE
plot(indep_2)

mack1=MackChainLadder(personal_paid,est.sigma = "Mack")
mack1
## MackChainLadder(Triangle = personal_paid, est.sigma = "Mack")
## 
##     Latest Dev.To.Date Ultimate    IBNR Mack.S.E CV(IBNR)
## 1  353,584       1.000  353,584       0        0      NaN
## 2  350,523       0.999  350,752     229      998   4.3544
## 3  385,224       0.995  387,054   1,830    1,713   0.9360
## 4  373,325       0.989  377,481   4,156    1,885   0.4537
## 5  382,738       0.973  393,454  10,716    2,872   0.2680
## 6  386,725       0.943  409,932  23,207    3,847   0.1658
## 7  367,357       0.887  414,305  46,948    6,405   0.1364
## 8  317,972       0.780  407,609  89,637    9,177   0.1024
## 9  246,803       0.607  406,593 159,790   12,532   0.0784
## 10 126,288       0.305  414,021 287,733   19,085   0.0663
## 
##                 Totals
## Latest:   3,290,539.00
## Dev:              0.84
## Ultimate: 3,914,785.82
## IBNR:       624,246.82
## Mack.S.E     30,358.21
## CV(IBNR):         0.05
plot(mack1) #residual plots for Assumption 3 verification

plot(mack1, lattice=TRUE)

mack2=MackChainLadder(commercial_paid,est.sigma = "Mack")
mack2
## MackChainLadder(Triangle = commercial_paid, est.sigma = "Mack")
## 
##     Latest Dev.To.Date Ultimate    IBNR Mack.S.E CV(IBNR)
## 1  106,690       1.000  106,690       0        0      NaN
## 2  118,571       0.988  119,991   1,420       66   0.0465
## 3  118,359       0.980  120,744   2,385      387   0.1622
## 4  124,347       0.954  130,335   5,988    2,540   0.4241
## 5  126,180       0.926  136,302  10,122    2,861   0.2826
## 6  121,326       0.856  141,667  20,341    3,491   0.1716
## 7  111,987       0.754  148,471  36,484    4,211   0.1154
## 8   92,439       0.603  153,230  60,791    8,896   0.1463
## 9   77,667       0.419  185,254 107,587   13,705   0.1274
## 10  46,285       0.201  229,947 183,662   20,180   0.1099
## 
##                 Totals
## Latest:   1,043,851.00
## Dev:              0.71
## Ultimate: 1,472,631.58
## IBNR:       428,780.58
## Mack.S.E     31,488.35
## CV(IBNR):         0.07
plot(mack2) #residual plots

plot(mack2,lattice=TRUE)

CDR(mack2)
CDR(mack2,dev="all")
#C-C plots for personal auto claims (full triangle) to verify Assumption 2
personal_C_i1=mack1$FullTriangle[1,]
personal_C_i2=mack1$FullTriangle[2,]
df1=data.frame(personal_C_i2,personal_C_i1)
personal_C_i3=mack1$FullTriangle[3,]
df2=data.frame(personal_C_i3,personal_C_i2)
personal_C_i4=mack1$FullTriangle[4,]
df3=data.frame(personal_C_i4,personal_C_i3)
personal_C_i9=mack1$FullTriangle[9,]
personal_C_i10=mack1$FullTriangle[10,]
df4=data.frame(personal_C_i10,personal_C_i9)

m1=lm(personal_C_i2~personal_C_i1,data=df1)
with(df1,plot(personal_C_i1,personal_C_i2))
abline(m1,col = 4, lwd = 3)

m2=lm(personal_C_i3~personal_C_i2,data=df2)
with(df1,plot(personal_C_i2,personal_C_i3))
abline(m1,col = 3, lwd = 3)

m1=lm(personal_C_i4~personal_C_i3,data=df3)
with(df1,plot(personal_C_i3,personal_C_i4))
abline(m1,col = 2, lwd = 3)

m1=lm(personal_C_i10~personal_C_i9,data=df4)
with(df1,plot(personal_C_i9,personal_C_i10))
abline(m1,col = 5, lwd = 3)

#basically the same thing but in a more clever way
m11=(mack1[["Models"]][[1]])
summary(m11)
## 
## Call:
## lm(formula = y ~ x + 0, data = data.frame(x = Triangle[, i], 
##     y = Triangle[, i + 1]), weights = weights[, i]/Triangle[, 
##     i]^delta[i])
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.572 -20.584  -2.631  14.478  39.925 
## 
## Coefficients:
##   Estimate Std. Error t value Pr(>|t|)    
## x  1.98999    0.02232   89.16  2.8e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.06 on 8 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.999,  Adjusted R-squared:  0.9989 
## F-statistic:  7949 on 1 and 8 DF,  p-value: 2.795e-13
op=par(mfrow=c(2,2))
plot(m11)

par(op)

Alternative: Bootstrap

#Bootstrap for personal triangle
b1=BootChainLadder(personal_paid,R=999,process.distr = "gamma")
b1
## BootChainLadder(Triangle = personal_paid, R = 999, process.distr = "gamma")
## 
##     Latest Mean Ultimate Mean IBNR IBNR.S.E IBNR 75% IBNR 95%
## 1  353,584       353,584         0        0        0        0
## 2  350,523       350,715       192      476      316    1,086
## 3  385,224       387,085     1,861    1,263    2,622    4,307
## 4  373,325       377,544     4,219    1,732    5,317    7,416
## 5  382,738       393,526    10,788    2,642   12,497   15,162
## 6  386,725       410,056    23,331    3,950   25,909   30,177
## 7  367,357       414,619    47,262    5,597   50,739   57,114
## 8  317,972       407,522    89,550    7,543   94,546  102,594
## 9  246,803       406,530   159,727   11,725  167,237  179,017
## 10 126,288       413,514   287,226   21,907  302,305  325,892
## 
##                    Totals
## Latest:         3,290,539
## Mean Ultimate:  3,914,696
## Mean IBNR:        624,157
## IBNR.S.E           29,142
## Total IBNR 75%:   643,210
## Total IBNR 95%:   674,718
plot(b1)

quantile(b1, c(0.75,0.95,0.99, 0.995))
## $ByOrigin
##      IBNR 75%   IBNR 95%   IBNR 99% IBNR 99.5%
## 1       0.000      0.000      0.000      0.000
## 2     316.382   1085.749   1881.005   2110.217
## 3    2622.095   4306.882   5708.486   6113.093
## 4    5317.002   7415.863   8518.720   9088.369
## 5   12496.705  15162.100  18214.033  18973.197
## 6   25908.746  30176.553  33572.218  35247.916
## 7   50738.764  57114.493  61139.995  62037.959
## 8   94545.696 102594.021 106959.431 109424.251
## 9  167237.105 179016.808 187830.259 191326.841
## 10 302304.763 325892.361 337280.218 341847.706
## 
## $Totals
##               Totals
## IBNR 75%:   643210.4
## IBNR 95%:   674718.1
## IBNR 99%:   697112.0
## IBNR 99.5%: 701600.6
library(MASS)
plot(ecdf(b1$IBNR.Totals))

fit <- fitdistr(b1$IBNR.Totals[b1$IBNR.Totals>0], "lognormal")
fit
##      meanlog         sdlog    
##   13.343071048    0.046604024 
##  ( 0.001474486) ( 0.001042619)
curve(plnorm(x,fit$estimate["meanlog"], fit$estimate["sdlog"]),
      col="red", add=TRUE)

b2=BootChainLadder(personal_paid,R=999,process.distr = "od.pois")
b2
## BootChainLadder(Triangle = personal_paid, R = 999, process.distr = "od.pois")
## 
##     Latest Mean Ultimate Mean IBNR IBNR.S.E IBNR 75% IBNR 95%
## 1  353,584       353,584         0        0        0        0
## 2  350,523       350,753       230      472      380    1,218
## 3  385,224       387,076     1,852    1,166    2,562    4,000
## 4  373,325       377,499     4,174    1,707    5,150    7,198
## 5  382,738       393,492    10,754    2,534   12,376   15,136
## 6  386,725       410,097    23,372    3,902   26,078   29,971
## 7  367,357       414,691    47,334    5,452   50,636   56,491
## 8  317,972       407,228    89,256    8,011   94,254  102,342
## 9  246,803       406,910   160,107   12,230  168,300  179,792
## 10 126,288       413,322   287,034   22,498  302,246  327,146
## 
##                    Totals
## Latest:         3,290,539
## Mean Ultimate:  3,914,652
## Mean IBNR:        624,113
## IBNR.S.E           31,191
## Total IBNR 75%:   645,044
## Total IBNR 95%:   677,121
plot(b2)

Another alternative: GLM

#GLM for personal triangle
No=nrow(personal_paid)
origin=NA 
devv=NA 
for(i in 1:No)
{origin=c(origin,1:(No-i+1)); devv=c(devv,rep(i,No-i+1))} 
origin
##  [1] NA  1  2  3  4  5  6  7  8  9 10  1  2  3  4  5  6  7  8  9  1  2  3  4  5
## [26]  6  7  8  1  2  3  4  5  6  7  1  2  3  4  5  6  1  2  3  4  5  1  2  3  4
## [51]  1  2  3  1  2  1
devv
##  [1] NA  1  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  3  3  3  3  3
## [26]  3  3  3  4  4  4  4  4  4  4  5  5  5  5  5  5  6  6  6  6  6  7  7  7  7
## [51]  8  8  8  9  9 10
origin=origin[is.na(origin)==FALSE]
origin
##  [1]  1  2  3  4  5  6  7  8  9 10  1  2  3  4  5  6  7  8  9  1  2  3  4  5  6
## [26]  7  8  1  2  3  4  5  6  7  1  2  3  4  5  6  1  2  3  4  5  1  2  3  4  1
## [51]  2  3  1  2  1
devv=devv[is.na(devv)==FALSE] 
devv
##  [1]  1  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  3  3  3  3  3  3
## [26]  3  3  4  4  4  4  4  4  4  5  5  5  5  5  5  6  6  6  6  6  7  7  7  7  8
## [51]  8  8  9  9 10
devF=as.factor(devv)
yrF=as.factor(origin) 
devF
##  [1] 1  1  1  1  1  1  1  1  1  1  2  2  2  2  2  2  2  2  2  3  3  3  3  3  3 
## [26] 3  3  4  4  4  4  4  4  4  5  5  5  5  5  5  6  6  6  6  6  7  7  7  7  8 
## [51] 8  8  9  9  10
## Levels: 1 2 3 4 5 6 7 8 9 10
yrF
##  [1] 1  2  3  4  5  6  7  8  9  10 1  2  3  4  5  6  7  8  9  1  2  3  4  5  6 
## [26] 7  8  1  2  3  4  5  6  7  1  2  3  4  5  6  1  2  3  4  5  1  2  3  4  1 
## [51] 2  3  1  2  1 
## Levels: 1 2 3 4 5 6 7 8 9 10
vec.personal=as.vector(incremental_personal_paid)[is.na(as.vector(incremental_personal_paid))==FALSE] 
vec.personal
##  [1] 101125 102541 114932 114452 115597 127760 135616 127177 128631 126288
## [11] 108796 100672 112772 113309 128014 131656 126678 117072 118172  56697
## [21]  57464  70416  73311  71604  67559  64792  73723  38489  42505  47422
## [31]  39597  39275  38805  40271  22743  25750  22218  19310  17886  20945
## [41]  12819  12016  10239   9269  10362   7761   6385   5612   4077   2763
## [51]   2480   1613   2160    710    231
data.new = data.frame(origin,devv,vec.personal)
data.new
glm_result1 <- glm(vec.personal ~ yrF + devF, family = Gamma(link = log))
summary(glm_result1)
## 
## Call:
## glm(formula = vec.personal ~ yrF + devF, family = Gamma(link = log))
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 11.767531   0.084221 139.722  < 2e-16 ***
## yrF2        -0.141775   0.082206  -1.725   0.0932 .  
## yrF3        -0.126462   0.085973  -1.471   0.1500    
## yrF4        -0.182343   0.090085  -2.024   0.0504 .  
## yrF5        -0.116818   0.094976  -1.230   0.2267    
## yrF6        -0.067147   0.101177  -0.664   0.5111    
## yrF7        -0.047350   0.109593  -0.432   0.6683    
## yrF8        -0.031898   0.122094  -0.261   0.7954    
## yrF9        -0.039817   0.143557  -0.277   0.7831    
## yrF10       -0.021211   0.193657  -0.110   0.9134    
## devF2       -0.009408   0.082206  -0.114   0.9095    
## devF3       -0.563219   0.085973  -6.551 1.28e-07 ***
## devF4       -1.047838   0.090085 -11.632 9.44e-14 ***
## devF5       -1.686576   0.094976 -17.758  < 2e-16 ***
## devF6       -2.357462   0.101177 -23.300  < 2e-16 ***
## devF7       -2.973480   0.109593 -27.132  < 2e-16 ***
## devF8       -3.950015   0.122094 -32.352  < 2e-16 ***
## devF9       -4.461617   0.143557 -31.079  < 2e-16 ***
## devF10      -6.325113   0.193657 -32.661  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Gamma family taken to be 0.03040991)
## 
##     Null deviance: 72.2803  on 54  degrees of freedom
## Residual deviance:  1.1714  on 36  degrees of freedom
## AIC: 1122.2
## 
## Number of Fisher Scoring iterations: 6
glm_result2 <- glm(vec.personal ~ yrF + devF, family = gaussian(link = log))
summary(glm_result2)
## 
## Call:
## glm(formula = vec.personal ~ yrF + devF, family = gaussian(link = log))
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 11.56666    0.02986 387.318  < 2e-16 ***
## yrF2        -0.01712    0.04003  -0.428 0.671417    
## yrF3         0.10264    0.03780   2.715 0.010103 *  
## yrF4         0.09668    0.03790   2.551 0.015149 *  
## yrF5         0.14633    0.03710   3.945 0.000354 ***
## yrF6         0.19204    0.03643   5.271 6.55e-06 ***
## yrF7         0.19961    0.03645   5.477 3.48e-06 ***
## yrF8         0.16041    0.03751   4.276 0.000134 ***
## yrF9         0.16247    0.03895   4.171 0.000183 ***
## yrF10        0.17966    0.04740   3.790 0.000553 ***
## devF2       -0.01147    0.01850  -0.620 0.539124    
## devF3       -0.56905    0.02795 -20.357  < 2e-16 ***
## devF4       -1.06064    0.04531 -23.406  < 2e-16 ***
## devF5       -1.69332    0.09036 -18.741  < 2e-16 ***
## devF6       -2.34411    0.19216 -12.199 2.39e-14 ***
## devF7       -2.93351    0.39469  -7.432 8.97e-09 ***
## devF8       -3.87601    1.18893  -3.260 0.002437 ** 
## devF9       -4.28497    2.28078  -1.879 0.068398 .  
## devF10      -6.12424   20.12225  -0.304 0.762612    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 21606093)
## 
##     Null deviance: 1.2343e+11  on 54  degrees of freedom
## Residual deviance: 7.7782e+08  on 36  degrees of freedom
## AIC: 1101.6
## 
## Number of Fisher Scoring iterations: 5
quantile(glm_result1$residuals)
##          0%         25%         50%         75%        100% 
## -0.45056971 -0.07084502  0.00146344  0.08534259  0.45056047

GMCL Approach by Zhang 2010

auto1 = as(auto, "triangles")
fit.scl = MultiChainLadder(auto1, "OLS") #the regular CL method, or SCL
fit.mcl = MultiChainLadder2(auto1) #MCL

These codes above won’t run because the input residual covariance is almost singular Solution: Split the data so that SCL is used for years 7:10 in MCL and GMCL

da1 <- auto1[, 1:7]
da2 <- auto1[, 7:10]