Read the file on Claim Levels
setwd("~/Dropbox/UDLAP/Cursos/2022 Primavera/Tema Selecto/Presentaciones")
ClaimLev <- read.csv("CLAIMLEVEL.csv")
head(ClaimLev)
## PolicyNum ClaimNum Year ClaimStatus Claim Deduct EntityType
## 1 120002 20100192 2010 Closed 6838.87 1000 County
## 2 120003 20080726 2007 Closed 2085.00 5000 County
## 3 120003 20081656 2007 Closed 7835.00 5000 County
## 4 120003 20081657 2007 Closed 3500.00 5000 County
## 5 120003 20081656 2007 Closed 1480.00 5000 County
## 6 120003 20081656 2007 Closed 600.00 5000 County
## Description CoverageGroup CoverageCode Fire5 CountyCode
## 1 lightningdamage BC VF 4 ASH
## 2 lightningdamageatComm.Center BC VF 0 BAR
## 3 lightningdamageatwatertower BC VF 0 BAR
## 4 lightningdamageatwatertower BC VF 0 BAR
## 5 lightningdamgetoradiotower BC VF 0 BAR
## 6 vandalismdamageatrecyclecenter BC VE 0 BAR
## county
## 1 Ashland
## 2 Barron
## 3 Barron
## 4 Barron
## 5 Barron
## 6 Barron
We select 2010, we have 301 policyholders and 1356 claims.
ClaimData<-subset(ClaimLev,Year==2009)
length(unique(ClaimData$PolicyNum))
## [1] 301
NTot = nrow(ClaimData)
NTot
## [1] 1356
We can analyze claims for 2010, always remember that a descriptive analysis is necessary.
summary(ClaimData$Claim)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 274.5 1203.4 8150.9 3401.2 623260.3
sd(ClaimData$Claim)
## [1] 39101.12
summary(log(ClaimData$Claim))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 5.615 7.093 7.050 8.132 13.343
sd(log(ClaimData$Claim))
## [1] 1.845126
And plotting is always a good idea:
#histogram
par(mfrow=c(1, 2))
hist(ClaimData$Claim, main="", xlab="Claims")
hist(log(ClaimData$Claim), main="", xlab="Logarithmic Claims")
Now, we will use a different library (there are many options) To fit our model, we will use the VGAM library, we will use
library("fitdistrplus")
## Loading required package: MASS
## Loading required package: survival
plotdist(log(ClaimData$Claim), histo = TRUE, demp = TRUE)
ClaimData$LClaim<-log(ClaimData$Claim)
ClaimDataP<-ClaimData[which(ClaimData$LClaim>0),]
After we select our desired population, we can fit the distribution:
f1<-fitdist(ClaimDataP$LClaim,"weibull")
summary(f1)
## Fitting of the distribution ' weibull ' by maximum likelihood
## Parameters :
## estimate Std. Error
## shape 4.139630 0.08377941
## scale 7.756231 0.05381304
## Loglikelihood: -2760.015 AIC: 5524.03 BIC: 5534.453
## Correlation matrix:
## shape scale
## shape 1.0000000 0.3244447
## scale 0.3244447 1.0000000
The good thing is that in here, we obtain the parameters and we see their names. Now, we can look at some plots:
denscomp(f1, legendtext = "Weibull")
cdfcomp(f1, legendtext = "Weibull")
qqcomp(f1, legendtext = "Weibull")
ppcomp(f1, legendtext = "Weibull")
summary(f1)
## Fitting of the distribution ' weibull ' by maximum likelihood
## Parameters :
## estimate Std. Error
## shape 4.139630 0.08377941
## scale 7.756231 0.05381304
## Loglikelihood: -2760.015 AIC: 5524.03 BIC: 5534.453
## Correlation matrix:
## shape scale
## shape 1.0000000 0.3244447
## scale 0.3244447 1.0000000
We can also fit the lognormal
data<-ClaimDataP$LClaim
f2<-fitdist(data,"lnorm")
summary(f2)
## Fitting of the distribution ' lnorm ' by maximum likelihood
## Parameters :
## estimate Std. Error
## meanlog 1.9182014 0.007386692
## sdlog 0.2719065 0.005222862
## Loglikelihood: -2757.212 AIC: 5518.425 BIC: 5528.848
## Correlation matrix:
## meanlog sdlog
## meanlog 1 0
## sdlog 0 1
denscomp(f2, legendtext = "Lognormal")
cdfcomp(f2, legendtext = "Lognormal")
qqcomp(f2, legendtext = "Lognormal")
ppcomp(f2, legendtext = "Lognormal")
We can also fit the gamma distribution
data<-ClaimDataP$LClaim
f3<-fitdist(data,"gamma")
summary(f3)
## Fitting of the distribution ' gamma ' by maximum likelihood
## Parameters :
## estimate Std. Error
## shape 14.227058 0.54029927
## rate 2.016708 0.07795341
## Loglikelihood: -2738.743 AIC: 5481.487 BIC: 5491.91
## Correlation matrix:
## shape rate
## shape 1.0000000 0.9824874
## rate 0.9824874 1.0000000
denscomp(f3, legendtext = "Gamma")
cdfcomp(f3, legendtext = "Gamma")
qqcomp(f3, legendtext = "Gamma")
ppcomp(f3, legendtext = "Gamma")
And we can compare all the distributions:
plot.legend <- c("Weibull", "lognormal", "gamma")
denscomp(list(f1, f2, f3), legendtext = plot.legend)
qqcomp(list(f1, f2, f3), legendtext = plot.legend)
cdfcomp(list(f1, f2, f3), legendtext = plot.legend)
ppcomp(list(f1, f2, f3), legendtext = plot.legend)