setwd("C:/Users/23043/Dropbox/UDLAP/Cursos/2022 Primavera/Tema Selecto/data")
Now, we open the file, the file has many components, but right now we just need few information.
For this session we need frequencies for 2010:
Insample<-read.csv("insample.csv")
Insample2010 <- subset(Insample, Year==2010)
claims<-as.data.frame(table(Insample2010$Freq))
colnames(claims)<-c("number","Freq")
claims$number<-as.numeric(as.character(claims$number))
claims$Freq<-as.numeric(as.character(claims$Freq))
claims
## number Freq
## 1 0 707
## 2 1 209
## 3 2 86
## 4 3 40
## 5 4 18
## 6 5 12
## 7 6 9
## 8 7 4
## 9 8 6
## 10 9 1
## 11 10 3
## 12 11 2
## 13 13 1
## 14 14 2
## 15 15 1
## 16 16 2
## 17 17 1
## 18 18 1
## 19 19 1
## 20 30 1
## 21 39 1
## 22 103 1
## 23 239 1
Now, the total number of claims can be estimated as:
claims$claims<-claims$number*claims$Freq
TotalClaims<-sum(claims$claims)
Lets analyse a little more in detail:
# Proportions of policies with claims:
policies<-1110
claims$Prop<-claims$Freq/policies
claims
## number Freq claims Prop
## 1 0 707 0 0.6369369369
## 2 1 209 209 0.1882882883
## 3 2 86 172 0.0774774775
## 4 3 40 120 0.0360360360
## 5 4 18 72 0.0162162162
## 6 5 12 60 0.0108108108
## 7 6 9 54 0.0081081081
## 8 7 4 28 0.0036036036
## 9 8 6 48 0.0054054054
## 10 9 1 9 0.0009009009
## 11 10 3 30 0.0027027027
## 12 11 2 22 0.0018018018
## 13 13 1 13 0.0009009009
## 14 14 2 28 0.0018018018
## 15 15 1 15 0.0009009009
## 16 16 2 32 0.0018018018
## 17 17 1 17 0.0009009009
## 18 18 1 18 0.0009009009
## 19 19 1 19 0.0009009009
## 20 30 1 30 0.0009009009
## 21 39 1 39 0.0009009009
## 22 103 1 103 0.0009009009
## 23 239 1 239 0.0009009009
# Average number of claims
ANC<-TotalClaims/policies
ANC
## [1] 1.240541
We take those policies with some claim:
InsamplePos2010 <- subset(Insample2010, yAvg>0)
Some descriptive statistics:
summary(InsamplePos2010$yAvg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 167 2226 4951 56332 11900 12922218
And some plots:
par(mfrow=c(1, 2))
hist(InsamplePos2010$yAvg, main="", xlab="Average Claims")
hist(log(InsamplePos2010$yAvg), main="", xlab="Logarithmic Average Claims")