Analytic models for identifying Auto Insurance Loss KPIs with an external IIHS dataset.
Step 1: Load data, Inspect
iihs_data <- read.csv("C:/Users/joshu/Desktop/data.csv")
str(iihs_data)
## 'data.frame': 150 obs. of 8 variables:
## $ Vehicle : Factor w/ 76 levels "","Acura TSX",..: 1 2 1 6 1 9 1 12 1 16 ...
## $ Average.Loss : num NA 93.2 NA 87.5 NA ...
## $ Collision. : int NA 106 NA 82 NA 95 NA 93 NA 114 ...
## $ Property.damage.: int NA 83 NA 68 NA 97 NA 88 NA 124 ...
## $ Comprehensive. : int NA 110 NA 86 NA 101 NA 97 NA 98 ...
## $ Personal.injury.: int NA 94 NA 114 NA 122 NA 144 NA 176 ...
## $ Medical.payment.: int NA 84 NA NA NA 136 NA 134 NA 178 ...
## $ Bodily.injury. : int NA 82 NA NA NA 123 NA 125 NA 169 ...
Principal Component Analysis
model <- princomp(~.,iihs_data[1:75,3:8], na.action=na.omit)
summary(model)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4
## Standard deviation 45.0800282 22.9448636 15.03564219 7.04920774
## Proportion of Variance 0.7061193 0.1829282 0.07855136 0.01726595
## Cumulative Proportion 0.7061193 0.8890475 0.96759889 0.98486483
## Comp.5 Comp.6
## Standard deviation 5.228119963 4.02811545
## Proportion of Variance 0.009497315 0.00563785
## Cumulative Proportion 0.994362150 1.00000000
screeplot(model)
model$loadings
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## Collision. -0.415 0.485 0.721 -0.224 0.125
## Property.damage. -0.230 0.502 0.371 -0.738
## Comprehensive. -0.867 -0.443 0.222
## Personal.injury. -0.578 -0.151 -0.281 -0.105 -0.656 -0.350
## Medical.payment. -0.648 -0.356 0.295 0.565 0.215
## Bodily.injury. -0.432 0.208 0.554 -0.426 -0.114 0.518
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.167 0.167 0.167 0.167 0.167 0.167
## Cumulative Var 0.167 0.333 0.500 0.667 0.833 1.000
Transform Dataset for Linear Modeling
iihs_data_tr <- transform(iihs_data,Buy = as.numeric(iihs_data$Average.Loss < 100))
str(iihs_data_tr)
## 'data.frame': 150 obs. of 9 variables:
## $ Vehicle : Factor w/ 76 levels "","Acura TSX",..: 1 2 1 6 1 9 1 12 1 16 ...
## $ Average.Loss : num NA 93.2 NA 87.5 NA ...
## $ Collision. : int NA 106 NA 82 NA 95 NA 93 NA 114 ...
## $ Property.damage.: int NA 83 NA 68 NA 97 NA 88 NA 124 ...
## $ Comprehensive. : int NA 110 NA 86 NA 101 NA 97 NA 98 ...
## $ Personal.injury.: int NA 94 NA 114 NA 122 NA 144 NA 176 ...
## $ Medical.payment.: int NA 84 NA NA NA 136 NA 134 NA 178 ...
## $ Bodily.injury. : int NA 82 NA NA NA 123 NA 125 NA 169 ...
## $ Buy : num NA 1 NA 1 NA 0 NA 0 NA 0 ...
Simple Linear Regression
lm_model=lm(iihs_data_tr$Buy ~ iihs_data_tr$Collision. + iihs_data_tr$Property.damage. + iihs_data_tr$Comprehensive. + iihs_data_tr$Personal.injury.
+ iihs_data_tr$Medical.payment. + iihs_data_tr$Bodily.injury.)
summary(lm_model)
##
## Call:
## lm(formula = iihs_data_tr$Buy ~ iihs_data_tr$Collision. + iihs_data_tr$Property.damage. +
## iihs_data_tr$Comprehensive. + iihs_data_tr$Personal.injury. +
## iihs_data_tr$Medical.payment. + iihs_data_tr$Bodily.injury.)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.46425 -0.25853 -0.03087 0.23089 0.55220
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0586715 0.4619836 4.456 6.84e-05 ***
## iihs_data_tr$Collision. -0.0007052 0.0048424 -0.146 0.885
## iihs_data_tr$Property.damage. -0.0006774 0.0072098 -0.094 0.926
## iihs_data_tr$Comprehensive. -0.0039661 0.0031103 -1.275 0.210
## iihs_data_tr$Personal.injury. -0.0047340 0.0062795 -0.754 0.455
## iihs_data_tr$Medical.payment. -0.0057862 0.0058844 -0.983 0.332
## iihs_data_tr$Bodily.injury. 0.0005263 0.0060277 0.087 0.931
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3124 on 39 degrees of freedom
## (104 observations deleted due to missingness)
## Multiple R-squared: 0.5452, Adjusted R-squared: 0.4752
## F-statistic: 7.792 on 6 and 39 DF, p-value: 1.51e-05
Logistic Regression
lg_model=glm(iihs_data_tr$Buy ~ iihs_data_tr$Collision. + iihs_data_tr$Property.damage. + iihs_data_tr$Comprehensive. +
iihs_data_tr$Personal.injury. + iihs_data_tr$Medical.payment. + iihs_data_tr$Bodily.injury.,family = "quasibinomial",na.action=na.omit,control =
list(maxit = 50))
summary(lg_model)
##
## Call:
## glm(formula = iihs_data_tr$Buy ~ iihs_data_tr$Collision. + iihs_data_tr$Property.damage. +
## iihs_data_tr$Comprehensive. + iihs_data_tr$Personal.injury. +
## iihs_data_tr$Medical.payment. + iihs_data_tr$Bodily.injury.,
## family = "quasibinomial", na.action = na.omit, control = list(maxit = 50))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -9.344e-06 -2.110e-08 -2.110e-08 -2.110e-08 1.305e-05
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 741.27518 6.81383 108.790 < 2e-16 ***
## iihs_data_tr$Collision. -0.96299 0.03903 -24.673 < 2e-16 ***
## iihs_data_tr$Property.damage. -2.39416 0.04395 -54.470 < 2e-16 ***
## iihs_data_tr$Comprehensive. -2.40964 0.03125 -77.099 < 2e-16 ***
## iihs_data_tr$Personal.injury. -0.45680 0.06394 -7.144 1.36e-08 ***
## iihs_data_tr$Medical.payment. -2.44471 0.09719 -25.154 < 2e-16 ***
## iihs_data_tr$Bodily.injury. 1.21074 0.09802 12.352 4.69e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 1.609654e-11)
##
## Null deviance: 5.0607e+01 on 45 degrees of freedom
## Residual deviance: 4.6209e-10 on 39 degrees of freedom
## (104 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 27