Analytic models for identifying Auto Insurance Loss KPIs with an external IIHS dataset.

Step 1: Load data, Inspect

iihs_data <- read.csv("C:/Users/joshu/Desktop/data.csv")

str(iihs_data)
## 'data.frame':    150 obs. of  8 variables:
##  $ Vehicle         : Factor w/ 76 levels "","Acura TSX",..: 1 2 1 6 1 9 1 12 1 16 ...
##  $ Average.Loss    : num  NA 93.2 NA 87.5 NA ...
##  $ Collision.      : int  NA 106 NA 82 NA 95 NA 93 NA 114 ...
##  $ Property.damage.: int  NA 83 NA 68 NA 97 NA 88 NA 124 ...
##  $ Comprehensive.  : int  NA 110 NA 86 NA 101 NA 97 NA 98 ...
##  $ Personal.injury.: int  NA 94 NA 114 NA 122 NA 144 NA 176 ...
##  $ Medical.payment.: int  NA 84 NA NA NA 136 NA 134 NA 178 ...
##  $ Bodily.injury.  : int  NA 82 NA NA NA 123 NA 125 NA 169 ...

Principal Component Analysis

model <- princomp(~.,iihs_data[1:75,3:8], na.action=na.omit)

summary(model)
## Importance of components:
##                            Comp.1     Comp.2      Comp.3     Comp.4
## Standard deviation     45.0800282 22.9448636 15.03564219 7.04920774
## Proportion of Variance  0.7061193  0.1829282  0.07855136 0.01726595
## Cumulative Proportion   0.7061193  0.8890475  0.96759889 0.98486483
##                             Comp.5     Comp.6
## Standard deviation     5.228119963 4.02811545
## Proportion of Variance 0.009497315 0.00563785
## Cumulative Proportion  0.994362150 1.00000000
screeplot(model)

model$loadings
## 
## Loadings:
##                  Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## Collision.              -0.415  0.485  0.721 -0.224  0.125
## Property.damage. -0.230         0.502         0.371 -0.738
## Comprehensive.          -0.867        -0.443  0.222       
## Personal.injury. -0.578 -0.151 -0.281 -0.105 -0.656 -0.350
## Medical.payment. -0.648        -0.356  0.295  0.565  0.215
## Bodily.injury.   -0.432  0.208  0.554 -0.426 -0.114  0.518
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.167  0.167  0.167  0.167  0.167  0.167
## Cumulative Var  0.167  0.333  0.500  0.667  0.833  1.000

Transform Dataset for Linear Modeling

iihs_data_tr <- transform(iihs_data,Buy = as.numeric(iihs_data$Average.Loss < 100))

str(iihs_data_tr)
## 'data.frame':    150 obs. of  9 variables:
##  $ Vehicle         : Factor w/ 76 levels "","Acura TSX",..: 1 2 1 6 1 9 1 12 1 16 ...
##  $ Average.Loss    : num  NA 93.2 NA 87.5 NA ...
##  $ Collision.      : int  NA 106 NA 82 NA 95 NA 93 NA 114 ...
##  $ Property.damage.: int  NA 83 NA 68 NA 97 NA 88 NA 124 ...
##  $ Comprehensive.  : int  NA 110 NA 86 NA 101 NA 97 NA 98 ...
##  $ Personal.injury.: int  NA 94 NA 114 NA 122 NA 144 NA 176 ...
##  $ Medical.payment.: int  NA 84 NA NA NA 136 NA 134 NA 178 ...
##  $ Bodily.injury.  : int  NA 82 NA NA NA 123 NA 125 NA 169 ...
##  $ Buy             : num  NA 1 NA 1 NA 0 NA 0 NA 0 ...

Simple Linear Regression

lm_model=lm(iihs_data_tr$Buy ~ iihs_data_tr$Collision. + iihs_data_tr$Property.damage. + iihs_data_tr$Comprehensive. + iihs_data_tr$Personal.injury.
+ iihs_data_tr$Medical.payment. + iihs_data_tr$Bodily.injury.)

summary(lm_model)
## 
## Call:
## lm(formula = iihs_data_tr$Buy ~ iihs_data_tr$Collision. + iihs_data_tr$Property.damage. + 
##     iihs_data_tr$Comprehensive. + iihs_data_tr$Personal.injury. + 
##     iihs_data_tr$Medical.payment. + iihs_data_tr$Bodily.injury.)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.46425 -0.25853 -0.03087  0.23089  0.55220 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    2.0586715  0.4619836   4.456 6.84e-05 ***
## iihs_data_tr$Collision.       -0.0007052  0.0048424  -0.146    0.885    
## iihs_data_tr$Property.damage. -0.0006774  0.0072098  -0.094    0.926    
## iihs_data_tr$Comprehensive.   -0.0039661  0.0031103  -1.275    0.210    
## iihs_data_tr$Personal.injury. -0.0047340  0.0062795  -0.754    0.455    
## iihs_data_tr$Medical.payment. -0.0057862  0.0058844  -0.983    0.332    
## iihs_data_tr$Bodily.injury.    0.0005263  0.0060277   0.087    0.931    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3124 on 39 degrees of freedom
##   (104 observations deleted due to missingness)
## Multiple R-squared:  0.5452, Adjusted R-squared:  0.4752 
## F-statistic: 7.792 on 6 and 39 DF,  p-value: 1.51e-05

Logistic Regression

lg_model=glm(iihs_data_tr$Buy ~ iihs_data_tr$Collision. + iihs_data_tr$Property.damage. + iihs_data_tr$Comprehensive. +
iihs_data_tr$Personal.injury. + iihs_data_tr$Medical.payment. + iihs_data_tr$Bodily.injury.,family = "quasibinomial",na.action=na.omit,control =
list(maxit = 50))

summary(lg_model)
## 
## Call:
## glm(formula = iihs_data_tr$Buy ~ iihs_data_tr$Collision. + iihs_data_tr$Property.damage. + 
##     iihs_data_tr$Comprehensive. + iihs_data_tr$Personal.injury. + 
##     iihs_data_tr$Medical.payment. + iihs_data_tr$Bodily.injury., 
##     family = "quasibinomial", na.action = na.omit, control = list(maxit = 50))
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -9.344e-06  -2.110e-08  -2.110e-08  -2.110e-08   1.305e-05  
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   741.27518    6.81383 108.790  < 2e-16 ***
## iihs_data_tr$Collision.        -0.96299    0.03903 -24.673  < 2e-16 ***
## iihs_data_tr$Property.damage.  -2.39416    0.04395 -54.470  < 2e-16 ***
## iihs_data_tr$Comprehensive.    -2.40964    0.03125 -77.099  < 2e-16 ***
## iihs_data_tr$Personal.injury.  -0.45680    0.06394  -7.144 1.36e-08 ***
## iihs_data_tr$Medical.payment.  -2.44471    0.09719 -25.154  < 2e-16 ***
## iihs_data_tr$Bodily.injury.     1.21074    0.09802  12.352 4.69e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 1.609654e-11)
## 
##     Null deviance: 5.0607e+01  on 45  degrees of freedom
## Residual deviance: 4.6209e-10  on 39  degrees of freedom
##   (104 observations deleted due to missingness)
## AIC: NA
## 
## Number of Fisher Scoring iterations: 27