Link to the project in RPubs: http://rpubs.com/ofomicheva86/382640
#required packages
library(corrplot)
library(PerformanceAnalytics)
library(GGally)
library(RColorBrewer)
library(VIM)
library(dplyr)
library(mice)
library(pROC)
library(caret)
library(pscl)
library(ResourceSelection)
library(stringr)
library(vcd)
#read training data set
data <- read.csv(file=
"https://raw.githubusercontent.com/olga0503/DATA-621/master/insurance_training_data.csv",
stringsAsFactors=T, header=T)
#read testing data set
data_testing <- read.csv(file=
"https://raw.githubusercontent.com/olga0503/DATA-621/master/insurance-evaluation-data.csv",
stringsAsFactors=T, header=T)
#display first six entries
head(data)
## INDEX TARGET_FLAG TARGET_AMT KIDSDRIV AGE HOMEKIDS YOJ INCOME PARENT1
## 1 1 0 0 0 60 0 11 $67,349 No
## 2 2 0 0 0 43 0 11 $91,449 No
## 3 4 0 0 0 35 1 10 $16,039 No
## 4 5 0 0 0 51 0 14 No
## 5 6 0 0 0 50 0 NA $114,986 No
## 6 7 1 2946 0 34 1 12 $125,301 Yes
## HOME_VAL MSTATUS SEX EDUCATION JOB TRAVTIME CAR_USE
## 1 $0 z_No M PhD Professional 14 Private
## 2 $257,252 z_No M z_High School z_Blue Collar 22 Commercial
## 3 $124,191 Yes z_F z_High School Clerical 5 Private
## 4 $306,251 Yes M <High School z_Blue Collar 32 Private
## 5 $243,925 Yes z_F PhD Doctor 36 Private
## 6 $0 z_No z_F Bachelors z_Blue Collar 46 Commercial
## BLUEBOOK TIF CAR_TYPE RED_CAR OLDCLAIM CLM_FREQ REVOKED MVR_PTS
## 1 $14,230 11 Minivan yes $4,461 2 No 3
## 2 $14,940 1 Minivan yes $0 0 No 0
## 3 $4,010 4 z_SUV no $38,690 2 No 3
## 4 $15,440 7 Minivan yes $0 0 No 0
## 5 $18,000 1 z_SUV no $19,217 2 Yes 3
## 6 $17,430 1 Sports Car no $0 0 No 0
## CAR_AGE URBANICITY
## 1 18 Highly Urban/ Urban
## 2 1 Highly Urban/ Urban
## 3 10 Highly Urban/ Urban
## 4 6 Highly Urban/ Urban
## 5 17 Highly Urban/ Urban
## 6 7 Highly Urban/ Urban
head(data_testing)
## INDEX TARGET_FLAG TARGET_AMT KIDSDRIV AGE HOMEKIDS YOJ INCOME PARENT1
## 1 3 NA NA 0 48 0 11 $52,881 No
## 2 9 NA NA 1 40 1 11 $50,815 Yes
## 3 10 NA NA 0 44 2 12 $43,486 Yes
## 4 18 NA NA 0 35 2 NA $21,204 Yes
## 5 21 NA NA 0 59 0 12 $87,460 No
## 6 30 NA NA 0 46 0 14 No
## HOME_VAL MSTATUS SEX EDUCATION JOB TRAVTIME CAR_USE
## 1 $0 z_No M Bachelors Manager 26 Private
## 2 $0 z_No M z_High School Manager 21 Private
## 3 $0 z_No z_F z_High School z_Blue Collar 30 Commercial
## 4 $0 z_No M z_High School Clerical 74 Private
## 5 $0 z_No M z_High School Manager 45 Private
## 6 $207,519 Yes M Bachelors Professional 7 Commercial
## BLUEBOOK TIF CAR_TYPE RED_CAR OLDCLAIM CLM_FREQ REVOKED MVR_PTS
## 1 $21,970 1 Van yes $0 0 No 2
## 2 $18,930 6 Minivan no $3,295 1 No 2
## 3 $5,900 10 z_SUV no $0 0 No 0
## 4 $9,230 6 Pickup no $0 0 Yes 0
## 5 $15,420 1 Minivan yes $44,857 2 No 4
## 6 $25,660 1 Panel Truck no $2,119 1 No 2
## CAR_AGE URBANICITY
## 1 10 Highly Urban/ Urban
## 2 1 Highly Urban/ Urban
## 3 10 z_Highly Rural/ Rural
## 4 4 z_Highly Rural/ Rural
## 5 1 Highly Urban/ Urban
## 6 12 Highly Urban/ Urban
#find dimentions
dim(data)
## [1] 8161 26
#build function that counts missing values
count_nas <- function(data){
variable_name_column <- c()
number_missing_column <- c()
for (i in 2:ncol(data)){
variable_name <- colnames(data[i])
number_missing <- sum(is.na(data[i]))
variable_name_column <- c(variable_name_column,variable_name)
number_missing_column <- c(number_missing_column,number_missing)
}
missing_table <- data.frame(variable_name_column,number_missing_column)
missing_table <- missing_table %>% mutate(percentage=round(number_missing_column*100/nrow(data),2)) %>% arrange(desc(percentage))
missing_table
}
#chart for missing values
aggr(data[-1], prop = T, numbers = T, cex.axis=.5, cex.numbers = 0.8,
ylab=c("Proportion of missingness","Missingness Pattern"),
labels=names(data[-1]))
#missing values
count_nas(data)
## variable_name_column number_missing_column percentage
## 1 CAR_AGE 510 6.25
## 2 YOJ 454 5.56
## 3 AGE 6 0.07
## 4 TARGET_FLAG 0 0.00
## 5 TARGET_AMT 0 0.00
## 6 KIDSDRIV 0 0.00
## 7 HOMEKIDS 0 0.00
## 8 INCOME 0 0.00
## 9 PARENT1 0 0.00
## 10 HOME_VAL 0 0.00
## 11 MSTATUS 0 0.00
## 12 SEX 0 0.00
## 13 EDUCATION 0 0.00
## 14 JOB 0 0.00
## 15 TRAVTIME 0 0.00
## 16 CAR_USE 0 0.00
## 17 BLUEBOOK 0 0.00
## 18 TIF 0 0.00
## 19 CAR_TYPE 0 0.00
## 20 RED_CAR 0 0.00
## 21 OLDCLAIM 0 0.00
## 22 CLM_FREQ 0 0.00
## 23 REVOKED 0 0.00
## 24 MVR_PTS 0 0.00
## 25 URBANICITY 0 0.00
count_nas(data_testing[4:length(data_testing)])
## variable_name_column number_missing_column percentage
## 1 CAR_AGE 129 6.03
## 2 YOJ 94 4.39
## 3 AGE 1 0.05
## 4 HOMEKIDS 0 0.00
## 5 INCOME 0 0.00
## 6 PARENT1 0 0.00
## 7 HOME_VAL 0 0.00
## 8 MSTATUS 0 0.00
## 9 SEX 0 0.00
## 10 EDUCATION 0 0.00
## 11 JOB 0 0.00
## 12 TRAVTIME 0 0.00
## 13 CAR_USE 0 0.00
## 14 BLUEBOOK 0 0.00
## 15 TIF 0 0.00
## 16 CAR_TYPE 0 0.00
## 17 RED_CAR 0 0.00
## 18 OLDCLAIM 0 0.00
## 19 CLM_FREQ 0 0.00
## 20 REVOKED 0 0.00
## 21 MVR_PTS 0 0.00
## 22 URBANICITY 0 0.00
#omit NAs
data <- na.omit(data)
data_testing_no_na <- (data_testing[4:length(data_testing)])
data_testing <- data.frame(data_testing[1:3],data_testing_no_na)
#confirm no NAs
count_nas(data)
## variable_name_column number_missing_column percentage
## 1 TARGET_FLAG 0 0
## 2 TARGET_AMT 0 0
## 3 KIDSDRIV 0 0
## 4 AGE 0 0
## 5 HOMEKIDS 0 0
## 6 YOJ 0 0
## 7 INCOME 0 0
## 8 PARENT1 0 0
## 9 HOME_VAL 0 0
## 10 MSTATUS 0 0
## 11 SEX 0 0
## 12 EDUCATION 0 0
## 13 JOB 0 0
## 14 TRAVTIME 0 0
## 15 CAR_USE 0 0
## 16 BLUEBOOK 0 0
## 17 TIF 0 0
## 18 CAR_TYPE 0 0
## 19 RED_CAR 0 0
## 20 OLDCLAIM 0 0
## 21 CLM_FREQ 0 0
## 22 REVOKED 0 0
## 23 MVR_PTS 0 0
## 24 CAR_AGE 0 0
## 25 URBANICITY 0 0
count_nas(data_testing[4:length(data_testing)])
## variable_name_column number_missing_column percentage
## 1 CAR_AGE 129 6.03
## 2 YOJ 94 4.39
## 3 AGE 1 0.05
## 4 HOMEKIDS 0 0.00
## 5 INCOME 0 0.00
## 6 PARENT1 0 0.00
## 7 HOME_VAL 0 0.00
## 8 MSTATUS 0 0.00
## 9 SEX 0 0.00
## 10 EDUCATION 0 0.00
## 11 JOB 0 0.00
## 12 TRAVTIME 0 0.00
## 13 CAR_USE 0 0.00
## 14 BLUEBOOK 0 0.00
## 15 TIF 0 0.00
## 16 CAR_TYPE 0 0.00
## 17 RED_CAR 0 0.00
## 18 OLDCLAIM 0 0.00
## 19 CLM_FREQ 0 0.00
## 20 REVOKED 0 0.00
## 21 MVR_PTS 0 0.00
## 22 URBANICITY 0 0.00
Clean data and convert data to appropriate formats.
#remove "$" and "z_"
data <- data %>% mutate(INCOME = str_replace(INCOME, "[^[:alnum:]]", ""), HOME_VAL = str_replace(HOME_VAL, "[^[:alnum:]]", ""),SEX = as.factor(str_replace(SEX, "z_", "")), OLDCLAIM = as.factor(str_replace(OLDCLAIM, "[^[:alnum:]]", "")), MSTATUS = as.factor(str_replace(MSTATUS, "z_", "")),EDUCATION = as.factor(str_replace(EDUCATION, "z_", "")), BLUEBOOK = as.factor(str_replace(BLUEBOOK, "[^[:alnum:]]", "")),EDUCATION = as.factor(str_replace(EDUCATION, "<", "")),JOB = as.factor(str_replace(JOB, "z_", "")),CAR_TYPE = as.factor(str_replace(CAR_TYPE, "z_", "")),URBANICITY = as.factor(str_replace(URBANICITY, "z_", "")))
data_testing <- data_testing %>% mutate(INCOME = str_replace(INCOME, "[^[:alnum:]]", ""), HOME_VAL = str_replace(HOME_VAL, "[^[:alnum:]]", ""),SEX = as.factor(str_replace(SEX, "z_", "")), OLDCLAIM = as.factor(str_replace(OLDCLAIM, "[^[:alnum:]]", "")), MSTATUS = as.factor(str_replace(MSTATUS, "z_", "")),EDUCATION = as.factor(str_replace(EDUCATION, "z_", "")), BLUEBOOK = as.factor(str_replace(BLUEBOOK, "[^[:alnum:]]", "")),EDUCATION = as.factor(str_replace(EDUCATION, "<", "")),JOB = as.factor(str_replace(JOB, "z_", "")),CAR_TYPE = as.factor(str_replace(CAR_TYPE, "z_", "")),URBANICITY = as.factor(str_replace(URBANICITY, "z_", "")))
#list all variables
colnames(data)
## [1] "INDEX" "TARGET_FLAG" "TARGET_AMT" "KIDSDRIV" "AGE"
## [6] "HOMEKIDS" "YOJ" "INCOME" "PARENT1" "HOME_VAL"
## [11] "MSTATUS" "SEX" "EDUCATION" "JOB" "TRAVTIME"
## [16] "CAR_USE" "BLUEBOOK" "TIF" "CAR_TYPE" "RED_CAR"
## [21] "OLDCLAIM" "CLM_FREQ" "REVOKED" "MVR_PTS" "CAR_AGE"
## [26] "URBANICITY"
#convert INCOME, HOME_VAL and OLDCLAIM to numeric
data <- data %>% mutate(INCOME = as.numeric(as.factor(INCOME)), HOME_VAL = as.numeric(as.factor(HOME_VAL)), OLDCLAIM = as.numeric(as.factor(OLDCLAIM)),BLUEBOOK = as.numeric(as.factor(BLUEBOOK)))
data_testing <- data_testing %>% mutate(INCOME = as.numeric(as.factor(INCOME)), HOME_VAL = as.numeric(as.factor(HOME_VAL)), OLDCLAIM = as.numeric(as.factor(OLDCLAIM)),BLUEBOOK = as.numeric(as.factor(BLUEBOOK)))
head(data)
## INDEX TARGET_FLAG TARGET_AMT KIDSDRIV AGE HOMEKIDS YOJ INCOME PARENT1
## 1 1 0 0 0 60 0 11 4506 No
## 2 2 0 0 0 43 0 11 5630 No
## 3 4 0 0 0 35 1 10 1114 No
## 4 5 0 0 0 51 0 14 1 No
## 5 7 1 2946 0 34 1 12 662 Yes
## 6 12 1 2501 0 34 0 10 4278 No
## HOME_VAL MSTATUS SEX EDUCATION JOB TRAVTIME CAR_USE
## 1 2 No M PhD Professional 14 Private
## 2 2886 No M High School Blue Collar 22 Commercial
## 3 310 Yes F High School Clerical 5 Private
## 4 3484 Yes M High School Blue Collar 32 Private
## 5 2 No F Bachelors Blue Collar 46 Commercial
## 6 2 No F Bachelors Clerical 34 Private
## BLUEBOOK TIF CAR_TYPE RED_CAR OLDCLAIM CLM_FREQ REVOKED MVR_PTS
## 1 429 11 Minivan yes 1285 2 No 3
## 2 498 1 Minivan yes 1 0 No 0
## 3 2120 4 SUV no 1164 2 No 3
## 4 548 7 Minivan yes 1 0 No 0
## 5 739 1 Sports Car no 1 0 No 0
## 6 132 1 SUV no 1 0 No 0
## CAR_AGE URBANICITY
## 1 18 Highly Urban/ Urban
## 2 1 Highly Urban/ Urban
## 3 10 Highly Urban/ Urban
## 4 6 Highly Urban/ Urban
## 5 7 Highly Urban/ Urban
## 6 1 Highly Urban/ Urban
head(data_testing)
## INDEX TARGET_FLAG TARGET_AMT KIDSDRIV AGE HOMEKIDS YOJ INCOME PARENT1
## 1 3 NA NA 0 48 0 11 1154 No
## 2 9 NA NA 1 40 1 11 1119 Yes
## 3 10 NA NA 0 44 2 12 974 Yes
## 4 18 NA NA 0 35 2 NA 513 Yes
## 5 21 NA NA 0 59 0 12 1686 No
## 6 30 NA NA 0 46 0 14 1 No
## HOME_VAL MSTATUS SEX EDUCATION JOB TRAVTIME CAR_USE
## 1 2 No M Bachelors Manager 26 Private
## 2 2 No M High School Manager 21 Private
## 3 2 No F High School Blue Collar 30 Commercial
## 4 2 No M High School Clerical 74 Private
## 5 2 No M High School Manager 45 Private
## 6 636 Yes M Bachelors Professional 7 Commercial
## BLUEBOOK TIF CAR_TYPE RED_CAR OLDCLAIM CLM_FREQ REVOKED MVR_PTS
## 1 703 1 Van yes 1 0 No 2
## 2 540 6 Minivan no 272 1 No 2
## 3 1189 10 SUV no 1 0 No 0
## 4 1373 6 Pickup no 1 0 Yes 0
## 5 345 1 Minivan yes 494 2 No 4
## 6 864 1 Panel Truck no 137 1 No 2
## CAR_AGE URBANICITY
## 1 10 Highly Urban/ Urban
## 2 1 Highly Urban/ Urban
## 3 10 Highly Rural/ Rural
## 4 4 Highly Rural/ Rural
## 5 1 Highly Urban/ Urban
## 6 12 Highly Urban/ Urban
The following assumption of logistic regression must be verified:
#correlation between variables
corrplot(cor(data[4:length(data_testing)] %>% select_if(is.numeric)), type = "upper", method = "number", tl.cex = 0.5, tl.col="black",number.cex = .5)
Analyze scatter plots and mosaic plots.
#create separate boxplots for each numeric variable
par(mfrow=c(1,5))
for(i in 4:ncol(data)) {
if (is.numeric(data[,i])=="TRUE") {
boxplot(data[,i], main=names(data)[i])
}
}
#create mosaic plots for each nominal/ordinal variable
par(mfrow=c(1,2))
for(i in 4:ncol(data)) {
if (is.numeric(data[,i])=="FALSE") {
count <- table(data$TARGET_FLAG, data[,i])
mosaicplot(count, main = names(data)[i],
xlab = "TARGET_FLAG",
ylab = names(data)[i],
las = 1,
border = "black",
shade = TRUE
)
}
}
data_linearity_test <- data %>% select(-TARGET_AMT,-INDEX)
#replacing each numeric variable with variable*log(variable)
for (i in 4:(length(data_linearity_test))){
for (j in 1:nrow(data_linearity_test)){
if (is.double(data_linearity_test[j,i]) == "TRUE" && (data_linearity_test[j,i] < 0 | data_linearity_test[j,i] > 0)){
data_linearity_test[j,i] <- data_linearity_test[j,i]*log(data_linearity_test[j,i])
}
}
}
head(data_linearity_test)
## TARGET_FLAG KIDSDRIV AGE HOMEKIDS YOJ INCOME PARENT1 HOME_VAL
## 1 0 0 60 0 11 37909.722 No 1.386294
## 2 0 0 43 0 11 48619.918 No 22994.570770
## 3 0 0 35 1 10 7815.504 No 1778.337412
## 4 0 0 51 0 14 0.000 No 28415.282201
## 5 1 0 34 1 12 4299.866 Yes 1.386294
## 6 1 0 34 0 10 35769.389 No 1.386294
## MSTATUS SEX EDUCATION JOB TRAVTIME CAR_USE BLUEBOOK TIF
## 1 No M PhD Professional 14 Private 2600.3650 11
## 2 No M High School Blue Collar 22 Commercial 3092.8788 1
## 3 Yes F High School Clerical 5 Private 16237.4433 4
## 4 Yes M High School Blue Collar 32 Private 3455.8389 7
## 5 No F Bachelors Blue Collar 46 Commercial 4881.3152 1
## 6 No F Bachelors Clerical 34 Private 644.5299 1
## CAR_TYPE RED_CAR OLDCLAIM CLM_FREQ REVOKED MVR_PTS CAR_AGE
## 1 Minivan yes 9198.690 2 No 3 18
## 2 Minivan yes 0.000 0 No 0 1
## 3 SUV no 8217.395 2 No 3 10
## 4 Minivan yes 0.000 0 No 0 6
## 5 Sports Car no 0.000 0 No 0 7
## 6 SUV no 0.000 0 No 0 1
## URBANICITY
## 1 Highly Urban/ Urban
## 2 Highly Urban/ Urban
## 3 Highly Urban/ Urban
## 4 Highly Urban/ Urban
## 5 Highly Urban/ Urban
## 6 Highly Urban/ Urban
#run regression model that includes all independent variables
model <- glm(formula = TARGET_FLAG ~ . , family = binomial(link = "logit"),
data = data_linearity_test)
summary(model)
##
## Call:
## glm(formula = TARGET_FLAG ~ ., family = binomial(link = "logit"),
## data = data_linearity_test)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5938 -0.7130 -0.4048 0.6235 3.1627
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.960e+00 3.439e-01 -11.517 < 2e-16 ***
## KIDSDRIV 3.569e-01 6.587e-02 5.419 6.00e-08 ***
## AGE -3.512e-03 4.235e-03 -0.829 0.406915
## HOMEKIDS 6.132e-02 3.941e-02 1.556 0.119729
## YOJ -1.909e-02 9.141e-03 -2.088 0.036815 *
## INCOME -3.011e-08 2.194e-06 -0.014 0.989048
## PARENT1Yes 3.826e-01 1.159e-01 3.301 0.000963 ***
## HOME_VAL -1.161e-05 2.933e-06 -3.958 7.56e-05 ***
## MSTATUSYes -4.905e-01 8.207e-02 -5.976 2.29e-09 ***
## SEXM 2.912e-01 1.099e-01 2.650 0.008058 **
## EDUCATIONHigh School 4.533e-01 9.307e-02 4.871 1.11e-06 ***
## EDUCATIONMasters 7.600e-02 1.505e-01 0.505 0.613603
## EDUCATIONPhD 1.199e-01 1.901e-01 0.631 0.528153
## JOBBlue Collar 3.774e-01 1.970e-01 1.916 0.055415 .
## JOBClerical 5.933e-01 2.066e-01 2.872 0.004079 **
## JOBDoctor -2.707e-01 2.731e-01 -0.991 0.321481
## JOBHome Maker 5.968e-01 2.147e-01 2.779 0.005445 **
## JOBLawyer 2.467e-01 1.795e-01 1.374 0.169315
## JOBManager -5.858e-01 1.835e-01 -3.193 0.001410 **
## JOBProfessional 2.259e-01 1.894e-01 1.193 0.232955
## JOBStudent 4.648e-01 2.227e-01 2.088 0.036824 *
## TRAVTIME 1.508e-02 2.007e-03 7.513 5.78e-14 ***
## CAR_USEPrivate -7.844e-01 9.288e-02 -8.445 < 2e-16 ***
## BLUEBOOK 5.076e-06 4.702e-06 1.080 0.280334
## TIF -5.410e-02 7.804e-03 -6.933 4.12e-12 ***
## CAR_TYPEPanel Truck 2.302e-01 1.512e-01 1.522 0.127954
## CAR_TYPEPickup 5.775e-01 1.082e-01 5.338 9.42e-08 ***
## CAR_TYPESports Car 1.266e+00 1.308e-01 9.679 < 2e-16 ***
## CAR_TYPESUV 9.639e-01 1.101e-01 8.756 < 2e-16 ***
## CAR_TYPEVan 4.600e-01 1.284e-01 3.584 0.000338 ***
## RED_CARyes -1.900e-02 9.183e-02 -0.207 0.836094
## OLDCLAIM 1.686e-05 6.495e-06 2.596 0.009427 **
## CLM_FREQ 1.137e-01 3.354e-02 3.389 0.000701 ***
## REVOKEDYes 7.300e-01 8.584e-02 8.504 < 2e-16 ***
## MVR_PTS 1.080e-01 1.452e-02 7.438 1.02e-13 ***
## CAR_AGE -7.041e-03 7.960e-03 -0.885 0.376367
## URBANICITYHighly Urban/ Urban 2.342e+00 1.189e-01 19.696 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 8303.6 on 7212 degrees of freedom
## Residual deviance: 6465.0 on 7176 degrees of freedom
## AIC: 6539
##
## Number of Fisher Scoring iterations: 5
#store TARGET_AMT separately
TARGET_AMT <- data$TARGET_AMT
INDEX <- data$INDEX
amount_index <- data.frame(INDEX,TARGET_AMT)
#replace variables that have non-linear relationships with logit function by variables logs
data_logistic_regression <- data %>% select(-INDEX,-TARGET_AMT) %>% mutate(HOME_VAL = ifelse(HOME_VAL > 0,log(HOME_VAL),""), TRAVTIME = ifelse(TRAVTIME > 0,log(TRAVTIME),""), TIF = ifelse(TIF > 0,log(TIF),""),OLDCLAIM = ifelse(OLDCLAIM > 0,log(OLDCLAIM),""))
#count NAs
count_nas(data_logistic_regression)
## variable_name_column number_missing_column percentage
## 1 KIDSDRIV 0 0
## 2 AGE 0 0
## 3 HOMEKIDS 0 0
## 4 YOJ 0 0
## 5 INCOME 0 0
## 6 PARENT1 0 0
## 7 HOME_VAL 0 0
## 8 MSTATUS 0 0
## 9 SEX 0 0
## 10 EDUCATION 0 0
## 11 JOB 0 0
## 12 TRAVTIME 0 0
## 13 CAR_USE 0 0
## 14 BLUEBOOK 0 0
## 15 TIF 0 0
## 16 CAR_TYPE 0 0
## 17 RED_CAR 0 0
## 18 OLDCLAIM 0 0
## 19 CLM_FREQ 0 0
## 20 REVOKED 0 0
## 21 MVR_PTS 0 0
## 22 CAR_AGE 0 0
## 23 URBANICITY 0 0
head(data_logistic_regression)
## TARGET_FLAG KIDSDRIV AGE HOMEKIDS YOJ INCOME PARENT1 HOME_VAL MSTATUS
## 1 0 0 60 0 11 4506 No 0.6931472 No
## 2 0 0 43 0 11 5630 No 7.9676267 No
## 3 0 0 35 1 10 1114 No 5.7365723 Yes
## 4 0 0 51 0 14 1 No 8.1559363 Yes
## 5 1 0 34 1 12 662 Yes 0.6931472 No
## 6 1 0 34 0 10 4278 No 0.6931472 No
## SEX EDUCATION JOB TRAVTIME CAR_USE BLUEBOOK TIF
## 1 M PhD Professional 2.639057 Private 429 2.397895
## 2 M High School Blue Collar 3.091042 Commercial 498 0.000000
## 3 F High School Clerical 1.609438 Private 2120 1.386294
## 4 M High School Blue Collar 3.465736 Private 548 1.945910
## 5 F Bachelors Blue Collar 3.828641 Commercial 739 0.000000
## 6 F Bachelors Clerical 3.526361 Private 132 0.000000
## CAR_TYPE RED_CAR OLDCLAIM CLM_FREQ REVOKED MVR_PTS CAR_AGE
## 1 Minivan yes 7.158514 2 No 3 18
## 2 Minivan yes 0.000000 0 No 0 1
## 3 SUV no 7.059618 2 No 3 10
## 4 Minivan yes 0.000000 0 No 0 6
## 5 Sports Car no 0.000000 0 No 0 7
## 6 SUV no 0.000000 0 No 0 1
## URBANICITY
## 1 Highly Urban/ Urban
## 2 Highly Urban/ Urban
## 3 Highly Urban/ Urban
## 4 Highly Urban/ Urban
## 5 Highly Urban/ Urban
## 6 Highly Urban/ Urban
#build glm model using stepwise approach
regression_model.null = glm(TARGET_FLAG ~ 1,
data = data_logistic_regression,
family = binomial(link="logit")
)
regression_model.full = glm(TARGET_FLAG ~ .,
data = data_logistic_regression,
family = binomial(link="logit")
)
step(regression_model.null,
scope = list(upper=regression_model.full),
direction = "both",
test = "Chisq",
data = data_logistic_regression)
## Start: AIC=8305.56
## TARGET_FLAG ~ 1
##
## Df Deviance AIC LRT Pr(>Chi)
## + URBANICITY 1 7864.2 7868.2 439.40 < 2.2e-16 ***
## + OLDCLAIM 1 7871.9 7875.9 431.69 < 2.2e-16 ***
## + MVR_PTS 1 7960.4 7964.4 343.20 < 2.2e-16 ***
## + CLM_FREQ 1 7964.9 7968.9 338.66 < 2.2e-16 ***
## + JOB 8 8054.6 8072.6 248.97 < 2.2e-16 ***
## + PARENT1 1 8143.7 8147.7 159.86 < 2.2e-16 ***
## + CAR_USE 1 8149.6 8153.6 153.93 < 2.2e-16 ***
## + HOME_VAL 1 8151.2 8155.2 152.33 < 2.2e-16 ***
## + CAR_TYPE 5 8145.5 8157.5 158.05 < 2.2e-16 ***
## + EDUCATION 3 8155.2 8163.2 148.40 < 2.2e-16 ***
## + REVOKED 1 8168.7 8172.7 134.90 < 2.2e-16 ***
## + MSTATUS 1 8181.7 8185.7 121.83 < 2.2e-16 ***
## + HOMEKIDS 1 8211.6 8215.6 92.00 < 2.2e-16 ***
## + CAR_AGE 1 8223.8 8227.8 79.80 < 2.2e-16 ***
## + AGE 1 8225.7 8229.7 77.83 < 2.2e-16 ***
## + KIDSDRIV 1 8240.9 8244.9 62.61 2.518e-15 ***
## + TIF 1 8260.9 8264.9 42.67 6.481e-11 ***
## + YOJ 1 8272.4 8276.4 31.19 2.340e-08 ***
## + TRAVTIME 1 8277.5 8281.5 26.05 3.330e-07 ***
## + BLUEBOOK 1 8278.6 8282.6 24.92 5.981e-07 ***
## + INCOME 1 8298.3 8302.3 5.27 0.02174 *
## + SEX 1 8300.4 8304.4 3.20 0.07356 .
## <none> 8303.6 8305.6
## + RED_CAR 1 8302.9 8306.9 0.70 0.40163
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=7868.15
## TARGET_FLAG ~ URBANICITY
##
## Df Deviance AIC LRT Pr(>Chi)
## + JOB 8 7389.8 7409.8 474.32 < 2.2e-16 ***
## + EDUCATION 3 7562.8 7572.8 301.40 < 2.2e-16 ***
## + OLDCLAIM 1 7602.0 7608.0 262.19 < 2.2e-16 ***
## + MVR_PTS 1 7611.5 7617.5 252.68 < 2.2e-16 ***
## + CLM_FREQ 1 7660.1 7666.1 204.03 < 2.2e-16 ***
## + HOME_VAL 1 7676.8 7682.8 187.37 < 2.2e-16 ***
## + PARENT1 1 7681.6 7687.6 182.52 < 2.2e-16 ***
## + CAR_TYPE 5 7674.1 7688.1 190.04 < 2.2e-16 ***
## + CAR_USE 1 7694.3 7700.3 169.90 < 2.2e-16 ***
## + CAR_AGE 1 7706.0 7712.0 158.14 < 2.2e-16 ***
## + MSTATUS 1 7736.4 7742.4 127.76 < 2.2e-16 ***
## + HOMEKIDS 1 7740.2 7746.2 123.94 < 2.2e-16 ***
## + REVOKED 1 7760.0 7766.0 104.19 < 2.2e-16 ***
## + AGE 1 7763.3 7769.3 100.83 < 2.2e-16 ***
## + KIDSDRIV 1 7784.6 7790.6 79.55 < 2.2e-16 ***
## + TRAVTIME 1 7801.5 7807.5 62.65 2.474e-15 ***
## + YOJ 1 7806.2 7812.2 57.95 2.684e-14 ***
## + TIF 1 7816.9 7822.9 47.30 6.102e-12 ***
## + BLUEBOOK 1 7838.0 7844.0 26.11 3.227e-07 ***
## + INCOME 1 7851.6 7857.6 12.59 0.0003887 ***
## + SEX 1 7855.4 7861.4 8.73 0.0031222 **
## + RED_CAR 1 7860.7 7866.7 3.46 0.0629175 .
## <none> 7864.2 7868.2
## - URBANICITY 1 8303.6 8305.6 439.40 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=7409.83
## TARGET_FLAG ~ URBANICITY + JOB
##
## Df Deviance AIC LRT Pr(>Chi)
## + MVR_PTS 1 7193.8 7215.8 196.00 < 2.2e-16 ***
## + OLDCLAIM 1 7196.7 7218.7 193.13 < 2.2e-16 ***
## + CLM_FREQ 1 7234.6 7256.6 155.26 < 2.2e-16 ***
## + MSTATUS 1 7236.5 7258.5 153.34 < 2.2e-16 ***
## + CAR_TYPE 5 7233.5 7263.5 156.29 < 2.2e-16 ***
## + PARENT1 1 7242.7 7264.7 147.13 < 2.2e-16 ***
## + HOME_VAL 1 7268.8 7290.8 121.00 < 2.2e-16 ***
## + REVOKED 1 7295.2 7317.2 94.59 < 2.2e-16 ***
## + CAR_USE 1 7310.7 7332.7 79.18 < 2.2e-16 ***
## + KIDSDRIV 1 7327.9 7349.9 61.98 3.478e-15 ***
## + TRAVTIME 1 7335.3 7357.3 54.54 1.519e-13 ***
## + TIF 1 7336.9 7358.9 52.96 3.412e-13 ***
## + HOMEKIDS 1 7341.6 7363.6 48.26 3.727e-12 ***
## + AGE 1 7365.7 7387.7 24.12 9.065e-07 ***
## + EDUCATION 3 7363.6 7389.6 26.20 8.666e-06 ***
## + YOJ 1 7372.0 7394.0 17.88 2.355e-05 ***
## + BLUEBOOK 1 7377.5 7399.5 12.32 0.0004491 ***
## + CAR_AGE 1 7381.6 7403.6 8.23 0.0041287 **
## + SEX 1 7385.2 7407.2 4.66 0.0308302 *
## <none> 7389.8 7409.8
## + RED_CAR 1 7388.1 7410.1 1.73 0.1878538
## + INCOME 1 7388.4 7410.4 1.39 0.2387805
## - JOB 8 7864.2 7868.2 474.32 < 2.2e-16 ***
## - URBANICITY 1 8054.6 8072.6 664.75 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=7215.83
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS
##
## Df Deviance AIC LRT Pr(>Chi)
## + MSTATUS 1 7056.1 7080.1 137.72 < 2.2e-16 ***
## + CAR_TYPE 5 7055.8 7087.8 138.04 < 2.2e-16 ***
## + PARENT1 1 7064.1 7088.1 129.68 < 2.2e-16 ***
## + HOME_VAL 1 7090.3 7114.3 103.50 < 2.2e-16 ***
## + REVOKED 1 7103.5 7127.5 90.31 < 2.2e-16 ***
## + OLDCLAIM 1 7119.6 7143.6 74.27 < 2.2e-16 ***
## + CAR_USE 1 7123.7 7147.7 70.10 < 2.2e-16 ***
## + CLM_FREQ 1 7131.3 7155.3 62.49 2.681e-15 ***
## + KIDSDRIV 1 7141.6 7165.6 52.28 4.815e-13 ***
## + TRAVTIME 1 7143.6 7167.6 50.27 1.338e-12 ***
## + TIF 1 7146.6 7170.6 47.20 6.399e-12 ***
## + HOMEKIDS 1 7154.0 7178.0 39.82 2.784e-10 ***
## + EDUCATION 3 7168.9 7196.9 24.95 1.579e-05 ***
## + AGE 1 7176.6 7200.6 17.22 3.326e-05 ***
## + YOJ 1 7180.9 7204.9 12.89 0.0003311 ***
## + BLUEBOOK 1 7181.1 7205.1 12.70 0.0003652 ***
## + CAR_AGE 1 7185.8 7209.8 8.00 0.0046663 **
## + SEX 1 7190.4 7214.4 3.38 0.0659479 .
## + INCOME 1 7191.8 7215.8 2.03 0.1544778
## <none> 7193.8 7215.8
## + RED_CAR 1 7192.1 7216.1 1.69 0.1930480
## - MVR_PTS 1 7389.8 7409.8 196.00 < 2.2e-16 ***
## - JOB 8 7611.5 7617.5 417.64 < 2.2e-16 ***
## - URBANICITY 1 7735.6 7755.6 541.74 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=7080.11
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS
##
## Df Deviance AIC LRT Pr(>Chi)
## + CAR_TYPE 5 6913.8 6947.8 142.30 < 2.2e-16 ***
## + REVOKED 1 6972.8 6998.8 83.30 < 2.2e-16 ***
## + CAR_USE 1 6989.7 7015.7 66.42 3.639e-16 ***
## + OLDCLAIM 1 6991.6 7017.6 64.55 9.403e-16 ***
## + KIDSDRIV 1 6993.1 7019.1 62.97 2.100e-15 ***
## + CLM_FREQ 1 7002.8 7028.8 53.36 2.780e-13 ***
## + TRAVTIME 1 7003.7 7029.7 52.39 4.552e-13 ***
## + HOMEKIDS 1 7006.6 7032.6 49.54 1.941e-12 ***
## + TIF 1 7006.9 7032.9 49.23 2.281e-12 ***
## + PARENT1 1 7014.4 7040.4 41.75 1.035e-10 ***
## + EDUCATION 3 7025.6 7055.6 30.49 1.090e-06 ***
## + HOME_VAL 1 7035.8 7061.8 20.33 6.516e-06 ***
## + BLUEBOOK 1 7045.0 7071.0 11.13 0.0008497 ***
## + CAR_AGE 1 7045.9 7071.9 10.23 0.0013797 **
## + AGE 1 7047.7 7073.7 8.42 0.0037186 **
## + SEX 1 7052.6 7078.6 3.48 0.0622212 .
## + YOJ 1 7053.1 7079.1 3.03 0.0815317 .
## + RED_CAR 1 7054.0 7080.0 2.16 0.1420570
## <none> 7056.1 7080.1
## + INCOME 1 7054.2 7080.2 1.93 0.1644498
## - MSTATUS 1 7193.8 7215.8 137.72 < 2.2e-16 ***
## - MVR_PTS 1 7236.5 7258.5 180.39 < 2.2e-16 ***
## - JOB 8 7498.9 7506.9 442.74 < 2.2e-16 ***
## - URBANICITY 1 7617.7 7639.7 561.59 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6947.81
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE
##
## Df Deviance AIC LRT Pr(>Chi)
## + REVOKED 1 6832.6 6868.6 81.23 < 2.2e-16 ***
## + KIDSDRIV 1 6848.2 6884.2 65.62 5.466e-16 ***
## + CAR_USE 1 6855.6 6891.6 58.16 2.412e-14 ***
## + OLDCLAIM 1 6858.1 6894.1 55.68 8.524e-14 ***
## + TIF 1 6859.8 6895.8 54.01 1.999e-13 ***
## + TRAVTIME 1 6860.6 6896.6 53.17 3.064e-13 ***
## + HOMEKIDS 1 6864.8 6900.8 49.01 2.546e-12 ***
## + CLM_FREQ 1 6867.2 6903.2 46.62 8.614e-12 ***
## + PARENT1 1 6871.9 6907.9 41.93 9.470e-11 ***
## + EDUCATION 3 6885.2 6925.2 28.65 2.647e-06 ***
## + HOME_VAL 1 6894.1 6930.1 19.67 9.186e-06 ***
## + AGE 1 6903.9 6939.9 9.87 0.001681 **
## + CAR_AGE 1 6904.5 6940.5 9.30 0.002292 **
## + SEX 1 6906.5 6942.5 7.30 0.006886 **
## + YOJ 1 6911.6 6947.6 2.22 0.136168
## <none> 6913.8 6947.8
## + RED_CAR 1 6912.4 6948.4 1.38 0.240029
## + BLUEBOOK 1 6912.7 6948.7 1.12 0.290811
## + INCOME 1 6912.7 6948.7 1.07 0.301813
## - CAR_TYPE 5 7056.1 7080.1 142.30 < 2.2e-16 ***
## - MSTATUS 1 7055.8 7087.8 141.99 < 2.2e-16 ***
## - MVR_PTS 1 7075.7 7107.7 161.94 < 2.2e-16 ***
## - JOB 8 7327.3 7345.3 413.48 < 2.2e-16 ***
## - URBANICITY 1 7495.0 7527.0 581.17 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6868.58
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED
##
## Df Deviance AIC LRT Pr(>Chi)
## + KIDSDRIV 1 6772.8 6810.8 59.77 1.064e-14 ***
## + CAR_USE 1 6776.8 6814.8 55.78 8.096e-14 ***
## + TRAVTIME 1 6778.1 6816.1 54.50 1.553e-13 ***
## + OLDCLAIM 1 6779.2 6817.2 53.36 2.783e-13 ***
## + TIF 1 6780.7 6818.7 51.88 5.911e-13 ***
## + CLM_FREQ 1 6787.1 6825.1 45.43 1.582e-11 ***
## + HOMEKIDS 1 6788.7 6826.7 43.89 3.481e-11 ***
## + PARENT1 1 6792.5 6830.5 40.11 2.406e-10 ***
## + EDUCATION 3 6804.5 6846.5 28.04 3.557e-06 ***
## + HOME_VAL 1 6814.4 6852.4 18.17 2.018e-05 ***
## + CAR_AGE 1 6823.1 6861.1 9.49 0.002061 **
## + AGE 1 6823.9 6861.9 8.67 0.003231 **
## + SEX 1 6825.7 6863.7 6.89 0.008668 **
## + YOJ 1 6830.0 6868.0 2.56 0.109288
## <none> 6832.6 6868.6
## + BLUEBOOK 1 6831.4 6869.4 1.22 0.268757
## + RED_CAR 1 6831.4 6869.4 1.14 0.285530
## + INCOME 1 6831.6 6869.6 0.93 0.334478
## - REVOKED 1 6913.8 6947.8 81.23 < 2.2e-16 ***
## - CAR_TYPE 5 6972.8 6998.8 140.23 < 2.2e-16 ***
## - MSTATUS 1 6967.4 7001.4 134.80 < 2.2e-16 ***
## - MVR_PTS 1 6991.6 7025.6 159.07 < 2.2e-16 ***
## - JOB 8 7237.9 7257.9 405.34 < 2.2e-16 ***
## - URBANICITY 1 7384.5 7418.5 551.89 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6810.8
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV
##
## Df Deviance AIC LRT Pr(>Chi)
## + CAR_USE 1 6713.5 6753.5 59.31 1.346e-14 ***
## + TRAVTIME 1 6716.1 6756.1 56.75 4.946e-14 ***
## + OLDCLAIM 1 6720.5 6760.5 52.34 4.672e-13 ***
## + TIF 1 6722.1 6762.1 50.67 1.093e-12 ***
## + CLM_FREQ 1 6728.4 6768.4 44.42 2.648e-11 ***
## + EDUCATION 3 6745.4 6789.4 27.41 4.830e-06 ***
## + PARENT1 1 6751.5 6791.5 21.30 3.931e-06 ***
## + HOME_VAL 1 6755.5 6795.5 17.28 3.222e-05 ***
## + HOMEKIDS 1 6760.6 6800.6 12.21 0.0004753 ***
## + CAR_AGE 1 6763.7 6803.7 9.08 0.0025847 **
## + SEX 1 6763.9 6803.9 8.89 0.0028640 **
## + AGE 1 6766.3 6806.3 6.47 0.0109717 *
## + YOJ 1 6768.6 6808.6 4.16 0.0413811 *
## <none> 6772.8 6810.8
## + RED_CAR 1 6771.1 6811.1 1.70 0.1918995
## + BLUEBOOK 1 6771.7 6811.7 1.14 0.2850905
## + INCOME 1 6772.2 6812.2 0.63 0.4287105
## - KIDSDRIV 1 6832.6 6868.6 59.77 1.064e-14 ***
## - REVOKED 1 6848.2 6884.2 75.38 < 2.2e-16 ***
## - CAR_TYPE 5 6915.8 6943.8 142.95 < 2.2e-16 ***
## - MSTATUS 1 6918.2 6954.2 145.43 < 2.2e-16 ***
## - MVR_PTS 1 6921.7 6957.7 148.94 < 2.2e-16 ***
## - JOB 8 7167.1 7189.1 394.33 < 2.2e-16 ***
## - URBANICITY 1 7341.2 7377.2 568.44 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6753.49
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE
##
## Df Deviance AIC LRT Pr(>Chi)
## + TRAVTIME 1 6657.1 6699.1 56.37 6.003e-14 ***
## + TIF 1 6662.3 6704.3 51.20 8.341e-13 ***
## + OLDCLAIM 1 6664.7 6706.7 48.79 2.852e-12 ***
## + CLM_FREQ 1 6672.2 6714.2 41.27 1.325e-10 ***
## + EDUCATION 3 6674.6 6720.6 38.87 1.849e-08 ***
## + PARENT1 1 6692.2 6734.2 21.25 4.035e-06 ***
## + HOME_VAL 1 6694.5 6736.5 18.99 1.313e-05 ***
## + CAR_AGE 1 6700.2 6742.2 13.31 0.0002633 ***
## + HOMEKIDS 1 6700.6 6742.6 12.92 0.0003255 ***
## + SEX 1 6703.4 6745.4 10.13 0.0014561 **
## + AGE 1 6706.0 6748.0 7.50 0.0061788 **
## + YOJ 1 6710.1 6752.1 3.39 0.0654142 .
## <none> 6713.5 6753.5
## + RED_CAR 1 6711.6 6753.6 1.92 0.1662546
## + BLUEBOOK 1 6711.9 6753.9 1.63 0.2020085
## + INCOME 1 6712.8 6754.8 0.74 0.3890476
## - CAR_USE 1 6772.8 6810.8 59.31 1.346e-14 ***
## - KIDSDRIV 1 6776.8 6814.8 63.30 1.773e-15 ***
## - REVOKED 1 6786.2 6824.2 72.75 < 2.2e-16 ***
## - CAR_TYPE 5 6846.1 6876.1 132.57 < 2.2e-16 ***
## - MVR_PTS 1 6853.0 6891.0 139.52 < 2.2e-16 ***
## - MSTATUS 1 6855.6 6893.6 142.14 < 2.2e-16 ***
## - JOB 8 6990.6 7014.6 277.14 < 2.2e-16 ***
## - URBANICITY 1 7292.7 7330.7 579.20 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6699.12
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME
##
## Df Deviance AIC LRT Pr(>Chi)
## + TIF 1 6608.1 6652.1 48.98 2.581e-12 ***
## + OLDCLAIM 1 6610.0 6654.0 47.08 6.800e-12 ***
## + CLM_FREQ 1 6618.4 6662.4 38.73 4.860e-10 ***
## + EDUCATION 3 6614.6 6662.6 42.51 3.125e-09 ***
## + PARENT1 1 6633.5 6677.5 23.64 1.162e-06 ***
## + HOME_VAL 1 6639.8 6683.8 17.33 3.139e-05 ***
## + HOMEKIDS 1 6641.7 6685.7 15.45 8.472e-05 ***
## + CAR_AGE 1 6642.8 6686.8 14.31 0.0001553 ***
## + SEX 1 6647.4 6691.4 9.68 0.0018668 **
## + AGE 1 6648.2 6692.2 8.92 0.0028243 **
## + YOJ 1 6653.7 6697.7 3.39 0.0655056 .
## <none> 6657.1 6699.1
## + RED_CAR 1 6655.4 6699.4 1.77 0.1834767
## + BLUEBOOK 1 6655.5 6699.5 1.57 0.2097381
## + INCOME 1 6656.4 6700.4 0.73 0.3935436
## - TRAVTIME 1 6713.5 6753.5 56.37 6.003e-14 ***
## - CAR_USE 1 6716.1 6756.1 58.93 1.633e-14 ***
## - KIDSDRIV 1 6722.6 6762.6 65.53 5.732e-16 ***
## - REVOKED 1 6731.2 6771.2 74.10 < 2.2e-16 ***
## - CAR_TYPE 5 6790.3 6822.3 133.14 < 2.2e-16 ***
## - MVR_PTS 1 6793.4 6833.4 136.29 < 2.2e-16 ***
## - MSTATUS 1 6802.3 6842.3 145.19 < 2.2e-16 ***
## - JOB 8 6931.8 6957.8 274.67 < 2.2e-16 ***
## - URBANICITY 1 7272.9 7312.9 615.81 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6652.14
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF
##
## Df Deviance AIC LRT Pr(>Chi)
## + OLDCLAIM 1 6561.5 6607.5 46.61 8.642e-12 ***
## + EDUCATION 3 6565.2 6615.2 42.91 2.573e-09 ***
## + CLM_FREQ 1 6570.1 6616.1 38.07 6.841e-10 ***
## + PARENT1 1 6584.6 6630.6 23.56 1.211e-06 ***
## + HOME_VAL 1 6591.0 6637.0 17.12 3.504e-05 ***
## + HOMEKIDS 1 6592.6 6638.6 15.59 7.879e-05 ***
## + CAR_AGE 1 6594.2 6640.2 13.90 0.0001925 ***
## + SEX 1 6598.1 6644.1 10.00 0.0015629 **
## + AGE 1 6599.3 6645.3 8.86 0.0029128 **
## + YOJ 1 6605.4 6651.4 2.70 0.1003124
## <none> 6608.1 6652.1
## + RED_CAR 1 6606.2 6652.2 1.95 0.1625458
## + BLUEBOOK 1 6606.3 6652.3 1.87 0.1720231
## + INCOME 1 6607.7 6653.7 0.47 0.4908447
## - TIF 1 6657.1 6699.1 48.98 2.581e-12 ***
## - TRAVTIME 1 6662.3 6704.3 54.15 1.854e-13 ***
## - CAR_USE 1 6667.5 6709.5 59.36 1.315e-14 ***
## - KIDSDRIV 1 6672.3 6714.3 64.12 1.172e-15 ***
## - REVOKED 1 6680.4 6722.4 72.27 < 2.2e-16 ***
## - CAR_TYPE 5 6745.8 6779.8 137.71 < 2.2e-16 ***
## - MVR_PTS 1 6739.3 6781.3 131.17 < 2.2e-16 ***
## - MSTATUS 1 6755.2 6797.2 147.06 < 2.2e-16 ***
## - JOB 8 6886.4 6914.4 278.25 < 2.2e-16 ***
## - URBANICITY 1 7232.1 7274.1 624.01 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6607.52
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM
##
## Df Deviance AIC LRT Pr(>Chi)
## + EDUCATION 3 6518.8 6570.8 42.69 2.865e-09 ***
## + PARENT1 1 6537.7 6585.7 23.79 1.073e-06 ***
## + HOME_VAL 1 6545.5 6593.5 16.00 6.345e-05 ***
## + HOMEKIDS 1 6546.5 6594.5 15.02 0.0001063 ***
## + CAR_AGE 1 6547.7 6595.7 13.79 0.0002046 ***
## + AGE 1 6552.3 6600.3 9.20 0.0024243 **
## + SEX 1 6552.6 6600.6 8.92 0.0028162 **
## + YOJ 1 6558.4 6606.4 3.13 0.0767678 .
## <none> 6561.5 6607.5
## + BLUEBOOK 1 6559.7 6607.7 1.87 0.1713670
## + RED_CAR 1 6559.7 6607.7 1.78 0.1819681
## + CLM_FREQ 1 6560.1 6608.1 1.41 0.2345579
## + INCOME 1 6560.9 6608.9 0.59 0.4441139
## - OLDCLAIM 1 6608.1 6652.1 46.61 8.642e-12 ***
## - TIF 1 6610.0 6654.0 48.51 3.279e-12 ***
## - TRAVTIME 1 6614.0 6658.0 52.45 4.423e-13 ***
## - MVR_PTS 1 6616.0 6660.0 54.47 1.581e-13 ***
## - CAR_USE 1 6617.3 6661.3 55.83 7.919e-14 ***
## - KIDSDRIV 1 6624.7 6668.7 63.22 1.848e-15 ***
## - REVOKED 1 6632.0 6676.0 70.49 < 2.2e-16 ***
## - CAR_TYPE 5 6692.7 6728.7 131.21 < 2.2e-16 ***
## - MSTATUS 1 6701.6 6745.6 140.03 < 2.2e-16 ***
## - JOB 8 6829.3 6859.3 267.83 < 2.2e-16 ***
## - URBANICITY 1 7080.8 7124.8 519.32 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6570.83
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM +
## EDUCATION
##
## Df Deviance AIC LRT Pr(>Chi)
## + PARENT1 1 6495.6 6549.6 23.23 1.437e-06 ***
## + HOME_VAL 1 6504.9 6558.9 13.95 0.0001881 ***
## + HOMEKIDS 1 6505.4 6559.4 13.41 0.0002504 ***
## + AGE 1 6510.8 6564.8 8.03 0.0046033 **
## + SEX 1 6510.9 6564.9 7.89 0.0049758 **
## + YOJ 1 6515.6 6569.6 3.20 0.0737714 .
## <none> 6518.8 6570.8
## + RED_CAR 1 6517.1 6571.1 1.71 0.1912727
## + CLM_FREQ 1 6517.4 6571.4 1.47 0.2249206
## + BLUEBOOK 1 6517.6 6571.6 1.23 0.2666877
## + CAR_AGE 1 6517.9 6571.9 0.95 0.3299285
## + INCOME 1 6518.7 6572.7 0.10 0.7560501
## - EDUCATION 3 6561.5 6607.5 42.69 2.865e-09 ***
## - OLDCLAIM 1 6565.2 6615.2 46.39 9.669e-12 ***
## - TIF 1 6567.8 6617.8 49.01 2.549e-12 ***
## - MVR_PTS 1 6572.3 6622.3 53.49 2.600e-13 ***
## - TRAVTIME 1 6574.8 6624.8 55.99 7.274e-14 ***
## - KIDSDRIV 1 6581.3 6631.3 62.51 2.647e-15 ***
## - CAR_USE 1 6586.6 6636.6 67.78 < 2.2e-16 ***
## - REVOKED 1 6588.1 6638.1 69.29 < 2.2e-16 ***
## - JOB 8 6625.0 6661.0 106.20 < 2.2e-16 ***
## - CAR_TYPE 5 6649.1 6691.1 130.26 < 2.2e-16 ***
## - MSTATUS 1 6665.9 6715.9 147.08 < 2.2e-16 ***
## - URBANICITY 1 7051.0 7101.0 532.15 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6549.6
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM +
## EDUCATION + PARENT1
##
## Df Deviance AIC LRT Pr(>Chi)
## + HOME_VAL 1 6480.8 6536.8 14.77 0.0001215 ***
## + SEX 1 6486.5 6542.5 9.06 0.0026149 **
## + YOJ 1 6491.6 6547.6 4.04 0.0445329 *
## + AGE 1 6493.4 6549.4 2.20 0.1376563
## + HOMEKIDS 1 6493.5 6549.5 2.15 0.1428139
## + RED_CAR 1 6493.5 6549.5 2.10 0.1468804
## <none> 6495.6 6549.6
## + CLM_FREQ 1 6493.9 6549.9 1.74 0.1877295
## + BLUEBOOK 1 6494.4 6550.4 1.23 0.2677490
## + CAR_AGE 1 6494.8 6550.8 0.84 0.3585227
## + INCOME 1 6495.5 6551.5 0.08 0.7764266
## - PARENT1 1 6518.8 6570.8 23.23 1.437e-06 ***
## - EDUCATION 3 6537.7 6585.7 42.13 3.770e-09 ***
## - KIDSDRIV 1 6537.8 6589.8 42.23 8.115e-11 ***
## - OLDCLAIM 1 6542.2 6594.2 46.59 8.738e-12 ***
## - TIF 1 6544.6 6596.6 48.97 2.605e-12 ***
## - MVR_PTS 1 6547.1 6599.1 51.54 7.006e-13 ***
## - TRAVTIME 1 6554.1 6606.1 58.51 2.019e-14 ***
## - MSTATUS 1 6558.9 6610.9 63.32 1.757e-15 ***
## - CAR_USE 1 6563.2 6615.2 67.63 < 2.2e-16 ***
## - REVOKED 1 6564.5 6616.5 68.90 < 2.2e-16 ***
## - JOB 8 6598.1 6636.1 102.51 < 2.2e-16 ***
## - CAR_TYPE 5 6623.5 6667.5 127.94 < 2.2e-16 ***
## - URBANICITY 1 7030.6 7082.6 534.96 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6536.83
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM +
## EDUCATION + PARENT1 + HOME_VAL
##
## Df Deviance AIC LRT Pr(>Chi)
## + SEX 1 6471.9 6529.9 8.94 0.0027828 **
## + YOJ 1 6477.2 6535.2 3.64 0.0563974 .
## + HOMEKIDS 1 6478.8 6536.8 2.02 0.1550890
## <none> 6480.8 6536.8
## + RED_CAR 1 6478.8 6536.8 1.99 0.1587449
## + AGE 1 6478.9 6536.9 1.90 0.1685293
## + CLM_FREQ 1 6479.2 6537.2 1.68 0.1952595
## + BLUEBOOK 1 6479.6 6537.6 1.20 0.2726206
## + CAR_AGE 1 6479.9 6537.9 0.93 0.3341152
## + INCOME 1 6480.8 6538.8 0.05 0.8238025
## - HOME_VAL 1 6495.6 6549.6 14.77 0.0001215 ***
## - PARENT1 1 6504.9 6558.9 24.05 9.368e-07 ***
## - MSTATUS 1 6506.3 6560.3 25.43 4.589e-07 ***
## - EDUCATION 3 6520.8 6570.8 40.00 1.063e-08 ***
## - KIDSDRIV 1 6522.3 6576.3 41.42 1.228e-10 ***
## - OLDCLAIM 1 6526.4 6580.4 45.59 1.457e-11 ***
## - TIF 1 6529.5 6583.5 48.64 3.077e-12 ***
## - MVR_PTS 1 6530.8 6584.8 49.99 1.545e-12 ***
## - TRAVTIME 1 6537.7 6591.7 56.86 4.676e-14 ***
## - REVOKED 1 6548.5 6602.5 67.70 < 2.2e-16 ***
## - CAR_USE 1 6549.7 6603.7 68.88 < 2.2e-16 ***
## - JOB 8 6575.2 6615.2 94.33 < 2.2e-16 ***
## - CAR_TYPE 5 6607.7 6653.7 126.91 < 2.2e-16 ***
## - URBANICITY 1 7016.8 7070.8 536.01 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6529.89
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM +
## EDUCATION + PARENT1 + HOME_VAL + SEX
##
## Df Deviance AIC LRT Pr(>Chi)
## + YOJ 1 6468.4 6528.4 3.45 0.063346 .
## + AGE 1 6469.3 6529.3 2.63 0.105150
## + HOMEKIDS 1 6469.4 6529.4 2.49 0.114846
## <none> 6471.9 6529.9
## + CLM_FREQ 1 6470.4 6530.4 1.49 0.222077
## + BLUEBOOK 1 6470.6 6530.6 1.34 0.247682
## + CAR_AGE 1 6471.0 6531.0 0.88 0.348468
## + INCOME 1 6471.9 6531.9 0.03 0.858636
## + RED_CAR 1 6471.9 6531.9 0.02 0.895218
## - SEX 1 6480.8 6536.8 8.94 0.002783 **
## - HOME_VAL 1 6486.5 6542.5 14.66 0.000129 ***
## - PARENT1 1 6497.1 6553.1 25.25 5.046e-07 ***
## - MSTATUS 1 6497.2 6553.2 25.34 4.811e-07 ***
## - EDUCATION 3 6510.9 6562.9 38.97 1.762e-08 ***
## - KIDSDRIV 1 6514.5 6570.5 42.60 6.709e-11 ***
## - OLDCLAIM 1 6516.4 6572.4 44.51 2.528e-11 ***
## - TIF 1 6520.9 6576.9 49.00 2.566e-12 ***
## - MVR_PTS 1 6522.2 6578.2 50.32 1.303e-12 ***
## - TRAVTIME 1 6528.5 6584.5 56.63 5.253e-14 ***
## - REVOKED 1 6539.3 6595.3 67.39 2.228e-16 ***
## - CAR_USE 1 6541.8 6597.8 69.96 < 2.2e-16 ***
## - JOB 8 6568.5 6610.5 96.63 < 2.2e-16 ***
## - CAR_TYPE 5 6595.7 6643.7 123.82 < 2.2e-16 ***
## - URBANICITY 1 7009.2 7065.2 537.28 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6528.44
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM +
## EDUCATION + PARENT1 + HOME_VAL + SEX + YOJ
##
## Df Deviance AIC LRT Pr(>Chi)
## + HOMEKIDS 1 6464.8 6526.8 3.62 0.0572029 .
## + AGE 1 6466.4 6528.4 2.00 0.1570267
## <none> 6468.4 6528.4
## + CLM_FREQ 1 6467.1 6529.1 1.36 0.2434744
## + BLUEBOOK 1 6467.1 6529.1 1.31 0.2527242
## + CAR_AGE 1 6467.5 6529.5 0.91 0.3400224
## - YOJ 1 6471.9 6529.9 3.45 0.0633461 .
## + INCOME 1 6468.4 6530.4 0.04 0.8474347
## + RED_CAR 1 6468.4 6530.4 0.02 0.8985257
## - SEX 1 6477.2 6535.2 8.75 0.0030928 **
## - HOME_VAL 1 6482.7 6540.7 14.26 0.0001588 ***
## - MSTATUS 1 6491.1 6549.1 22.63 1.961e-06 ***
## - PARENT1 1 6494.5 6552.5 26.01 3.388e-07 ***
## - EDUCATION 3 6507.5 6561.5 39.03 1.708e-08 ***
## - KIDSDRIV 1 6512.1 6570.1 43.64 3.948e-11 ***
## - OLDCLAIM 1 6513.4 6571.4 44.95 2.022e-11 ***
## - TIF 1 6516.7 6574.7 48.22 3.802e-12 ***
## - MVR_PTS 1 6517.4 6575.4 48.99 2.569e-12 ***
## - TRAVTIME 1 6525.1 6583.1 56.67 5.149e-14 ***
## - REVOKED 1 6536.1 6594.1 67.69 < 2.2e-16 ***
## - CAR_USE 1 6537.4 6595.4 68.97 < 2.2e-16 ***
## - JOB 8 6558.4 6602.4 89.92 4.832e-16 ***
## - CAR_TYPE 5 6591.1 6641.1 122.68 < 2.2e-16 ***
## - URBANICITY 1 7006.4 7064.4 537.96 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Step: AIC=6526.83
## TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM +
## EDUCATION + PARENT1 + HOME_VAL + SEX + YOJ + HOMEKIDS
##
## Df Deviance AIC LRT Pr(>Chi)
## <none> 6464.8 6526.8
## + CLM_FREQ 1 6463.5 6527.5 1.37 0.2423159
## + BLUEBOOK 1 6463.7 6527.7 1.14 0.2855922
## + CAR_AGE 1 6463.9 6527.9 0.89 0.3459988
## + AGE 1 6464.2 6528.2 0.58 0.4457354
## - HOMEKIDS 1 6468.4 6528.4 3.62 0.0572029 .
## + INCOME 1 6464.8 6528.8 0.05 0.8163774
## + RED_CAR 1 6464.8 6528.8 0.03 0.8660822
## - YOJ 1 6469.4 6529.4 4.58 0.0323854 *
## - SEX 1 6474.1 6534.1 9.29 0.0023082 **
## - PARENT1 1 6477.1 6537.1 12.23 0.0004705 ***
## - HOME_VAL 1 6478.9 6538.9 14.03 0.0001796 ***
## - MSTATUS 1 6490.7 6550.7 25.85 3.685e-07 ***
## - KIDSDRIV 1 6493.2 6553.2 28.33 1.023e-07 ***
## - EDUCATION 3 6503.0 6559.0 38.18 2.588e-08 ***
## - OLDCLAIM 1 6509.5 6569.5 44.62 2.387e-11 ***
## - TIF 1 6513.0 6573.0 48.21 3.822e-12 ***
## - MVR_PTS 1 6513.3 6573.3 48.49 3.323e-12 ***
## - TRAVTIME 1 6522.3 6582.3 57.49 3.393e-14 ***
## - REVOKED 1 6531.5 6591.5 66.64 3.265e-16 ***
## - CAR_USE 1 6534.0 6594.0 69.19 < 2.2e-16 ***
## - JOB 8 6551.7 6597.7 86.90 1.975e-15 ***
## - CAR_TYPE 5 6587.7 6639.7 122.87 < 2.2e-16 ***
## - URBANICITY 1 7002.6 7062.6 537.73 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call: glm(formula = TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS +
## CAR_TYPE + REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF +
## OLDCLAIM + EDUCATION + PARENT1 + HOME_VAL + SEX + YOJ + HOMEKIDS,
## family = binomial(link = "logit"), data = data_logistic_regression)
##
## Coefficients:
## (Intercept) URBANICITYHighly Urban/ Urban
## -4.97393 2.31267
## JOBBlue Collar JOBClerical
## 0.40049 0.62287
## JOBDoctor JOBHome Maker
## -0.28342 0.56190
## JOBLawyer JOBManager
## 0.22651 -0.60161
## JOBProfessional JOBStudent
## 0.21678 0.41912
## MVR_PTS MSTATUSYes
## 0.10286 -0.44613
## CAR_TYPEPanel Truck CAR_TYPEPickup
## 0.24276 0.61511
## CAR_TYPESports Car CAR_TYPESUV
## 1.29462 0.96939
## CAR_TYPEVan REVOKEDYes
## 0.44868 0.70437
## KIDSDRIV CAR_USEPrivate
## 0.34490 -0.76485
## TRAVTIME TIF
## 0.40872 -0.22491
## OLDCLAIM EDUCATIONHigh School
## 0.06630 0.51158
## EDUCATIONMasters EDUCATIONPhD
## 0.02798 0.04218
## PARENT1Yes HOME_VAL
## 0.40297 -0.04277
## SEXM YOJ
## 0.28733 -0.01888
## HOMEKIDS
## 0.06966
##
## Degrees of Freedom: 7212 Total (i.e. Null); 7182 Residual
## Null Deviance: 8304
## Residual Deviance: 6465 AIC: 6527
Test Goodness of Fit.
#build the final model for logistic regression
final_logistic_model <- glm(formula = TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE + REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM + EDUCATION + PARENT1 + HOME_VAL + SEX + YOJ + HOMEKIDS, family = binomial(link = "logit"), data = data_logistic_regression)
summary(final_logistic_model)
##
## Call:
## glm(formula = TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS +
## CAR_TYPE + REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF +
## OLDCLAIM + EDUCATION + PARENT1 + HOME_VAL + SEX + YOJ + HOMEKIDS,
## family = binomial(link = "logit"), data = data_logistic_regression)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4136 -0.7171 -0.4019 0.6191 3.1037
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.973933 0.339749 -14.640 < 2e-16 ***
## URBANICITYHighly Urban/ Urban 2.312670 0.118587 19.502 < 2e-16 ***
## JOBBlue Collar 0.400491 0.196687 2.036 0.041732 *
## JOBClerical 0.622865 0.205623 3.029 0.002452 **
## JOBDoctor -0.283421 0.271886 -1.042 0.297215
## JOBHome Maker 0.561895 0.213592 2.631 0.008521 **
## JOBLawyer 0.226513 0.179133 1.264 0.206052
## JOBManager -0.601606 0.183188 -3.284 0.001023 **
## JOBProfessional 0.216776 0.189090 1.146 0.251625
## JOBStudent 0.419121 0.223734 1.873 0.061027 .
## MVR_PTS 0.102862 0.014837 6.933 4.12e-12 ***
## MSTATUSYes -0.446125 0.087484 -5.099 3.41e-07 ***
## CAR_TYPEPanel Truck 0.242756 0.148254 1.637 0.101540
## CAR_TYPEPickup 0.615111 0.105469 5.832 5.47e-09 ***
## CAR_TYPESports Car 1.294620 0.129511 9.996 < 2e-16 ***
## CAR_TYPESUV 0.969394 0.109561 8.848 < 2e-16 ***
## CAR_TYPEVan 0.448676 0.128326 3.496 0.000472 ***
## REVOKEDYes 0.704373 0.085524 8.236 < 2e-16 ***
## KIDSDRIV 0.344905 0.064709 5.330 9.82e-08 ***
## CAR_USEPrivate -0.764853 0.092848 -8.238 < 2e-16 ***
## TRAVTIME 0.408717 0.055212 7.403 1.33e-13 ***
## TIF -0.224914 0.032452 -6.931 4.19e-12 ***
## OLDCLAIM 0.066303 0.009890 6.704 2.03e-11 ***
## EDUCATIONHigh School 0.511577 0.084712 6.039 1.55e-09 ***
## EDUCATIONMasters 0.027985 0.144192 0.194 0.846113
## EDUCATIONPhD 0.042183 0.183744 0.230 0.818421
## PARENT1Yes 0.402971 0.115354 3.493 0.000477 ***
## HOME_VAL -0.042768 0.011411 -3.748 0.000178 ***
## SEXM 0.287326 0.094891 3.028 0.002462 **
## YOJ -0.018883 0.008826 -2.139 0.032401 *
## HOMEKIDS 0.069662 0.036508 1.908 0.056372 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 8303.6 on 7212 degrees of freedom
## Residual deviance: 6464.8 on 7182 degrees of freedom
## AIC: 6526.8
##
## Number of Fisher Scoring iterations: 5
#reduced models with fewer parameters
logistic_model2 <- glm(formula =TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE + REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM + EDUCATION + PARENT1 + HOME_VAL + SEX + YOJ, family = binomial(link = "logit"), data = data_logistic_regression)
logistic_model3 <- glm(formula = TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE + REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM + EDUCATION + PARENT1 + HOME_VAL + SEX, family = binomial(link = "logit"), data = data_logistic_regression)
#residual deviance test
p_value = 1 - pchisq(final_logistic_model$deviance,final_logistic_model$df.residual)
p_value
## [1] 1
#Likelihood Ratio Test
anova(final_logistic_model, logistic_model2, test ="Chisq")
## Analysis of Deviance Table
##
## Model 1: TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM +
## EDUCATION + PARENT1 + HOME_VAL + SEX + YOJ + HOMEKIDS
## Model 2: TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM +
## EDUCATION + PARENT1 + HOME_VAL + SEX + YOJ
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 7182 6464.8
## 2 7183 6468.4 -1 -3.6167 0.0572 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(final_logistic_model, logistic_model3, test ="Chisq")
## Analysis of Deviance Table
##
## Model 1: TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM +
## EDUCATION + PARENT1 + HOME_VAL + SEX + YOJ + HOMEKIDS
## Model 2: TARGET_FLAG ~ URBANICITY + JOB + MVR_PTS + MSTATUS + CAR_TYPE +
## REVOKED + KIDSDRIV + CAR_USE + TRAVTIME + TIF + OLDCLAIM +
## EDUCATION + PARENT1 + HOME_VAL + SEX
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 7182 6464.8
## 2 7184 6471.9 -2 -7.0642 0.02924 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Pseudo R^2 Test
pR2(final_logistic_model)
## llh llhNull G2 McFadden r2ML
## -3232.4129169 -4151.7778788 1838.7299237 0.2214389 0.2250206
## r2CU
## 0.3291023
#Hosmer-Lemeshow Test
hoslem.test(data_logistic_regression$TARGET_FLAG, fitted(final_logistic_model), g=10)
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: data_logistic_regression$TARGET_FLAG, fitted(final_logistic_model)
## X-squared = 13.434, df = 8, p-value = 0.09775
Create dummy variables for training data set.
#check variable factor levels
data_logistic_regression$EDUCATION_HighSchool <- ifelse(data_logistic_regression$EDUCATION == "High School",1,0)
data_logistic_regression$EDUCATION_Masters <- ifelse(data_logistic_regression$EDUCATION == "Masters",1,0)
data_logistic_regression$EDUCATION_PhD <- ifelse(data_logistic_regression$EDUCATION == "PhD",1,0)
data_logistic_regression$URBANICITY_HighlyUrban <- ifelse(data_logistic_regression$URBANICITY == "Highly Urban/ Urban",1,0)
data_logistic_regression$JOB_BlueCollar <- ifelse(data_logistic_regression$JOB == "Blue Collar",1,0)
data_logistic_regression$JOB_Clerical <- ifelse(data_logistic_regression$JOB == "Clerical",1,0)
data_logistic_regression$JOB_HomeMaker <- ifelse(data_logistic_regression$JOB == "Home Maker",1,0)
data_logistic_regression$JOB_Manager <- ifelse(data_logistic_regression$JOB == "Manager",1,0)
data_logistic_regression$MSTATUS_Yes <- ifelse(data_logistic_regression$MSTATUS == "Yes",1,0)
data_logistic_regression$CAR_TYPE_Pickup <- ifelse(data_logistic_regression$CAR_TYPE == "Pickup",1,0)
data_logistic_regression$CAR_TYPE_Sports_Car <- ifelse(data_logistic_regression$CAR_TYPE == "Sports Car",1,0)
data_logistic_regression$CAR_TYPE_SUV <- ifelse(data_logistic_regression$CAR_TYPE == "SUV",1,0)
data_logistic_regression$CAR_TYPE_Van <- ifelse(data_logistic_regression$CAR_TYPE == "Van",1,0)
data_logistic_regression$REVOKED_Yes <- ifelse(data_logistic_regression$REVOKED == "Yes",1,0)
data_logistic_regression$CAR_USE_Private <- ifelse(data_logistic_regression$CAR_USE == "Private",1,0)
data_logistic_regression$EDUCATION_HighSchool <- ifelse(data_logistic_regression$EDUCATION == "High School",1,0)
data_logistic_regression$PARENT1_Yes <- ifelse(data_logistic_regression$PARENT1 == "Yes",1,0)
data_logistic_regression$CAR_TYPE_Sports_Car <- ifelse(data_logistic_regression$CAR_TYPE == "Sports Car",1,0)
data_logistic_regression$SEX_M <- ifelse(data_logistic_regression$SEX == "M",1,0)
#create dummy varibles for testing dataset
data_testing$URBANICITY_HighlyUrban <- ifelse(data_testing$URBANICITY == "Highly Urban/ Urban",1,0)
data_testing$JOB_BlueCollar <- ifelse(data_testing$JOB == "Blue Collar",1,0)
data_testing$JOB_Clerical <- ifelse(data_testing$JOB == "Clerical",1,0)
data_testing$JOB_HomeMaker <- ifelse(data_testing$JOB == "Home Maker",1,0)
data_testing$JOB_Manager <- ifelse(data_testing$JOB == "Manager",1,0)
data_testing$MSTATUS_Yes <- ifelse(data_testing$MSTATUS == "Yes",1,0)
data_testing$CAR_TYPE_Pickup <- ifelse(data_testing$CAR_TYPE == "Pickup",1,0)
data_testing$CAR_TYPE_Sports_Car <- ifelse(data_testing$CAR_TYPE == "Sports Car",1,0)
data_testing$CAR_TYPE_SUV <- ifelse(data_testing$CAR_TYPE == "SUV",1,0)
data_testing$CAR_TYPE_Van <- ifelse(data_testing$CAR_TYPE == "Van",1,0)
data_testing$REVOKED_Yes <- ifelse(data_testing$REVOKED == "Yes",1,0)
data_testing$CAR_USE_Private <- ifelse(data_testing$CAR_USE == "Private",1,0)
data_testing$EDUCATION_HighSchool <- ifelse(data_testing$EDUCATION == "High School",1,0)
data_testing$PARENT1_Yes <- ifelse(data_testing$PARENT1 == "Yes",1,0)
data_testing$CAR_TYPE_Sports_Car <- ifelse(data_testing$CAR_TYPE == "Sports Car",1,0)
data_testing$SEX_M <- ifelse(data_testing$SEX == "M",1,0)
Calculate log odds.
#create a new variable 'probability'
data_logistic_regression$probability <- c()
data_testing$probability <- c()
#calculate logit function using optimal model equasion
logit_p <- -4.93738 + 2.30350*data_logistic_regression$URBANICITY_HighlyUrban + 0.400491*data_logistic_regression$JOB_BlueCollar + 0.62651*data_logistic_regression$JOB_Clerical + 0.561895*data_logistic_regression$JOB_HomeMaker - 0.601606*data_logistic_regression$JOB_Manager + 0.102862*data_logistic_regression$MVR_PTS - 0.446125*data_logistic_regression$MSTATUS_Yes + 0.61657*data_logistic_regression$CAR_TYPE_Pickup + 1.29135*data_logistic_regression$CAR_TYPE_Sports_Car + 0.96726*data_logistic_regression$CAR_TYPE_SUV + 0.44968*data_logistic_regression$CAR_TYPE_Van + 0.70753*data_logistic_regression$REVOKED_Yes + 0.344905*data_logistic_regression$KIDSDRIV - 0.76242*data_logistic_regression$CAR_USE_Private + 0.40831*data_logistic_regression$TRAVTIME - 0.22526*data_logistic_regression$TIF + 0.06899*data_logistic_regression$OLDCLAIM + 0.50684*data_logistic_regression$EDUCATION_HighSchool + 0.43558*data_logistic_regression$PARENT1_Yes - 0.04297*data_logistic_regression$HOME_VAL + 0.28046*data_logistic_regression$SEX_M - 0.018883*data$YOJ
#calculate logit function using alternative model equasion
logit_p_alternative <- -4.973933 + 2.312670*data$URBANICITY_HighlyUrban + 0.400491*data$JOB_BlueCollar + 0.622865*data$JOB_Clerical + 0.561895*data$JOB_HomeMaker - 0.601606*data$JOB_Manager + 0.102862*data$MVR_PTS - 0.446125*data$MSTATUS_Yes + 0.61657*data$CAR_TYPE_Pickup + 1.29135*data$CAR_TYPE_Sports_Car + 0.96726*data$CAR_TYPE_SUV + 0.44968*data$CAR_TYPE_Van + 0.70753*data$REVOKED_Yes + 0.344905*data$KIDSDRIV - 0.76242*data$CAR_USE_Private + 0.40831*data$TRAVTIME - 0.22526*data$TIF + 0.06899*data$OLDCLAIM + 0.50684*data$EDUCATION_HighSchool + 0.43558*data$PARENT1_Yes - 0.04297*data$HOME_VAL + 0.28046*data$SEX_M - 0.018883*data$YOJ
logit_p_testing <- -4.973933 + 2.312670*data_testing$URBANICITY_HighlyUrban + 0.400491*data_testing$JOB_BlueCollar + 0.622865*data_testing$JOB_Clerical + 0.561895*data_testing$JOB_HomeMaker - 0.601606*data_testing$JOB_Manager + 0.102862*data_testing$MVR_PTS - 0.446125*data_testing$MSTATUS_Yes + 0.61657*data_testing$CAR_TYPE_Pickup + 1.29135*data_testing$CAR_TYPE_Sports_Car + 0.96726*data_testing$CAR_TYPE_SUV + 0.44968*data_testing$CAR_TYPE_Van + 0.70753*data_testing$REVOKED_Yes + 0.344905*data_testing$KIDSDRIV - 0.76242*data_testing$CAR_USE_Private + 0.40831*data_testing$TRAVTIME - 0.22526*data_testing$TIF + 0.06899*data_testing$OLDCLAIM + 0.50684*data_testing$EDUCATION_HighSchool + 0.43558*data_testing$PARENT1_Yes - 0.04297*data_testing$HOME_VAL + 0.28046*data_testing$SEX_M - 0.018883*data_testing$YOJ
Calculate probability of getting into car accident. Predict ‘TARGET_FLAG’ class for testing and training data sets.
#calculate probability
data_logistic_regression$probability <- (exp(1)^logit_p)/(1+exp(1)^logit_p)
data_testing$probability <- exp(1)^logit_p_testing/(1+exp(1)^logit_p_testing)
data_testing$probability
## [1] 9.991562e-01 1.000000e+00 9.991668e-01 NA 1.000000e+00
## [6] 1.337410e-08 3.488676e-07 1.474999e-02 1.833339e-23 9.977398e-01
## [11] NA 5.199257e-01 1.000000e+00 9.172867e-01 6.637367e-01
## [16] 9.999998e-01 9.999963e-01 4.989484e-06 1.000000e+00 1.000000e+00
## [21] 1.000000e+00 4.006423e-20 4.223537e-20 9.999833e-01 7.541605e-22
## [26] 9.950475e-01 1.000000e+00 2.618009e-06 1.297758e-16 3.817256e-20
## [31] 5.446553e-20 1.000000e+00 2.541876e-15 1.924232e-19 1.737243e-01
## [36] NA 9.789095e-04 2.185046e-01 NA 9.999797e-01
## [41] 9.999701e-01 9.997209e-01 4.743812e-12 1.000000e+00 2.274956e-03
## [46] 4.100613e-15 1.000000e+00 1.491647e-05 9.999832e-01 6.075518e-01
## [51] 8.238502e-16 1.000000e+00 1.000000e+00 9.986468e-01 1.000000e+00
## [56] 4.772734e-14 1.000000e+00 1.000000e+00 3.671643e-12 9.983249e-01
## [61] 3.025951e-07 1.084019e-16 8.529005e-01 9.981782e-01 5.375234e-12
## [66] NA 1.000000e+00 1.000000e+00 9.982177e-01 9.644926e-01
## [71] 7.901832e-01 1.000000e+00 1.000000e+00 9.966134e-01 9.718959e-01
## [76] 1.000000e+00 4.562253e-03 1.000000e+00 9.847121e-04 1.358265e-01
## [81] 1.000000e+00 1.000000e+00 5.423676e-03 6.827702e-01 9.992215e-01
## [86] 9.995504e-01 2.808516e-02 1.000000e+00 1.014909e-04 1.000000e+00
## [91] 9.999947e-01 1.000000e+00 9.032302e-01 2.536132e-03 2.349733e-08
## [96] 5.409637e-10 1.218352e-03 6.760640e-01 5.824237e-07 8.579031e-10
## [101] 9.999704e-01 9.992760e-01 9.999999e-01 3.364843e-14 5.646662e-17
## [106] 8.088321e-02 7.047803e-07 9.967364e-01 1.000000e+00 9.999979e-01
## [111] 1.000000e+00 9.997268e-01 1.201364e-01 1.595884e-09 9.999940e-01
## [116] 3.120073e-03 6.572122e-19 1.000000e+00 1.000000e+00 1.000000e+00
## [121] 1.000000e+00 1.000000e+00 9.978326e-01 1.000000e+00 8.261186e-13
## [126] 1.000000e+00 6.652920e-10 9.697054e-01 1.000000e+00 6.378436e-20
## [131] 9.304442e-21 NA 3.923775e-14 3.033100e-01 3.786816e-05
## [136] 1.000000e+00 1.000000e+00 4.839932e-01 9.998999e-01 9.999999e-01
## [141] 6.807017e-07 1.000000e+00 8.966885e-01 9.989857e-01 1.874463e-13
## [146] 1.000000e+00 9.999761e-01 9.983475e-01 1.000000e+00 2.909540e-22
## [151] 1.000000e+00 1.070080e-07 1.000000e+00 4.337199e-20 9.999962e-01
## [156] 1.000000e+00 1.000000e+00 4.310062e-13 1.000000e+00 1.000000e+00
## [161] 1.000000e+00 1.323689e-09 9.983556e-01 3.587998e-04 1.000000e+00
## [166] 9.752234e-01 4.054002e-02 1.000000e+00 2.786234e-08 1.000000e+00
## [171] 1.290432e-03 1.000000e+00 9.924147e-01 1.000000e+00 8.077345e-13
## [176] 3.095271e-17 9.999825e-01 1.000000e+00 1.000000e+00 1.163981e-06
## [181] 1.000000e+00 7.863900e-06 9.741054e-14 1.593427e-20 1.000000e+00
## [186] 1.000000e+00 6.298621e-18 5.339705e-16 NA 1.000000e+00
## [191] 9.688631e-01 1.000000e+00 6.633404e-01 9.999620e-01 4.717433e-06
## [196] 9.999984e-01 1.000000e+00 1.227725e-12 5.669425e-03 8.717258e-01
## [201] 9.997371e-01 9.727366e-01 2.210473e-18 9.999700e-01 9.999975e-01
## [206] 4.622415e-13 9.999998e-01 8.965515e-01 9.428593e-01 8.628850e-13
## [211] 4.967659e-19 3.330193e-05 3.519627e-01 4.657784e-02 8.460254e-01
## [216] 1.000000e+00 3.478653e-06 1.000000e+00 5.952505e-13 2.553183e-21
## [221] 8.519176e-03 1.056105e-15 9.992095e-01 1.000000e+00 8.168794e-14
## [226] 1.000000e+00 9.785202e-01 9.999998e-01 7.486246e-01 5.610636e-04
## [231] 3.827775e-08 NA 1.867072e-03 1.880903e-11 1.616136e-21
## [236] 4.001906e-01 2.859045e-17 NA 3.765984e-24 9.122125e-01
## [241] 9.999054e-01 9.999977e-01 9.985073e-01 3.701373e-10 1.000000e+00
## [246] 5.138667e-01 1.000000e+00 9.876840e-01 3.182194e-01 1.000000e+00
## [251] 1.852952e-10 1.000000e+00 3.972133e-07 NA 9.999931e-01
## [256] 2.376649e-05 4.055179e-09 1.000000e+00 1.000000e+00 9.996932e-01
## [261] 9.938856e-01 9.655233e-01 2.962275e-06 1.090752e-03 3.260202e-13
## [266] 5.982201e-05 3.598859e-10 1.585833e-06 1.000000e+00 9.982667e-01
## [271] 9.999959e-01 8.223011e-16 2.553424e-07 1.000000e+00 1.247845e-08
## [276] 4.488421e-22 1.000000e+00 1.000000e+00 2.429337e-04 1.507925e-01
## [281] 6.286569e-15 1.000000e+00 1.000000e+00 1.000000e+00 NA
## [286] 1.000000e+00 1.127553e-09 1.000000e+00 9.999997e-01 9.999054e-01
## [291] 2.253533e-11 8.155802e-01 1.000000e+00 1.000000e+00 3.423680e-11
## [296] 9.999889e-01 3.496316e-08 1.000000e+00 5.137769e-10 1.000000e+00
## [301] 1.009203e-12 9.858302e-01 6.988856e-09 9.999997e-01 1.000000e+00
## [306] 7.928896e-01 9.999973e-01 1.000000e+00 4.693949e-14 9.981822e-01
## [311] 1.000000e+00 1.000000e+00 6.341284e-14 1.000000e+00 1.029988e-18
## [316] 9.842820e-01 6.470847e-06 3.371238e-01 1.000000e+00 5.793457e-13
## [321] 9.961125e-01 9.966951e-01 4.109367e-11 1.570584e-16 2.821716e-02
## [326] 9.992577e-01 1.000000e+00 9.002133e-01 9.999963e-01 9.999984e-01
## [331] 1.671764e-16 NA 2.194223e-04 8.113208e-01 1.000000e+00
## [336] 9.999999e-01 9.374320e-01 9.999997e-01 1.856337e-01 2.715044e-22
## [341] 9.999999e-01 2.684901e-06 1.000000e+00 1.000000e+00 4.371759e-15
## [346] 2.123889e-04 3.754087e-11 3.072307e-20 NA 2.599594e-13
## [351] NA 1.566188e-10 1.000000e+00 1.000000e+00 6.222192e-02
## [356] 5.332995e-04 1.000000e+00 9.534229e-01 2.320905e-23 6.442119e-16
## [361] 9.999992e-01 1.520427e-11 1.943526e-20 5.838187e-04 4.708507e-19
## [366] NA 2.380701e-22 4.463268e-07 9.999996e-01 9.999015e-01
## [371] 2.256447e-18 1.000000e+00 1.000000e+00 9.999987e-01 4.191013e-02
## [376] 1.000000e+00 5.260667e-20 9.999143e-01 1.164027e-13 3.980915e-06
## [381] 8.892652e-14 4.620424e-02 1.000000e+00 1.000000e+00 3.931986e-06
## [386] 9.993981e-01 9.999994e-01 1.954353e-02 5.686822e-05 9.999998e-01
## [391] NA 2.899067e-09 6.745375e-11 1.000000e+00 2.746774e-05
## [396] 3.363339e-03 5.500587e-19 NA 1.000000e+00 1.000000e+00
## [401] 9.999126e-01 9.998413e-01 4.154888e-15 9.999025e-01 4.763192e-22
## [406] 9.992728e-01 2.704512e-02 1.000000e+00 2.289895e-02 6.874294e-10
## [411] 1.838119e-01 9.999998e-01 9.998079e-01 6.207225e-21 1.034743e-01
## [416] 6.715091e-02 1.195037e-10 1.000000e+00 9.901904e-11 1.239737e-10
## [421] 9.999899e-01 1.000000e+00 1.000000e+00 5.320582e-01 1.000000e+00
## [426] 1.000000e+00 7.263333e-04 9.944578e-01 9.695398e-01 2.566057e-01
## [431] 9.999999e-01 1.627710e-22 2.941131e-05 9.304606e-01 1.000000e+00
## [436] 1.000000e+00 7.675514e-24 9.996293e-01 1.000000e+00 1.194774e-06
## [441] 5.133882e-23 4.216702e-06 4.111896e-14 9.999592e-01 1.000000e+00
## [446] 2.710324e-04 3.505717e-23 9.999895e-01 9.999998e-01 9.999645e-01
## [451] 5.274005e-12 1.813399e-01 1.000000e+00 2.885222e-21 1.000000e+00
## [456] 1.000000e+00 2.419765e-07 1.000000e+00 3.852036e-19 4.435915e-20
## [461] 1.373779e-07 2.143976e-05 9.999990e-01 9.318472e-01 NA
## [466] 6.917334e-16 1.000000e+00 9.999999e-01 7.006116e-03 1.000000e+00
## [471] 5.984604e-01 1.000000e+00 9.422565e-01 9.986503e-01 1.000000e+00
## [476] 1.679516e-06 1.000000e+00 1.000000e+00 2.021736e-02 9.999999e-01
## [481] 1.363687e-01 1.000000e+00 2.166450e-20 6.113691e-01 1.000000e+00
## [486] 1.000000e+00 9.997887e-01 1.000000e+00 9.999975e-01 1.000000e+00
## [491] NA 1.439298e-04 1.000000e+00 NA 1.172290e-15
## [496] 1.000000e+00 9.999999e-01 9.988784e-01 1.000000e+00 1.000000e+00
## [501] 1.000000e+00 5.812707e-01 1.000000e+00 1.727108e-01 1.000000e+00
## [506] 9.999350e-01 1.000000e+00 5.181745e-01 9.989977e-14 NA
## [511] 8.641808e-20 1.000000e+00 9.999797e-01 NA 9.999993e-01
## [516] 8.490683e-01 1.000000e+00 4.242594e-10 9.999983e-01 1.000000e+00
## [521] 9.551413e-01 9.953319e-01 7.385869e-05 1.000000e+00 1.000000e+00
## [526] NA NA 3.561306e-14 1.143423e-04 NA
## [531] 9.999999e-01 9.998128e-01 6.273518e-06 1.094297e-11 1.009467e-15
## [536] 1.503701e-16 9.948465e-01 2.790049e-02 4.393087e-01 1.602631e-16
## [541] 5.328524e-24 9.999918e-01 1.000000e+00 1.000000e+00 3.779447e-19
## [546] 1.000000e+00 9.962876e-01 1.000000e+00 9.995475e-01 1.000000e+00
## [551] 9.999993e-01 1.170845e-11 6.452779e-20 9.999908e-01 4.030420e-01
## [556] NA 1.000000e+00 NA 3.868957e-11 9.984978e-01
## [561] 1.430105e-20 2.022659e-13 9.998395e-01 1.000000e+00 1.000000e+00
## [566] 1.869859e-05 9.999999e-01 9.854630e-01 2.133315e-04 1.000000e+00
## [571] 7.595447e-22 9.681855e-01 NA 9.487488e-10 2.561293e-12
## [576] 6.879257e-06 9.999716e-01 5.884100e-03 2.541639e-04 1.000000e+00
## [581] 9.942086e-01 1.000000e+00 4.337442e-08 1.000000e+00 6.061236e-19
## [586] 7.984275e-11 3.921935e-23 1.534987e-01 9.999998e-01 1.000000e+00
## [591] 1.169033e-02 1.000000e+00 4.997557e-07 1.000000e+00 1.000000e+00
## [596] 4.502401e-03 1.000000e+00 9.958039e-01 4.224639e-08 9.999812e-01
## [601] 9.999893e-01 9.991262e-01 1.000000e+00 9.999993e-01 7.946445e-01
## [606] 1.000000e+00 1.000000e+00 9.999996e-01 9.732310e-01 1.763554e-13
## [611] 2.258484e-18 1.000000e+00 2.784143e-03 6.394152e-20 9.999955e-01
## [616] 9.997819e-01 2.376770e-17 1.000000e+00 9.999987e-01 1.000000e+00
## [621] 9.091605e-01 3.846451e-11 7.784843e-05 1.000000e+00 9.351599e-14
## [626] 9.999999e-01 NA 6.566844e-02 3.921619e-06 1.000000e+00
## [631] 9.789665e-01 9.999888e-01 5.741904e-20 1.208058e-08 4.887582e-18
## [636] 6.777441e-17 9.989519e-01 9.989070e-01 4.153671e-06 9.771372e-01
## [641] 1.000000e+00 1.000000e+00 9.671374e-01 7.011528e-17 1.361036e-03
## [646] 6.936542e-01 8.177915e-13 1.000000e+00 1.000000e+00 9.990083e-01
## [651] 9.823776e-01 1.608801e-12 1.000000e+00 NA 9.240185e-04
## [656] 1.107481e-14 9.998654e-01 6.824858e-16 6.899854e-01 9.995862e-01
## [661] 9.999992e-01 9.966609e-01 1.000000e+00 2.124163e-09 9.795351e-01
## [666] 9.731287e-05 2.374863e-03 7.960313e-14 1.000000e+00 9.842930e-01
## [671] 1.090538e-02 1.000000e+00 1.000000e+00 9.979324e-01 9.993281e-01
## [676] 5.255804e-10 1.000000e+00 1.099112e-03 6.619673e-22 5.313369e-23
## [681] 8.867394e-01 1.263421e-15 9.666521e-01 1.330903e-18 1.000000e+00
## [686] 9.914735e-03 5.209268e-04 2.958848e-03 1.000000e+00 1.102475e-16
## [691] 1.000000e+00 9.999988e-01 9.979183e-01 9.698801e-01 1.143315e-21
## [696] 9.999322e-01 3.868443e-22 3.370639e-07 9.680802e-01 6.417461e-01
## [701] 1.515519e-24 8.990871e-19 9.973084e-01 7.719807e-01 1.000000e+00
## [706] 1.733026e-08 9.999998e-01 1.000000e+00 2.850571e-07 2.859341e-14
## [711] 1.000000e+00 9.996219e-01 9.999866e-01 9.929055e-01 1.264909e-04
## [716] 1.424522e-17 1.691492e-12 9.440242e-01 7.764469e-11 1.305822e-01
## [721] 1.000000e+00 1.287217e-01 5.848466e-04 6.828672e-04 1.157504e-18
## [726] 1.741004e-09 9.999777e-01 2.219224e-07 9.999948e-01 4.644697e-07
## [731] 4.964628e-01 9.999996e-01 9.999536e-01 1.085689e-08 9.999999e-01
## [736] 1.000000e+00 9.997503e-01 5.528143e-10 1.000000e+00 1.223813e-14
## [741] 1.000000e+00 3.654590e-22 9.999128e-01 2.734777e-08 1.000000e+00
## [746] 2.584609e-02 9.999999e-01 9.935983e-01 3.864813e-15 9.999989e-01
## [751] 9.903460e-01 1.000000e+00 4.839469e-10 1.000000e+00 5.087945e-21
## [756] 1.052400e-15 9.999772e-01 1.000000e+00 NA 9.995450e-01
## [761] 9.997774e-01 7.892228e-05 1.000000e+00 1.000000e+00 1.000000e+00
## [766] 2.621857e-11 9.999980e-01 1.000000e+00 3.250405e-03 1.000000e+00
## [771] 1.000000e+00 1.000000e+00 9.992492e-01 2.584695e-18 NA
## [776] 6.027582e-01 9.999928e-01 7.776258e-01 5.679972e-14 1.038367e-04
## [781] 4.871833e-01 9.999752e-01 NA 1.257057e-01 5.557837e-11
## [786] 1.000000e+00 1.712751e-06 1.000000e+00 1.000000e+00 1.711158e-01
## [791] 4.373955e-05 9.999779e-01 6.130128e-17 7.047327e-15 1.980042e-12
## [796] 5.505864e-02 2.392870e-15 1.000000e+00 1.000000e+00 8.504784e-01
## [801] 9.999998e-01 9.990567e-01 5.132889e-17 1.280476e-01 9.833804e-01
## [806] 5.889707e-13 2.729982e-09 2.344183e-01 8.023373e-20 1.000000e+00
## [811] 1.019556e-01 7.971523e-17 1.595936e-12 2.235276e-04 1.000000e+00
## [816] 3.626835e-01 9.999998e-01 1.000000e+00 1.000000e+00 1.000000e+00
## [821] 1.000000e+00 9.927058e-01 NA 6.090454e-05 1.000000e+00
## [826] 9.995786e-01 9.999970e-01 2.201530e-16 6.399683e-15 9.999968e-01
## [831] 9.999934e-01 1.368263e-12 NA 3.241159e-05 1.430014e-11
## [836] 4.275187e-19 1.000000e+00 9.987315e-01 1.296422e-03 5.171303e-19
## [841] 9.999986e-01 9.998943e-01 9.999998e-01 9.993799e-01 1.357250e-19
## [846] 6.708374e-01 1.000000e+00 8.157909e-10 1.000000e+00 3.996953e-01
## [851] 4.375978e-11 9.135445e-03 9.999953e-01 1.722608e-18 1.000000e+00
## [856] 9.487880e-01 5.597974e-08 1.241390e-11 1.000000e+00 7.568908e-01
## [861] 5.558242e-20 1.000000e+00 1.000000e+00 1.698807e-11 1.000000e+00
## [866] 1.477880e-15 1.000000e+00 2.790839e-15 1.618941e-16 1.000000e+00
## [871] 1.082136e-16 1.000000e+00 2.712766e-06 9.999999e-01 2.254508e-04
## [876] 1.000000e+00 NA 4.915092e-15 1.817723e-04 8.766960e-01
## [881] 1.000000e+00 9.950477e-01 6.546283e-15 4.371365e-15 1.000000e+00
## [886] 2.381328e-03 1.000000e+00 4.379473e-14 4.477340e-13 NA
## [891] 1.061228e-16 9.914338e-01 6.502037e-17 1.174163e-17 9.999813e-01
## [896] 1.000000e+00 9.999912e-01 1.000000e+00 1.000000e+00 1.000000e+00
## [901] 9.999744e-01 2.634492e-15 1.000000e+00 4.520297e-18 1.087090e-01
## [906] 1.000000e+00 9.997258e-01 9.965478e-21 1.000000e+00 1.000000e+00
## [911] 1.000000e+00 2.176042e-02 1.624678e-16 2.198004e-05 5.981268e-01
## [916] 9.999997e-01 1.000000e+00 9.994737e-01 7.483488e-01 1.000000e+00
## [921] 9.997740e-01 2.399382e-22 6.581138e-08 9.952393e-01 9.999110e-01
## [926] 1.366586e-15 9.999992e-01 1.120099e-01 1.000000e+00 4.918310e-21
## [931] 4.802223e-01 4.959105e-13 NA 2.851642e-23 1.000000e+00
## [936] 1.000000e+00 2.557063e-16 1.000000e+00 1.000000e+00 6.250288e-02
## [941] 1.000000e+00 8.685233e-13 1.000000e+00 1.000000e+00 1.000000e+00
## [946] 9.953357e-01 1.000000e+00 8.602130e-01 9.994929e-01 9.593866e-21
## [951] 9.999990e-01 6.176959e-12 1.000000e+00 5.076749e-01 9.825757e-01
## [956] 1.000000e+00 1.000000e+00 2.314578e-13 9.999336e-01 9.905300e-01
## [961] 9.531057e-02 1.000000e+00 1.073139e-02 9.999939e-01 1.835101e-07
## [966] 1.000000e+00 9.997392e-01 1.000000e+00 1.000000e+00 1.000000e+00
## [971] 3.915258e-23 1.000000e+00 1.554460e-07 1.000000e+00 2.471853e-10
## [976] 1.000000e+00 9.997258e-01 9.999978e-01 3.302584e-12 1.000000e+00
## [981] 9.999967e-01 1.856028e-02 9.999993e-01 1.000000e+00 1.000000e+00
## [986] 1.000000e+00 3.112384e-11 3.573540e-02 3.623012e-01 NA
## [991] 1.306272e-10 2.798720e-11 1.000000e+00 1.000000e+00 9.893673e-01
## [996] 5.508458e-18 1.000000e+00 9.999112e-01 1.000000e+00 9.999910e-01
## [1001] 9.999997e-01 1.000000e+00 1.000000e+00 5.182325e-09 9.999373e-01
## [1006] 1.179682e-17 3.437426e-09 1.000000e+00 1.371268e-06 1.058572e-03
## [1011] 9.999059e-01 2.708175e-11 9.999740e-01 8.037672e-01 3.230547e-01
## [1016] 1.000000e+00 2.103904e-16 9.483419e-01 2.001485e-02 2.681285e-11
## [1021] 6.217246e-02 6.881544e-01 1.000000e+00 9.999993e-01 1.000000e+00
## [1026] 9.997865e-01 1.000000e+00 1.299356e-11 6.786563e-08 6.757177e-01
## [1031] NA 1.021648e-12 3.425692e-01 5.431101e-20 NA
## [1036] 1.304633e-09 9.999808e-01 9.999967e-01 1.674953e-13 1.215341e-14
## [1041] 2.961933e-21 2.490302e-09 9.979617e-01 9.988500e-01 7.451310e-09
## [1046] NA 9.921649e-01 7.766658e-01 9.999591e-01 9.931522e-01
## [1051] 1.000000e+00 1.000000e+00 1.000000e+00 9.998786e-01 1.790505e-01
## [1056] 9.999998e-01 1.000000e+00 3.548504e-01 9.999884e-01 1.000000e+00
## [1061] 4.676205e-09 1.000000e+00 1.000000e+00 1.000000e+00 2.202863e-01
## [1066] 9.002924e-26 NA 1.000000e+00 6.073911e-19 9.954402e-01
## [1071] 9.999998e-01 9.817188e-01 7.952640e-01 1.000000e+00 3.099743e-24
## [1076] 2.028395e-08 9.999475e-01 3.058349e-02 1.000000e+00 5.925240e-12
## [1081] 1.000000e+00 1.000000e+00 2.471801e-11 1.000000e+00 1.000000e+00
## [1086] 1.000000e+00 9.998894e-01 3.441855e-17 1.000000e+00 5.092955e-03
## [1091] 9.981710e-01 NA 3.042461e-17 9.998837e-01 9.952106e-01
## [1096] 1.000000e+00 6.608072e-06 9.990452e-01 9.999866e-01 1.000000e+00
## [1101] 4.723014e-12 9.227700e-09 4.424978e-05 2.707613e-04 2.254040e-14
## [1106] 9.999164e-01 8.602852e-19 1.000000e+00 1.000000e+00 1.000000e+00
## [1111] 9.996936e-01 NA 9.990565e-01 1.000000e+00 4.818888e-13
## [1116] 2.363184e-21 2.777195e-15 1.000000e+00 1.000000e+00 2.442968e-19
## [1121] 9.999091e-01 9.999854e-01 4.768563e-01 1.000000e+00 9.999752e-01
## [1126] 1.165371e-17 9.999964e-01 4.985419e-16 7.252622e-12 9.999993e-01
## [1131] 9.802263e-01 1.000000e+00 1.000000e+00 8.911459e-02 1.356028e-11
## [1136] 9.939776e-01 8.920898e-01 9.999955e-01 9.999514e-01 1.259366e-14
## [1141] 9.021923e-01 9.972352e-01 9.670878e-01 1.000000e+00 1.000000e+00
## [1146] NA 4.930404e-20 9.979674e-01 4.148612e-08 9.997322e-01
## [1151] 5.614592e-10 1.000000e+00 2.241389e-17 1.000000e+00 1.000000e+00
## [1156] 9.999959e-01 1.000000e+00 NA 9.965107e-01 4.877763e-18
## [1161] 9.661147e-01 1.000000e+00 1.000000e+00 4.329351e-01 7.699403e-19
## [1166] 1.000000e+00 2.080086e-11 8.069246e-11 1.000000e+00 1.162223e-04
## [1171] 1.000000e+00 1.000000e+00 1.000000e+00 9.999997e-01 5.012227e-15
## [1176] 5.661326e-02 1.555183e-13 3.592881e-16 3.516519e-01 NA
## [1181] 9.670120e-01 1.000000e+00 1.000000e+00 9.999153e-01 9.999994e-01
## [1186] 9.999764e-01 9.995202e-01 9.655273e-01 4.571760e-13 NA
## [1191] 1.000000e+00 5.556681e-10 4.480342e-16 1.000000e+00 1.000000e+00
## [1196] 5.523727e-04 6.246453e-19 9.795423e-01 1.000000e+00 9.970333e-01
## [1201] NA 1.244121e-16 1.364381e-09 1.823221e-17 9.133839e-01
## [1206] 4.189312e-26 1.000000e+00 9.999719e-01 6.426500e-07 1.000000e+00
## [1211] 1.451078e-15 4.711863e-03 1.000000e+00 1.308943e-06 4.319972e-20
## [1216] 4.508766e-16 1.000000e+00 3.103248e-05 9.999985e-01 2.968358e-09
## [1221] 5.223546e-16 6.029151e-04 1.000000e+00 2.809995e-17 9.999814e-01
## [1226] 9.999996e-01 1.963664e-12 2.825371e-08 9.999999e-01 1.000000e+00
## [1231] 7.276623e-09 9.834271e-01 5.954677e-21 1.000000e+00 3.000072e-02
## [1236] 5.289417e-12 1.031292e-24 9.999986e-01 6.889152e-11 1.171281e-14
## [1241] 1.000000e+00 1.692908e-07 9.992236e-01 1.151565e-14 2.001186e-04
## [1246] 1.000000e+00 9.999735e-01 5.036751e-14 7.306185e-24 4.517518e-02
## [1251] NA 3.872749e-16 1.315503e-15 1.241996e-03 3.086345e-04
## [1256] 9.939606e-01 1.000000e+00 3.854969e-21 1.000000e+00 9.979576e-01
## [1261] 9.999933e-01 1.000000e+00 1.572675e-19 1.000000e+00 7.494387e-01
## [1266] 9.446882e-07 1.000000e+00 1.277306e-11 2.842196e-11 3.845647e-19
## [1271] 9.997151e-01 9.992855e-01 1.831973e-18 6.226591e-01 1.000000e+00
## [1276] 3.238974e-01 1.000000e+00 3.800703e-22 2.386724e-03 1.000000e+00
## [1281] 1.000000e+00 1.587935e-17 3.891884e-07 1.000000e+00 9.999755e-01
## [1286] 9.999131e-01 1.000000e+00 2.249213e-16 7.479305e-01 9.986506e-01
## [1291] 1.000000e+00 9.926651e-01 3.191209e-09 9.999958e-01 1.000000e+00
## [1296] 5.725862e-07 9.997196e-01 1.000000e+00 1.000000e+00 9.999978e-01
## [1301] 9.245224e-02 1.000000e+00 1.157620e-09 1.618905e-21 2.333481e-22
## [1306] 5.536233e-15 1.000000e+00 1.000000e+00 1.680729e-15 1.000000e+00
## [1311] 1.330077e-02 1.000000e+00 1.000000e+00 9.999987e-01 6.355913e-04
## [1316] 2.143935e-03 2.817740e-14 9.999985e-01 9.623464e-14 9.999991e-01
## [1321] 8.232392e-08 1.000000e+00 9.999999e-01 8.498645e-19 9.341426e-21
## [1326] 1.000000e+00 2.404067e-20 9.992990e-01 6.717277e-20 2.560837e-01
## [1331] 2.019726e-18 4.489160e-05 4.520205e-15 5.048549e-03 1.000000e+00
## [1336] 9.999897e-01 1.871312e-06 5.854208e-19 1.415206e-10 9.559760e-01
## [1341] 1.000000e+00 9.997679e-01 4.267144e-18 9.999980e-01 NA
## [1346] 2.779540e-17 7.133935e-21 5.522515e-14 1.350608e-19 9.999976e-01
## [1351] 1.045057e-05 1.000000e+00 1.000000e+00 6.431451e-13 6.748216e-24
## [1356] 2.540680e-09 9.997580e-01 1.000000e+00 NA 1.166962e-17
## [1361] 7.007211e-09 2.808165e-12 9.185688e-01 4.425820e-18 2.416013e-17
## [1366] 3.028005e-10 1.000000e+00 1.000000e+00 1.045653e-05 2.459046e-04
## [1371] 1.898334e-07 3.101114e-07 1.843059e-18 5.409869e-10 1.000000e+00
## [1376] 9.168471e-01 NA 1.000000e+00 6.941115e-15 4.456437e-01
## [1381] 1.000000e+00 1.000000e+00 1.000000e+00 3.735825e-04 5.759823e-09
## [1386] 1.117151e-17 8.093202e-15 2.807417e-18 4.242832e-20 1.000000e+00
## [1391] 1.000000e+00 1.000000e+00 5.805870e-05 9.056923e-01 9.997757e-01
## [1396] 1.345313e-18 9.534968e-10 1.000000e+00 NA 2.310507e-01
## [1401] 1.000000e+00 1.985236e-17 9.994807e-01 1.000000e+00 1.000000e+00
## [1406] 9.999998e-01 7.833879e-19 9.999595e-01 9.931799e-01 9.997381e-01
## [1411] 6.786410e-22 4.657853e-01 9.995077e-01 3.699795e-07 1.181058e-17
## [1416] 2.847569e-04 1.003431e-13 1.445642e-08 1.000000e+00 5.123137e-10
## [1421] 3.771852e-04 1.000000e+00 1.000000e+00 1.000000e+00 NA
## [1426] 1.000000e+00 9.960369e-01 9.978161e-01 1.000000e+00 4.885912e-10
## [1431] 9.999658e-01 5.788648e-09 2.009775e-11 9.859200e-01 2.508557e-08
## [1436] 1.712255e-02 9.909302e-01 1.000000e+00 1.000000e+00 9.999770e-01
## [1441] 1.474555e-06 1.000000e+00 9.980573e-01 6.414261e-20 9.998287e-01
## [1446] 9.524424e-01 1.000000e+00 1.000000e+00 1.610079e-16 2.404218e-20
## [1451] 4.259593e-17 1.000000e+00 3.238851e-19 1.371294e-20 3.303293e-01
## [1456] 5.010851e-02 3.544995e-02 7.852028e-07 1.061335e-21 1.000000e+00
## [1461] 6.550675e-04 2.580609e-17 NA 1.000000e+00 9.950283e-01
## [1466] 9.999871e-01 2.773797e-13 2.809152e-18 3.690653e-02 1.000000e+00
## [1471] 9.999987e-01 4.206957e-17 4.346365e-19 1.000000e+00 9.999447e-01
## [1476] 5.176303e-12 1.000000e+00 1.000000e+00 9.999492e-01 9.999899e-01
## [1481] 9.999673e-01 9.999948e-01 1.000000e+00 4.711385e-11 9.820804e-01
## [1486] 9.999028e-01 9.999901e-01 NA 1.000000e+00 1.783796e-08
## [1491] 7.188745e-03 1.000000e+00 6.636707e-10 9.999993e-01 8.661777e-01
## [1496] 9.999092e-01 2.168586e-13 3.185760e-09 9.975177e-01 4.361918e-21
## [1501] 5.000793e-05 1.000000e+00 9.396631e-13 4.770491e-01 NA
## [1506] 4.854031e-01 1.167496e-07 9.998833e-01 1.000000e+00 1.000000e+00
## [1511] 9.966145e-01 9.899424e-01 9.998829e-01 NA 1.000000e+00
## [1516] 1.000000e+00 9.993433e-01 9.999412e-01 NA 2.218221e-25
## [1521] 1.000000e+00 3.855551e-11 NA 9.999880e-01 NA
## [1526] 1.765835e-01 9.999990e-01 1.000000e+00 7.596878e-03 1.000000e+00
## [1531] 9.847051e-01 1.000000e+00 7.015518e-06 6.237769e-17 1.000000e+00
## [1536] 9.989228e-01 4.557428e-04 9.999807e-01 1.000000e+00 9.999999e-01
## [1541] 5.488882e-18 1.136772e-02 6.137457e-02 1.762514e-04 9.999855e-01
## [1546] 9.999996e-01 2.441639e-15 9.999999e-01 3.399689e-20 NA
## [1551] 1.000000e+00 4.662930e-04 1.000000e+00 1.000000e+00 9.999863e-01
## [1556] 8.556184e-14 1.532708e-12 1.324501e-23 2.458028e-04 9.439324e-01
## [1561] 1.000000e+00 1.000000e+00 1.000000e+00 9.911859e-01 9.999979e-01
## [1566] 1.106031e-16 1.260305e-10 9.999155e-01 9.999903e-01 1.000000e+00
## [1571] 9.999903e-01 1.000000e+00 2.950800e-14 1.000000e+00 9.942329e-01
## [1576] 1.000000e+00 1.696246e-03 5.401610e-01 1.619864e-12 9.999982e-01
## [1581] 1.000000e+00 1.050002e-06 3.652540e-01 2.558637e-18 5.442017e-18
## [1586] 1.591156e-10 2.390662e-23 9.501074e-09 5.454283e-12 1.000000e+00
## [1591] 1.000000e+00 NA 1.000000e+00 9.999972e-01 9.999954e-01
## [1596] 9.999804e-01 9.973166e-01 2.615421e-11 3.366511e-19 9.648945e-01
## [1601] 1.000000e+00 9.999849e-01 1.000000e+00 1.120072e-19 1.000000e+00
## [1606] 9.999989e-01 9.648950e-01 3.071713e-15 6.994893e-01 9.998738e-01
## [1611] 1.000000e+00 9.982714e-01 4.906874e-12 9.960172e-01 2.797203e-12
## [1616] 1.000000e+00 1.162500e-20 1.098531e-06 9.153923e-04 9.999976e-01
## [1621] 9.999999e-01 NA 9.995538e-01 1.000000e+00 NA
## [1626] 9.999991e-01 4.754428e-25 3.084480e-10 1.220238e-11 1.000000e+00
## [1631] 3.618018e-05 1.000000e+00 1.000000e+00 1.000000e+00 9.832858e-04
## [1636] 1.000000e+00 1.000000e+00 1.994722e-05 2.801275e-17 9.635314e-01
## [1641] 2.049975e-17 6.329549e-10 9.980222e-01 1.728319e-14 9.999482e-01
## [1646] 1.240010e-08 9.999863e-01 7.550291e-01 NA 2.766915e-10
## [1651] 4.564187e-15 NA 1.981331e-20 5.938487e-15 1.000000e+00
## [1656] 7.832949e-12 2.948249e-17 1.243778e-15 9.997486e-01 1.579138e-24
## [1661] 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## [1666] 7.507040e-14 8.081452e-01 9.999998e-01 1.000000e+00 9.999676e-01
## [1671] 1.000000e+00 2.335614e-08 1.000000e+00 9.977180e-01 2.603252e-17
## [1676] 9.995223e-01 7.810823e-15 1.000000e+00 5.757485e-09 1.000000e+00
## [1681] 5.877206e-16 1.000000e+00 1.000000e+00 1.000000e+00 3.807763e-20
## [1686] 1.000000e+00 3.495961e-01 NA 1.256335e-20 8.599116e-15
## [1691] 3.810576e-16 9.999914e-01 1.000000e+00 NA 9.954277e-01
## [1696] 1.000000e+00 1.732515e-21 1.000000e+00 1.000000e+00 7.636102e-25
## [1701] 1.000000e+00 9.997089e-01 3.689321e-04 1.000000e+00 2.604604e-16
## [1706] 3.686330e-15 1.000000e+00 8.970585e-01 1.042647e-12 5.100409e-19
## [1711] 9.993807e-01 4.801911e-20 9.999999e-01 1.063157e-10 9.994956e-01
## [1716] 1.993970e-16 9.524541e-19 3.242297e-22 9.969374e-01 7.453020e-01
## [1721] 9.999992e-01 3.770647e-03 1.000000e+00 6.076001e-08 1.000000e+00
## [1726] 5.285071e-23 4.640100e-21 1.000673e-01 1.000000e+00 1.000000e+00
## [1731] 1.000000e+00 4.305449e-16 3.686926e-11 8.614835e-19 9.353600e-25
## [1736] 4.720133e-11 9.999996e-01 1.000000e+00 9.992811e-01 3.641705e-11
## [1741] 1.000000e+00 5.030373e-02 1.474255e-20 3.390660e-08 8.067138e-01
## [1746] 5.482023e-12 1.000000e+00 1.304255e-02 NA 9.991521e-01
## [1751] 9.997294e-01 9.999952e-01 5.683008e-05 5.041837e-17 9.519724e-20
## [1756] NA 4.610006e-23 9.999997e-01 5.824138e-11 1.000000e+00
## [1761] 9.999919e-01 3.068236e-18 3.627128e-07 2.273130e-19 9.998608e-01
## [1766] 1.000000e+00 NA 2.866193e-18 NA 2.022590e-20
## [1771] 7.500281e-16 2.925723e-08 1.388515e-01 1.000000e+00 7.638453e-01
## [1776] 1.183382e-04 1.000000e+00 1.000000e+00 4.066495e-06 1.000000e+00
## [1781] 4.975315e-04 5.138701e-15 1.000000e+00 4.095130e-05 9.782900e-01
## [1786] 1.000000e+00 3.281824e-07 9.997098e-01 9.999963e-01 3.444783e-09
## [1791] 1.000000e+00 5.584148e-20 1.000000e+00 9.999987e-01 9.833045e-01
## [1796] 1.842127e-11 1.267011e-07 NA 9.984848e-01 2.167942e-02
## [1801] 1.000000e+00 9.999909e-01 1.057802e-06 7.761040e-01 9.999998e-01
## [1806] 2.064967e-11 1.000000e+00 NA 3.159053e-15 6.012364e-22
## [1811] 2.456595e-06 3.312061e-19 1.000000e+00 1.000000e+00 9.999989e-01
## [1816] 5.164403e-16 9.999973e-01 9.998467e-01 5.402356e-01 1.000000e+00
## [1821] 2.754723e-03 9.999328e-01 8.471193e-14 9.913068e-01 9.999995e-01
## [1826] NA 1.000000e+00 9.999742e-01 8.374018e-07 1.000000e+00
## [1831] 2.847335e-04 9.948628e-01 1.000000e+00 3.212858e-09 4.654129e-08
## [1836] 7.309421e-11 3.568705e-09 1.000000e+00 1.379941e-15 9.836625e-01
## [1841] 3.201926e-05 9.999570e-01 NA 9.999998e-01 4.484342e-18
## [1846] 1.232034e-18 2.417478e-09 1.212987e-17 9.993310e-01 9.999615e-01
## [1851] 1.001033e-04 2.189489e-04 1.147786e-12 6.219471e-01 1.197074e-22
## [1856] 1.467355e-01 8.284841e-01 1.000000e+00 3.390894e-17 4.426540e-06
## [1861] 1.000000e+00 9.999740e-01 1.000000e+00 1.864440e-02 7.336774e-01
## [1866] 9.997399e-01 3.791306e-05 2.850529e-01 9.992548e-01 NA
## [1871] NA 1.000000e+00 2.447754e-20 1.719540e-10 9.964931e-01
## [1876] 9.992076e-01 9.944044e-01 1.000000e+00 2.297939e-02 9.701568e-01
## [1881] 4.458753e-12 9.999848e-01 1.223601e-15 9.991543e-01 4.945316e-09
## [1886] NA 2.091477e-06 1.000000e+00 9.889874e-01 9.999999e-01
## [1891] 3.895588e-06 1.004722e-01 1.700279e-05 9.982812e-01 9.997028e-01
## [1896] 9.961612e-01 1.000000e+00 1.041824e-03 3.498314e-11 1.000000e+00
## [1901] 9.486769e-12 1.524189e-16 3.065436e-08 1.000000e+00 1.000000e+00
## [1906] 1.054156e-19 9.215594e-01 9.964497e-01 8.030895e-03 9.999985e-01
## [1911] 1.000000e+00 4.828004e-23 9.386876e-17 6.788389e-08 9.801247e-08
## [1916] 7.659383e-03 9.999610e-01 1.176995e-01 2.683081e-02 1.000000e+00
## [1921] 9.998154e-01 NA 1.109582e-10 8.043427e-01 5.933439e-07
## [1926] 5.756320e-15 1.000000e+00 2.746531e-06 4.329773e-12 2.152519e-01
## [1931] 9.999964e-01 9.967299e-01 1.719664e-03 1.000000e+00 1.800453e-14
## [1936] 1.000000e+00 9.999985e-01 1.740434e-18 1.320803e-12 1.000000e+00
## [1941] 1.288316e-18 4.776766e-19 9.999995e-01 1.380480e-22 7.033695e-01
## [1946] 1.000000e+00 1.000000e+00 7.869808e-18 4.908215e-15 9.995969e-01
## [1951] 1.000000e+00 3.015658e-03 1.000000e+00 9.992042e-01 5.315615e-18
## [1956] 1.000000e+00 1.222353e-19 1.499681e-15 1.000000e+00 9.999992e-01
## [1961] 9.999485e-01 1.000000e+00 2.884319e-09 3.095813e-05 9.996121e-01
## [1966] 5.026826e-08 9.993382e-01 1.902457e-11 1.193061e-03 1.072451e-11
## [1971] 2.954703e-11 8.629093e-01 9.644940e-01 9.999954e-01 2.036249e-17
## [1976] 9.175817e-06 1.843370e-24 1.000000e+00 1.000000e+00 1.793904e-16
## [1981] 2.613376e-08 1.000000e+00 4.117395e-01 NA 1.000000e+00
## [1986] 8.376566e-11 1.766734e-04 3.087063e-02 5.024277e-01 1.000000e+00
## [1991] 1.632363e-19 1.000000e+00 1.000000e+00 9.995091e-01 NA
## [1996] 1.471904e-08 9.999448e-01 1.000000e+00 8.933317e-19 4.256961e-01
## [2001] 1.000000e+00 5.976574e-16 1.000000e+00 NA 1.000000e+00
## [2006] 1.000000e+00 9.999895e-01 7.668453e-20 8.724745e-03 1.000000e+00
## [2011] 1.000000e+00 9.998089e-01 1.000000e+00 1.936573e-20 9.999976e-01
## [2016] 1.000000e+00 9.868468e-17 9.999996e-01 1.000000e+00 8.891802e-02
## [2021] 4.183076e-04 9.998245e-01 9.999972e-01 2.790500e-03 2.639794e-05
## [2026] 6.026683e-19 9.758677e-01 NA 9.999997e-01 1.000000e+00
## [2031] 1.000000e+00 9.999992e-01 7.481084e-08 9.944458e-01 1.000000e+00
## [2036] 1.000000e+00 1.000000e+00 4.363948e-07 1.000000e+00 9.956262e-01
## [2041] 5.499191e-11 4.947837e-16 5.969351e-04 1.489023e-02 1.007883e-06
## [2046] 1.738778e-06 1.818919e-13 1.000000e+00 5.540074e-03 3.149157e-08
## [2051] 6.167468e-06 7.873677e-01 9.927192e-01 9.994217e-01 1.313571e-02
## [2056] 7.621567e-21 1.000000e+00 NA 1.484438e-06 1.000000e+00
## [2061] 7.272900e-12 NA 9.991653e-01 9.797690e-07 9.995833e-01
## [2066] NA 7.710150e-08 9.824848e-01 9.998230e-01 9.617866e-01
## [2071] 8.047236e-18 1.091597e-06 9.999989e-01 5.442220e-06 1.590952e-01
## [2076] 7.934462e-16 9.999790e-01 9.909290e-01 1.000000e+00 1.000000e+00
## [2081] 3.846653e-25 9.997986e-01 9.999967e-01 NA 4.846822e-07
## [2086] 1.141826e-18 4.014492e-19 1.000000e+00 2.529155e-12 1.000000e+00
## [2091] 1.000000e+00 6.340297e-17 1.000000e+00 1.122172e-20 1.000000e+00
## [2096] 1.000000e+00 3.812552e-06 1.000000e+00 1.000000e+00 1.000000e+00
## [2101] 1.000000e+00 NA NA 2.926968e-11 9.990667e-01
## [2106] 2.731640e-22 9.999924e-01 9.998809e-01 7.967273e-04 NA
## [2111] 1.000000e+00 2.907499e-05 1.000000e+00 1.639738e-19 7.686795e-20
## [2116] 9.219948e-15 9.999983e-01 1.000000e+00 1.000000e+00 1.579293e-11
## [2121] 9.494199e-01 2.214006e-02 1.000000e+00 9.999921e-01 1.000000e+00
## [2126] 3.056612e-23 2.550070e-08 2.867807e-08 6.676972e-17 5.200752e-04
## [2131] 9.997936e-01 2.991824e-12 9.966760e-01 1.000000e+00 1.271203e-15
## [2136] 1.431374e-22 1.000000e+00 9.999847e-01 2.708294e-19 1.000000e+00
## [2141] 3.058037e-07
#create a new variable that specifies predicted class
data_testing$TARGET_FLAG_pred <-c()
#calculate probability
data_logistic_regression = within(data_logistic_regression, {
TARGET_FLAG_pred = ifelse(data_logistic_regression$probability < 0.5, 0, 1)
})
data_testing = within(data_testing, {
TARGET_FLAG_pred = ifelse(data_testing$probability < 0.5, 0, 1)
})
head(data_testing)
## INDEX TARGET_FLAG TARGET_AMT KIDSDRIV AGE HOMEKIDS YOJ INCOME PARENT1
## 1 3 NA NA 0 48 0 11 1154 No
## 2 9 NA NA 1 40 1 11 1119 Yes
## 3 10 NA NA 0 44 2 12 974 Yes
## 4 18 NA NA 0 35 2 NA 513 Yes
## 5 21 NA NA 0 59 0 12 1686 No
## 6 30 NA NA 0 46 0 14 1 No
## HOME_VAL MSTATUS SEX EDUCATION JOB TRAVTIME CAR_USE
## 1 2 No M Bachelors Manager 26 Private
## 2 2 No M High School Manager 21 Private
## 3 2 No F High School Blue Collar 30 Commercial
## 4 2 No M High School Clerical 74 Private
## 5 2 No M High School Manager 45 Private
## 6 636 Yes M Bachelors Professional 7 Commercial
## BLUEBOOK TIF CAR_TYPE RED_CAR OLDCLAIM CLM_FREQ REVOKED MVR_PTS
## 1 703 1 Van yes 1 0 No 2
## 2 540 6 Minivan no 272 1 No 2
## 3 1189 10 SUV no 1 0 No 0
## 4 1373 6 Pickup no 1 0 Yes 0
## 5 345 1 Minivan yes 494 2 No 4
## 6 864 1 Panel Truck no 137 1 No 2
## CAR_AGE URBANICITY URBANICITY_HighlyUrban JOB_BlueCollar
## 1 10 Highly Urban/ Urban 1 0
## 2 1 Highly Urban/ Urban 1 0
## 3 10 Highly Rural/ Rural 0 1
## 4 4 Highly Rural/ Rural 0 0
## 5 1 Highly Urban/ Urban 1 0
## 6 12 Highly Urban/ Urban 1 0
## JOB_Clerical JOB_HomeMaker JOB_Manager MSTATUS_Yes CAR_TYPE_Pickup
## 1 0 0 1 0 0
## 2 0 0 1 0 0
## 3 0 0 0 0 0
## 4 1 0 0 0 1
## 5 0 0 1 0 0
## 6 0 0 0 1 0
## CAR_TYPE_Sports_Car CAR_TYPE_SUV CAR_TYPE_Van REVOKED_Yes
## 1 0 0 1 0
## 2 0 0 0 0
## 3 0 1 0 0
## 4 0 0 0 1
## 5 0 0 0 0
## 6 0 0 0 0
## CAR_USE_Private EDUCATION_HighSchool PARENT1_Yes SEX_M probability
## 1 1 0 0 1 9.991562e-01
## 2 1 1 1 1 1.000000e+00
## 3 0 1 1 0 9.991668e-01
## 4 1 1 1 1 NA
## 5 1 1 0 1 1.000000e+00
## 6 0 0 0 1 1.337410e-08
## TARGET_FLAG_pred
## 1 1
## 2 1
## 3 1
## 4 NA
## 5 1
## 6 0
Calculate Classification Metrics.
#create confusion matrix
confusion_matrix <- table("Predicted" = data_logistic_regression$TARGET_FLAG_pred, "Actual" = data_logistic_regression$TARGET_FLAG)
confusion_matrix
## Actual
## Predicted 0 1
## 0 4983 1215
## 1 337 678
#calculate true positive
TP <- confusion_matrix[4]
#calculate true negative
TN <- confusion_matrix[1]
#calculate false negative
FN <- confusion_matrix[2]
#calculate false positive
FP <- confusion_matrix[3]
#calculate accuracy
accuracy <- (confusion_matrix[1,1] + confusion_matrix[2,2])/nrow(data_logistic_regression)
accuracy
## [1] 0.7848329
#calculate accuracy classification error rate
classification_error_rate = (FP + FN)/(TP + FP + TN + FN)
classification_error_rate
## [1] 0.2151671
#calculate precision
precision = TP/(TP + FP)
precision
## [1] 0.3581616
#calculate sensitivity
sensitivity = TP/(TP + FN)
sensitivity
## [1] 0.6679803
#calculate specificity
specificity <- TN/(TN + FP)
specificity
## [1] 0.803969
#calculate F1 score
F1_score <- (2*precision*sensitivity)/(precision + sensitivity)
F1_score
## [1] 0.4662999
roc.val <- roc(TARGET_FLAG ~ probability, data_logistic_regression)
plot(roc.val, main="ROC plot")
roc.val$auc
## Area under the curve: 0.8097
#create subset that includes only records with TARGET_AMT=1 (people that got into car crash)
data_crash <- subset(data, data$TARGET_FLAG==1)
head(data_crash)
## INDEX TARGET_FLAG TARGET_AMT KIDSDRIV AGE HOMEKIDS YOJ INCOME PARENT1
## 5 7 1 2946.000 0 34 1 12 662 Yes
## 6 12 1 2501.000 0 34 0 10 4278 No
## 8 14 1 6077.000 0 53 0 14 5021 No
## 11 17 1 1267.000 0 53 0 11 767 No
## 12 19 1 2920.167 0 45 0 0 2 No
## 17 25 1 6857.000 0 28 1 13 3076 No
## HOME_VAL MSTATUS SEX EDUCATION JOB TRAVTIME CAR_USE
## 5 2 No F Bachelors Blue Collar 46 Commercial
## 6 2 No F Bachelors Clerical 34 Private
## 8 2 No F Masters Lawyer 15 Private
## 11 2 No M PhD 64 Commercial
## 12 72 Yes F High School Home Maker 48 Private
## 17 1207 Yes F High School Blue Collar 29 Commercial
## BLUEBOOK TIF CAR_TYPE RED_CAR OLDCLAIM CLM_FREQ REVOKED MVR_PTS
## 5 739 1 Sports Car no 1 0 No 0
## 6 132 1 SUV no 1 0 No 0
## 8 822 1 Sports Car no 1 0 No 0
## 11 1677 6 Panel Truck yes 1 0 No 3
## 12 2295 1 SUV no 1 0 No 3
## 17 2548 6 SUV no 2402 2 No 0
## CAR_AGE URBANICITY
## 5 7 Highly Urban/ Urban
## 6 1 Highly Urban/ Urban
## 8 11 Highly Urban/ Urban
## 11 10 Highly Urban/ Urban
## 12 5 Highly Urban/ Urban
## 17 1 Highly Urban/ Urban
dim(data_crash)
## [1] 1893 26
The following assumptions must be verified for linear regression.
# histograms and density lines
par(mfrow=c(2,2))
hist(data_crash$KIDSDRIV,breaks=seq(0, 10, 1),probability=TRUE, col="gray", border="white")
d <- density(data_crash$KIDSDRIV)
lines(d, col="red")
hist(data_crash$AGE,breaks=seq(0, 100, 1),probability=TRUE, col="gray", border="white")
d <- density(data_crash$AGE)
lines(d, col="red")
hist(data_crash$HOMEKIDS,breaks=seq(0, 10, 1),probability=TRUE, col="gray", border="white")
d <- density(data_crash$HOMEKIDS)
lines(d, col="red")
hist(data_crash$YOJ,breaks=seq(0, 25, 1),probability=TRUE, col="gray", border="white")
d <- density(data_crash$YOJ)
lines(d, col="red")
par(mfrow=c(2,2))
hist(data_crash$INCOME,breaks=seq(0, 10000, 500),probability=TRUE, col="gray", border="white")
d <- density(data_crash$INCOME)
lines(d, col="red")
hist(data_crash$HOME_VAL,probability=TRUE, col="gray", border="white")
d <- density(data_crash$HOME_VAL)
lines(d, col="red")
hist(data_crash$TRAVTIME,probability=TRUE, col="gray", border="white")
d <- density(data_crash$TRAVTIME)
lines(d, col="red")
hist(data_crash$BLUEBOOK,breaks=seq(0, 5000, 100),probability=TRUE, col="gray", border="white")
d <- density(data_crash$BLUEBOOK)
lines(d, col="red")
par(mfrow=c(2,2))
hist(data_crash$TIF,probability=TRUE, col="gray", border="white")
d <- density(data_crash$TIF)
lines(d, col="red")
hist(data_crash$OLDCLAIM,probability=TRUE, col="gray", border="white")
d <- density(data_crash$OLDCLAIM)
lines(d, col="red")
hist(data_crash$CLM_FREQ,breaks=seq(0, 6, 1),probability=TRUE, col="gray", border="white")
d <- density(data_crash$CLM_FREQ)
lines(d, col="red")
hist(data_crash$MVR_PTS,probability=TRUE, col="gray", border="white")
d <- density(data_crash$MVR_PTS)
lines(d, col="red")
hist(data_crash$CAR_AGE,probability=TRUE, col="gray", border="white")
d <- density(data_crash$CAR_AGE)
lines(d, col="red")
#verify lineriarity
par(mfrow=c(2,2))
#colnames <- dimnames(data)[[2]]
for (i in 4:ncol(data_crash)) {
if (is.numeric(data_crash[,i]) == "TRUE"){
plot(data_crash$TARGET_AMT ~ data_crash[,i],main=names(data_crash)[i], xlab=names(data_crash)[i])
reg_line <- lm(data_crash$TARGET_AMT ~ data_crash[,i])
abline(reg_line,col="red")
}
}
#replace variables that have non-linear relationships with logit function or which distribution is not normal(or near normal) by variables logs
data_linear_regression <- data_crash %>% select(-INDEX,-TARGET_FLAG) %>% mutate(HOME_VAL = ifelse(HOME_VAL > 0,log(HOME_VAL),""), BLUEBOOK = ifelse(BLUEBOOK > 0,log(BLUEBOOK),""),INCOME = ifelse(INCOME > 0,log(INCOME),""),TIF = ifelse(TIF > 0,log(TIF),""), OLDCLAIM = ifelse(OLDCLAIM > 0,log(OLDCLAIM),""))
count_nas(data_linear_regression)
## variable_name_column number_missing_column percentage
## 1 KIDSDRIV 0 0
## 2 AGE 0 0
## 3 HOMEKIDS 0 0
## 4 YOJ 0 0
## 5 INCOME 0 0
## 6 PARENT1 0 0
## 7 HOME_VAL 0 0
## 8 MSTATUS 0 0
## 9 SEX 0 0
## 10 EDUCATION 0 0
## 11 JOB 0 0
## 12 TRAVTIME 0 0
## 13 CAR_USE 0 0
## 14 BLUEBOOK 0 0
## 15 TIF 0 0
## 16 CAR_TYPE 0 0
## 17 RED_CAR 0 0
## 18 OLDCLAIM 0 0
## 19 CLM_FREQ 0 0
## 20 REVOKED 0 0
## 21 MVR_PTS 0 0
## 22 CAR_AGE 0 0
## 23 URBANICITY 0 0
#correlation between variables
corrplot(cor(data_linear_regression %>% select_if(is.numeric)), type = "upper", method = "number", tl.cex = 0.5, tl.col="black",number.cex = .5)
#build lm model using stepwise approach
linear_model.null = lm(TARGET_AMT ~ 1, data = data_linear_regression)
linear_model.full = lm(TARGET_AMT ~ ., data = data_linear_regression)
step(linear_model.null,
scope = list(upper=linear_model.full),
direction = "both",
data = data_linear_regression)
## Start: AIC=33841.23
## TARGET_AMT ~ 1
##
## Df Sum of Sq RSS AIC
## + CAR_TYPE 5 881040861 1.0892e+11 33836
## + MSTATUS 1 277922853 1.0952e+11 33838
## + CAR_USE 1 220700204 1.0958e+11 33839
## + SEX 1 192617922 1.0960e+11 33840
## + INCOME 1 128447239 1.0967e+11 33841
## + REVOKED 1 118967437 1.0968e+11 33841
## <none> 1.0980e+11 33841
## + PARENT1 1 109443748 1.0969e+11 33841
## + MVR_PTS 1 105520144 1.0969e+11 33841
## + YOJ 1 103068817 1.0969e+11 33841
## + BLUEBOOK 1 98244081 1.0970e+11 33842
## + AGE 1 40333734 1.0976e+11 33843
## + CAR_AGE 1 26556088 1.0977e+11 33843
## + HOMEKIDS 1 21854988 1.0978e+11 33843
## + TRAVTIME 1 15490316 1.0978e+11 33843
## + CLM_FREQ 1 15181150 1.0978e+11 33843
## + RED_CAR 1 12635915 1.0978e+11 33843
## + KIDSDRIV 1 7522012 1.0979e+11 33843
## + URBANICITY 1 4219827 1.0979e+11 33843
## + OLDCLAIM 1 3306186 1.0979e+11 33843
## + TIF 1 202114 1.0980e+11 33843
## + HOME_VAL 1 6774 1.0980e+11 33843
## + EDUCATION 3 172507522 1.0962e+11 33844
## + JOB 8 703216490 1.0909e+11 33845
##
## Step: AIC=33835.98
## TARGET_AMT ~ CAR_TYPE
##
## Df Sum of Sq RSS AIC
## + MSTATUS 1 251703068 1.0866e+11 33834
## + PARENT1 1 142021530 1.0877e+11 33836
## + MVR_PTS 1 127222706 1.0879e+11 33836
## <none> 1.0892e+11 33836
## + REVOKED 1 93890746 1.0882e+11 33836
## + INCOME 1 83566596 1.0883e+11 33837
## + YOJ 1 73539414 1.0884e+11 33837
## + CAR_AGE 1 66685043 1.0885e+11 33837
## + HOMEKIDS 1 45918691 1.0887e+11 33837
## + RED_CAR 1 42747717 1.0887e+11 33837
## + CAR_USE 1 32396980 1.0888e+11 33837
## + BLUEBOOK 1 31661512 1.0888e+11 33837
## + AGE 1 22473139 1.0889e+11 33838
## + TRAVTIME 1 14533771 1.0890e+11 33838
## + CLM_FREQ 1 13619726 1.0890e+11 33838
## + KIDSDRIV 1 7645685 1.0891e+11 33838
## + OLDCLAIM 1 6504566 1.0891e+11 33838
## + SEX 1 5274732 1.0891e+11 33838
## + HOME_VAL 1 1938459 1.0891e+11 33838
## + URBANICITY 1 138093 1.0892e+11 33838
## + TIF 1 12406 1.0892e+11 33838
## + EDUCATION 3 88562536 1.0883e+11 33840
## - CAR_TYPE 5 881040861 1.0980e+11 33841
## + JOB 8 416607293 1.0850e+11 33845
##
## Step: AIC=33833.6
## TARGET_AMT ~ CAR_TYPE + MSTATUS
##
## Df Sum of Sq RSS AIC
## + MVR_PTS 1 118525976 1.0855e+11 33834
## + YOJ 1 117276981 1.0855e+11 33834
## <none> 1.0866e+11 33834
## + REVOKED 1 99751169 1.0856e+11 33834
## + CAR_AGE 1 83560584 1.0858e+11 33834
## + INCOME 1 79478095 1.0858e+11 33834
## + HOME_VAL 1 67680007 1.0860e+11 33834
## + HOMEKIDS 1 64077606 1.0860e+11 33834
## + AGE 1 41863959 1.0862e+11 33835
## + CAR_USE 1 38465090 1.0863e+11 33835
## + RED_CAR 1 36858180 1.0863e+11 33835
## + BLUEBOOK 1 28181920 1.0864e+11 33835
## + PARENT1 1 18336946 1.0865e+11 33835
## + TRAVTIME 1 17957391 1.0865e+11 33835
## + CLM_FREQ 1 14920962 1.0865e+11 33835
## + SEX 1 12175700 1.0865e+11 33835
## + KIDSDRIV 1 12032899 1.0865e+11 33835
## + OLDCLAIM 1 7384154 1.0866e+11 33835
## + URBANICITY 1 1007899 1.0866e+11 33836
## + TIF 1 499702 1.0866e+11 33836
## - MSTATUS 1 251703068 1.0892e+11 33836
## + EDUCATION 3 72614014 1.0859e+11 33838
## - CAR_TYPE 5 854821076 1.0952e+11 33838
## + JOB 8 411288798 1.0825e+11 33842
##
## Step: AIC=33833.53
## TARGET_AMT ~ CAR_TYPE + MSTATUS + MVR_PTS
##
## Df Sum of Sq RSS AIC
## + YOJ 1 125081593 1.0842e+11 33833
## <none> 1.0855e+11 33834
## - MVR_PTS 1 118525976 1.0866e+11 33834
## + REVOKED 1 94467874 1.0845e+11 33834
## + HOME_VAL 1 81523631 1.0846e+11 33834
## + INCOME 1 80168955 1.0847e+11 33834
## + CAR_AGE 1 73876351 1.0847e+11 33834
## + HOMEKIDS 1 57153156 1.0849e+11 33835
## + CLM_FREQ 1 56020772 1.0849e+11 33835
## + AGE 1 45800105 1.0850e+11 33835
## + RED_CAR 1 40678583 1.0851e+11 33835
## + CAR_USE 1 31202382 1.0851e+11 33835
## + BLUEBOOK 1 29940033 1.0852e+11 33835
## + TRAVTIME 1 17361415 1.0853e+11 33835
## + PARENT1 1 14181990 1.0853e+11 33835
## + SEX 1 12232883 1.0853e+11 33835
## + KIDSDRIV 1 10929573 1.0853e+11 33835
## + OLDCLAIM 1 1842795 1.0854e+11 33836
## + URBANICITY 1 257193 1.0855e+11 33836
## + TIF 1 169741 1.0855e+11 33836
## - MSTATUS 1 243006338 1.0879e+11 33836
## + EDUCATION 3 74606002 1.0847e+11 33838
## - CAR_TYPE 5 874882524 1.0942e+11 33839
## + JOB 8 394337535 1.0815e+11 33843
##
## Step: AIC=33833.35
## TARGET_AMT ~ CAR_TYPE + MSTATUS + MVR_PTS + YOJ
##
## Df Sum of Sq RSS AIC
## <none> 1.0842e+11 33833
## + REVOKED 1 105936347 1.0831e+11 33834
## - YOJ 1 125081593 1.0855e+11 33834
## - MVR_PTS 1 126330588 1.0855e+11 33834
## + CAR_AGE 1 84989277 1.0834e+11 33834
## + CLM_FREQ 1 53926426 1.0837e+11 33834
## + HOME_VAL 1 47665441 1.0837e+11 33835
## + HOMEKIDS 1 45961587 1.0837e+11 33835
## + RED_CAR 1 36371947 1.0838e+11 33835
## + CAR_USE 1 34715713 1.0839e+11 33835
## + AGE 1 29453290 1.0839e+11 33835
## + BLUEBOOK 1 27862260 1.0839e+11 33835
## + TRAVTIME 1 14913152 1.0841e+11 33835
## + PARENT1 1 12241647 1.0841e+11 33835
## + SEX 1 11976117 1.0841e+11 33835
## + KIDSDRIV 1 6088028 1.0841e+11 33835
## + INCOME 1 5266242 1.0842e+11 33835
## + OLDCLAIM 1 2478421 1.0842e+11 33835
## + TIF 1 440341 1.0842e+11 33835
## + URBANICITY 1 495 1.0842e+11 33835
## - MSTATUS 1 287498666 1.0871e+11 33836
## - CAR_TYPE 5 835158704 1.0926e+11 33838
## + EDUCATION 3 73898025 1.0835e+11 33838
## + JOB 8 325899692 1.0809e+11 33844
##
## Call:
## lm(formula = TARGET_AMT ~ CAR_TYPE + MSTATUS + MVR_PTS + YOJ,
## data = data_linear_regression)
##
## Coefficients:
## (Intercept) CAR_TYPEPanel Truck CAR_TYPEPickup
## 5059.38 2164.76 -168.90
## CAR_TYPESports Car CAR_TYPESUV CAR_TYPEVan
## 13.55 -158.22 876.20
## MSTATUSYes MVR_PTS YOJ
## -788.88 100.24 58.25
#optimal model
final_linear_model <- lm(formula = TARGET_AMT ~ CAR_TYPE + MSTATUS + MVR_PTS + YOJ, data = data_linear_regression)
summary(final_linear_model)
##
## Call:
## lm(formula = TARGET_AMT ~ CAR_TYPE + MSTATUS + MVR_PTS + YOJ,
## data = data_linear_regression)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7251 -3085 -1595 276 79768
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5059.38 624.28 8.104 9.44e-16 ***
## CAR_TYPEPanel Truck 2164.76 742.96 2.914 0.00361 **
## CAR_TYPEPickup -168.90 579.26 -0.292 0.77064
## CAR_TYPESports Car 13.55 635.51 0.021 0.98299
## CAR_TYPESUV -158.22 535.58 -0.295 0.76771
## CAR_TYPEVan 876.20 721.42 1.215 0.22469
## MSTATUSYes -788.88 352.94 -2.235 0.02553 *
## MVR_PTS 100.24 67.65 1.482 0.13861
## YOJ 58.25 39.51 1.474 0.14057
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7586 on 1884 degrees of freedom
## Multiple R-squared: 0.01254, Adjusted R-squared: 0.008342
## F-statistic: 2.99 on 8 and 1884 DF, p-value: 0.002472
#create new dummy variables
data_linear_regression$CAR_TYPE_PanelTruck <- ifelse(data_linear_regression$CAR_TYPE== "Panel Truck",1,0)
data_testing$CAR_TYPE_PanelTruck <- ifelse(data_testing$CAR_TYPE== "Panel Truck",1,0)
data_linear_regression$MSTATUS_Yes <- ifelse(data_linear_regression$MSTATUS== "Yes",1,0)
data_testing$MSTATUS_Yes <- ifelse(data_testing$MSTATUS== "Yes",1,0)
data_linear_regression$TARGET_AMT_pred <- c()
data_testing$TARGET_AMT_pred <- c()
data_testing$TARGET_AMT_pred <- ""
#optimal model equasion
data_linear_regression$TARGET_AMT_pred <- 5059.38 + 2164.76*data_linear_regression$CAR_TYPE_PanelTruck - 788.88*data_linear_regression$MSTATUS_Yes
head(data_testing)
## INDEX TARGET_FLAG TARGET_AMT KIDSDRIV AGE HOMEKIDS YOJ INCOME PARENT1
## 1 3 NA NA 0 48 0 11 1154 No
## 2 9 NA NA 1 40 1 11 1119 Yes
## 3 10 NA NA 0 44 2 12 974 Yes
## 4 18 NA NA 0 35 2 NA 513 Yes
## 5 21 NA NA 0 59 0 12 1686 No
## 6 30 NA NA 0 46 0 14 1 No
## HOME_VAL MSTATUS SEX EDUCATION JOB TRAVTIME CAR_USE
## 1 2 No M Bachelors Manager 26 Private
## 2 2 No M High School Manager 21 Private
## 3 2 No F High School Blue Collar 30 Commercial
## 4 2 No M High School Clerical 74 Private
## 5 2 No M High School Manager 45 Private
## 6 636 Yes M Bachelors Professional 7 Commercial
## BLUEBOOK TIF CAR_TYPE RED_CAR OLDCLAIM CLM_FREQ REVOKED MVR_PTS
## 1 703 1 Van yes 1 0 No 2
## 2 540 6 Minivan no 272 1 No 2
## 3 1189 10 SUV no 1 0 No 0
## 4 1373 6 Pickup no 1 0 Yes 0
## 5 345 1 Minivan yes 494 2 No 4
## 6 864 1 Panel Truck no 137 1 No 2
## CAR_AGE URBANICITY URBANICITY_HighlyUrban JOB_BlueCollar
## 1 10 Highly Urban/ Urban 1 0
## 2 1 Highly Urban/ Urban 1 0
## 3 10 Highly Rural/ Rural 0 1
## 4 4 Highly Rural/ Rural 0 0
## 5 1 Highly Urban/ Urban 1 0
## 6 12 Highly Urban/ Urban 1 0
## JOB_Clerical JOB_HomeMaker JOB_Manager MSTATUS_Yes CAR_TYPE_Pickup
## 1 0 0 1 0 0
## 2 0 0 1 0 0
## 3 0 0 0 0 0
## 4 1 0 0 0 1
## 5 0 0 1 0 0
## 6 0 0 0 1 0
## CAR_TYPE_Sports_Car CAR_TYPE_SUV CAR_TYPE_Van REVOKED_Yes
## 1 0 0 1 0
## 2 0 0 0 0
## 3 0 1 0 0
## 4 0 0 0 1
## 5 0 0 0 0
## 6 0 0 0 0
## CAR_USE_Private EDUCATION_HighSchool PARENT1_Yes SEX_M probability
## 1 1 0 0 1 9.991562e-01
## 2 1 1 1 1 1.000000e+00
## 3 0 1 1 0 9.991668e-01
## 4 1 1 1 1 NA
## 5 1 1 0 1 1.000000e+00
## 6 0 0 0 1 1.337410e-08
## TARGET_FLAG_pred CAR_TYPE_PanelTruck TARGET_AMT_pred
## 1 1 0
## 2 1 0
## 3 1 0
## 4 NA 0
## 5 1 0
## 6 0 1
Test Goodness of Fit.
#linearity
plot(final_linear_model$residuals ~ data_linear_regression$TARGET_AMT)
abline(h = 0, lty = 3) # adds a horizontal dashed line at y = 0
#normal residuals
par(mfrow=c(1,2))
hist(final_linear_model$residuals, probability=TRUE,col="gray", border="white", main="Distribution of residuals")
d <- density(final_linear_model$residuals)
lines(d, col="red")
#normal probability plot
qqnorm(final_linear_model$residuals)
qqline(final_linear_model$residuals)
#constant variability
plot(final_linear_model)
#alternative model
linear_model2 <- lm(formula = TARGET_AMT ~ CAR_TYPE + MSTATUS + MVR_PTS, data = data_linear_regression)
linear_model3 <- lm(formula = TARGET_AMT ~ CAR_TYPE + MSTATUS, data = data_linear_regression)
#Likelihood Ratio Test
anova(final_linear_model, linear_model2, test ="Chisq")
## Analysis of Variance Table
##
## Model 1: TARGET_AMT ~ CAR_TYPE + MSTATUS + MVR_PTS + YOJ
## Model 2: TARGET_AMT ~ CAR_TYPE + MSTATUS + MVR_PTS
## Res.Df RSS Df Sum of Sq Pr(>Chi)
## 1 1884 1.0842e+11
## 2 1885 1.0855e+11 -1 -125081593 0.1404
anova(final_linear_model, linear_model3, test ="Chisq")
## Analysis of Variance Table
##
## Model 1: TARGET_AMT ~ CAR_TYPE + MSTATUS + MVR_PTS + YOJ
## Model 2: TARGET_AMT ~ CAR_TYPE + MSTATUS
## Res.Df RSS Df Sum of Sq Pr(>Chi)
## 1 1884 1.0842e+11
## 2 1886 1.0866e+11 -2 -243607569 0.1204
#calculate accuracy
data_linear_regression$TARGET_AMT_accuracy <- c()
data_linear_regression$TARGET_AMT_accuracy <- (data_linear_regression$TARGET_AMT_pred-data_linear_regression$TARGET_AMT)/data_linear_regression$TARGET_AMT
mean(data_linear_regression$TARGET_AMT_accuracy)
## [1] 0.770665
#draw plot predicted vs actual
plot(data_linear_regression$TARGET_AMT_pred,data_linear_regression$TARGET_AMT,
xlab="predicted",ylab="actual",col="blue")
abline(a=0,b=1,col="red")
#calculate car crash payout for testing dataset
data_testing <- data_testing %>% mutate(TARGET_AMT_pred=as.numeric(ifelse(TARGET_FLAG_pred==1,5059.38 + 2164.76*data_testing$CAR_TYPE_PanelTruck - 788.88*data_testing$MSTATUS_Yes,'')))
#export testing data file with predicted class
write.table(data_testing, file = "/Users/olga/downloads/HW4_data_evaluation.csv",append = FALSE)