libraries
library(caret)
## Warning: package 'caret' was built under R version 3.3.3
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.3
library(e1071)
## Warning: package 'e1071' was built under R version 3.3.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data
trainsheet = "life_Line_AME_sheet2_out"; testsheet = "life_Line_AME_sheet8_out"
#Health <- function(trainsheet, testsheet) {
train <-
read.csv(
paste0(
"E:\\Chandu\\From\\RSA\\RSA_Health\\POC\\Output\\Relation_Output\\",
trainsheet,
".csv"
)
)
test <-
read.csv(
paste0(
"E:\\Chandu\\From\\RSA\\RSA_Health\\POC\\Output\\Relation_Output\\",
testsheet,
".csv"
)
)
head(train)
## PolicyID claim_status claims_count familysize Main_insurred
## 1 HC00041886000107 1 1 2 0
## 2 HC00041885000107 1 2 3 Self
## 3 HC00041882000107 0 0 3 0
## 4 HC00041866000107 0 0 2 Self
## 5 HC00041864000107 0 0 1 Self
## 6 HC00041857000107 0 0 1 0
## Main_insurred_age age1 relationship1 age2 relationship2 age3
## 1 0 64 Mother in Law 35 Brother 0
## 2 43 12 Son 33 Wife 0
## 3 0 36 Husband 63 Father 56
## 4 48 46 Wife 0 0 0
## 5 57 0 0 0 0 0
## 6 0 8 Daughter 0 0 0
## relationship3 age4 relationship4 age5 relationship5 age6 relationship6
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## 3 Mother 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## age7 relationship7 age8 relationship8 age9 relationship9 age10
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## relationship10 age11 relationship11 age12 relationship12 age13
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## relationship13 age14 relationship14 age15 relationship15 age16
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## relationship16 age17 relationship17 age18 relationship18 age19
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## relationship19 age20 relationship20 age21 relationship21 age22
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## relationship22 age23 relationship23 age24 relationship24
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
head(test)
## PolicyID claim_status claims_count familysize Main_insurred
## 1 TQ00000128000101 0 0 2 Self
## 2 TQ00000127000101 0 0 3 Self
## 3 TQ00000124000101 0 0 4 Self
## 4 TQ00000123000101 0 0 3 Self
## 5 TQ00000123000100 0 0 3 Self
## 6 TQ00000122000101 0 0 4 0
## Main_insurred_age age1 relationship1 age2 relationship2 age3
## 1 38 39 Wife 0 0 0
## 2 31 29 Wife 8 Daughter 0
## 3 37 36 Wife 8 Son 6
## 4 34 30 Wife 4 Son 0
## 5 33 29 Wife 3 Son 0
## 6 0 42 Spouse 18 Father 13
## relationship3 age4 relationship4 age5 relationship5 age6 relationship6
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## 3 Son 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 Father 43 Spouse 0 0 0 0
## age7 relationship7 age8 relationship8 age9 relationship9 age10
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0
## 4 0 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## relationship10 age11 relationship11 age12 relationship12 age13
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## relationship13 age14 relationship14 age15 relationship15 age16
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## relationship16 age17 relationship17 age18 relationship18 age19
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## relationship19 age20 relationship20 age21 relationship21 age22
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## relationship22 age23 relationship23 age24 relationship24
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 0 0 0 0
train <- train[, -c(1, 3)] #removing policy id and claim_count
test <- test[, -c(1, 3)]
# converting every relationship into factor --------------------------------------------------
for (i in 1:24) {
h <- paste0("relationship", i)
train[, h] <- as.factor(train[, h])
test[, h] <- as.factor(test[, h])
}
# levels combining --------------------------------------------------------
le <- function(na) {
a <- levels(train[, na])
b <- levels(test[, na])
c <- unique(c(a, b))
return(c)
}
for (i in 1:24) {
h <- paste0("relationship", i)
train[, h] <- factor(train[, h], levels = le(na = h))
test[, h] <- factor(test[, h], levels = le(na = h))
}
# Removing unique coumns --------------------------------------------------
train <-
train[, !sapply(train, function(col)
nlevels(col) == 1)] # removing factors with 1 level
train <-
train[, colSums(train != 0) != 0] # removing columns with colsums==0
test <-
test[, names(test) %in% names(train)] # so what are the columns in training we are going to take in testing
# svm ---------------------------------------------------------------------
svm_model <- svm(claim_status ~ ., data = train)
summary(svm_model)
##
## Call:
## svm(formula = claim_status ~ ., data = train)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 1
## gamma: 0.01052632
## epsilon: 0.1
##
##
## Number of Support Vectors: 451
svm_predicted <- predict(svm_model, test[,-1], type = 'response')
svm_pred <- ifelse(svm_predicted > 0.03, 1, 0)
svm_output <- confusionMatrix(svm_pred, test$claim_status)
svm_output
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 231 6
## 1 1 0
##
## Accuracy : 0.9706
## 95% CI : (0.9403, 0.9881)
## No Information Rate : 0.9748
## P-Value [Acc > NIR] : 0.7458
##
## Kappa : -0.0073
## Mcnemar's Test P-Value : 0.1306
##
## Sensitivity : 0.9957
## Specificity : 0.0000
## Pos Pred Value : 0.9747
## Neg Pred Value : 0.0000
## Prevalence : 0.9748
## Detection Rate : 0.9706
## Detection Prevalence : 0.9958
## Balanced Accuracy : 0.4978
##
## 'Positive' Class : 0
##
# glm ---------------------------------------------------------------------
glm_model <-
glm(claim_status ~ .,
data = train,
family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
train_relation <-
names(train %>% select(contains("relationship"))) #columns names which contains relationships
# merging levels to the model
for (i in train_relation) {
glm_model$xlevels[[i]] <-
union(glm_model$xlevels[[i]], levels(test[, i]))
}
glm_predicted <- predict(glm_model, test[,-1], type = 'response')
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
glm_predict <- ifelse(glm_predicted > 0.5, 1, 0)
glm_output <- confusionMatrix(glm_predict, test$claim_status)
## Warning in confusionMatrix.default(glm_predict, test$claim_status): Levels
## are not in the same order for reference and data. Refactoring data to
## match.
glm_output
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 232 6
## 1 0 0
##
## Accuracy : 0.9748
## 95% CI : (0.9459, 0.9907)
## No Information Rate : 0.9748
## P-Value [Acc > NIR] : 0.60631
##
## Kappa : 0
## Mcnemar's Test P-Value : 0.04123
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.9748
## Neg Pred Value : NaN
## Prevalence : 0.9748
## Detection Rate : 0.9748
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : 0
##
test1<-data.frame(cbind(svm_predicted,svm_pred,glm_predicted,glm_predict,test))
#return(list(svm_output, glm_output,test1)) #returning multiple objects
#}
#a<-suppressWarnings(Health(trainsheet = "life_Line_AME_sheet2_out", testsheet = "life_Line_AME_sheet8_out"))
#result<-a[[3]]