The sensitivity value is very low. Sensitivity is the True positive rate. The value of sensitivity is low because the data is imbalanced. The proportion of event class and non-event class in the dependent variable is not same. In order to balance that, data balancing needs to be done. This can be done using ROSE package and creating over-sampled, under-sampled and ‘both’ sampled data
# Using ROSE package to create over-sampled, under-sampled and 'both' data
set.seed(1234)
over <- ovun.sample(y~., data = train, method = "over", N = 63876)$data
set.seed(1234)
under <- ovun.sample(y~., data = train, method = "under", N = 8464)$data
set.seed(1234)
both <- ovun.sample(y~., data = train, method = "both", N = 36170, p=0.5 )$data
# Over sampled data prediction and ROC curve
model2 <- train(y ~., data = over, method= 'rpart', trControl = cv, tuneLength = 10)
predicted2 <- predict(model2, newdata = test, type = "raw")
confusionMatrix(predicted2, test$y, positive = "yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 6447 190
## yes 1537 867
##
## Accuracy : 0.809
## 95% CI : (0.8007, 0.817)
## No Information Rate : 0.8831
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.4043
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8202
## Specificity : 0.8075
## Pos Pred Value : 0.3606
## Neg Pred Value : 0.9714
## Prevalence : 0.1169
## Detection Rate : 0.0959
## Detection Prevalence : 0.2659
## Balanced Accuracy : 0.8139
##
## 'Positive' Class : yes
##
# Sensitivity : 0.8202
predicted22 <- predict(model2, newdata = test, type = "prob")
par(pty="s")
roc(test$y, predicted22$yes, plot=TRUE,
legacy.axes=TRUE, percent=T, xlab="False Positive Percentage", ylab="True Postive Percentage",
col="red", lwd=1, print.auc = TRUE, print.auc.x=50, print.auc.y= 70)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases

##
## Call:
## roc.default(response = test$y, predictor = predicted22$yes, percent = T, plot = TRUE, legacy.axes = TRUE, xlab = "False Positive Percentage", ylab = "True Postive Percentage", col = "red", lwd = 1, print.auc = TRUE, print.auc.x = 50, print.auc.y = 70)
##
## Data: predicted22$yes in 7984 controls (test$y no) < 1057 cases (test$y yes).
## Area under the curve: 87.21%
# Area under the curve: 87.21%
# Under sampled data prediction and ROC curve
model3 <- train(y ~., data = under, method= 'rpart', trControl = cv, tuneLength = 10)
predicted3 <- predict(model3, newdata = test, type = "raw")
confusionMatrix(predicted3, test$y, positive = "yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 6326 168
## yes 1658 889
##
## Accuracy : 0.798
## 95% CI : (0.7896, 0.8063)
## No Information Rate : 0.8831
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.393
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.84106
## Specificity : 0.79233
## Pos Pred Value : 0.34904
## Neg Pred Value : 0.97413
## Prevalence : 0.11691
## Detection Rate : 0.09833
## Detection Prevalence : 0.28172
## Balanced Accuracy : 0.81670
##
## 'Positive' Class : yes
##
# Sensitivity : 0.84106
predicted33 <- predict(model3, newdata = test, type = "prob")
roc(test$y, predicted33$yes, plot=TRUE,
legacy.axes=TRUE, percent=T, xlab="False Positive Percentage", ylab="True Postive Percentage",
col="red", lwd=1, print.auc = TRUE, print.auc.x=50, print.auc.y= 70)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases

##
## Call:
## roc.default(response = test$y, predictor = predicted33$yes, percent = T, plot = TRUE, legacy.axes = TRUE, xlab = "False Positive Percentage", ylab = "True Postive Percentage", col = "red", lwd = 1, print.auc = TRUE, print.auc.x = 50, print.auc.y = 70)
##
## Data: predicted33$yes in 7984 controls (test$y no) < 1057 cases (test$y yes).
## Area under the curve: 87.12%
# Area under the curve: 87.12%
# Both sampled data prediction and ROC curve
model4 <- train(y ~., data = both, method= 'rpart', trControl = cv, tuneLength = 10)
predicted4 <- predict(model4, newdata = test, type = "raw")
confusionMatrix(predicted4, test$y, positive = "yes")
## Confusion Matrix and Statistics
##
## Reference
## Prediction no yes
## no 6362 166
## yes 1622 891
##
## Accuracy : 0.8022
## 95% CI : (0.7939, 0.8104)
## No Information Rate : 0.8831
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.4005
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.84295
## Specificity : 0.79684
## Pos Pred Value : 0.35456
## Neg Pred Value : 0.97457
## Prevalence : 0.11691
## Detection Rate : 0.09855
## Detection Prevalence : 0.27796
## Balanced Accuracy : 0.81990
##
## 'Positive' Class : yes
##
# Sensitivity : 0.84295
predicted44 <- predict(model4, newdata = test, type = "prob")
roc(test$y, predicted44$yes, plot=TRUE,
legacy.axes=TRUE, percent=T, xlab="False Positive Percentage", ylab="True Postive Percentage",
col="red", lwd=1, print.auc = TRUE, print.auc.x=50, print.auc.y= 70)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases

##
## Call:
## roc.default(response = test$y, predictor = predicted44$yes, percent = T, plot = TRUE, legacy.axes = TRUE, xlab = "False Positive Percentage", ylab = "True Postive Percentage", col = "red", lwd = 1, print.auc = TRUE, print.auc.x = 50, print.auc.y = 70)
##
## Data: predicted44$yes in 7984 controls (test$y no) < 1057 cases (test$y yes).
## Area under the curve: 86.76%
# Area under the curve: 86.76%
# Combined ROC of all 3 models
roc(test$y, predicted22$yes, plot=TRUE,
legacy.axes=TRUE, percent=T, xlab="False Positive Percentage", ylab="True Postive Percentage",
col="red", lwd=1, print.auc = TRUE, print.auc.x=50, print.auc.y= 70)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
##
## Call:
## roc.default(response = test$y, predictor = predicted22$yes, percent = T, plot = TRUE, legacy.axes = TRUE, xlab = "False Positive Percentage", ylab = "True Postive Percentage", col = "red", lwd = 1, print.auc = TRUE, print.auc.x = 50, print.auc.y = 70)
##
## Data: predicted22$yes in 7984 controls (test$y no) < 1057 cases (test$y yes).
## Area under the curve: 87.21%
plot.roc(test$y, predicted33$yes, legacy.axes=TRUE,
percent=T, col="blue", lwd=1, print.auc = TRUE, print.auc.x=50, print.auc.y= 60, add=TRUE)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
plot.roc(test$y, predicted44$yes, legacy.axes=TRUE, percent=T,
col="green", lwd=1, print.auc = TRUE, print.auc.x=50, print.auc.y= 50, add=TRUE)
## Setting levels: control = no, case = yes
## Setting direction: controls < cases
legend ("bottomright", bg ="transparent", legend=c("Over", "Under", "Both"),
col=c("red", "blue","green"), cex = 0.55, lwd=2)
