library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(InformationValue)
##
## Attaching package: 'InformationValue'
## The following objects are masked from 'package:caret':
##
## confusionMatrix, precision, sensitivity, specificity
library(ISLR)
Logistic Regression: Used when your response variable is binary Will also look at a 2x2 confusion matrix —> looks at predicted values from the model vs the actual values from the test dataset
data <- Default
#split data into training and testing set
set.seed(1)
sample <- sample(c(TRUE, FALSE), nrow(data), replace = TRUE, prob=c(0.7,0.3))
train <- data[sample,]
test <- data[!sample,]
Fitting a logistic Regression Model into our data set :)
#fit logistic regression model
model <- glm(default ~ student + balance + income, family = "binomial", data = train)
Create Confusion Matrix
# use model to predict the probability of default
predict <- predict(model, test, type = "response")
#convert defaults from "Yes" to "No" to 1's and 0's
test$default <- ifelse(test$default == "Yes", 1,0)
# We need to find the optimal cutoff probability to use to maximize accuracy
optimal <- optimalCutoff(test$default, predict)[1]
# let's put this in a confusion matrix
confusionMatrix(test$default, predict)
## 0 1
## 0 2912 64
## 1 21 39
Evaluate the confusion matrix
# Sensitivity : True Positive Rate
sensitivity(test$default, predict)
## [1] 0.3786408
# Specificity : True negative Rate
specificity(test$default, predict)
## [1] 0.9928401
# Calculating the total miscalculation error rate
misClassError(test$default, predict, threshold = optimal)
## [1] 0.027
From our calculation we have that the total miscalculation error rate is 2.7% for this model.