library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(InformationValue)
## 
## Attaching package: 'InformationValue'
## The following objects are masked from 'package:caret':
## 
##     confusionMatrix, precision, sensitivity, specificity
library(ISLR)

Logistic Regression: Used when your response variable is binary Will also look at a 2x2 confusion matrix —> looks at predicted values from the model vs the actual values from the test dataset

data <- Default

#split data into training and testing set 
set.seed(1)
sample <- sample(c(TRUE, FALSE), nrow(data), replace = TRUE, prob=c(0.7,0.3))

train <- data[sample,]
test  <- data[!sample,]

Fitting a logistic Regression Model into our data set :)

#fit logistic regression model 

model <- glm(default ~ student + balance + income, family = "binomial", data = train)

Create Confusion Matrix

# use model to predict the probability of default 

predict <- predict(model, test, type = "response")

#convert defaults from "Yes" to "No" to 1's and 0's 

test$default <- ifelse(test$default == "Yes", 1,0)

# We need to find the optimal cutoff probability to use to maximize accuracy

optimal <- optimalCutoff(test$default, predict)[1]

# let's put this in a confusion matrix 

confusionMatrix(test$default, predict)
##      0  1
## 0 2912 64
## 1   21 39

Evaluate the confusion matrix

# Sensitivity : True Positive Rate 

sensitivity(test$default, predict)
## [1] 0.3786408
# Specificity : True negative Rate 

specificity(test$default, predict)
## [1] 0.9928401
# Calculating the total miscalculation error rate 

misClassError(test$default, predict, threshold = optimal)
## [1] 0.027

From our calculation we have that the total miscalculation error rate is 2.7% for this model.