# Load the required libraries
library(ISLR)
library(caTools)
# Load the Default dataset
data(Default)
# (a) Fit a logistic regression model using income and balance to predict default
model <- glm(default ~ income + balance, data = Default, family = binomial)
# (b) Validation set approach
# Split the dataset into training and validation sets
split <- sample.split(Default$default, SplitRatio = 0.7)
train <- subset(Default, split == TRUE)
validation <- subset(Default, split == FALSE)
# Fit a logistic regression model using the training set
model_train <- glm(default ~ income + balance, data = train, family = binomial)
# Obtain predictions for the validation set
validation$predicted_default <- predict(model_train, newdata = validation, type = "response")
validation$predicted_class <- ifelse(validation$predicted_default > 0.5, "Yes", "No")
# Compute the validation set error
validation_error <- mean(validation$predicted_class != validation$default)
cat("Validation set error:", validation_error, "\n")
## Validation set error: 0.02466667
# (c) Repeat the process three times
num_splits <- 3
validation_errors <- numeric(num_splits)
for (i in 1:num_splits) {
# Create a new random split
split <- sample.split(Default$default, SplitRatio = 0.7)
train <- subset(Default, split == TRUE)
validation <- subset(Default, split == FALSE)
# Fit logistic regression model using the training set
model_train <- glm(default ~ income + balance, data = train, family = binomial)
# Obtain predictions for the validation set
validation$predicted_default <- predict(model_train, newdata = validation, type = "response")
validation$predicted_class <- ifelse(validation$predicted_default > 0.5, "Yes", "No")
# Compute the validation set error
validation_errors[i] <- mean(validation$predicted_class != validation$default)
}
cat("Validation set errors (three splits):", validation_errors, "\n")
## Validation set errors (three splits): 0.028 0.02566667 0.026
# (d) Fit a logistic regression model with income, balance, and a dummy variable for student
model_student <- glm(default ~ income + balance + student, data = Default, family = binomial)
# Obtain predictions for the validation set
validation$predicted_default_student <- predict(model_student, newdata = validation, type = "response")
validation$predicted_class_student <- ifelse(validation$predicted_default_student > 0.5, "Yes", "No")
# Compute the validation set error for the model with the dummy variable
validation_error_student <- mean(validation$predicted_class_student != validation$default)
cat("Validation set error (model with student dummy variable):", validation_error_student, "\n")
## Validation set error (model with student dummy variable): 0.02733333