Note: All of the data can be accessed by package ISLR2.
Hint: You will have to create the response variable yourself, using the variables that are contained in the Boston data set.
# Load required packages
library(ISLR2)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:ISLR2':
##
## Boston
library(e1071)
library(class)
# Load Boston data set
data("Boston")
# Create response variable: crime_rate_above_median
crime_rate_above_median <- ifelse(Boston$crim > median(Boston$crim), 1, 0)
# Add the response variable to the Boston data set
Boston$crime_rate_above_median <- crime_rate_above_median
# Logistic Regression
logistic_model <- glm(crime_rate_above_median ~ ., data = Boston, family = "binomial")
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(logistic_model)
##
## Call:
## glm(formula = crime_rate_above_median ~ ., family = "binomial",
## data = Boston)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.584e-03 -2.000e-08 0.000e+00 2.000e-08 2.655e-03
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.437e+01 1.202e+05 0.000 1.000
## crim 1.083e+03 1.773e+04 0.061 0.951
## zn 2.194e+00 5.856e+01 0.037 0.970
## indus -2.510e+00 9.002e+02 -0.003 0.998
## chas 4.489e+00 1.014e+04 0.000 1.000
## nox -2.585e+02 1.458e+05 -0.002 0.999
## rm -3.953e+01 1.653e+03 -0.024 0.981
## age 3.437e-01 5.798e+01 0.006 0.995
## dis -1.742e+01 2.146e+03 -0.008 0.994
## rad -5.933e+00 2.642e+03 -0.002 0.998
## tax 1.639e-01 1.078e+02 0.002 0.999
## ptratio 5.525e+00 3.640e+03 0.002 0.999
## black 3.266e-02 1.208e+01 0.003 0.998
## lstat -1.687e+00 3.560e+02 -0.005 0.996
## medv 2.358e+00 5.382e+02 0.004 0.997
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7.0146e+02 on 505 degrees of freedom
## Residual deviance: 2.8371e-05 on 491 degrees of freedom
## AIC: 30
##
## Number of Fisher Scoring iterations: 25
We fit a logistic regression model using all predictors in the Boston data set. The glm function is used with a binomial family to perform logistic regression. The summary function provides an overview of the model’s coefficients and statistical significance.
# LDA
lda_model <- lda(crime_rate_above_median ~ ., data = Boston)
lda_model
## Call:
## lda(crime_rate_above_median ~ ., data = Boston)
##
## Prior probabilities of groups:
## 0 1
## 0.5 0.5
##
## Group means:
## crim zn indus chas nox rm age dis
## 0 0.0955715 21.525692 7.002292 0.05138340 0.4709711 6.394395 51.31028 5.091596
## 1 7.1314756 1.201581 15.271265 0.08695652 0.6384190 6.174874 85.83953 2.498489
## rad tax ptratio black lstat medv
## 0 4.158103 305.7431 17.90711 388.7061 9.419486 24.94941
## 1 14.940711 510.7312 19.00395 324.6420 15.886640 20.11621
##
## Coefficients of linear discriminants:
## LD1
## crim 0.0046376592
## zn -0.0056431194
## indus 0.0126159626
## chas -0.0592836851
## nox 8.1826206579
## rm 0.0874007870
## age 0.0112829040
## dis 0.0453643651
## rad 0.0699133176
## tax -0.0008444666
## ptratio 0.0513806507
## black -0.0009892799
## lstat 0.0143945059
## medv 0.0386990631
We fit an LDA model using all predictors in the Boston data set. The lda function from the MASS package is used. The model provides information about the linear discriminants and their coefficients.
# Naive Bayes
naive_bayes_model <- naiveBayes(as.factor(crime_rate_above_median) ~ ., data = Boston)
naive_bayes_model
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## 0 1
## 0.5 0.5
##
## Conditional probabilities:
## crim
## Y [,1] [,2]
## 0 0.0955715 0.06281773
## 1 7.1314756 11.10912294
##
## zn
## Y [,1] [,2]
## 0 21.525692 29.319808
## 1 1.201581 4.798611
##
## indus
## Y [,1] [,2]
## 0 7.002292 5.514454
## 1 15.271265 5.439010
##
## chas
## Y [,1] [,2]
## 0 0.05138340 0.2212161
## 1 0.08695652 0.2823299
##
## nox
## Y [,1] [,2]
## 0 0.4709711 0.05559789
## 1 0.6384190 0.09870365
##
## rm
## Y [,1] [,2]
## 0 6.394395 0.5556856
## 1 6.174874 0.8101381
##
## age
## Y [,1] [,2]
## 0 51.31028 25.88190
## 1 85.83953 17.87423
##
## dis
## Y [,1] [,2]
## 0 5.091596 2.081304
## 1 2.498489 1.085521
##
## rad
## Y [,1] [,2]
## 0 4.158103 1.659121
## 1 14.940711 9.529843
##
## tax
## Y [,1] [,2]
## 0 305.7431 87.4837
## 1 510.7312 167.8553
##
## ptratio
## Y [,1] [,2]
## 0 17.90711 1.811216
## 1 19.00395 2.346947
##
## black
## Y [,1] [,2]
## 0 388.7061 22.83774
## 1 324.6420 118.83084
##
## lstat
## Y [,1] [,2]
## 0 9.419486 4.923497
## 1 15.886640 7.546922
##
## medv
## Y [,1] [,2]
## 0 24.94941 7.232047
## 1 20.11621 10.270362
We fit a Naive Bayes model using all predictors in the Boston data set. The naiveBayes function from the e1071 package is used. The model provides information about the conditional probabilities of each predictor given the response variable.
# KNN
set.seed(123) # For reproducibility
train_indices <- sample(1:nrow(Boston), 0.7*nrow(Boston)) # 70% for training
train_data <- Boston[train_indices, ]
test_data <- Boston[-train_indices, ]
knn_model <- knn(train_data[, -1], test_data[, -1], train_data$crime_rate_above_median, k = 5)
table(knn_model, test_data$crime_rate_above_median)
##
## knn_model 0 1
## 0 69 5
## 1 6 72
We split the Boston data set into training and testing sets (70% for training) and fit a KNN model using the knn function from the class package. We use the Euclidean distance metric and choose k = 5 neighbors. The resulting confusion matrix provides an evaluation of the model’s predictions against the actual response in the testing set.
# Load required packages
#install.packages("leaps")
library(leaps)
# Load Boston data set
data("Boston")
# Create response variable
response <- Boston$crim
# Remove the response variable from the predictors
predictors <- Boston[, -1]
# Perform best subset selection
best_subset <- regsubsets(response ~ ., data = Boston, nvmax = ncol(predictors))
summary(best_subset)
## Subset selection object
## Call: regsubsets.formula(response ~ ., data = Boston, nvmax = ncol(predictors))
## 14 Variables (and intercept)
## Forced in Forced out
## crim FALSE FALSE
## zn FALSE FALSE
## indus FALSE FALSE
## chas FALSE FALSE
## nox FALSE FALSE
## rm FALSE FALSE
## age FALSE FALSE
## dis FALSE FALSE
## rad FALSE FALSE
## tax FALSE FALSE
## ptratio FALSE FALSE
## black FALSE FALSE
## lstat FALSE FALSE
## medv FALSE FALSE
## 1 subsets of each size up to 13
## Selection Algorithm: exhaustive
## crim zn indus chas nox rm age dis rad tax ptratio black lstat medv
## 1 ( 1 ) "*" " " " " " " " " " " " " " " " " " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " "*" " " " " " " " " " " " " " " " " " "
## 3 ( 1 ) "*" " " " " " " "*" " " " " " " " " " " " " "*" " " " "
## 4 ( 1 ) "*" " " " " " " "*" " " " " " " " " " " " " "*" " " "*"
## 5 ( 1 ) "*" " " " " " " "*" "*" " " " " " " " " " " "*" " " "*"
## 6 ( 1 ) "*" " " " " " " "*" "*" " " " " "*" " " " " "*" " " "*"
## 7 ( 1 ) "*" " " " " "*" "*" "*" " " " " "*" " " " " "*" " " "*"
## 8 ( 1 ) "*" " " " " "*" "*" "*" " " " " "*" "*" " " "*" " " "*"
## 9 ( 1 ) "*" "*" " " "*" "*" "*" " " " " "*" "*" " " "*" " " "*"
## 10 ( 1 ) "*" "*" " " "*" "*" "*" " " " " "*" "*" "*" "*" " " "*"
## 11 ( 1 ) "*" "*" " " "*" "*" "*" " " " " "*" "*" "*" "*" "*" "*"
## 12 ( 1 ) "*" "*" "*" "*" "*" "*" " " " " "*" "*" "*" "*" "*" "*"
## 13 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " " "*" "*" "*" "*" "*" "*"
# Perform forward stepwise selection
forward_stepwise <- regsubsets(response ~ ., data = Boston, method = "forward")
summary(forward_stepwise)
## Subset selection object
## Call: regsubsets.formula(response ~ ., data = Boston, method = "forward")
## 14 Variables (and intercept)
## Forced in Forced out
## crim FALSE FALSE
## zn FALSE FALSE
## indus FALSE FALSE
## chas FALSE FALSE
## nox FALSE FALSE
## rm FALSE FALSE
## age FALSE FALSE
## dis FALSE FALSE
## rad FALSE FALSE
## tax FALSE FALSE
## ptratio FALSE FALSE
## black FALSE FALSE
## lstat FALSE FALSE
## medv FALSE FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: forward
## crim zn indus chas nox rm age dis rad tax ptratio black lstat medv
## 1 ( 1 ) "*" " " " " " " " " " " " " " " " " " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " "*" " " " " " " " " " " " " " " " " " "
## 3 ( 1 ) "*" " " " " " " "*" " " " " " " " " " " " " "*" " " " "
## 4 ( 1 ) "*" " " " " " " "*" " " " " " " " " " " " " "*" " " "*"
## 5 ( 1 ) "*" " " " " " " "*" "*" " " " " " " " " " " "*" " " "*"
## 6 ( 1 ) "*" " " " " " " "*" "*" " " " " "*" " " " " "*" " " "*"
## 7 ( 1 ) "*" " " " " "*" "*" "*" " " " " "*" " " " " "*" " " "*"
## 8 ( 1 ) "*" " " " " "*" "*" "*" " " " " "*" "*" " " "*" " " "*"
# Perform backward stepwise selection
backward_stepwise <- regsubsets(response ~ ., data = Boston, method = "backward")
summary(backward_stepwise)
## Subset selection object
## Call: regsubsets.formula(response ~ ., data = Boston, method = "backward")
## 14 Variables (and intercept)
## Forced in Forced out
## crim FALSE FALSE
## zn FALSE FALSE
## indus FALSE FALSE
## chas FALSE FALSE
## nox FALSE FALSE
## rm FALSE FALSE
## age FALSE FALSE
## dis FALSE FALSE
## rad FALSE FALSE
## tax FALSE FALSE
## ptratio FALSE FALSE
## black FALSE FALSE
## lstat FALSE FALSE
## medv FALSE FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: backward
## crim zn indus chas nox rm age dis rad tax ptratio black lstat medv
## 1 ( 1 ) "*" " " " " " " " " " " " " " " " " " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " "*" " " " " " " " " " " " " " " " " " "
## 3 ( 1 ) "*" " " " " " " "*" " " " " " " " " " " " " "*" " " " "
## 4 ( 1 ) "*" " " " " " " "*" " " " " " " " " " " " " "*" " " "*"
## 5 ( 1 ) "*" " " " " " " "*" "*" " " " " " " " " " " "*" " " "*"
## 6 ( 1 ) "*" " " " " " " "*" "*" " " " " "*" " " " " "*" " " "*"
## 7 ( 1 ) "*" " " " " "*" "*" "*" " " " " "*" " " " " "*" " " "*"
## 8 ( 1 ) "*" " " " " "*" "*" "*" " " " " "*" "*" " " "*" " " "*"
Which of the three models with k predictors has the smallest training RSS? We can determine the model with the smallest training RSS by looking at the summary statistics for each selection method (best subset, forward stepwise, backward stepwise). The model with the smallest RSS is the one with the lowest training RSS value.
Which of the three models with k predictors has the smallest test RSS? To determine the model with the smallest test RSS, we would need to split the data into training and testing sets and calculate the RSS for each model on the testing set. However, the provided code does not include the code for splitting the data and calculating the test RSS.
True or False:
# Load required packages
library(ISLR2)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-6
# Load College data set
data("College")
# (a) Split the data set into a training set and a test set
set.seed(123) # For reproducibility
train_indices <- sample(1:nrow(College), 0.7 * nrow(College)) # 70% for training
train_data <- College[train_indices, ]
test_data <- College[-train_indices, ]
# (b) Fit a linear model using least squares on the training set and report the test error obtained
linear_model <- lm(Apps ~ ., data = train_data)
linear_predictions <- predict(linear_model, newdata = test_data)
linear_test_error <- mean((test_data$Apps - linear_predictions)^2)
linear_test_error
## [1] 1734841
# (c) Fit a ridge regression model on the training set, with λ chosen by cross-validation and report the test error obtained
ridge_model <- cv.glmnet(as.matrix(train_data[, -1]), train_data$Apps, alpha = 0, nfolds = 10)
ridge_predictions <- predict(ridge_model, newx = as.matrix(test_data[, -1]), s = "lambda.min")
ridge_test_error <- mean((test_data$Apps - ridge_predictions)^2)
ridge_test_error
## [1] 557372.2
# (d) Fit a lasso model on the training set, with λ chosen by cross-validation and report the test error obtained, along with the number of non-zero coefficient estimates
lasso_model <- cv.glmnet(as.matrix(train_data[, -1]), train_data$Apps, alpha = 1, nfolds = 10)
lasso_predictions <- predict(lasso_model, newx = as.matrix(test_data[, -1]), s = "lambda.min")
lasso_test_error <- mean((test_data$Apps - lasso_predictions)^2)
lasso_test_error
## [1] 19487.97
non_zero_coef <- sum(coef(lasso_model, s = "lambda.min") != 0)
non_zero_coef
## [1] 2