Zolboo_Final

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#Q1

library(ISLR2)
library(ridge)
library(elasticnet)

## Loading required package: lars

## Loaded lars 1.3

library(naivebayes)

## naivebayes 0.9.7 loaded

library(class)  # Add class package for KNN

# Classification Models for Crime Rate Prediction
data(Boston)

# Create binary response variable for crime rate above or below median
Boston$CrimeAboveMedian <- as.factor(ifelse(Boston$crim > median(Boston$crim), 1, 0))

# Logistic Regression
logistic_model <- glm(CrimeAboveMedian ~ ., data = Boston, family = binomial)

## Warning: glm.fit: algorithm did not converge

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

summary(logistic_model)

## 
## Call:
## glm(formula = CrimeAboveMedian ~ ., family = binomial, data = Boston)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.270e+02  2.030e+05  -0.002    0.999
## crim         1.056e+03  2.021e+04   0.052    0.958
## zn           2.251e+00  6.284e+01   0.036    0.971
## indus       -3.859e+00  1.542e+03  -0.003    0.998
## chas        -5.407e+00  1.089e+04   0.000    1.000
## nox          1.467e+02  2.190e+05   0.001    0.999
## rm          -4.152e+01  1.990e+03  -0.021    0.983
## age          4.756e-01  8.017e+01   0.006    0.995
## dis         -1.335e+01  2.827e+03  -0.005    0.996
## rad         -4.353e+00  3.454e+03  -0.001    0.999
## tax         -1.346e-01  1.581e+02  -0.001    0.999
## ptratio      1.464e+01  6.733e+03   0.002    0.998
## lstat       -9.119e-01  5.204e+02  -0.002    0.999
## medv         3.491e+00  7.710e+02   0.005    0.996
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7.0146e+02  on 505  degrees of freedom
## Residual deviance: 2.8134e-05  on 492  degrees of freedom
## AIC: 28
## 
## Number of Fisher Scoring iterations: 25

# LDA
lda_model <- MASS::lda(CrimeAboveMedian ~ ., data = Boston)
lda_model

## Call:
## lda(CrimeAboveMedian ~ ., data = Boston)
## 
## Prior probabilities of groups:
##   0   1 
## 0.5 0.5 
## 
## Group means:
##        crim        zn     indus       chas       nox       rm      age      dis
## 0 0.0955715 21.525692  7.002292 0.05138340 0.4709711 6.394395 51.31028 5.091596
## 1 7.1314756  1.201581 15.271265 0.08695652 0.6384190 6.174874 85.83953 2.498489
##         rad      tax  ptratio     lstat     medv
## 0  4.158103 305.7431 17.90711  9.419486 24.94941
## 1 14.940711 510.7312 19.00395 15.886640 20.11621
## 
## Coefficients of linear discriminants:
##                   LD1
## crim     0.0057477432
## zn      -0.0055783361
## indus    0.0133950314
## chas    -0.0683284866
## nox      8.2352660572
## rm       0.1127191607
## age      0.0109751104
## dis      0.0431741184
## rad      0.0723695021
## tax     -0.0008391622
## ptratio  0.0473594598
## lstat    0.0158822769
## medv     0.0361430310

# Naive Bayes
naive_bayes_model <- naive_bayes(CrimeAboveMedian ~ ., data = Boston)
naive_bayes_model

## 
## ================================== Naive Bayes ================================== 
##  
##  Call: 
## naive_bayes.formula(formula = CrimeAboveMedian ~ ., data = Boston)
## 
## --------------------------------------------------------------------------------- 
##  
## Laplace smoothing: 0
## 
## --------------------------------------------------------------------------------- 
##  
##  A priori probabilities: 
## 
##   0   1 
## 0.5 0.5 
## 
## --------------------------------------------------------------------------------- 
##  
##  Tables: 
## 
## --------------------------------------------------------------------------------- 
##  ::: crim (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## crim             0           1
##   mean  0.09557150  7.13147561
##   sd    0.06281773 11.10912294
## 
## --------------------------------------------------------------------------------- 
##  ::: zn (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## zn             0         1
##   mean 21.525692  1.201581
##   sd   29.319808  4.798611
## 
## --------------------------------------------------------------------------------- 
##  ::: indus (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## indus          0         1
##   mean  7.002292 15.271265
##   sd    5.514454  5.439010
## 
## --------------------------------------------------------------------------------- 
##  ::: chas (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## chas            0          1
##   mean 0.05138340 0.08695652
##   sd   0.22121612 0.28232985
## 
## --------------------------------------------------------------------------------- 
##  ::: nox (Gaussian) 
## --------------------------------------------------------------------------------- 
##       
## nox             0          1
##   mean 0.47097115 0.63841897
##   sd   0.05559789 0.09870365
## 
## ---------------------------------------------------------------------------------
## 
## # ... and 8 more tables
## 
## ---------------------------------------------------------------------------------

# KNN
knn_model <- knn.cv(train = Boston[, -14], cl = Boston$CrimeAboveMedian, k = 5)
knn_model

##   [1] 0 0 0 0 0 0 1 0 1 0 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0
##  [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
##  [75] 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
## [223] 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
## [260] 1 1 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [371] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [408] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [445] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [482] 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## Levels: 0 1

#Q2

#(a) The model with k predictors that has the smallest training RSS depends on the specific data set and the selection method used. It cannot be determined without performing the model selection and evaluating the training RSS for each model.

#(b) Similar to (a), the model with k predictors that has the smallest test RSS depends on the specific data set and the selection method used. It requires conducting the model selection process and assessing the test RSS for each model.

#(c) Let's analyze the statements:

#i. True. In forward stepwise selection, each step adds one predictor to the model. Therefore, the k-variable model identified by forward stepwise is a subset of the (k + 1)-variable model identified by forward stepwise selection.

#ii. False. In backward stepwise selection, each step removes one predictor from the model. Thus, the k-variable model identified by backward stepwise is not a subset of the (k + 1)-variable model identified by backward stepwise selection.

#iii. False. Since backward stepwise selection removes predictors at each step, the k-variable model identified by backward stepwise is not necessarily a subset of the (k + 1)-variable model identified by forward stepwise selection.

#iv. False. Similar to (ii), the forward and backward stepwise selection methods differ in terms of adding or removing predictors. Therefore, the k-variable model identified by forward stepwise is not necessarily a subset of the (k + 1)-variable model identified by backward stepwise selection.

#v. True. Best subset selection considers all possible combinations of predictors and selects the best model based on a specific criterion. If the k-variable model is identified as the best subset, it means it outperformed other models with different predictors. Thus, the k-variable model identified by best subset is a subset of the (k + 1)-variable model identified by best subset selection.

#Q3

# Predicting Number of Applications Received
# Load required packages
library(ISLR2)
library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

# Load College dataset
data("College")

# Set seed for reproducibility
set.seed(123)

# Split the data into training and test sets
trainIndex <- createDataPartition(College$Apps, p = 0.7, list = FALSE)
trainData <- College[trainIndex, ]
testData <- College[-trainIndex, ]

# Fit linear model on the training set
lm_model <- lm(Apps ~ ., data = trainData)

# Predict on the test set
lm_pred <- predict(lm_model, newdata = testData)

# Calculate test error for linear model (mean squared error)
lm_test_error <- mean((lm_pred - testData$Apps)^2)

# Fit ridge regression model with cross-validation for alpha selection
ridge_model <- train(Apps ~ ., data = trainData, method = "ridge",
                     trControl = trainControl(method = "cv"),
                     tuneLength = 10)

# Predict on the test set
ridge_pred <- predict(ridge_model, newdata = testData)

# Calculate test error for ridge regression model (mean squared error)
ridge_test_error <- mean((ridge_pred - testData$Apps)^2)

# Fit lasso model with cross-validation for alpha selection
lasso_model <- train(Apps ~ ., data = trainData, method = "lasso",
                     trControl = trainControl(method = "cv"),
                     tuneLength = 10)

# Predict on the test set
lasso_pred <- predict(lasso_model, newdata = testData)

# Calculate test error for lasso model (mean squared error)
lasso_test_error <- mean((lasso_pred - testData$Apps)^2)

# Number of non-zero coefficient estimates in the lasso model
num_non_zero <- sum(coef(lasso_model$finalModel) != 0)

# Print the test errors and the number of non-zero coefficients
print(paste("Linear Model Test Error:", lm_test_error))

## [1] "Linear Model Test Error: 1882073.83239865"

print(paste("Ridge Regression Test Error:", ridge_test_error))

## [1] "Ridge Regression Test Error: 1893106.5864917"

print(paste("Lasso Model Test Error:", lasso_test_error))

## [1] "Lasso Model Test Error: 2006127.86876507"

print(paste("Number of Non-Zero Coefficients in Lasso Model:", num_non_zero))

## [1] "Number of Non-Zero Coefficients in Lasso Model: 0"

Zolboo_Final

2023-06-14

R Markdown

Including Plots