install.packages(c(“ISLR”, “glmnet”, “pls”, “caret”))
library(ISLR) library(glmnet) # Ridge & Lasso library(pls) # PCR & PLS library(caret) # Train/test split
data(“College”, package = “ISLR”)
set.seed(321) trainIndex <- createDataPartition(College$Apps, p = 0.7, list = FALSE) train <- College[trainIndex,] test <- College[-trainIndex,]
lm_model <- lm(Apps ~ ., data = train) lm_preds <- predict(lm_model, test) lm_error <- mean((lm_preds - test$Apps)^2)
x_train <- model.matrix(Apps ~ ., train)[,-1] x_test <- model.matrix(Apps ~ ., test)[,-1] y_train <- train$Apps
cv_ridge <- cv.glmnet(x_train, y_train, alpha = 0) ridge_model <- glmnet(x_train, y_train, alpha = 0, lambda = cv_ridge\(lambda.min) ridge_preds <- predict(ridge_model, newx = x_test) ridge_error <- mean((ridge_preds - test\)Apps)^2)
cv_lasso <- cv.glmnet(x_train, y_train, alpha = 1) lasso_model <- glmnet(x_train, y_train, alpha = 1, lambda = cv_lasso\(lambda.min) lasso_preds <- predict(lasso_model, newx = x_test) lasso_error <- mean((lasso_preds - test\)Apps)^2) lasso_nonzero <- sum(coef(lasso_model) != 0)
pcr_model <- pcr(Apps ~ ., data = train, validation = “CV”) pcr_preds <- predict(pcr_model, test, ncomp = which.min(summary(pcr_model)\(val[1,])) pcr_error <- mean((pcr_preds - test\)Apps)^2)
pls_model <- plsr(Apps ~ ., data = train, validation = “CV”) pls_preds <- predict(pls_model, test, ncomp = which.min(summary(pls_model)\(val[1,])) pls_error <- mean((pls_preds - test\)Apps)^2)
results <- data.frame( Model = c(“Linear Regression”, “Ridge Regression”, “Lasso Regression”, “PCR”, “PLS”), TestError = c(lm_error, ridge_error, lasso_error, pcr_error, pls_error), NonZeroLasso = c(NA, NA, lasso_nonzero, NA, NA) ) print(results)