library(ISLR2)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:ISLR2':
## 
##     Boston
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-9
library(pls)
## 
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
## 
##     loadings
library(ggplot2)

data("Smarket")
Smarket$Direction <- ifelse(Smarket$Direction == "Up", 1, 0)  # Binary encoding
set.seed(1)
train_idx <- Smarket$Year < 2005
x <- model.matrix(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = Smarket)[, -1]
y <- Smarket$Direction

x_train <- x[train_idx, ]
x_test <- x[!train_idx, ]
y_train <- y[train_idx]
y_test <- y[!train_idx]
# Linear Model
lm_fit <- glm(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume,
              data = Smarket, family = binomial, subset = train_idx)
probs <- predict(lm_fit, newdata = Smarket[!train_idx, ], type = "response")
preds_lm <- ifelse(probs > 0.5, 1, 0)
test_error_linear <- mean(preds_lm != y_test)

# Ridge Regression
grid <- 10^seq(10, -2, length = 100)
ridge_mod <- glmnet(x_train, y_train, alpha = 0, lambda = grid, family = "binomial")
cv_ridge <- cv.glmnet(x_train, y_train, alpha = 0, family = "binomial")
best_lambda_ridge <- cv_ridge$lambda.min
ridge_preds <- predict(ridge_mod, s = best_lambda_ridge, newx = x_test, type = "class")
test_error_ridge <- mean(ridge_preds != y_test)

# Lasso
lasso_mod <- glmnet(x_train, y_train, alpha = 1, lambda = grid, family = "binomial")
cv_lasso <- cv.glmnet(x_train, y_train, alpha = 1, family = "binomial")
best_lambda_lasso <- cv_lasso$lambda.min
lasso_preds <- predict(lasso_mod, s = best_lambda_lasso, newx = x_test, type = "class")
test_error_lasso <- mean(lasso_preds != y_test)

# PCR
pcr_fit <- pcr(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = Smarket, 
               subset = train_idx, scale = TRUE, validation = "CV")
pcr_pred <- predict(pcr_fit, Smarket[!train_idx, ], ncomp = 3)
pcr_class <- ifelse(pcr_pred > 0.5, 1, 0)
test_error_pcr <- mean(pcr_class != y_test)

# PLS
pls_fit <- plsr(Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = Smarket, 
                subset = train_idx, scale = TRUE, validation = "CV")
pls_pred <- predict(pls_fit, Smarket[!train_idx, ], ncomp = 3)
pls_class <- ifelse(pls_pred > 0.5, 1, 0)
test_error_pls <- mean(pls_class != y_test)
results <- data.frame(
  Method = c("Linear Model", "Ridge Regression", "Lasso", "PCR", "PLS"),
  TestError = c(test_error_linear, test_error_ridge, test_error_lasso, test_error_pcr, test_error_pls)
)
print(results)
##             Method TestError
## 1     Linear Model 0.5198413
## 2 Ridge Regression 0.4404762
## 3            Lasso 0.4404762
## 4              PCR 0.5476190
## 5              PLS 0.5198413
# Optional: Visualize
ggplot(results, aes(x = Method, y = TestError, fill = Method)) +
  geom_col() +
  theme_minimal() +
  labs(title = "Test Error Comparison of 5 Models", y = "Test Error Rate", x = "")