(c)Repeat (a) for non-linear methods relative to least squares.
library(ISLR)
library(caret)
library(tidyverse)
library(glmnet)
library(pls)
library(ggplot2)
library(scales)
library(ggthemes)
set.seed(1)
College=na.omit(College)
data("College")
smp_size <- floor(.7 * nrow(College))
set.seed(1)
train_ind <- sample(seq_len(nrow(College)), size = smp_size)
train <- College[train_ind, ]
test <- College[-train_ind, ]
set.seed(1)
lm.fit <- lm(Apps ~ ., data = train)
lm.pred <- predict(lm.fit, test)
lm.RMSE <- mean((lm.pred-test[, "Apps"])^2)
lm.RMSE
## [1] 1261630
set.seed(1)
train.matrix <- model.matrix(Apps ~ ., data = train)
test.matrix <- model.matrix(Apps ~ ., data = test)
grid = 4^ seq(4, -2, length = 100)
ridge <- cv.glmnet(train.matrix, train$Apps, alpha = 0, lambda = grid, thresh = 1e-12)
ridge.reg <- ridge$lambda.min
ridge.reg
## [1] 0.0625
plot(ridge)
set.seed(1)
ridge.pred <- predict(ridge, newx = test.matrix, s = ridge.reg)
ridge.RMSE <- mean((ridge.pred - test$Apps) ^2)
ridge.RMSE
## [1] 1261460
set.seed(1)
lasso.mod = cv.glmnet(train.matrix, train[, "Apps"], alpha=1, lambda=grid, thresh=1e-12)
lasso.bestlam = lasso.mod$lambda.min
lasso.bestlam
## [1] 0.0625
set.seed(1)
lasso.pred = predict(lasso.mod, newx=test.matrix, s=lasso.bestlam)
lasso.RMSE <- mean((lasso.pred - test[, "Apps"])^2)
lasso.RMSE
## [1] 1261414
model_matrix <- model.matrix(Apps~., data=College)
lasso.mod = glmnet(model_matrix, College[, "Apps"], alpha=1)
predict(lasso.mod, s=lasso.bestlam, type="coefficients")
## 19 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) -471.39372069
## (Intercept) .
## PrivateYes -491.04485135
## Accept 1.57033288
## Enroll -0.75961467
## Top10perc 48.14698891
## Top25perc -12.84690694
## F.Undergrad 0.04149116
## P.Undergrad 0.04438973
## Outstate -0.08328388
## Room.Board 0.14943472
## Books 0.01532293
## Personal 0.02909954
## PhD -8.39597537
## Terminal -3.26800340
## S.F.Ratio 14.59298267
## perc.alumni -0.04404771
## Expend 0.07712632
## Grad.Rate 8.28950241
prc.model <- pcr(Apps ~ ., data = train, scale = TRUE, validation = "CV")
validationplot(prc.model, val.type = "MSE")
set.seed(1)
pcr.pred = predict(prc.model, test, ncomp=5)
pcr.RMSE <- mean((pcr.pred - test[, "Apps"])^2)
pcr.RMSE
## [1] 2057929
pls.fit=plsr(Apps~., data=train, scale=TRUE, validation ="CV")
validationplot(pls.fit, val.type="RMSEP")
set.seed(1)
pls.pred = predict(pls.fit, test, ncomp=5)
pls.RMSE <- mean((pls.pred - test[, "Apps"])^2)
pls.RMSE
## [1] 1316934
test.avg <- mean(test[, "Apps"])
lm.test.r2 <- 1 - mean((test[, "Apps"] - lm.pred)^2) / mean((test[, "Apps"] - test.avg)^2)
ridge.test.r2 <- 1 - mean((test[, "Apps"] - ridge.pred)^2)/ mean((test[, "Apps"] - test.avg)^2)
lasso.test.r2 <- 1 - mean((test[, "Apps"] - lasso.pred)^2) / mean((test[, "Apps"] - test.avg)^2)
pcr.test.r2 <- 1 - mean((test[, "Apps"] - pcr.pred)^2) / mean((test[, "Apps"] - test.avg)^2)
pls.test.r2 <- 1 - mean((test[, "Apps"] - pls.pred)^2) / mean((test[, "Apps"] - test.avg)^2)
r2 <- c(lm.test.r2, ridge.test.r2, lasso.test.r2, pcr.test.r2, pls.test.r2)
r2
## [1] 0.9134458 0.9134575 0.9134607 0.8588157 0.9096517
x <- data.frame("Models"=c("Linear", "Ridge", "Lasso","PCR","PLS"), "R2"=c(lm.test.r2, ridge.test.r2, lasso.test.r2, pcr.test.r2, pls.test.r2))
x
## Models R2
## 1 Linear 0.9134458
## 2 Ridge 0.9134575
## 3 Lasso 0.9134607
## 4 PCR 0.8588157
## 5 PLS 0.9096517
x$fraction = x$R2 / sum(x$R2)
ggplot(data=x, aes(x=" ", y=R2, group=R2, colour=R2, fill=R2)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0) +
facet_grid(.~ Models) + theme_bw()
library(MASS)
data("Boston")
Boston=na.omit(Boston)
size.11 <- floor(.7 * nrow(Boston))
set.seed(1)
train_index <- sample(seq_len(nrow(Boston)), size = size.11)
train <- Boston[train_index, ]
test <- Boston[-train_index, ]
Ridge Regression
set.seed(1)
xtrain <- model.matrix(crim ~ ., data = train)
xtest <- model.matrix(crim ~ ., data = test)
grid <- 4 ^ seq(4, -2, length = 100)
ridge.model <- cv.glmnet(xtrain, train[, "crim"], alpha = 0, lambda = grid, thresh = 1e-12)
ridge.11 <- ridge.model$lambda.min
ridge.p = predict(ridge.model, newx=xtest, s=ridge.11)
ridge.RMSE <- mean((ridge.p - test[, "crim"])^2)
ridge.RMSE
## [1] 59.63209
r2.11 <- mean(test[, "crim"])
ridge.r2.11 <- 1 - mean((test[, "crim"] - ridge.p)^2)/
mean((test[, "crim"] - r2.11)^2)
ridge.r2.11
## [1] 0.3670422
Lasso
set.seed(4)
lasso.model = cv.glmnet(xtrain, train[, "crim"], alpha=1, lambda=grid, thresh=1e-12)
lasso.11 = lasso.model$lambda.min
lasso.p = predict(lasso.model, newx=xtest, s=lasso.11)
lasso.RMSE <- mean((lasso.p - test[, "crim"])^2)
lasso.RMSE
## [1] 59.80344
lasso.r2.11 <- 1 - mean((test[, "crim"] - ridge.p)^2)/
mean((test[, "crim"] - r2.11)^2)
lasso.r2.11
## [1] 0.3670422
PLS
set.seed(4)
pls.model=plsr(crim~., data=train, scale=TRUE, validation ="CV")
validationplot(pls.model, val.type="RMSEP")
pls.p = predict(pls.model, test, ncomp=4)
pls.RMSE <- mean((pls.p - test[, "crim"])^2)
pls.RMSE
## [1] 61.02598
pls.test.r2 <- 1 - mean((test[, "crim"] - ridge.p)^2)/
mean((test[, "crim"] - r2.11)^2)
pls.test.r2
## [1] 0.3670422
PCR
set.seed(4)
pcr.model=pcr(crim~., data=train, scale=TRUE, validation ="CV")
pcr.p = predict(pcr.model, test, ncomp=4)
pcr.RMSE <- mean((pcr.p - test[, "crim"])^2)
pcr.RMSE
## [1] 63.96279
pcr.test.r2 <- 1 - mean((test[, "crim"] - ridge.p)^2)/
mean((test[, "crim"] - r2.11)^2)
pcr.test.r2
## [1] 0.3670422
errors <- c(ridge.RMSE,lasso.RMSE, pls.RMSE, pcr.RMSE )
names(errors) <- c("ridge", "lasso", "pls", "pcr")
par(las=2)
par(mar=c(5,8,4,2))
barplot(sort(errors, decreasing = T), horiz=TRUE)
print(sort(errors))
## ridge lasso pls pcr
## 59.63209 59.80344 61.02598 63.96279
Our best model is ridge.
Yes, the chosen model involved all of the features in the data set, I did not removed any features when running models.