require(MASS); require(tidyverse); require(ggthemes)
## Loading required package: MASS
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::select() masks MASS::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: ggthemes
require(knitr); require(kableExtra); require(corrplot)
## Loading required package: knitr
## Loading required package: kableExtra
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
## 
## Loading required package: corrplot
## corrplot 0.92 loaded
require(broom)
## Loading required package: broom
set.seed(1)
theme_set(theme_tufte(base_size = 14))
data('Boston')
Boston <- Boston %>%
    mutate(chas = factor(chas),
           crime_factor = factor(ifelse(crim > median(crim), 
                                              'High', 'Low'), 
                                       levels = c('High', 'Low')))
cor_test <- Boston %>%
    select(-chas, -crime_factor) %>%
    cor.mtest(conf.level = .95)

Boston %>%
    select(-chas, -crime_factor) %>%
    cor %>%
    corrplot(method = 'color', 
         order = 'hclust', addrect = 2,
         tl.col = 'black', addCoef.col = 'black', number.cex = 0.65,
         p.mat = cor_test$p, sig.level = .05)

scale_this <- function(x) {
    (x - mean(x, na.rm = TRUE)) / sd(x, na.rm = TRUE)
}

table_1 <- Boston %>%
          dplyr::select(zn:crime_factor) %>%
          gather(Variable, value, -crime_factor, -chas) %>%
          group_by(Variable) %>%
          mutate(value = scale_this(value)) %>%
          group_by(Variable, crime_factor) %>%
          summarize(Q10 = quantile(value, .10),
                    Q25 = quantile(value, .25),
                    median = median(value), 
                    mean = mean(value), 
                    Q75 = quantile(value, .75),
                    Q90 = quantile(value, .90))
## `summarise()` has grouped output by 'Variable'. You can override using the
## `.groups` argument.
kable(table_1,
      digits = 3, format = 'html') %>%
    kable_styling(bootstrap_options = c('striped', 'hover', 'condensed')) %>%
    column_spec(1:2, bold = T) %>%
    scroll_box(height = '400px')
Variable crime_factor Q10 Q25 median mean Q75 Q90
age High -0.234 0.470 0.846 0.613 1.042 1.116
age Low -1.782 -1.317 -0.713 -0.613 0.118 0.750
black High -2.942 -0.298 0.292 -0.351 0.421 0.441
black Low 0.222 0.356 0.405 0.351 0.441 0.441
dis High -1.108 -0.970 -0.786 -0.616 -0.355 0.098
dis Low -0.631 -0.202 0.628 0.616 1.275 1.915
indus High -0.720 -0.180 1.015 0.603 1.015 1.231
indus Low -1.306 -1.132 -0.801 -0.603 -0.376 0.247
lstat High -0.914 -0.290 0.352 0.453 1.076 1.931
lstat Low -1.149 -0.964 -0.580 -0.453 -0.045 0.457
medv High -1.319 -0.971 -0.504 -0.263 0.051 0.995
medv Low -0.436 -0.243 0.062 0.263 0.649 1.345
nox High -0.412 -0.092 0.598 0.723 1.254 1.599
nox Low -1.256 -1.067 -0.912 -0.723 -0.343 -0.046
ptratio High -1.735 -0.026 0.806 0.253 0.806 1.175
ptratio Low -1.504 -0.857 -0.210 -0.253 0.298 0.806
rad High -0.637 -0.522 1.660 0.619 1.660 1.660
rad Low -0.867 -0.752 -0.637 -0.619 -0.522 -0.408
rm High -1.337 -0.751 -0.222 -0.156 0.265 1.221
rm Low -0.672 -0.429 0.036 0.156 0.630 1.229
tax High -0.618 -0.601 1.529 0.608 1.529 1.529
tax Low -1.105 -0.957 -0.719 -0.608 -0.375 -0.061
zn High -0.487 -0.487 -0.487 -0.436 -0.487 -0.487
zn Low -0.487 -0.487 -0.487 0.436 0.971 2.943
difference <- function(x) x[1] - x[2]

table_2 <- table_1 %>%
    group_by(Variable) %>%
    summarize(diff_Q10 = difference(Q10),
              diff_Q25 = difference(Q25),
              diff_med = difference(median),
              diff_mean = difference(mean),
              diff_Q75 = difference(Q75),
              diff_Q90 = difference(Q90))


kable(table_2,
      digits = 3, format = 'html') %>%
    kable_styling(bootstrap_options = c('striped', 'hover', 'condensed')) %>%
    column_spec(1, bold = T) %>%
    add_header_above(c(' ' = 1, 'Between-Groups Differences' = 6)) %>%
    scroll_box(height = '400px')
Between-Groups Differences
Variable diff_Q10 diff_Q25 diff_med diff_mean diff_Q75 diff_Q90
age 1.548 1.787 1.560 1.227 0.924 0.366
black -3.165 -0.654 -0.113 -0.702 -0.020 0.000
dis -0.477 -0.768 -1.414 -1.231 -1.630 -1.817
indus 0.586 0.952 1.816 1.205 1.391 0.984
lstat 0.235 0.674 0.933 0.906 1.122 1.474
medv -0.883 -0.728 -0.565 -0.526 -0.598 -0.350
nox 0.844 0.975 1.510 1.445 1.597 1.645
ptratio -0.231 0.831 1.016 0.507 0.508 0.370
rad 0.230 0.230 2.297 1.238 2.182 2.067
rm -0.666 -0.322 -0.258 -0.312 -0.364 -0.007
tax 0.487 0.356 2.249 1.216 1.905 1.590
zn 0.000 0.000 0.000 -0.871 -1.458 -3.430
table_3 <- table_2 %>%
    gather(Measure, value, -Variable) %>%
    group_by(Variable) %>%
    summarize(`Absolute Mean Differences` = abs(mean(value))) %>%
    arrange(desc(`Absolute Mean Differences`))
## Warning: attributes are not identical across measure variables; they will be
## dropped
kable(table_3,
      digits = 3, format = 'html') %>%
    kable_styling(bootstrap_options = c('striped', 'hover', 'condensed')) %>%
    column_spec(1, bold = T) %>%
    scroll_box(height = '400px')
Variable Absolute Mean Differences
rad 1.374
nox 1.336
tax 1.300
age 1.235
dis 1.223
indus 1.156
zn 0.960
lstat 0.890
black 0.776
medv 0.608
ptratio 0.500
rm 0.322
Boston %>%
    dplyr::select(zn:crime_factor) %>%
    gather(value_type, value, -crime_factor, -chas) %>%
    ggplot(aes(value_type, value, fill = crime_factor)) +
    geom_boxplot(alpha = 0.5) +
    facet_wrap(~value_type, scales = 'free') +
    scale_fill_discrete(name = 'Crime Rate') +
    theme(legend.position = 'top')

Boston %>%
    dplyr::select(crim, crime_factor, rad, nox, tax, age, dis, indus) %>%
    gather(Variable, value, -crim, -crime_factor) %>%
    mutate(Variable = str_to_title(Variable)) %>%
    ggplot(aes(value, crim)) +
    geom_point(aes(col = crime_factor)) +
    facet_wrap(~ Variable, scales = 'free') +
    geom_smooth(method = 'lm', formula = y ~ poly(x, 3), se = FALSE) +
    guides(col = FALSE) +
    labs(title = 'Scatterplots for each strong predictor')
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

set.seed(1)
train_size <- nrow(Boston) * 0.75
inTrain <- sample(1:nrow(Boston), size = train_size)
train <- Boston[inTrain,]
test <- Boston[-inTrain,]

log_reg <- glm(crime_factor ~ rad + nox + tax + 
                   age + dis + indus, data = train, family = binomial)

log_reg %>%
    tidy %>%
    kable(digits = 3)
term estimate std.error statistic p.value
(Intercept) 31.312 4.984 6.282 0.000
rad -0.602 0.132 -4.543 0.000
nox -53.440 9.090 -5.879 0.000
tax 0.008 0.003 2.903 0.004
age -0.018 0.011 -1.697 0.090
dis -0.544 0.179 -3.034 0.002
indus 0.095 0.049 1.950 0.051
pred <- predict(log_reg, test, type = 'response')
pred_value <- ifelse(pred >= 0.5, 'Low', 'High')
acc <- mean(pred_value == test$crime_factor)
    
table(pred_value, test$crime_factor) %>%
    kable(format = 'html') %>%
    kable_styling() %>%
    add_header_above(c('Predicted' = 1, 'Observed' = 2)) %>%
    column_spec(1, bold = T) %>%
    add_footnote(label = acc)
Predicted
Observed
High Low
High 58 14
Low 6 49
a 0.84251968503937
log_reg <- glm(crime_factor ~ poly(rad, 3) + poly(nox, 3) + 
                   poly(tax, 3) + poly(age, 3) + poly(dis, 3), 
               data = train, family = binomial)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
tidy(log_reg) %>%
    kable(digits = 3)
term estimate std.error statistic p.value
(Intercept) -336.446 156.083 -2.156 0.031
poly(rad, 3)1 -13319.953 5336.905 -2.496 0.013
poly(rad, 3)2 -2181.427 867.504 -2.515 0.012
poly(rad, 3)3 -297.994 126.652 -2.353 0.019
poly(nox, 3)1 -1674.159 657.049 -2.548 0.011
poly(nox, 3)2 -1384.744 580.787 -2.384 0.017
poly(nox, 3)3 -512.519 215.952 -2.373 0.018
poly(tax, 3)1 3380.046 1285.347 2.630 0.009
poly(tax, 3)2 1213.841 456.842 2.657 0.008
poly(tax, 3)3 233.440 94.770 2.463 0.014
poly(age, 3)1 -3.601 8.031 -0.448 0.654
poly(age, 3)2 -12.508 5.997 -2.086 0.037
poly(age, 3)3 0.976 6.002 0.163 0.871
poly(dis, 3)1 -13.737 15.247 -0.901 0.368
poly(dis, 3)2 2.369 10.966 0.216 0.829
poly(dis, 3)3 -16.354 9.667 -1.692 0.091
pred <- predict(log_reg, test, type = 'response')
pred_value <- ifelse(pred >= 0.5, 'Low', 'High')
acc <- mean(pred_value == test$crime_factor)

table(pred_value, test$crime_factor) %>%
    kable(format = 'html') %>%
    kable_styling() %>%
    add_header_above(c('Predicted' = 1, 'Observed' = 2)) %>%
    column_spec(1, bold = T) %>%
    add_footnote(label = acc)
Predicted
Observed
High Low
High 63 7
Low 1 56
a 0.937007874015748
lda_model <- lda(crime_factor ~ poly(rad, 3) + poly(nox, 3) + 
                   poly(tax, 3) + poly(age, 3) + poly(dis, 3), data = train)
pred <- predict(lda_model, test)
acc <- mean(pred$class == test$crime_factor)

table(pred$class, test$crime_factor) %>%
    kable(format = 'html') %>%
    kable_styling() %>%
    add_header_above(c('Predicted' = 1, 'Observed' = 2)) %>%
    column_spec(1, bold = T) %>%
    add_footnote(label = acc)
Predicted
Observed
High Low
High 62 11
Low 2 52
a 0.897637795275591
lda_model <- lda(crime_factor ~ rad + nox + tax + age + dis, data = train)
pred <- predict(lda_model, test)
acc <- mean(pred$class == test$crime_factor)

table(pred$class, test$crime_factor) %>%
    kable(format = 'html') %>%
    kable_styling() %>%
    add_header_above(c('Predicted' = 1, 'Observed' = 2)) %>%
    column_spec(1, bold = T) %>%
    add_footnote(label = acc)
Predicted
Observed
High Low
High 49 1
Low 15 62
a 0.874015748031496
qda_model <- qda(crime_factor ~ poly(rad, 3) + poly(nox, 3) + 
                   poly(tax, 3) + poly(age, 3) + poly(dis, 3), data = train)
pred <- predict(qda_model, test)
acc <- mean(pred$class == test$crime_factor)

table(pred$class, test$crime_factor) %>%
    kable(format = 'html') %>%
    kable_styling() %>%
    add_header_above(c('Predicted' = 1, 'Observed' = 2)) %>%
    column_spec(1, bold = T) %>%
    add_footnote(label = acc)
Predicted
Observed
High Low
High 59 2
Low 5 61
a 0.94488188976378
qda_model <- qda(crime_factor ~ rad + nox + tax + age + dis, data = train)
pred <- predict(qda_model, test)
acc <- mean(pred$class == test$crime_factor) 

table(pred$class, test$crime_factor) %>%
    kable(format = 'html') %>%
    kable_styling() %>%
    add_header_above(c('Predicted' = 1, 'Observed' = 2)) %>%
    column_spec(1, bold = T) %>%
    add_footnote(label = acc)
Predicted
Observed
High Low
High 52 3
Low 12 60
a 0.881889763779528
qda_model <- qda(crime_factor ~ poly(rad, 3) + poly(nox, 3) + 
                   poly(tax, 3) + poly(age, 3) + poly(dis, 3) + poly(indus, 3),
                 data = train)
pred <- predict(qda_model, test)
acc <- mean(pred$class == test$crime_factor)

table(pred$class, test$crime_factor) %>%
    kable(format = 'html') %>%
    kable_styling() %>%
    add_header_above(c('Predicted' = 1, 'Observed' = 2)) %>%
    column_spec(1, bold = T) %>%
    add_footnote(label = acc)
Predicted
Observed
High Low
High 57 1
Low 7 62
a 0.937007874015748
require(class)
## Loading required package: class
variables <- c('rad', 'nox', 'tax', 'age', 'dis', 'zn', 'indus')

x_train <- train[, variables]
y_train <- train$crime_factor
x_test <- test[, variables]
acc <- list()

for (i in 1:20) {
    knn_pred <- knn(train = x_train, test = x_test, cl = y_train, k = i)
    acc[as.character(i)] = mean(knn_pred == test$crime_factor)
}

acc <- unlist(acc)

data_frame(acc = acc) %>%
    mutate(k = row_number()) %>%
    ggplot(aes(k, acc)) +
    geom_col(aes(fill = k == which.max(acc))) +
    labs(x = 'K', y = 'Accuracy', title = 'KNN Accuracy for different values of K') +
    scale_x_continuous(breaks = 1:20) +
    scale_y_continuous(breaks = round(c(seq(0.90, 0.94, 0.01), max(acc)),
                                      digits = 3)) +
    geom_hline(yintercept = max(acc), lty = 2) +
    coord_cartesian(ylim = c(min(acc), max(acc))) +
    guides(fill = FALSE)
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

lambda = 3
y= 4
betas = seq(-10, 10, 0.2)
ridge = function(beta, y, lambda) (y-beta)^2 + lambda*beta^2
plot(betas, ridge(betas, y, lambda), pch = 20, xlab = "beta", ylab = "Ridge optimization")
est.beta= y/(1+ lambda)
points(est.beta, ridge(est.beta, y, lambda), col = "red", pch = 20, lwd=5)

lambda = 3
y= 4
betas = seq(-10, 10, 0.2)
lasso = function(beta, y, lambda) (y-beta)^2 + lambda*abs(beta)
plot(betas, lasso(betas, y, lambda), xlab="beta", main="Lasso Regression Optimization", pch=20)
est.beta= y- (lambda/2)
points(est.beta, lasso(est.beta, y, lambda), col = "red", pch = 20, lwd=5)

set.seed(1)
X = rnorm(100)
eps = rnorm(100)
b0 = 4
b1 = 2
b2 = 3
b3 = 2
Y = b0 + b1 * X + b2 * X^2 + b3 * X^3 + eps
library(leaps)
data.full = data.frame(Y, X)
regfit.full = regsubsets(Y~poly(X,10,raw=T), data=data.frame(Y,X), nvmax=10)
summary = summary(regfit.full)
par(mfrow=c(2,2))
plot(summary$cp, xlab ="Number of variables", ylab="C_p", type="l")
points(which.min(summary$cp), summary$cp[which.min(summary$cp)], col = "red", cex = 2, pch = 20)
plot(summary$bic, xlab ="Number of variables", ylab="BIC", type="l")
points(which.min(summary$bic), summary$bic[which.min(summary$bic)], col = "red", cex = 2, pch = 20)
plot(summary$adjr2, xlab ="Number of variables", ylab="Adjusted R^2", type="l")
points(which.max(summary$adjr2), summary$adjr2[which.max(summary$adjr2)], col = "red", cex = 2, pch = 20)

library(leaps)
data.full = data.frame(Y, X)
regfit.fwd = regsubsets(Y~poly(X,10,raw=T), data=data.frame(Y,X), nvmax=10, method="forward")
summary = summary(regfit.fwd)
par(mfrow=c(2,2))
plot(summary$cp, xlab ="Number of variables", ylab="C_p", type="l")
points(which.min(summary$cp), summary$cp[which.min(summary$cp)], col = "red", cex = 2, pch = 20)
plot(summary$bic, xlab ="Number of variables", ylab="BIC", type="l")
points(which.min(summary$bic), summary$bic[which.min(summary$bic)], col = "red", cex = 2, pch = 20)
plot(summary$adjr2, xlab ="Number of variables", ylab="Adjusted R^2", type="l")
points(which.max(summary$adjr2), summary$adjr2[which.max(summary$adjr2)], col = "red", cex = 2, pch = 20)

library(leaps)
data.full = data.frame(Y, X)
regfit.back = regsubsets(Y~poly(X,10,raw=T), data=data.frame(Y,X), nvmax=10, method="backward")
summary = summary(regfit.back)
par(mfrow=c(2,2))
plot(summary$cp, xlab ="Number of variables", ylab="C_p", type="l")
points(which.min(summary$cp), summary$cp[which.min(summary$cp)], col = "red", cex = 2, pch = 20)
plot(summary$bic, xlab ="Number of variables", ylab="BIC", type="l")
points(which.min(summary$bic), summary$bic[which.min(summary$bic)], col = "red", cex = 2, pch = 20)
plot(summary$adjr2, xlab ="Number of variables", ylab="Adjusted R^2", type="l")
points(which.max(summary$adjr2), summary$adjr2[which.max(summary$adjr2)], col = "red", cex = 2, pch = 20)

coef(regfit.full, which.max(summary(regfit.full)$adjr2))
##           (Intercept) poly(X, 10, raw = T)1 poly(X, 10, raw = T)2 
##            4.07200775            2.38745596            2.84575641 
## poly(X, 10, raw = T)3 poly(X, 10, raw = T)5 
##            1.55797426            0.08072292
coef(regfit.fwd, which.max(summary(regfit.fwd)$adjr2))
##           (Intercept) poly(X, 10, raw = T)1 poly(X, 10, raw = T)2 
##            4.07200775            2.38745596            2.84575641 
## poly(X, 10, raw = T)3 poly(X, 10, raw = T)5 
##            1.55797426            0.08072292
coef(regfit.back, which.max(summary(regfit.back)$adjr2))
##           (Intercept) poly(X, 10, raw = T)1 poly(X, 10, raw = T)2 
##           4.079236362           2.231905828           2.833494180 
## poly(X, 10, raw = T)3 poly(X, 10, raw = T)9 
##           1.819555807           0.001290827
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-7
xmat = model.matrix(Y~poly(X,10,raw=T))[,-1]
lasso.mod = cv.glmnet(xmat, Y, alpha=1)
(lambda = lasso.mod$lambda.min)
## [1] 0.04924644
par(mfrow=c(1,1))
plot(lasso.mod)

predict(lasso.mod, s=lambda, type="coefficients")
## 11 x 1 sparse Matrix of class "dgCMatrix"
##                                 s1
## (Intercept)            4.160843693
## poly(X, 10, raw = T)1  2.203516697
## poly(X, 10, raw = T)2  2.650263654
## poly(X, 10, raw = T)3  1.765931597
## poly(X, 10, raw = T)4  0.040178155
## poly(X, 10, raw = T)5  0.021903165
## poly(X, 10, raw = T)6  .          
## poly(X, 10, raw = T)7  0.003685793
## poly(X, 10, raw = T)8  .          
## poly(X, 10, raw = T)9  .          
## poly(X, 10, raw = T)10 .
Y2 = 4 + 7*X^7 + eps
data.full2 = data.frame(y = Y2,x = X)
regfit.full2 = regsubsets(Y~poly(X,10,raw=T), data=data.full2, nvmax=10)
summary2 = summary(regfit.full2)
par(mfrow=c(2,2))
plot(summary2$cp, xlab ="Number of variables", ylab="C_p", type="l")
points(which.min(summary2$cp), summary2$cp[which.min(summary2$cp)], col = "red", cex = 2, pch = 20)
plot(summary2$bic, xlab ="Number of variables", ylab="BIC", type="l")
points(which.min(summary2$bic), summary2$bic[which.min(summary2$bic)], col = "red", cex = 2, pch = 20)
plot(summary2$adjr2, xlab ="Number of variables", ylab="Adjusted R^2", type="l")
points(which.max(summary2$adjr2), summary2$adjr2[which.max(summary2$adjr2)], col = "red", cex = 2, pch = 20)

coef(regfit.full2, which.min(summary2$cp))
##           (Intercept) poly(X, 10, raw = T)1 poly(X, 10, raw = T)2 
##            4.07200775            2.38745596            2.84575641 
## poly(X, 10, raw = T)3 poly(X, 10, raw = T)5 
##            1.55797426            0.08072292
coef(regfit.full2, which.min(summary2$bic))
##           (Intercept) poly(X, 10, raw = T)1 poly(X, 10, raw = T)2 
##              4.061507              1.975280              2.876209 
## poly(X, 10, raw = T)3 
##              2.017639
coef(regfit.full2, which.min(summary2$adjr2))
##           (Intercept) poly(X, 10, raw = T)3 
##              6.437156              2.828270
xmat = model.matrix(Y2~poly(X,10,raw=T))[,-1]
lasso = cv.glmnet(xmat, Y2, alpha=1)
(lambda = lasso$lambda.min)
## [1] 12.36884
par(mfrow=c(1,1))
plot(lasso)

predict(lasso, s=lambda, type="coefficients")
## 11 x 1 sparse Matrix of class "dgCMatrix"
##                              s1
## (Intercept)            4.820215
## poly(X, 10, raw = T)1  .       
## poly(X, 10, raw = T)2  .       
## poly(X, 10, raw = T)3  .       
## poly(X, 10, raw = T)4  .       
## poly(X, 10, raw = T)5  .       
## poly(X, 10, raw = T)6  .       
## poly(X, 10, raw = T)7  6.796694
## poly(X, 10, raw = T)8  .       
## poly(X, 10, raw = T)9  .       
## poly(X, 10, raw = T)10 .
library(ISLR)
data(College)
set.seed(1)
trainid = sample(1:nrow(College), nrow(College)/2)
train = College[trainid,]
test = College[-trainid,]
lm.fit = lm(Apps~., data=train)
lm.pred = predict(lm.fit, test)
lm.err = mean((test$Apps - lm.pred)^2)
lm.err
## [1] 1135758
library(glmnet)
train.X = model.matrix(Apps ~ ., data = train)
train.Y = train[, "Apps"]
test.X = model.matrix(Apps ~ ., data = test)
test.Y = test[, "Apps"]
grid = 10 ^ seq(4, -2, length=100)
ridge.mod = glmnet(train.X, train.Y, alpha =0, lambda=grid, thresh = 1e-12)
lambda.best = ridge.mod$lambda.min
ridge.pred = predict(ridge.mod, newx= test.X, s=lambda.best)
(ridge.err = mean((test.Y - ridge.pred)^2))
## [1] 1164319
lasso.mod = glmnet(train.X, train.Y, alpha =1, lambda=grid, thresh = 1e-12)
lasso.cv = cv.glmnet(train.X, train.Y, alpha =1, lambda=grid, thresh = 1e-12)
lambda.best = lasso.cv$lambda.min
lasso.pred = predict(lasso.mod, newx= test.X, s=lambda.best)
(lasso.err = mean((test.Y - lasso.pred)^2))
## [1] 1135660
predict(lasso.mod, s = lambda.best, type="coefficients")
## 19 x 1 sparse Matrix of class "dgCMatrix"
##                        s1
## (Intercept) -7.900363e+02
## (Intercept)  .           
## PrivateYes  -3.070103e+02
## Accept       1.779328e+00
## Enroll      -1.469508e+00
## Top10perc    6.672214e+01
## Top25perc   -2.230442e+01
## F.Undergrad  9.258974e-02
## P.Undergrad  9.408838e-03
## Outstate    -1.083495e-01
## Room.Board   2.115147e-01
## Books        2.912105e-01
## Personal     6.120406e-03
## PhD         -1.547200e+01
## Terminal     6.409503e+00
## S.F.Ratio    2.282638e+01
## perc.alumni  1.130498e+00
## Expend       4.856697e-02
## Grad.Rate    7.488081e+00
library(pls)
## 
## Attaching package: 'pls'
## The following object is masked from 'package:corrplot':
## 
##     corrplot
## The following object is masked from 'package:stats':
## 
##     loadings
pcr.fit = pcr(Apps~., data=train, scale=TRUE, validation="CV")
validationplot(pcr.fit, val.type="MSEP")

summary(pcr.fit)
## Data:    X dimension: 388 17 
##  Y dimension: 388 1
## Fit method: svdpc
## Number of components considered: 17
## 
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
##        (Intercept)  1 comps  2 comps  3 comps  4 comps  5 comps  6 comps
## CV            4288     4027     2351     2355     2046     1965     1906
## adjCV         4288     4031     2347     2353     2014     1955     1899
##        7 comps  8 comps  9 comps  10 comps  11 comps  12 comps  13 comps
## CV        1910     1913     1871      1799      1799      1802      1800
## adjCV     1903     1908     1866      1790      1792      1795      1793
##        14 comps  15 comps  16 comps  17 comps
## CV         1832      1728      1310      1222
## adjCV      1829      1702      1296      1212
## 
## TRAINING: % variance explained
##       1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps  8 comps
## X       32.20    57.78    65.31    70.99    76.37    81.27     84.8    87.85
## Apps    13.44    70.93    71.07    79.87    81.15    82.25     82.3    82.33
##       9 comps  10 comps  11 comps  12 comps  13 comps  14 comps  15 comps
## X       90.62     92.91     94.98     96.74     97.79     98.72     99.42
## Apps    83.38     84.76     84.80     84.84     85.11     85.14     90.55
##       16 comps  17 comps
## X        99.88    100.00
## Apps     93.42     93.89
pcr.pred = predict(pcr.fit, test, ncomp=10)
(pcr.err = mean((test$Apps - pcr.pred)^2))
## [1] 1723100
pls.fit = plsr(Apps~., data=train, scale=TRUE, validation="CV")
validationplot(pls.fit, val.type="MSEP")

pls.pred = predict(pls.fit, test, ncomp=10)
(pls.err = mean((test$Apps - pls.pred)^2))
## [1] 1131661
test.avg = mean(test$Apps)
lm.r2 =1 - lm.err/mean((test.avg - test$Apps)^2)
ridge.r2 =1 - ridge.err/mean((test.avg - test$Apps)^2)
lasso.r2 =1 - lasso.err/mean((test.avg - test$Apps)^2)
pcr.r2 =1 - pcr.err/mean((test.avg - test$Apps)^2)
pls.r2 =1 - pls.err/mean((test.avg - test$Apps)^2)
all.r2 = c(lm.r2, ridge.r2, lasso.r2, pcr.r2, pls.r2)
names(all.r2) = c("lm", "ridge", "lasso", "pcr", "pls")
barplot(all.r2 )