install.packages("boot", repos = "https://cran.r-project.org/web/packages/boot/boot.pdf")
## Warning: unable to access index for repository https://cran.r-project.org/web/packages/boot/boot.pdf/src/contrib:
## cannot open URL 'https://cran.r-project.org/web/packages/boot/boot.pdf/src/contrib/PACKAGES'
## Warning: package 'boot' is not available (for R version 3.6.1)
## Warning: unable to access index for repository https://cran.r-project.org/web/packages/boot/boot.pdf/bin/macosx/el-capitan/contrib/3.6:
## cannot open URL 'https://cran.r-project.org/web/packages/boot/boot.pdf/bin/macosx/el-capitan/contrib/3.6/PACKAGES'
library(boot)
install.packages("data.table", repos = "https://CRAN.R-project.org/package=data.table")
## Warning: unable to access index for repository https://CRAN.R-project.org/package=data.table/src/contrib:
## cannot open URL 'https://CRAN.R-project.org/package=data.table/src/contrib/PACKAGES'
## Warning: package 'data.table' is not available (for R version 3.6.1)
## Warning: unable to access index for repository https://CRAN.R-project.org/package=data.table/bin/macosx/el-capitan/contrib/3.6:
## cannot open URL 'https://CRAN.R-project.org/package=data.table/bin/macosx/el-capitan/contrib/3.6/PACKAGES'
library(data.table)
library(splines)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library(ISLR)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
attach(Boston)
Boston=na.omit(Boston)
mod=lm(nox ~ poly(dis, 3), data = Boston)
summary(mod)
##
## Call:
## lm(formula = nox ~ poly(dis, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.121130 -0.040619 -0.009738 0.023385 0.194904
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.554695 0.002759 201.021 < 2e-16 ***
## poly(dis, 3)1 -2.003096 0.062071 -32.271 < 2e-16 ***
## poly(dis, 3)2 0.856330 0.062071 13.796 < 2e-16 ***
## poly(dis, 3)3 -0.318049 0.062071 -5.124 4.27e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06207 on 502 degrees of freedom
## Multiple R-squared: 0.7148, Adjusted R-squared: 0.7131
## F-statistic: 419.3 on 3 and 502 DF, p-value: < 2.2e-16
dislims <- range(Boston$dis)
dis.grid <- seq(from = dislims[1], to = dislims[2], by = 0.1)
preds <- predict(mod, list(dis = dis.grid))
plot(nox ~ dis, data = Boston, col = "darkgrey")
lines(dis.grid, preds, col = "red", lwd = 2)
rss <- rep(NA, 10)
for (i in 1:10) {
fit <- lm(nox ~ poly(dis, i), data = Boston)
rss[i] <- sum(fit$residuals^2)
}
plot(1:10, rss, xlab = "Degree", ylab = "RSS", type = "l")
deltas <- rep(NA, 10)
for (i in 1:10) {
fit <- glm(nox ~ poly(dis, i), data = Boston)
deltas[i] <- cv.glm(Boston, fit, K = 10)$delta[1]
}
plot(1:10, deltas, xlab = "Degree", ylab = "Test MSE", type = "l")
library(splines)
attr(bs(dis,df=4), "knots")
## 50%
## 3.20745
modspln = fit <- lm(nox ~ bs(dis, knots = c(4, 7, 11)), data = Boston)
summary(fit)
##
## Call:
## lm(formula = nox ~ bs(dis, knots = c(4, 7, 11)), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.124567 -0.040355 -0.008702 0.024740 0.192920
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.73926 0.01331 55.537 < 2e-16 ***
## bs(dis, knots = c(4, 7, 11))1 -0.08861 0.02504 -3.539 0.00044 ***
## bs(dis, knots = c(4, 7, 11))2 -0.31341 0.01680 -18.658 < 2e-16 ***
## bs(dis, knots = c(4, 7, 11))3 -0.26618 0.03147 -8.459 3.00e-16 ***
## bs(dis, knots = c(4, 7, 11))4 -0.39802 0.04647 -8.565 < 2e-16 ***
## bs(dis, knots = c(4, 7, 11))5 -0.25681 0.09001 -2.853 0.00451 **
## bs(dis, knots = c(4, 7, 11))6 -0.32926 0.06327 -5.204 2.85e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06185 on 499 degrees of freedom
## Multiple R-squared: 0.7185, Adjusted R-squared: 0.7151
## F-statistic: 212.3 on 6 and 499 DF, p-value: < 2.2e-16
pred <- predict(fit, list(dis = dis.grid))
plot(nox ~ dis, data = Boston, col = "darkgrey")
lines(dis.grid, preds, col = "red", lwd = 2)
rss <- rep(NA, 16)
for (i in 3:16) {
fit <- lm(nox ~ bs(dis, df = i), data = Boston)
rss[i] <- sum(fit$residuals^2)
}
plot(3:16, rss[-c(1, 2)], xlab = "Degrees of freedom", ylab = "RSS", type = "l")
cv <- rep(NA, 16)
for (i in 3:16) {
fit <- glm(nox ~ bs(dis, df = i), data = Boston)
cv[i] <- cv.glm(Boston, fit, K = 10)$delta[1]
}
## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots =
## c(1.137, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots =
## c(1.137, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots =
## c(1.1296, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots =
## c(1.1296, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`50%` = 3.2157), Boundary.knots =
## c(1.1296, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`50%` = 3.2157), Boundary.knots =
## c(1.1296, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`50%` = 3.2157), Boundary.knots =
## c(1.137, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`50%` = 3.2157), Boundary.knots =
## c(1.137, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`33.33333%` = 2.38876666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`33.33333%` = 2.38876666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`33.33333%` = 2.40693333333333, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`33.33333%` = 2.40693333333333, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`25%` = 2.1114, `50%` = 3.2157, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`25%` = 2.1114, `50%` = 3.2157, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`25%` = 2.10035, `50%` =
## 3.2721, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`25%` = 2.10035, `50%` =
## 3.2721, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`20%` = 1.9512, `40%` = 2.548, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`20%` = 1.9512, `40%` = 2.548, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`20%` = 1.97036, `40%` =
## 2.62694, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`20%` = 1.97036, `40%` =
## 2.62694, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`16.66667%` = 1.82231666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`16.66667%` = 1.82231666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`16.66667%` = 1.8208, `33.33333%`
## = 2.3727, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`16.66667%` = 1.8208, `33.33333%`
## = 2.3727, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`14.28571%` = 1.79777142857143, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`14.28571%` = 1.79777142857143, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`14.28571%` = 1.77172857142857, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`14.28571%` = 1.77172857142857, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`12.5%` = 1.748425, `25%` =
## 2.10035, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`12.5%` = 1.748425, `25%` =
## 2.10035, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`12.5%` = 1.751575, `25%` =
## 2.1084, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`12.5%` = 1.751575, `25%` =
## 2.1084, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`11.11111%` = 1.73271111111111, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`11.11111%` = 1.73271111111111, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`11.11111%` = 1.68835555555556, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`11.11111%` = 1.68835555555556, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`10%` = 1.62728, `20%` =
## 1.96794, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`10%` = 1.62728, `20%` =
## 1.96794, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`10%` = 1.6424, `20%` =
## 1.96794, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`10%` = 1.6424, `20%` =
## 1.96794, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`9.090909%` = 1.60841818181818, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`9.090909%` = 1.60841818181818, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`9.090909%` = 1.61450909090909, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`9.090909%` = 1.61450909090909, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`8.333333%` = 1.5874, `16.66667%`
## = 1.83066666666667, : some 'x' values beyond boundary knots may cause ill-
## conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`8.333333%` = 1.5874, `16.66667%`
## = 1.83066666666667, : some 'x' values beyond boundary knots may cause ill-
## conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`8.333333%` = 1.60476666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`8.333333%` = 1.60476666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`7.692308%` = 1.57254615384615, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`7.692308%` = 1.57254615384615, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`7.692308%` = 1.58815384615385, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`7.692308%` = 1.58815384615385, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`7.142857%` = 1.56255714285714, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`7.142857%` = 1.56255714285714, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(`7.142857%` = 1.5299, `14.28571%`
## = 1.7996, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
## Warning in bs(dis, degree = 3L, knots = c(`7.142857%` = 1.5299, `14.28571%`
## = 1.7996, : some 'x' values beyond boundary knots may cause ill-conditioned
## bases
plot(3:16, cv[-c(1, 2)], xlab = "Degrees of freedom", ylab = "Test MSE", type = "l")
library(ISLR)
install.packages("leaps", repos = "https://CRAN.R-project.org/package=leaps")
## Warning: unable to access index for repository https://CRAN.R-project.org/package=leaps/src/contrib:
## cannot open URL 'https://CRAN.R-project.org/package=leaps/src/contrib/PACKAGES'
## Warning: package 'leaps' is not available (for R version 3.6.1)
## Warning: unable to access index for repository https://CRAN.R-project.org/package=leaps/bin/macosx/el-capitan/contrib/3.6:
## cannot open URL 'https://CRAN.R-project.org/package=leaps/bin/macosx/el-capitan/contrib/3.6/PACKAGES'
library(leaps)
train <- sample(1: nrow(College), nrow(College)/2)
test <- -train
fit <- regsubsets(Outstate ~ ., data = College, subset = train, method = 'forward')
fit.summary <- summary(fit)
fit.summary
## Subset selection object
## Call: regsubsets.formula(Outstate ~ ., data = College, subset = train,
## method = "forward")
## 17 Variables (and intercept)
## Forced in Forced out
## PrivateYes FALSE FALSE
## Apps FALSE FALSE
## Accept FALSE FALSE
## Enroll FALSE FALSE
## Top10perc FALSE FALSE
## Top25perc FALSE FALSE
## F.Undergrad FALSE FALSE
## P.Undergrad FALSE FALSE
## Room.Board FALSE FALSE
## Books FALSE FALSE
## Personal FALSE FALSE
## PhD FALSE FALSE
## Terminal FALSE FALSE
## S.F.Ratio FALSE FALSE
## perc.alumni FALSE FALSE
## Expend FALSE FALSE
## Grad.Rate FALSE FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: forward
## PrivateYes Apps Accept Enroll Top10perc Top25perc F.Undergrad
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " " " " " " "
## 3 ( 1 ) "*" " " " " " " " " " " " "
## 4 ( 1 ) "*" " " " " " " " " " " " "
## 5 ( 1 ) "*" " " " " " " " " " " " "
## 6 ( 1 ) "*" " " " " " " " " "*" " "
## 7 ( 1 ) "*" " " " " " " " " "*" " "
## 8 ( 1 ) "*" " " "*" " " " " "*" " "
## P.Undergrad Room.Board Books Personal PhD Terminal S.F.Ratio
## 1 ( 1 ) " " " " " " " " " " " " " "
## 2 ( 1 ) " " " " " " " " " " " " " "
## 3 ( 1 ) " " "*" " " " " " " " " " "
## 4 ( 1 ) " " "*" " " " " " " "*" " "
## 5 ( 1 ) " " "*" " " " " " " "*" " "
## 6 ( 1 ) " " "*" " " " " " " "*" " "
## 7 ( 1 ) " " "*" " " " " " " "*" " "
## 8 ( 1 ) " " "*" " " " " " " "*" " "
## perc.alumni Expend Grad.Rate
## 1 ( 1 ) " " "*" " "
## 2 ( 1 ) " " "*" " "
## 3 ( 1 ) " " "*" " "
## 4 ( 1 ) " " "*" " "
## 5 ( 1 ) " " "*" "*"
## 6 ( 1 ) " " "*" "*"
## 7 ( 1 ) "*" "*" "*"
## 8 ( 1 ) "*" "*" "*"
coef(fit, id = 6)
## (Intercept) PrivateYes Top25perc Room.Board Terminal
## -4682.0604917 3364.5887624 22.7444078 0.8485723 49.7396461
## Expend Grad.Rate
## 0.1988311 28.1908514
install.packages("gam", repos = "https://CRAN.R-project.org/package=gam")
## Warning: unable to access index for repository https://CRAN.R-project.org/package=gam/src/contrib:
## cannot open URL 'https://CRAN.R-project.org/package=gam/src/contrib/PACKAGES'
## Warning: package 'gam' is not available (for R version 3.6.1)
## Warning: unable to access index for repository https://CRAN.R-project.org/package=gam/bin/macosx/el-capitan/contrib/3.6:
## cannot open URL 'https://CRAN.R-project.org/package=gam/bin/macosx/el-capitan/contrib/3.6/PACKAGES'
library(gam)
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loaded gam 1.16.1
gam.mod <- gam(Outstate ~ Private + s(Room.Board, 5) + s(Terminal, 5) + s(perc.alumni, 5) + s(Expend, 5) + s(Grad.Rate, 5), data = College, subset = train)
## Warning in model.matrix.default(mt, mf, contrasts): non-list contrasts
## argument ignored
par(mfrow = c(2,3))
plot(gam.mod, se = TRUE, col = 'blue')
preds <- predict(gam.mod, College[test, ])
RSS <- sum((College[test, ]$Outstate - preds)^2)
TSS <- sum((College[test, ]$Outstate - mean(College[test, ]$Outstate)) ^ 2)
1 - (RSS / TSS)
## [1] 0.7737496
summary(gam.mod)
##
## Call: gam(formula = Outstate ~ Private + s(Room.Board, 5) + s(Terminal,
## 5) + s(perc.alumni, 5) + s(Expend, 5) + s(Grad.Rate, 5),
## data = College, subset = train)
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7544.60 -1071.49 -73.43 1258.11 4302.43
##
## (Dispersion Parameter for gaussian family taken to be 3207027)
##
## Null Deviance: 5856058281 on 387 degrees of freedom
## Residual Deviance: 1157736560 on 360.9999 degrees of freedom
## AIC: 6941.683
##
## Number of Local Scoring Iterations: 2
##
## Anova for Parametric Effects
## Df Sum Sq Mean Sq F value Pr(>F)
## Private 1 1568003917 1568003917 488.928 < 2.2e-16 ***
## s(Room.Board, 5) 1 1129020425 1129020425 352.046 < 2.2e-16 ***
## s(Terminal, 5) 1 411353760 411353760 128.266 < 2.2e-16 ***
## s(perc.alumni, 5) 1 153175654 153175654 47.763 2.188e-11 ***
## s(Expend, 5) 1 571017807 571017807 178.052 < 2.2e-16 ***
## s(Grad.Rate, 5) 1 58834288 58834288 18.345 2.365e-05 ***
## Residuals 361 1157736560 3207027
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Anova for Nonparametric Effects
## Npar Df Npar F Pr(F)
## (Intercept)
## Private
## s(Room.Board, 5) 4 2.8750 0.02288 *
## s(Terminal, 5) 4 1.7835 0.13151
## s(perc.alumni, 5) 4 0.5833 0.67494
## s(Expend, 5) 4 17.3817 4.701e-13 ***
## s(Grad.Rate, 5) 4 1.2876 0.27442
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1