# QUESTION 9 ## PART A:
library(MASS)
model <- lm(nox ~ poly(dis,3),data = Boston)
summary(model)
##
## Call:
## lm(formula = nox ~ poly(dis, 3), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.121130 -0.040619 -0.009738 0.023385 0.194904
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.554695 0.002759 201.021 < 2e-16 ***
## poly(dis, 3)1 -2.003096 0.062071 -32.271 < 2e-16 ***
## poly(dis, 3)2 0.856330 0.062071 13.796 < 2e-16 ***
## poly(dis, 3)3 -0.318049 0.062071 -5.124 4.27e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06207 on 502 degrees of freedom
## Multiple R-squared: 0.7148, Adjusted R-squared: 0.7131
## F-statistic: 419.3 on 3 and 502 DF, p-value: < 2.2e-16
dis.grid <- seq(min(Boston$dis), max(Boston$dis), by = 0.1)
pred <- predict(model, list(dis = dis.grid))
plot(nox ~ dis, data = Boston, col = "darkgrey")
lines(dis.grid, pred, col = "red", lwd = 2)
rss <- numeric(10)
for (degree in 1:10){
fit <- lm(nox ~poly(dis, degree), data = Boston)
rss[degree] <- sum(residuals(fit)^2)}
plot(1:10,rss, type = 'b', xlab = "Degree of Polynomial", ylab = "RSS")
library(boot)
set.seed(702)
cv.error <- rep(NA,10)
for (i in 1:10) {
poly.fit <- glm(nox ~ poly(dis, i), data=Boston)
cv.error[i] <- cv.glm(Boston, poly.fit, K=10)$delta[1]
}
plot(1:10, cv.error,xlab='Degree Polynomial',ylab='CV MSE', type= 'b')
points(x=which.min(cv.error), y=min(cv.error), col='red', pch=18)
axis(1, at = seq(1,10,by=1))
The optimal degree of the polynomial is that which minimizes test MSE. From the graph above, a 3rd-degree polynomial minimizes test MSE, therefore it is the optimal model.
library(splines)
fit <- lm(nox ~ bs(dis, df = 4), data = Boston)
summary(fit)
##
## Call:
## lm(formula = nox ~ bs(dis, df = 4), data = Boston)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.124622 -0.039259 -0.008514 0.020850 0.193891
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.73447 0.01460 50.306 < 2e-16 ***
## bs(dis, df = 4)1 -0.05810 0.02186 -2.658 0.00812 **
## bs(dis, df = 4)2 -0.46356 0.02366 -19.596 < 2e-16 ***
## bs(dis, df = 4)3 -0.19979 0.04311 -4.634 4.58e-06 ***
## bs(dis, df = 4)4 -0.38881 0.04551 -8.544 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06195 on 501 degrees of freedom
## Multiple R-squared: 0.7164, Adjusted R-squared: 0.7142
## F-statistic: 316.5 on 4 and 501 DF, p-value: < 2.2e-16
dis.grid <- seq(min(Boston$dis), max(Boston$dis), by = 0.1)
pred <- predict(model, list(dis = dis.grid))
plot(nox ~ dis, data = Boston, col = "darkgrey")
lines(dis.grid, pred, col = "red", lwd = 2)
Since there are 4 degrees of freedom and we are using a cubic polynomial, there will be one spline.
res <- c()
dof_range <- 3:20
for (dof in dof_range) {
fit <- lm(nox ~ bs(dis, df = dof), data = Boston)
res <- c(res, sum(fit$residuals ^ 2))
}
plot(dof_range, res, type = 'l', xlab = 'Degrees of Freedom', ylab = 'RSS')
As expected, as the degrees of freedom increases, the RSS continues to
decrease. There are some odd increases at 9 and 11 degrees of freedom
but overall the RSS decreases rapidly at lower degrees of freedom and
slowly at higher degrees of freedom values.
set.seed(702)
cv.spline.fun <- function(i) {
fit <- glm(nox ~ bs(dis, df=i), data=Boston)
cv.error <- cv.glm(Boston, fit, K=10)$delta[2]
}
cv.err <- sapply(3:20, cv.spline.fun)
## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.1296,
## : some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.1296,
## : some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.137, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = numeric(0), Boundary.knots = c(1.137, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = 3.26745, Boundary.knots = c(1.1296, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = 3.26745, Boundary.knots = c(1.1296, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = 3.3633, Boundary.knots = c(1.1691, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = 3.3633, Boundary.knots = c(1.1691, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(2.38883333333333, 4.3549),
## Boundary.knots = c(1.137, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(2.38883333333333, 4.3549),
## Boundary.knots = c(1.137, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(2.38876666666667, 4.3257),
## Boundary.knots = c(1.1296, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(2.38876666666667, 4.3257),
## Boundary.knots = c(1.1296, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(2.0941, 3.2157, 5.16495),
## Boundary.knots = c(1.1296, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(2.0941, 3.2157, 5.16495),
## Boundary.knots = c(1.1296, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(2.08285, 3.2628, 5.21325),
## Boundary.knots = c(1.137, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(2.08285, 3.2628, 5.21325),
## Boundary.knots = c(1.137, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.9669, 2.5806, 3.7886, 5.5605: some
## 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.9669, 2.5806, 3.7886, 5.5605: some
## 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.8208, 2.354, 3.0923, 4.0999, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.8208, 2.354, 3.0923, 4.0999, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.86223333333333, 2.38883333333333, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.86223333333333, 2.38883333333333, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.77, 2.162, 2.7986, 3.665, 4.7075, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.77, 2.162, 2.7986, 3.665, 4.7075, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.79078571428571, 2.15565714285714, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.79078571428571, 2.15565714285714, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.7519375, 2.100525, 2.4895375, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.7519375, 2.100525, 2.4895375, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.733725, 2.08755, 2.5185, 3.2157, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.733725, 2.08755, 2.5185, 3.2157, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.66397777777778, 1.98665555555556, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.66397777777778, 1.98665555555556, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.66864444444444, 2.00204444444444, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.66864444444444, 2.00204444444444, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.66236, 1.9781, 2.29746, 2.7147, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.66236, 1.9781, 2.29746, 2.7147, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.61242, 1.92404, 2.19752, 2.53734, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.61242, 1.92404, 2.19752, 2.53734, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.59026363636364, 1.8653,
## 2.10717272727273, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.59026363636364, 1.8653,
## 2.10717272727273, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.59026363636364, 1.89532727272727, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.59026363636364, 1.89532727272727, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.58865, 1.83268333333333, 2.0771, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.58865, 1.83268333333333, 2.0771, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.5894, 1.8172, 2.0218, 2.2625, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.5894, 1.8172, 2.0218, 2.2625, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.5523, 1.81097692307692, 2.0447, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.5523, 1.81097692307692, 2.0447, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.5311, 1.79078571428571,
## 1.98178571428571, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.5311, 1.79078571428571,
## 1.98178571428571, : some 'x' values beyond boundary knots may cause
## ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.52647142857143, 1.79078571428571, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.52647142857143, 1.79078571428571, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.52136666666667, 1.75436666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.52136666666667, 1.75436666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.53053333333333, 1.76808666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.53053333333333, 1.76808666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.5136, 1.7257, 1.87135, 2.0635, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.5136, 1.7257, 1.87135, 2.0635, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.5187, 1.748425, 1.910175, 2.1084, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.5187, 1.748425, 1.910175, 2.1084, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.51797647058824, 1.72697058823529, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.51797647058824, 1.72697058823529, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.47217777777778, 1.65225555555556, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.47217777777778, 1.65225555555556, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.51857777777778, 1.72676666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
## Warning in bs(dis, degree = 3L, knots = c(1.51857777777778, 1.72676666666667, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
plot(cv.err, type='b',xlab='Degrees of Freedom', ylab='CV MSE')
The optimal degrees of freedom for a regression spline on this data is where test MSE is minimized. This occurs at 8 degrees of freedom.