For you assignment, please create an R-markdown document where you load the auto data set, perform the polynomial fit and then plot the resulting 5 fold cross validation curve. Your output should show the characteristic U-shape illustrating the tradeoff between bias and variance.
#Initialize array for storing the cross validation error
cv.err5 <- array(1:8)
# Fit 1st order polynomial
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 1), data = autodf)
# Show all the results
glm.fit##
## Call: glm(formula = mpg ~ poly(disp + hp + wt + acc, 1), data = autodf)
##
## Coefficients:
## (Intercept) poly(disp + hp + wt + acc, 1)
## 23.45 -129.08
##
## Degrees of Freedom: 391 Total (i.e. Null); 390 Residual
## Null Deviance: 23820
## Residual Deviance: 7157 AIC: 2257
# Store the cross validation error for plotting
cv.err5[1] <- cv.glm(autodf, glm.fit, K=5)$delta[1]
# Fit 2nd order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 2), data = autodf)
glm.fit##
## Call: glm(formula = mpg ~ poly(disp + hp + wt + acc, 2), data = autodf)
##
## Coefficients:
## (Intercept) poly(disp + hp + wt + acc, 2)1
## 23.45 -129.08
## poly(disp + hp + wt + acc, 2)2
## 24.55
##
## Degrees of Freedom: 391 Total (i.e. Null); 389 Residual
## Null Deviance: 23820
## Residual Deviance: 6554 AIC: 2225
cv.err5[2] <- cv.glm(autodf, glm.fit, K=5)$delta[1]
# Fit 3rd order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 3), data = autodf)
glm.fit##
## Call: glm(formula = mpg ~ poly(disp + hp + wt + acc, 3), data = autodf)
##
## Coefficients:
## (Intercept) poly(disp + hp + wt + acc, 3)1
## 23.4459 -129.0809
## poly(disp + hp + wt + acc, 3)2 poly(disp + hp + wt + acc, 3)3
## 24.5509 -0.6179
##
## Degrees of Freedom: 391 Total (i.e. Null); 388 Residual
## Null Deviance: 23820
## Residual Deviance: 6554 AIC: 2227
cv.err5[3] <- cv.glm(autodf, glm.fit, K=5)$delta[1]
# Fit 4th order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 4), data = autodf)
glm.fit##
## Call: glm(formula = mpg ~ poly(disp + hp + wt + acc, 4), data = autodf)
##
## Coefficients:
## (Intercept) poly(disp + hp + wt + acc, 4)1
## 23.4459 -129.0809
## poly(disp + hp + wt + acc, 4)2 poly(disp + hp + wt + acc, 4)3
## 24.5509 -0.6179
## poly(disp + hp + wt + acc, 4)4
## -2.3957
##
## Degrees of Freedom: 391 Total (i.e. Null); 387 Residual
## Null Deviance: 23820
## Residual Deviance: 6548 AIC: 2228
cv.err5[4] <- cv.glm(autodf, glm.fit, K=5)$delta[1]
# Fit 5th order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 5), data = autodf)
glm.fit##
## Call: glm(formula = mpg ~ poly(disp + hp + wt + acc, 5), data = autodf)
##
## Coefficients:
## (Intercept) poly(disp + hp + wt + acc, 5)1
## 23.4459 -129.0809
## poly(disp + hp + wt + acc, 5)2 poly(disp + hp + wt + acc, 5)3
## 24.5509 -0.6179
## poly(disp + hp + wt + acc, 5)4 poly(disp + hp + wt + acc, 5)5
## -2.3957 2.8573
##
## Degrees of Freedom: 391 Total (i.e. Null); 386 Residual
## Null Deviance: 23820
## Residual Deviance: 6540 AIC: 2230
cv.err5[5] <- cv.glm(autodf, glm.fit, K=5)$delta[1]
# Fit 6th order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 6), data = autodf)
glm.fit##
## Call: glm(formula = mpg ~ poly(disp + hp + wt + acc, 6), data = autodf)
##
## Coefficients:
## (Intercept) poly(disp + hp + wt + acc, 6)1
## 23.4459 -129.0809
## poly(disp + hp + wt + acc, 6)2 poly(disp + hp + wt + acc, 6)3
## 24.5509 -0.6179
## poly(disp + hp + wt + acc, 6)4 poly(disp + hp + wt + acc, 6)5
## -2.3957 2.8573
## poly(disp + hp + wt + acc, 6)6
## -3.7582
##
## Degrees of Freedom: 391 Total (i.e. Null); 385 Residual
## Null Deviance: 23820
## Residual Deviance: 6526 AIC: 2231
cv.err5[6] <- cv.glm(autodf, glm.fit, K=5)$delta[1]
# Fit 7th order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 7), data = autodf)
glm.fit##
## Call: glm(formula = mpg ~ poly(disp + hp + wt + acc, 7), data = autodf)
##
## Coefficients:
## (Intercept) poly(disp + hp + wt + acc, 7)1
## 23.4459 -129.0809
## poly(disp + hp + wt + acc, 7)2 poly(disp + hp + wt + acc, 7)3
## 24.5509 -0.6179
## poly(disp + hp + wt + acc, 7)4 poly(disp + hp + wt + acc, 7)5
## -2.3957 2.8573
## poly(disp + hp + wt + acc, 7)6 poly(disp + hp + wt + acc, 7)7
## -3.7582 6.9607
##
## Degrees of Freedom: 391 Total (i.e. Null); 384 Residual
## Null Deviance: 23820
## Residual Deviance: 6477 AIC: 2230
cv.err5[7] <- cv.glm(autodf, glm.fit, K=5)$delta[1]
# Fit 8th order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 8), data = autodf)
glm.fit##
## Call: glm(formula = mpg ~ poly(disp + hp + wt + acc, 8), data = autodf)
##
## Coefficients:
## (Intercept) poly(disp + hp + wt + acc, 8)1
## 23.4459 -129.0809
## poly(disp + hp + wt + acc, 8)2 poly(disp + hp + wt + acc, 8)3
## 24.5509 -0.6179
## poly(disp + hp + wt + acc, 8)4 poly(disp + hp + wt + acc, 8)5
## -2.3957 2.8573
## poly(disp + hp + wt + acc, 8)6 poly(disp + hp + wt + acc, 8)7
## -3.7582 6.9607
## poly(disp + hp + wt + acc, 8)8
## -1.3778
##
## Degrees of Freedom: 391 Total (i.e. Null); 383 Residual
## Null Deviance: 23820
## Residual Deviance: 6476 AIC: 2232
cv.err5[8] <- cv.glm(autodf, glm.fit, K=5)$delta[1]# Plot the U-shape bias vs. variance curve
degree <- 1:8
plot(degree, cv.err5, type='b')