Bias Variance Tradeoff

For you assignment, please create an R-markdown document where you load the auto data set, perform the polynomial fit and then plot the resulting 5 fold cross validation curve. Your output should show the characteristic U-shape illustrating the tradeoff between bias and variance.

#Initialize array for storing the cross validation error
cv.err5 <- array(1:8)

# Fit 1st order polynomial 
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 1), data = autodf)
# Show all the results
glm.fit
## 
## Call:  glm(formula = mpg ~ poly(disp + hp + wt + acc, 1), data = autodf)
## 
## Coefficients:
##                   (Intercept)  poly(disp + hp + wt + acc, 1)  
##                         23.45                        -129.08  
## 
## Degrees of Freedom: 391 Total (i.e. Null);  390 Residual
## Null Deviance:       23820 
## Residual Deviance: 7157  AIC: 2257
# Store the cross validation error for plotting
cv.err5[1] <- cv.glm(autodf, glm.fit, K=5)$delta[1]

# Fit 2nd order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 2), data = autodf)
glm.fit
## 
## Call:  glm(formula = mpg ~ poly(disp + hp + wt + acc, 2), data = autodf)
## 
## Coefficients:
##                    (Intercept)  poly(disp + hp + wt + acc, 2)1  
##                          23.45                         -129.08  
## poly(disp + hp + wt + acc, 2)2  
##                          24.55  
## 
## Degrees of Freedom: 391 Total (i.e. Null);  389 Residual
## Null Deviance:       23820 
## Residual Deviance: 6554  AIC: 2225
cv.err5[2] <- cv.glm(autodf, glm.fit, K=5)$delta[1]

# Fit 3rd order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 3), data = autodf)
glm.fit
## 
## Call:  glm(formula = mpg ~ poly(disp + hp + wt + acc, 3), data = autodf)
## 
## Coefficients:
##                    (Intercept)  poly(disp + hp + wt + acc, 3)1  
##                        23.4459                       -129.0809  
## poly(disp + hp + wt + acc, 3)2  poly(disp + hp + wt + acc, 3)3  
##                        24.5509                         -0.6179  
## 
## Degrees of Freedom: 391 Total (i.e. Null);  388 Residual
## Null Deviance:       23820 
## Residual Deviance: 6554  AIC: 2227
cv.err5[3] <- cv.glm(autodf, glm.fit, K=5)$delta[1]

# Fit 4th order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 4), data = autodf)
glm.fit
## 
## Call:  glm(formula = mpg ~ poly(disp + hp + wt + acc, 4), data = autodf)
## 
## Coefficients:
##                    (Intercept)  poly(disp + hp + wt + acc, 4)1  
##                        23.4459                       -129.0809  
## poly(disp + hp + wt + acc, 4)2  poly(disp + hp + wt + acc, 4)3  
##                        24.5509                         -0.6179  
## poly(disp + hp + wt + acc, 4)4  
##                        -2.3957  
## 
## Degrees of Freedom: 391 Total (i.e. Null);  387 Residual
## Null Deviance:       23820 
## Residual Deviance: 6548  AIC: 2228
cv.err5[4] <- cv.glm(autodf, glm.fit, K=5)$delta[1]

# Fit 5th order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 5), data = autodf)
glm.fit
## 
## Call:  glm(formula = mpg ~ poly(disp + hp + wt + acc, 5), data = autodf)
## 
## Coefficients:
##                    (Intercept)  poly(disp + hp + wt + acc, 5)1  
##                        23.4459                       -129.0809  
## poly(disp + hp + wt + acc, 5)2  poly(disp + hp + wt + acc, 5)3  
##                        24.5509                         -0.6179  
## poly(disp + hp + wt + acc, 5)4  poly(disp + hp + wt + acc, 5)5  
##                        -2.3957                          2.8573  
## 
## Degrees of Freedom: 391 Total (i.e. Null);  386 Residual
## Null Deviance:       23820 
## Residual Deviance: 6540  AIC: 2230
cv.err5[5] <- cv.glm(autodf, glm.fit, K=5)$delta[1]

# Fit 6th order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 6), data = autodf)
glm.fit
## 
## Call:  glm(formula = mpg ~ poly(disp + hp + wt + acc, 6), data = autodf)
## 
## Coefficients:
##                    (Intercept)  poly(disp + hp + wt + acc, 6)1  
##                        23.4459                       -129.0809  
## poly(disp + hp + wt + acc, 6)2  poly(disp + hp + wt + acc, 6)3  
##                        24.5509                         -0.6179  
## poly(disp + hp + wt + acc, 6)4  poly(disp + hp + wt + acc, 6)5  
##                        -2.3957                          2.8573  
## poly(disp + hp + wt + acc, 6)6  
##                        -3.7582  
## 
## Degrees of Freedom: 391 Total (i.e. Null);  385 Residual
## Null Deviance:       23820 
## Residual Deviance: 6526  AIC: 2231
cv.err5[6] <- cv.glm(autodf, glm.fit, K=5)$delta[1]

# Fit 7th order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 7), data = autodf)
glm.fit
## 
## Call:  glm(formula = mpg ~ poly(disp + hp + wt + acc, 7), data = autodf)
## 
## Coefficients:
##                    (Intercept)  poly(disp + hp + wt + acc, 7)1  
##                        23.4459                       -129.0809  
## poly(disp + hp + wt + acc, 7)2  poly(disp + hp + wt + acc, 7)3  
##                        24.5509                         -0.6179  
## poly(disp + hp + wt + acc, 7)4  poly(disp + hp + wt + acc, 7)5  
##                        -2.3957                          2.8573  
## poly(disp + hp + wt + acc, 7)6  poly(disp + hp + wt + acc, 7)7  
##                        -3.7582                          6.9607  
## 
## Degrees of Freedom: 391 Total (i.e. Null);  384 Residual
## Null Deviance:       23820 
## Residual Deviance: 6477  AIC: 2230
cv.err5[7] <- cv.glm(autodf, glm.fit, K=5)$delta[1]

# Fit 8th order poly
glm.fit <- glm(mpg ~ poly(disp + hp + wt + acc, 8), data = autodf)
glm.fit
## 
## Call:  glm(formula = mpg ~ poly(disp + hp + wt + acc, 8), data = autodf)
## 
## Coefficients:
##                    (Intercept)  poly(disp + hp + wt + acc, 8)1  
##                        23.4459                       -129.0809  
## poly(disp + hp + wt + acc, 8)2  poly(disp + hp + wt + acc, 8)3  
##                        24.5509                         -0.6179  
## poly(disp + hp + wt + acc, 8)4  poly(disp + hp + wt + acc, 8)5  
##                        -2.3957                          2.8573  
## poly(disp + hp + wt + acc, 8)6  poly(disp + hp + wt + acc, 8)7  
##                        -3.7582                          6.9607  
## poly(disp + hp + wt + acc, 8)8  
##                        -1.3778  
## 
## Degrees of Freedom: 391 Total (i.e. Null);  383 Residual
## Null Deviance:       23820 
## Residual Deviance: 6476  AIC: 2232
cv.err5[8] <- cv.glm(autodf, glm.fit, K=5)$delta[1]
# Plot the U-shape bias vs. variance curve
degree <- 1:8
plot(degree, cv.err5, type='b')