#auto data
auto = read.table("./data/auto-mpg.data",header = FALSE)
auto = setNames(auto, c('displacement', 'horsepower','weight', 'acceleration','mpg'))
head(auto)
## displacement horsepower weight acceleration mpg
## 1 307 130 3504 12.0 18
## 2 350 165 3693 11.5 15
## 3 318 150 3436 11.0 18
## 4 304 150 3433 12.0 16
## 5 302 140 3449 10.5 17
## 6 429 198 4341 10.0 15
In k-fold cross-validation, the original sample is randomly partitioned into k equal sized subsamples. Of the k subsamples, a single subsample is retained as the validation data for testing the model, and the remaining k − 1 subsamples are used as training data. The cross-validation process is then repeated k times (the folds), with each of the k subsamples used exactly once as the validation data. And the average of K samples is the output of MSE.
#Set seed
set.seed(7340)
cv.err5 = NULL
#Loop until 8 polynomial
for(i in 1:8) {
glm.fit = glm(mpg ~ poly(displacement + horsepower + weight + acceleration,i), data = auto)
cv.err5[i] = cv.glm(auto, glm.fit, K =5)$delta[1]
}
cv.err5
## [1] 18.52731 17.11173 17.04114 17.23299 17.13292 17.07718 17.08503 17.24576
#Plot the mse for each polynomial
degree=1:i
plot(degree,cv.err5,type='b')
#Leave-one-out cross-validation method
glm.fit.loocv = glm(mpg ~ displacement + horsepower + weight + acceleration, data = auto)
cv.err = cv.glm(auto, glm.fit.loocv)$delta[1]
#MSE of LOOCV
cv.err
## [1] 18.33959