Lab 5: Cross-Validation and the Bootstrap

The Validation Set Approach

library(ISLR)
set.seed(1)
train = sample(392,196)
?sample
lm.fit = lm(mpg ~ horsepower, data = Auto, subset = train)
attach(Auto)
mean((mpg-predict(lm.fit, Auto))[-train]^2)
[1] 26.14142
lm.fit2 = lm(mpg ~ poly(horsepower, 2), data = Auto, subset = train)
mean((mpg-predict(lm.fit2, Auto))[-train]^2)
[1] 19.82259
lm.fit3 = lm(mpg ~ poly(horsepower, 3), data = Auto, subset = train)
mean((mpg-predict(lm.fit3, Auto))[-train]^2)
[1] 19.78252
set.seed(2)
train = sample(392,196)
lm.fit = lm(mpg ~ horsepower, subset = train)
mean((mpg-predict(lm.fit, Auto))[-train]^2)
[1] 23.29559
lm.fit2 = lm(mpg ~ poly(horsepower, 2), data = Auto, subset = train)
mean((mpg-predict(lm.fit2, Auto))[-train]^2)
[1] 18.90124
lm.fit3 = lm(mpg ~ poly(horsepower, 3), data = Auto, subset = train)
mean((mpg-predict(lm.fit3, Auto))[-train]^2)
[1] 19.2574

Leave-One-Out Cross-Validation

# ambos funcionan igual
glm.fit = glm(mpg ~ horsepower, data = Auto)
coef(glm.fit)
(Intercept)  horsepower 
 39.9358610  -0.1578447 
lm.fit = lm(mpg ~ horsepower, data = Auto)
coef(lm.fit)
(Intercept)  horsepower 
 39.9358610  -0.1578447 
library(boot)
glm.fit = glm(mpg ~ horsepower, data = Auto)
cv.err = cv.glm(Auto, glm.fit)
cv.err$delta
[1] 24.23151 24.23114
cv.error = rep(0,5)
for (i in 1:5) {
  glm.fit = glm(mpg~poly(horsepower, i), data = Auto)
  cv.error[i] = cv.glm(Auto, glm.fit)$delta[1]
}
cv.error
[1] 24.23151 19.24821 19.33498 19.42443 19.03321

k-Fold Cross-Validation

set.seed(17)
cv.error.10 = rep(0,10)
for (i in 1:10) {
  glm.fit = glm(mpg ~ poly(horsepower, i), data = Auto)
  cv.error.10[i] = cv.glm(Auto, glm.fit, K = 10)$delta[1]
}
cv.error.10
 [1] 24.20520 19.18924 19.30662 19.33799 18.87911
 [6] 19.02103 18.89609 19.71201 18.95140 19.50196

The Bootstrap

Estimating the Accuracy of a Statistic of Interest
alpha.fn = function(data, index) {
  X = data$X[index]
  Y = data$Y[index]
  return((var(Y) - cov(X,Y)) / (var(X) + var(Y) - 2 * cov(X,Y)))
}
alpha.fn(Portfolio, 1:100)
[1] 0.5758321
set.seed(1)
alpha.fn(Portfolio, sample(100, 100, replace = T))
[1] 0.5963833
boot(Portfolio, alpha.fn, R = 1000)

ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Portfolio, statistic = alpha.fn, R = 1000)


Bootstrap Statistics :
     original        bias    std. error
t1* 0.5758321 -7.315422e-05  0.08861826
Estimating the Accuracy of a Linear Regression Model
boot.fn = function(data, index)
  return(coef(lm(mpg ~ horsepower, data = data, subset = index)))
boot.fn(Auto, 1:392)
(Intercept)  horsepower 
 39.9358610  -0.1578447 
set.seed(1)
boot.fn(Auto, sample(392, 392, replace = T))
(Intercept)  horsepower 
 38.7387134  -0.1481952 
boot.fn(Auto, sample(392, 392, replace = T))
(Intercept)  horsepower 
 40.0383086  -0.1596104 
boot(Auto, boot.fn, 1000)

ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Auto, statistic = boot.fn, R = 1000)


Bootstrap Statistics :
      original      bias    std. error
t1* 39.9358610  0.02972191 0.860007896
t2* -0.1578447 -0.00030823 0.007404467
summary(lm(mpg ~ horsepower, data = Auto))$coef
              Estimate  Std. Error   t value
(Intercept) 39.9358610 0.717498656  55.65984
horsepower  -0.1578447 0.006445501 -24.48914
                 Pr(>|t|)
(Intercept) 1.220362e-187
horsepower   7.031989e-81
boot.fn = function(data, index)
  coefficients(lm(mpg ~ horsepower + I(horsepower^2), data = data, subset = index))
set.seed(1)
boot(Auto, boot.fn, 1000)

ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Auto, statistic = boot.fn, R = 1000)


Bootstrap Statistics :
        original        bias     std. error
t1* 56.900099702  6.098115e-03 2.0944855842
t2* -0.466189630 -1.777108e-04 0.0334123802
t3*  0.001230536  1.324315e-06 0.0001208339
summary(lm(mpg~horsepower + I(horsepower^2), data = Auto))$coef
                    Estimate   Std. Error
(Intercept)     56.900099702 1.8004268063
horsepower      -0.466189630 0.0311246171
I(horsepower^2)  0.001230536 0.0001220759
                  t value      Pr(>|t|)
(Intercept)      31.60367 1.740911e-109
horsepower      -14.97816  2.289429e-40
I(horsepower^2)  10.08009  2.196340e-21
LS0tDQp0aXRsZTogIkxhYm9yYXRvcmlvIDUiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCmF1dGhvcjogIkx1aXMgSmltZW5leiINCi0tLQ0KDQojIyMgTGFiIDU6IENyb3NzLVZhbGlkYXRpb24gYW5kIHRoZSBCb290c3RyYXANCg0KIyMjIyBUaGUgVmFsaWRhdGlvbiBTZXQgQXBwcm9hY2gNCg0KYGBge3J9DQpsaWJyYXJ5KElTTFIpDQpzZXQuc2VlZCgxKQ0KdHJhaW4gPSBzYW1wbGUoMzkyLDE5NikNCiMgcmV2aXNhciBxdWUgaGFjZSBzYW1wbGUNCiMgP3NhbXBsZQ0KYGBgDQoNCmBgYHtyfQ0KbG0uZml0ID0gbG0obXBnIH4gaG9yc2Vwb3dlciwgZGF0YSA9IEF1dG8sIHN1YnNldCA9IHRyYWluKQ0KYGBgDQoNCmBgYHtyfQ0KYXR0YWNoKEF1dG8pDQptZWFuKChtcGctcHJlZGljdChsbS5maXQsIEF1dG8pKVstdHJhaW5dXjIpDQpgYGANCg0KYGBge3J9DQpsbS5maXQyID0gbG0obXBnIH4gcG9seShob3JzZXBvd2VyLCAyKSwgZGF0YSA9IEF1dG8sIHN1YnNldCA9IHRyYWluKQ0KbWVhbigobXBnLXByZWRpY3QobG0uZml0MiwgQXV0bykpWy10cmFpbl1eMikNCmxtLmZpdDMgPSBsbShtcGcgfiBwb2x5KGhvcnNlcG93ZXIsIDMpLCBkYXRhID0gQXV0bywgc3Vic2V0ID0gdHJhaW4pDQptZWFuKChtcGctcHJlZGljdChsbS5maXQzLCBBdXRvKSlbLXRyYWluXV4yKQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMikNCnRyYWluID0gc2FtcGxlKDM5MiwxOTYpDQpsbS5maXQgPSBsbShtcGcgfiBob3JzZXBvd2VyLCBzdWJzZXQgPSB0cmFpbikNCm1lYW4oKG1wZy1wcmVkaWN0KGxtLmZpdCwgQXV0bykpWy10cmFpbl1eMikNCmxtLmZpdDIgPSBsbShtcGcgfiBwb2x5KGhvcnNlcG93ZXIsIDIpLCBkYXRhID0gQXV0bywgc3Vic2V0ID0gdHJhaW4pDQptZWFuKChtcGctcHJlZGljdChsbS5maXQyLCBBdXRvKSlbLXRyYWluXV4yKQ0KbG0uZml0MyA9IGxtKG1wZyB+IHBvbHkoaG9yc2Vwb3dlciwgMyksIGRhdGEgPSBBdXRvLCBzdWJzZXQgPSB0cmFpbikNCm1lYW4oKG1wZy1wcmVkaWN0KGxtLmZpdDMsIEF1dG8pKVstdHJhaW5dXjIpDQpgYGANCg0KIyMjIyBMZWF2ZS1PbmUtT3V0IENyb3NzLVZhbGlkYXRpb24NCg0KYGBge3J9DQojIGFtYm9zIGZ1bmNpb25hbiBpZ3VhbA0KZ2xtLmZpdCA9IGdsbShtcGcgfiBob3JzZXBvd2VyLCBkYXRhID0gQXV0bykNCmNvZWYoZ2xtLmZpdCkNCmxtLmZpdCA9IGxtKG1wZyB+IGhvcnNlcG93ZXIsIGRhdGEgPSBBdXRvKQ0KY29lZihsbS5maXQpDQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KGJvb3QpDQpnbG0uZml0ID0gZ2xtKG1wZyB+IGhvcnNlcG93ZXIsIGRhdGEgPSBBdXRvKQ0KY3YuZXJyID0gY3YuZ2xtKEF1dG8sIGdsbS5maXQpDQpjdi5lcnIkZGVsdGENCmBgYA0KDQpgYGB7cn0NCmN2LmVycm9yID0gcmVwKDAsNSkNCmZvciAoaSBpbiAxOjUpIHsNCiAgZ2xtLmZpdCA9IGdsbShtcGd+cG9seShob3JzZXBvd2VyLCBpKSwgZGF0YSA9IEF1dG8pDQogIGN2LmVycm9yW2ldID0gY3YuZ2xtKEF1dG8sIGdsbS5maXQpJGRlbHRhWzFdDQp9DQpjdi5lcnJvcg0KYGBgDQoNCiMjIyMgay1Gb2xkIENyb3NzLVZhbGlkYXRpb24NCg0KYGBge3J9DQpzZXQuc2VlZCgxNykNCmN2LmVycm9yLjEwID0gcmVwKDAsMTApDQpmb3IgKGkgaW4gMToxMCkgew0KICBnbG0uZml0ID0gZ2xtKG1wZyB+IHBvbHkoaG9yc2Vwb3dlciwgaSksIGRhdGEgPSBBdXRvKQ0KICBjdi5lcnJvci4xMFtpXSA9IGN2LmdsbShBdXRvLCBnbG0uZml0LCBLID0gMTApJGRlbHRhWzFdDQp9DQpjdi5lcnJvci4xMA0KYGBgDQoNCiMjIyMgVGhlIEJvb3RzdHJhcA0KDQojIyMjIyBFc3RpbWF0aW5nIHRoZSBBY2N1cmFjeSBvZiBhIFN0YXRpc3RpYyBvZiBJbnRlcmVzdA0KDQpgYGB7cn0NCmFscGhhLmZuID0gZnVuY3Rpb24oZGF0YSwgaW5kZXgpIHsNCiAgWCA9IGRhdGEkWFtpbmRleF0NCiAgWSA9IGRhdGEkWVtpbmRleF0NCiAgcmV0dXJuKCh2YXIoWSkgLSBjb3YoWCxZKSkgLyAodmFyKFgpICsgdmFyKFkpIC0gMiAqIGNvdihYLFkpKSkNCn0NCmBgYA0KDQpgYGB7cn0NCmFscGhhLmZuKFBvcnRmb2xpbywgMToxMDApDQpgYGANCg0KYGBge3J9DQpzZXQuc2VlZCgxKQ0KYWxwaGEuZm4oUG9ydGZvbGlvLCBzYW1wbGUoMTAwLCAxMDAsIHJlcGxhY2UgPSBUKSkNCmBgYA0KDQpgYGB7cn0NCmJvb3QoUG9ydGZvbGlvLCBhbHBoYS5mbiwgUiA9IDEwMDApDQpgYGANCg0KIyMjIyMgRXN0aW1hdGluZyB0aGUgQWNjdXJhY3kgb2YgYSBMaW5lYXIgUmVncmVzc2lvbiBNb2RlbA0KDQpgYGB7cn0NCmJvb3QuZm4gPSBmdW5jdGlvbihkYXRhLCBpbmRleCkNCiAgcmV0dXJuKGNvZWYobG0obXBnIH4gaG9yc2Vwb3dlciwgZGF0YSA9IGRhdGEsIHN1YnNldCA9IGluZGV4KSkpDQpib290LmZuKEF1dG8sIDE6MzkyKQ0KYGBgDQoNCmBgYHtyfQ0Kc2V0LnNlZWQoMSkNCmJvb3QuZm4oQXV0bywgc2FtcGxlKDM5MiwgMzkyLCByZXBsYWNlID0gVCkpDQpib290LmZuKEF1dG8sIHNhbXBsZSgzOTIsIDM5MiwgcmVwbGFjZSA9IFQpKQ0KYGBgDQoNCmBgYHtyfQ0KYm9vdChBdXRvLCBib290LmZuLCAxMDAwKQ0KYGBgDQoNCmBgYHtyfQ0Kc3VtbWFyeShsbShtcGcgfiBob3JzZXBvd2VyLCBkYXRhID0gQXV0bykpJGNvZWYNCmBgYA0KDQpgYGB7cn0NCmJvb3QuZm4gPSBmdW5jdGlvbihkYXRhLCBpbmRleCkNCiAgY29lZmZpY2llbnRzKGxtKG1wZyB+IGhvcnNlcG93ZXIgKyBJKGhvcnNlcG93ZXJeMiksIGRhdGEgPSBkYXRhLCBzdWJzZXQgPSBpbmRleCkpDQpzZXQuc2VlZCgxKQ0KYm9vdChBdXRvLCBib290LmZuLCAxMDAwKQ0Kc3VtbWFyeShsbShtcGd+aG9yc2Vwb3dlciArIEkoaG9yc2Vwb3dlcl4yKSwgZGF0YSA9IEF1dG8pKSRjb2VmDQpgYGA=