Cross-Validation and the Bootstrap

The Validation Set Approach

library (ISLR)
set.seed (1)
train=sample (392 ,196)
lm.fit =lm(mpg∼horsepower ,data=Auto ,subset =train )
attach (Auto)
mean((mpg -predict (lm.fit ,Auto))[-train ]^2)
[1] 26.14142
lm.fit2=lm(mpg∼poly(horsepower ,2) ,data=Auto ,subset =train )
mean((mpg -predict (lm.fit2 ,Auto))[-train ]^2)
[1] 19.82259
lm.fit3=lm(mpg∼poly(horsepower ,3) ,data=Auto ,subset =train )
mean((mpg -predict (lm.fit3 ,Auto))[-train ]^2)
[1] 19.78252
set.seed (2)
train=sample (392 ,196)
lm.fit =lm(mpg∼horsepower ,subset =train)
mean((mpg -predict (lm.fit ,Auto))[-train ]^2)
[1] 23.29559
lm.fit2=lm(mpg∼poly(horsepower ,2) ,data=Auto ,subset =train )
mean((mpg -predict (lm.fit2 ,Auto))[-train ]^2)
[1] 18.90124
lm.fit3=lm(mpg∼poly(horsepower ,3) ,data=Auto ,subset =train )
mean((mpg -predict (lm.fit3 ,Auto))[-train ]^2)
[1] 19.2574

Leave-One-Out Cross-Validation

glm.fit=glm(mpg∼horsepower ,data=Auto)
coef(glm.fit)
(Intercept)  horsepower 
 39.9358610  -0.1578447 
lm.fit =lm(mpg∼horsepower ,data=Auto)
coef(lm.fit)
(Intercept)  horsepower 
 39.9358610  -0.1578447 
library (boot)
glm.fit=glm(mpg∼horsepower ,data=Auto)
cv.err =cv.glm(Auto ,glm.fit)
cv.err$delta
[1] 24.23151 24.23114
cv.error=rep (0,5)
for (i in 1:5){
 glm.fit=glm(mpg∼poly(horsepower ,i),data=Auto)
 cv.error[i]=cv.glm (Auto ,glm.fit)$delta [1]
}
cv.error
[1] 24.23151 19.24821 19.33498 19.42443 19.03321

k-Fold Cross-Validation

set.seed (17)
cv.error.10= rep (0 ,10)
for (i in 1:10) {
  glm.fit=glm(mpg∼poly(horsepower ,i),data=Auto)
  cv.error.10[i]=cv.glm (Auto ,glm.fit ,K=10) $delta [1]
}
cv.error.10
 [1] 24.20520 19.18924 19.30662 19.33799 18.87911 19.02103 18.89609 19.71201 18.95140 19.50196

The Bootstrap

Estimating the Accuracy of a Statistic of Interest

alpha.fn=function (data ,index){
  X=data$X [index]
  Y=data$Y [index]
  return ((var(Y)-cov (X,Y))/(var(X)+var(Y) -2* cov(X,Y)))
}
alpha.fn(Portfolio ,1:100)
[1] 0.5758321
set.seed (1)
alpha.fn(Portfolio ,sample (100 ,100 , replace =T))
[1] 0.5963833
boot(Portfolio ,alpha.fn,R=1000)

ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Portfolio, statistic = alpha.fn, R = 1000)


Bootstrap Statistics :
     original        bias    std. error
t1* 0.5758321 -7.315422e-05  0.08861826

Estimating the Accuracy of a Linear Regression Model

boot.fn=function (data ,index )
 return (coef(lm(mpg∼horsepower ,data=data ,subset =index)))
boot.fn(Auto ,1:392)
(Intercept)  horsepower 
 39.9358610  -0.1578447 
set.seed (1)
boot.fn(Auto ,sample (392 ,392 , replace =T))
(Intercept)  horsepower 
 38.7387134  -0.1481952 
boot.fn(Auto ,sample (392 ,392 , replace =T))
(Intercept)  horsepower 
 40.0383086  -0.1596104 
boot(Auto ,boot.fn ,1000)

ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Auto, statistic = boot.fn, R = 1000)


Bootstrap Statistics :
      original      bias    std. error
t1* 39.9358610  0.02972191 0.860007896
t2* -0.1578447 -0.00030823 0.007404467
summary (lm(mpg∼horsepower ,data=Auto))$coef
              Estimate  Std. Error   t value      Pr(>|t|)
(Intercept) 39.9358610 0.717498656  55.65984 1.220362e-187
horsepower  -0.1578447 0.006445501 -24.48914  7.031989e-81
boot.fn=function (data ,index )
  coefficients(lm(mpg∼horsepower +I( horsepower ^2) ,data=data ,subset =index))
set.seed (1)
boot(Auto ,boot.fn ,1000)

ORDINARY NONPARAMETRIC BOOTSTRAP


Call:
boot(data = Auto, statistic = boot.fn, R = 1000)


Bootstrap Statistics :
        original        bias     std. error
t1* 56.900099702  6.098115e-03 2.0944855842
t2* -0.466189630 -1.777108e-04 0.0334123802
t3*  0.001230536  1.324315e-06 0.0001208339
summary (lm(mpg∼horsepower +I(horsepower ^2) ,data=Auto))$coef
                    Estimate   Std. Error   t value      Pr(>|t|)
(Intercept)     56.900099702 1.8004268063  31.60367 1.740911e-109
horsepower      -0.466189630 0.0311246171 -14.97816  2.289429e-40
I(horsepower^2)  0.001230536 0.0001220759  10.08009  2.196340e-21
LS0tDQp0aXRsZTogIkhvamEgZGUgVHJhYmFqbyAjIDUgLSBPc2NhciBQYWRpbGxhIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMjIENyb3NzLVZhbGlkYXRpb24gYW5kIHRoZSBCb290c3RyYXANCiMjIyMgVGhlIFZhbGlkYXRpb24gU2V0IEFwcHJvYWNoDQoNCmBgYHtyfQ0KbGlicmFyeSAoSVNMUikNCnNldC5zZWVkICgxKQ0KdHJhaW49c2FtcGxlICgzOTIgLDE5NikNCmxtLmZpdCA9bG0obXBn4oi8aG9yc2Vwb3dlciAsZGF0YT1BdXRvICxzdWJzZXQgPXRyYWluICkNCmF0dGFjaCAoQXV0bykNCm1lYW4oKG1wZyAtcHJlZGljdCAobG0uZml0ICxBdXRvKSlbLXRyYWluIF1eMikNCmxtLmZpdDI9bG0obXBn4oi8cG9seShob3JzZXBvd2VyICwyKSAsZGF0YT1BdXRvICxzdWJzZXQgPXRyYWluICkNCm1lYW4oKG1wZyAtcHJlZGljdCAobG0uZml0MiAsQXV0bykpWy10cmFpbiBdXjIpDQpsbS5maXQzPWxtKG1wZ+KIvHBvbHkoaG9yc2Vwb3dlciAsMykgLGRhdGE9QXV0byAsc3Vic2V0ID10cmFpbiApDQptZWFuKChtcGcgLXByZWRpY3QgKGxtLmZpdDMgLEF1dG8pKVstdHJhaW4gXV4yKQ0Kc2V0LnNlZWQgKDIpDQp0cmFpbj1zYW1wbGUgKDM5MiAsMTk2KQ0KbG0uZml0ID1sbShtcGfiiLxob3JzZXBvd2VyICxzdWJzZXQgPXRyYWluKQ0KbWVhbigobXBnIC1wcmVkaWN0IChsbS5maXQgLEF1dG8pKVstdHJhaW4gXV4yKQ0KbG0uZml0Mj1sbShtcGfiiLxwb2x5KGhvcnNlcG93ZXIgLDIpICxkYXRhPUF1dG8gLHN1YnNldCA9dHJhaW4gKQ0KbWVhbigobXBnIC1wcmVkaWN0IChsbS5maXQyICxBdXRvKSlbLXRyYWluIF1eMikNCmxtLmZpdDM9bG0obXBn4oi8cG9seShob3JzZXBvd2VyICwzKSAsZGF0YT1BdXRvICxzdWJzZXQgPXRyYWluICkNCm1lYW4oKG1wZyAtcHJlZGljdCAobG0uZml0MyAsQXV0bykpWy10cmFpbiBdXjIpDQpgYGANCg0KIyMjIyBMZWF2ZS1PbmUtT3V0IENyb3NzLVZhbGlkYXRpb24NCg0KYGBge3J9DQpnbG0uZml0PWdsbShtcGfiiLxob3JzZXBvd2VyICxkYXRhPUF1dG8pDQpjb2VmKGdsbS5maXQpDQpsbS5maXQgPWxtKG1wZ+KIvGhvcnNlcG93ZXIgLGRhdGE9QXV0bykNCmNvZWYobG0uZml0KQ0KbGlicmFyeSAoYm9vdCkNCmdsbS5maXQ9Z2xtKG1wZ+KIvGhvcnNlcG93ZXIgLGRhdGE9QXV0bykNCmN2LmVyciA9Y3YuZ2xtKEF1dG8gLGdsbS5maXQpDQpjdi5lcnIkZGVsdGENCmN2LmVycm9yPXJlcCAoMCw1KQ0KZm9yIChpIGluIDE6NSl7DQogZ2xtLmZpdD1nbG0obXBn4oi8cG9seShob3JzZXBvd2VyICxpKSxkYXRhPUF1dG8pDQogY3YuZXJyb3JbaV09Y3YuZ2xtIChBdXRvICxnbG0uZml0KSRkZWx0YSBbMV0NCn0NCmN2LmVycm9yDQpgYGANCiMjIyMgay1Gb2xkIENyb3NzLVZhbGlkYXRpb24NCg0KYGBge3J9DQpzZXQuc2VlZCAoMTcpDQpjdi5lcnJvci4xMD0gcmVwICgwICwxMCkNCmZvciAoaSBpbiAxOjEwKSB7DQogIGdsbS5maXQ9Z2xtKG1wZ+KIvHBvbHkoaG9yc2Vwb3dlciAsaSksZGF0YT1BdXRvKQ0KICBjdi5lcnJvci4xMFtpXT1jdi5nbG0gKEF1dG8gLGdsbS5maXQgLEs9MTApICRkZWx0YSBbMV0NCn0NCmN2LmVycm9yLjEwDQpgYGANCg0KIyMjIyBUaGUgQm9vdHN0cmFwDQojIyMjIEVzdGltYXRpbmcgdGhlIEFjY3VyYWN5IG9mIGEgU3RhdGlzdGljIG9mIEludGVyZXN0DQpgYGB7cn0NCmFscGhhLmZuPWZ1bmN0aW9uIChkYXRhICxpbmRleCl7DQogIFg9ZGF0YSRYIFtpbmRleF0NCiAgWT1kYXRhJFkgW2luZGV4XQ0KICByZXR1cm4gKCh2YXIoWSktY292IChYLFkpKS8odmFyKFgpK3ZhcihZKSAtMiogY292KFgsWSkpKQ0KfQ0KYWxwaGEuZm4oUG9ydGZvbGlvICwxOjEwMCkNCnNldC5zZWVkICgxKQ0KYWxwaGEuZm4oUG9ydGZvbGlvICxzYW1wbGUgKDEwMCAsMTAwICwgcmVwbGFjZSA9VCkpDQpib290KFBvcnRmb2xpbyAsYWxwaGEuZm4sUj0xMDAwKQ0KYGBgDQoNCiMjIyMgRXN0aW1hdGluZyB0aGUgQWNjdXJhY3kgb2YgYSBMaW5lYXIgUmVncmVzc2lvbiBNb2RlbA0KDQpgYGB7cn0NCmJvb3QuZm49ZnVuY3Rpb24gKGRhdGEgLGluZGV4ICkNCiByZXR1cm4gKGNvZWYobG0obXBn4oi8aG9yc2Vwb3dlciAsZGF0YT1kYXRhICxzdWJzZXQgPWluZGV4KSkpDQpib290LmZuKEF1dG8gLDE6MzkyKQ0Kc2V0LnNlZWQgKDEpDQpib290LmZuKEF1dG8gLHNhbXBsZSAoMzkyICwzOTIgLCByZXBsYWNlID1UKSkNCmJvb3QuZm4oQXV0byAsc2FtcGxlICgzOTIgLDM5MiAsIHJlcGxhY2UgPVQpKQ0KYm9vdChBdXRvICxib290LmZuICwxMDAwKQ0Kc3VtbWFyeSAobG0obXBn4oi8aG9yc2Vwb3dlciAsZGF0YT1BdXRvKSkkY29lZg0KYm9vdC5mbj1mdW5jdGlvbiAoZGF0YSAsaW5kZXggKQ0KICBjb2VmZmljaWVudHMobG0obXBn4oi8aG9yc2Vwb3dlciArSSggaG9yc2Vwb3dlciBeMikgLGRhdGE9ZGF0YSAsc3Vic2V0ID1pbmRleCkpDQpzZXQuc2VlZCAoMSkNCmJvb3QoQXV0byAsYm9vdC5mbiAsMTAwMCkNCnN1bW1hcnkgKGxtKG1wZ+KIvGhvcnNlcG93ZXIgK0koaG9yc2Vwb3dlciBeMikgLGRhdGE9QXV0bykpJGNvZWYNCmBgYA0KDQoNCg==