A. Best Subset Selection
library(leaps)
## Warning: package 'leaps' was built under R version 3.3.3
regfit.full<-regsubsets(Salary~.,Hitters )
summary(regfit.full)
## Subset selection object
## Call: regsubsets.formula(Salary ~ ., Hitters)
## 19 Variables (and intercept)
## Forced in Forced out
## AtBat FALSE FALSE
## Hits FALSE FALSE
## HmRun FALSE FALSE
## Runs FALSE FALSE
## RBI FALSE FALSE
## Walks FALSE FALSE
## Years FALSE FALSE
## CAtBat FALSE FALSE
## CHits FALSE FALSE
## CHmRun FALSE FALSE
## CRuns FALSE FALSE
## CRBI FALSE FALSE
## CWalks FALSE FALSE
## LeagueN FALSE FALSE
## DivisionW FALSE FALSE
## PutOuts FALSE FALSE
## Assists FALSE FALSE
## Errors FALSE FALSE
## NewLeagueN FALSE FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: exhaustive
## AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns
## 1 ( 1 ) " " " " " " " " " " " " " " " " " " " " " "
## 2 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 3 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 4 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 5 ( 1 ) "*" "*" " " " " " " " " " " " " " " " " " "
## 6 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " " "
## 7 ( 1 ) " " "*" " " " " " " "*" " " "*" "*" "*" " "
## 8 ( 1 ) "*" "*" " " " " " " "*" " " " " " " "*" "*"
## CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
## 1 ( 1 ) "*" " " " " " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " " " " " " " " "
## 3 ( 1 ) "*" " " " " " " "*" " " " " " "
## 4 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 5 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 6 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 7 ( 1 ) " " " " " " "*" "*" " " " " " "
## 8 ( 1 ) " " "*" " " "*" "*" " " " " " "
- regsubsets() function performs best subset selection by identifying the best model that contains given number of charactes.
regfit.full=regsubsets(Salary~.,data=Hitters ,nvmax =19)
reg.summary=summary(regfit.full)
names(reg.summary)
## [1] "which" "rsq" "rss" "adjr2" "cp" "bic" "outmat" "obj"
reg.summary$rsq
## [1] 0.3214501 0.4252237 0.4514294 0.4754067 0.4908036 0.5087146 0.5141227
## [8] 0.5285569 0.5346124 0.5404950 0.5426153 0.5436302 0.5444570 0.5452164
## [15] 0.5454692 0.5457656 0.5459518 0.5460945 0.5461159
- We can conculde best overall model includes such variables.
par(mfrow=c(2,2))
plot(reg.summary$rss,xlab="Number of Variables",ylab="RSS", type="l")
plot(reg.summary$adjr2,xlab="Number of Variables", ylab=" Adjusted RSq",type="l")
which.max(reg.summary$adjr2) # returns 11
## [1] 11
points(11,reg.summary$adjr2[11], col ="red",cex =2, pch =20)

plot(reg.summary$cp,xlab="Number of Variables",ylab="Cp", type='l')
which.min(reg.summary$cp)
## [1] 10
points(10,reg.summary$cp[10],col="red",cex=2,pch=20)

which.min(reg.summary$bic)
## [1] 6
plot(reg.summary$bic,xlab="Number of Variables",ylab="BIC", type='l')
points(6,reg.summary$bic[6],col="red",cex=2,pch=20)

plot(regfit.full,scale="r2")

plot(regfit.full,scale="adjr2")

plot(regfit.full,scale="Cp")

plot(regfit.full,scale="bic")

coef(regfit.full,6)
## (Intercept) AtBat Hits Walks CRBI
## 91.5117981 -1.8685892 7.6043976 3.6976468 0.6430169
## DivisionW PutOuts
## -122.9515338 0.2643076
B. Forward and Backward Stepwise Selection
regfit.fwd<-regsubsets(Salary~., data=Hitters, nvmax=19, method="forward")
summary(regfit.fwd)
## Subset selection object
## Call: regsubsets.formula(Salary ~ ., data = Hitters, nvmax = 19, method = "forward")
## 19 Variables (and intercept)
## Forced in Forced out
## AtBat FALSE FALSE
## Hits FALSE FALSE
## HmRun FALSE FALSE
## Runs FALSE FALSE
## RBI FALSE FALSE
## Walks FALSE FALSE
## Years FALSE FALSE
## CAtBat FALSE FALSE
## CHits FALSE FALSE
## CHmRun FALSE FALSE
## CRuns FALSE FALSE
## CRBI FALSE FALSE
## CWalks FALSE FALSE
## LeagueN FALSE FALSE
## DivisionW FALSE FALSE
## PutOuts FALSE FALSE
## Assists FALSE FALSE
## Errors FALSE FALSE
## NewLeagueN FALSE FALSE
## 1 subsets of each size up to 19
## Selection Algorithm: forward
## AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns
## 1 ( 1 ) " " " " " " " " " " " " " " " " " " " " " "
## 2 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 3 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 4 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 5 ( 1 ) "*" "*" " " " " " " " " " " " " " " " " " "
## 6 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " " "
## 7 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " " "
## 8 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " "*"
## 9 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 10 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 11 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 12 ( 1 ) "*" "*" " " "*" " " "*" " " "*" " " " " "*"
## 13 ( 1 ) "*" "*" " " "*" " " "*" " " "*" " " " " "*"
## 14 ( 1 ) "*" "*" "*" "*" " " "*" " " "*" " " " " "*"
## 15 ( 1 ) "*" "*" "*" "*" " " "*" " " "*" "*" " " "*"
## 16 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" " " "*"
## 17 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" " " "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" " " "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
## 1 ( 1 ) "*" " " " " " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " " " " " " " " "
## 3 ( 1 ) "*" " " " " " " "*" " " " " " "
## 4 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 5 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 6 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 7 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 8 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 9 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 10 ( 1 ) "*" "*" " " "*" "*" "*" " " " "
## 11 ( 1 ) "*" "*" "*" "*" "*" "*" " " " "
## 12 ( 1 ) "*" "*" "*" "*" "*" "*" " " " "
## 13 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 14 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 15 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 16 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 17 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
regfit.bwd<-regsubsets(Salary~., data=Hitters, nvmax=19, method="forward")
summary(regfit.bwd)
## Subset selection object
## Call: regsubsets.formula(Salary ~ ., data = Hitters, nvmax = 19, method = "forward")
## 19 Variables (and intercept)
## Forced in Forced out
## AtBat FALSE FALSE
## Hits FALSE FALSE
## HmRun FALSE FALSE
## Runs FALSE FALSE
## RBI FALSE FALSE
## Walks FALSE FALSE
## Years FALSE FALSE
## CAtBat FALSE FALSE
## CHits FALSE FALSE
## CHmRun FALSE FALSE
## CRuns FALSE FALSE
## CRBI FALSE FALSE
## CWalks FALSE FALSE
## LeagueN FALSE FALSE
## DivisionW FALSE FALSE
## PutOuts FALSE FALSE
## Assists FALSE FALSE
## Errors FALSE FALSE
## NewLeagueN FALSE FALSE
## 1 subsets of each size up to 19
## Selection Algorithm: forward
## AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns
## 1 ( 1 ) " " " " " " " " " " " " " " " " " " " " " "
## 2 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 3 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 4 ( 1 ) " " "*" " " " " " " " " " " " " " " " " " "
## 5 ( 1 ) "*" "*" " " " " " " " " " " " " " " " " " "
## 6 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " " "
## 7 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " " "
## 8 ( 1 ) "*" "*" " " " " " " "*" " " " " " " " " "*"
## 9 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 10 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 11 ( 1 ) "*" "*" " " " " " " "*" " " "*" " " " " "*"
## 12 ( 1 ) "*" "*" " " "*" " " "*" " " "*" " " " " "*"
## 13 ( 1 ) "*" "*" " " "*" " " "*" " " "*" " " " " "*"
## 14 ( 1 ) "*" "*" "*" "*" " " "*" " " "*" " " " " "*"
## 15 ( 1 ) "*" "*" "*" "*" " " "*" " " "*" "*" " " "*"
## 16 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" " " "*"
## 17 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" " " "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" " " "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
## CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
## 1 ( 1 ) "*" " " " " " " " " " " " " " "
## 2 ( 1 ) "*" " " " " " " " " " " " " " "
## 3 ( 1 ) "*" " " " " " " "*" " " " " " "
## 4 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 5 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 6 ( 1 ) "*" " " " " "*" "*" " " " " " "
## 7 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 8 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 9 ( 1 ) "*" "*" " " "*" "*" " " " " " "
## 10 ( 1 ) "*" "*" " " "*" "*" "*" " " " "
## 11 ( 1 ) "*" "*" "*" "*" "*" "*" " " " "
## 12 ( 1 ) "*" "*" "*" "*" "*" "*" " " " "
## 13 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 14 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 15 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 16 ( 1 ) "*" "*" "*" "*" "*" "*" "*" " "
## 17 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
## 18 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
## 19 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*"
coef(regfit.full,7)
## (Intercept) Hits Walks CAtBat CHits
## 79.4509472 1.2833513 3.2274264 -0.3752350 1.4957073
## CHmRun DivisionW PutOuts
## 1.4420538 -129.9866432 0.2366813
coef(regfit.fwd,7)
## (Intercept) AtBat Hits Walks CRBI
## 109.7873062 -1.9588851 7.4498772 4.9131401 0.8537622
## CWalks DivisionW PutOuts
## -0.3053070 -127.1223928 0.2533404
coef(regfit.bwd,7)
## (Intercept) AtBat Hits Walks CRBI
## 109.7873062 -1.9588851 7.4498772 4.9131401 0.8537622
## CWalks DivisionW PutOuts
## -0.3053070 -127.1223928 0.2533404
C. Choosing Among Models using validation set approach and cross-validation
1) The Validation Set Approach
library(ISLR)
set.seed(1)
train=sample(392, 196)
attach(Auto)
## The following objects are masked from Auto (pos = 4):
##
## acceleration, cylinders, displacement, horsepower, mpg, name,
## origin, weight, year
lm.fit<-lm(mpg~horsepower, subset=train)
mean((mpg-predict(lm.fit,Auto))[-train]^2)
## [1] 26.14142
lm.fit2=lm(mpg~poly(horsepower ,2) ,data=Auto ,subset =train )
mean((mpg-predict(lm.fit2 ,Auto))[-train ]^2)
## [1] 19.82259
lm.fit3=lm(mpg~poly(horsepower ,3) ,data=Auto ,subset =train )
mean((mpg-predict(lm.fit3 ,Auto))[-train ]^2)
## [1] 19.78252
- We split 392 total amount of observations into two chunks(192 observatiosn for each)
set.seed(2)
train<-sample(392,196)
lm.fit<-lm(mpg~horsepower, subset=train)
mean((mpg-predict(lm.fit,Auto))[-train]^2)
## [1] 23.29559
lm.fit2=lm(mpg~poly(horsepower ,2) ,data=Auto ,subset =train )
mean((mpg-predict(lm.fit2 ,Auto))[-train ]^2)
## [1] 18.90124
lm.fit3=lm(mpg~poly(horsepower ,3) ,data=Auto ,subset =train )
mean((mpg-predict(lm.fit3 ,Auto))[-train ]^2)
## [1] 19.2574
- Choosing different set of training data would yield different results.
2) Leave-One-Out Cross- Validation
glm.fit<-glm(mpg~horsepower, data=Auto)
coef(glm.fit)
## (Intercept) horsepower
## 39.9358610 -0.1578447
lm.fit<-lm(mpg~horsepower, data=Auto)
coef(lm.fit)
## (Intercept) horsepower
## 39.9358610 -0.1578447
- glm() function without ‘family=binomial’ is perfectly identical with lm()
- glm() function is compatible with cv.glm() function which performs cross-validation
- cv.glm() is available with ‘boot’ package
library(boot)
glm.fit<-glm(mpg~horsepower, data=Auto)
cv.err<-cv.glm(Auto, glm.fit)
cv.err
## $call
## cv.glm(data = Auto, glmfit = glm.fit)
##
## $K
## [1] 392
##
## $delta
## [1] 24.23151 24.23114
##
## $seed
## [1] 403 196 241038711 -724551010 -982165160
## [6] -1828904773 1219649113 639708648 -249104362 -1532738671
## [11] -205493181 1178813266 334776292 619230759 -943223171
## [16] 845950836 574669658 1135228325 1324937663 -225314234
## [21] -259712816 1901458163 760603665 1517777472 -353409506
## [26] 333918441 1007243803 1823191594 -1096755892 2047555343
## [31] 36174405 -1271550980 -709741998 1365872589 667289991
## [36] -2093876114 2061812808 1112530507 -120106071 -226796808
## [41] 147205670 833570625 791874771 -1221193598 282619572
## [46] -321910217 -1307274963 1192534596 -516456598 -1233040427
## [51] -1928163985 1129724022 1863340960 -338181021 -448311039
## [56] -256091024 -2016258034 -559437703 1778395147 -1105262278
## [61] 166402108 -19181697 426083541 1074993388 1545622082
## [66] 1292039581 1261006871 -194927234 -601294024 -1892334565
## [71] -1536975495 -1797657656 -38799370 1610275377 239717475
## [76] -604261326 -536620668 845797319 1754618525 1214392852
## [81] -1429288774 586749189 -1730887969 587875174 -904748304
## [86] 672121363 -54498383 1480726112 1623135230 2031094665
## [91] -2039817029 -1814243190 -1606497428 -1824984849 -1596875995
## [96] -880638500 1951458226 1469794989 1610621543 -1477541490
## [101] -315013528 -1967721877 -2085860343 156693144 965051846
## [106] -932641503 1197057459 -1633947358 -309015788 -1219595369
## [111] -1463256179 -785873244 -421959542 -1062695179 -336916145
## [116] 999251542 -2076424960 -262791101 -952249375 860711760
## [121] 2101607854 166400857 -1620321941 225362138 1630562716
## [126] -1517587041 -1274951691 265766540 -1931087006 1167170365
## [131] -2085811273 503398494 -65790568 909314299 -1730128743
## [136] 268399912 374625238 -1757951919 892390147 1181977234
## [141] 493131812 -1682069785 -1504339139 -947600460 -1030189030
## [146] 1675588453 678678015 -1797865338 1334660112 -1283674445
## [151] -82739887 -1505405696 1111932894 941079593 1318247387
## [156] -609132310 -295719284 245051855 2080408965 976559676
## [161] 1455584018 875057165 361124679 1824041262 1956921480
## [166] 2124596619 950263145 -417919176 -157584922 -269556351
## [171] 2084387091 677148354 -586485388 1869613431 2130049645
## [176] -1030736636 1083969578 1863962005 1177604655 -313319626
## [181] -1012932768 1623303587 -1291219263 -269605200 -973897266
## [186] 402289977 -327653045 -1236116742 83024892 -1227580353
## [191] 1307051285 2097704492 -281709822 880299997 -911979817
## [196] 1624294846 -472252424 804961243 1277638201 77242248
## [201] -1010671818 347512177 -745572445 -1174651150 79554116
## [206] 920340743 883691741 -303516204 2077157626 1905365061
## [211] 1215936671 -766516698 1646868144 -1075389613 1488991857
## [216] -1584825440 914451262 -1652559287 7775995 -1859905846
## [221] 1414457388 -512053201 528011493 -855590500 1758151666
## [226] 496778861 -785667161 1294308814 -1863417048 488125074
## [231] 1278490912 -2038677044 921281248 404484290 -613405656
## [236] -1412032596 58366652 -322515982 -909986256 1505804196
## [241] -1465512520 -1523693734 -152006512 1641118588 -527245036
## [246] 1018198338 1470021888 -1891758660 -166731440 816011554
## [251] -1071916472 -1884475380 -2141170084 628476370 -304482816
## [256] -209914540 -973247256 1433092346 1704011472 -88847572
## [261] -1111912204 681318802 1950203232 1995049996 -352142112
## [266] -19226014 1800340648 1068629068 -1146415236 1722620210
## [271] 1051349616 1499976516 2096690136 -908721286 -1870100432
## [276] -223252548 1837474388 -791064510 -1980305760 503440060
## [281] 278547408 -799040158 2049662696 -2098938676 1559892476
## [286] 401391090 1461519488 1215711732 2117753480 465652538
## [291] -1788670640 1581624428 -2084002412 107348050 1291615712
## [296] -15092532 1774034592 -1379708542 1626781928 -1372914964
## [301] -1229085380 50352242 2115818096 -1332639196 -1541728200
## [306] 1949935898 -485931824 -1600845508 -836607084 -115148926
## [311] -951219264 -802585028 -1778278000 -800122846 -1441028856
## [316] 1886967948 -2133061860 -1466141550 1088673152 -1591570412
## [321] 1709315880 -265951686 1774973392 992754028 -1771736140
## [326] 741460498 1272719328 -2073695796 1350154464 -360575966
## [331] -1020734680 1430329868 607814460 1439473650 673624816
## [336] -930342332 1721134680 -1136879814 754839536 -2048417092
## [341] 711103764 -1422336830 -635491232 -752929412 1842204624
## [346] -2112134878 -476981592 1306720140 -1199999812 -1488123086
## [351] -1770289600 217404596 1421263944 -1870264902 -247698928
## [356] -1869415188 -97091244 -761236718 -930673248 -915866932
## [361] -1155319200 -2141052990 -546098392 1085725100 282357052
## [366] 2101099378 -730301136 517703460 1361823032 -1078728998
## [371] 1901264784 -1379681540 -2101163116 1778623554 1593718016
## [376] 909891644 2056453456 -1631849310 1674128200 1630019340
## [381] -7569700 630968146 1113634432 -245606316 -1759632408
## [386] 2041191162 -267942576 1396895532 -1593138316 862483346
## [391] -796917152 551139212 -490440864 1238053858 1840294824
## [396] -1928217396 828193276 503416754 -811133456 -2080655932
## [401] -1172813480 2016959866 282688944 356578236 -350773164
## [406] 1927860162 484069408 -897625924 -432780720 -370182174
## [411] 1213849192 -131133876 -2103951108 -1286535182 316466432
## [416] 1723224948 1535381512 1866535354 -937316912 -1812925460
## [421] -1613583468 -679760942 -1050605728 -1433263796 -1674153696
## [426] 2139692930 -1178417432 -1716832020 -449440196 -935341198
## [431] 693573232 1537647652 -499056840 1750759578 1745100496
## [436] 1747000508 -1303573868 611275394 1763614912 665081148
## [441] 1698478224 258487842 1096725000 471317772 497767068
## [446] 845497874 1964198272 1857821204 1164103208 1354232122
## [451] 461208528 1656421996 -439921868 -557509998 -1897257632
## [456] -600040116 -788043687 153732555 1152550460 2143549546
## [461] -1726037345 -1350267095 99627966 1641251500 444559357
## [466] -898730169 -1201080272 -1805233842 2073958779 -188195107
## [471] -1063397622 -2143556056 -371460463 1663524483 326623108
## [476] -1673095758 -1525679353 -951078575 92702934 -1575262796
## [481] -1370611259 -357486321 1650880392 -917750106 -250007981
## [486] 850890677 1246248210 -1337481504 -487523511 1169806715
## [491] -1484193716 -663021094 752178127 534494617 -2029419090
## [496] -978464068 -1231899859 915857623 -1606686432 1166208638
## [501] 1194301515 -943635891 73769082 681193656 242958945
## [506] -1190053869 -1052223948 54140994 19505239 1918653345
## [511] 2074563686 1421040740 -1478502507 2128543935 -1616027560
## [516] 1233806454 -2003129213 -1024410427 -711730718 -1301151664
## [521] -1379204743 642848427 9289244 1925110090 553294143
## [526] 1206542217 107249566 -1927418292 -1263929315 1676601767
## [531] 709094608 -1387123090 -259393253 -1302553987 1696768106
## [536] -1661039352 1636743601 -1212994013 421227300 -2059002926
## [541] 106965031 -1893965199 -89880394 -787140524 1144164581
## [546] 1036324079 1925177384 1886950534 3415731 327819285
## [551] -1308331406 -996761920 -924749015 1136206235 1037579884
## [556] 118159994 1472371631 -1674445127 -800886578 2145069596
## [561] -22077043 1983901687 1351316224 667928926 1819814443
## [566] 815064365 2087254426 1974450008 914243265 -439017101
## [571] 1855908500 103198498 1531525687 -1325183231 -30284282
## [576] -501318588 1454079349 -1147617057 -2050407368 1378542614
## [581] 1350960163 -553557019 600647170 -123696016 912235033
## [586] -2131741685 527537916 -313086166 325968223 -362366743
## [591] -5971970 98743276 -1054574787 -746165497 -1541627152
## [596] 503097998 241034427 1541255325 505019466 -1256266136
## [601] -1532392879 283654467 -1648799676 212181746 1583844039
## [606] 795519121 -933546474 -1945971596 -838709499 203685199
## [611] 181985096 -671140122 -1744881645 -1734942859 -1303097518
## [616] -427317344 -744529271 -1641019717 -1042348788 -1183692646
## [621] -1561760241 707701337 898156270 626447740 -1844980243
## [626] 1367433108
cv.err$delta
## [1] 24.23151 24.23114
- cv.glm(x=data, y=model)
- two vector numbers in ‘delta’ component contains cross-validation results. In above case the numbers correspond to LOOCV statistics(cross-validation estimate).
cv.error<-rep(0,5)
for (i in 1:5){
glm.fit<-glm(mpg~poly(horsepower, i), data=Auto)
cv.error[i]<-cv.glm(Auto, glm.fit)$delta[1]
}
cv.error
## [1] 24.23151 19.24821 19.33498 19.42443 19.03321
- MSE dropped dramatically as we applied squared predictor. However, there seems to be no need to use higher-order polynomials.
3) k-Fold Cross-Validation
set.seed(17)
cv.error.10<-rep(0,10)
for(i in 1:10){
glm.fit<-glm(mpg~poly(horsepower ,i),data=Auto)
cv.error.10[i]=cv.glm(Auto,glm.fit,K=10)$delta[1]
}
cv.error.10
## [1] 24.20520 19.18924 19.30662 19.33799 18.87911 19.02103 18.89609
## [8] 19.71201 18.95140 19.50196
- Function’cv.glm()‘can also be used to conduct k-Fold Cross Validation with argument ’K=i’(with large K). In above case k=10.
- Unlike LOOCV, delta values as a result of 10-Fold Cross Validation is different for each values. The first is the standard k-fold CV estimate. The second is a biascorrected version.