Cross Validation

Author : Arvind Saini

Data

cat("\014")

library(ISLR)
library(boot)
head(Auto)   #data used
##   mpg cylinders displacement horsepower weight acceleration year origin
## 1  18         8          307        130   3504         12.0   70      1
## 2  15         8          350        165   3693         11.5   70      1
## 3  18         8          318        150   3436         11.0   70      1
## 4  16         8          304        150   3433         12.0   70      1
## 5  17         8          302        140   3449         10.5   70      1
## 6  15         8          429        198   4341         10.0   70      1
##                        name
## 1 chevrolet chevelle malibu
## 2         buick skylark 320
## 3        plymouth satellite
## 4             amc rebel sst
## 5               ford torino
## 6          ford galaxie 500
?Auto #information about data
## starting httpd help server ... done

Leave one out cross validation (LOOCV) and 10 fold cross validation

glm.fit=glm(mpg~horsepower,data=Auto) #linear model
cv.glm(Auto,glm.fit) #left one observation and fits the data
## $call
## cv.glm(data = Auto, glmfit = glm.fit)
## 
## $K
## [1] 392
## 
## $delta
## [1] 24.23151 24.23114
## 
## $seed
##   [1]         403          10   -26898344 -1857319493 -1105222823
##   [6] -1698836248   992997654 -2015718767  1663622979   787528274
##  [11] -1552626460 -1435379929  -200649859   118761076  1340086362
##  [16] -1667506011 -1042431297  1542265158  -467783728 -1080120845
##  [21]  -315865839 -2138513088  2129547550   605611497  1554900763
##  [26]  2118586666  1476037196   126178831  2070133061 -1743215876
##  [31]   200038738  1773498061  -644069753  1005015406   538225480
##  [36]  -193401013 -1544505687 -2130736136 -1534282970  -684657599
##  [41]  1736381907 -1768941694 -1099605068  -921914569  2057982509
##  [46]  -283225276   627849322  -736042283  1420139631   -59861642
##  [51] -1482285408  -210330781  -684298239  1905957744   648925966
##  [56]   418819449 -1671263989  2076604986  1427648828   436444799
##  [61] -1633155627  -526156820  1928363330  -685243747  -448549097
##  [66] -1181430146  1755639352 -1297491685 -2075492743   781335240
##  [71]   762473206 -2002525391  -466406557  1761872178  -477595004
##  [76] -1561995577  1402150301   329709844 -1616177222   -31211003
##  [81]   -79096865  -207361434   721479152   451388179   727125169
##  [86]   993365856  1664814334  -961180535 -1256150085  1629488522
##  [91] -1922235284 -1954636305  -751397339  1627304668   741566642
##  [96]   489237421 -1464635545   241908878  -345589400  1111812459
## [101]  -404034807  -737002088  1560554694   228177441 -2092198221
## [106]  -712983518   957531668    73775767   316961933 -2128938588
## [111]   752739210  1760972789  1643593295  1387022678  -875367424
## [116] -1922728637   740290785  -512193968 -1782008146   311624281
## [121]  1122511467 -2106482726  1922883740  -933719393  -286899979
## [126]   950946188   657712226 -1678040003 -1141815625  -481204898
## [131] -2024395624  1519351803  -686867047   551501352   114738902
## [136]  -400813231     4929027  2129033618   -39942364   729475559
## [141]  1825064509 -1561956172 -1022957798   393399397  1825621247
## [146]  1731831174 -1615832304  1264000947   -46029743   660916224
## [151]  1388271838    89592617  1622085339   734764522  2120770956
## [156]  1567341263  -315423099 -1194449604   107079698   106743565
## [161] -1497357241   722142766 -1360072824 -1194349941 -2057356183
## [166]  -865951688 -2138216218  2052558977   977741843  -840679486
## [171]  -516175756  -537223561   489684845  -705658876   -86279382
## [176]   451076245  -849387729  2087577142  1031619168  1881981091
## [181]  -968174655 -1948639312  1808689870  -896837575 -1182179765
## [186]   625697786  1937084156   747946815 -1786999787  1763469100
## [191]  -919647230  1689808605  -478462505 -1526490946  1316866808
## [196]   229134555 -1986702535 -1806133624 -2003901898  1839184497
## [201] -1031749469  1073121778   954624324  -844316153   -46247971
## [206]  1160911060 -1648891910 -1261658299  1734986655   240812326
## [211]  1240920496  1810934355    29627761   930739360  1031492670
## [216]   359144265  -164804101 -2010763830  1928618796  -518780113
## [221] -1654992411 -1006183268 -2006779662  1668264301  -687341913
## [226] -1904332594  -988258776 -1016912341 -1874023351  -361964596
## [231]   127874272  -684950846   966279208  2076976044  1141838524
## [236]   212327922 -1398723024  -225884252  1612948920  -373225638
## [241]  -584857456   539157372  1687771924 -2100068030 -1388780800
## [246]    -7985220  2131632976  -646451934  1688409672   961783820
## [251]  -322434980  2026262994   -31641088   459536724  -899906328
## [256]  1417854202   187391696  1253319980 -1106130700 -1975333486
## [261]  -474640032  1038929932  1961981664 -2029180318    86042280
## [266] -2011134388   876569468   467461426   486340208  -824931516
## [271]  1780777944   135516026   518097456  1833233340  1650578004
## [276]   219841602   252412064 -1751237956  1889041872  -998946974
## [281]  -426489112   167367884  -208728580  -287589902 -1810950528
## [286]   416028148   -24333688 -1710842054    20250448   326761580
## [291]   435778964  -719141806  2066094560   979397836  1867917472
## [296]  -838891134 -1949247256  1304992492   844547388  -306199438
## [301] -1250816912  -389704668   181096504   300573466  -275749680
## [306]  -140831940    69148564 -1130489982  1550806464   577499708
## [311] -1859356272   912806434  -534639352   410870412  1724450588
## [316]  -784308078 -1230575232 -1293833708   613278504  -872063430
## [321]  -599141936  1610790252 -1372492876 -1809708014  -762504736
## [326]  -666903604 -1871312672 -1564344286  1845103912  1740123660
## [331] -2081588420 -1650621454  1766612720   830415428 -1517621672
## [336]   783450938   738058736   131690172 -1963447020   244659906
## [341]    -2492320 -1462888580   759610320  1594092834  1085900456
## [346]  -511075444 -1626968900   637945650    11352640  1370747060
## [351]  1912199752  -279261766  -438316528  1078884076   276694356
## [356] -1500826350  2093689248  -235587892 -1597923744 -1137882174
## [361]  -115217624   680373164  1076592444   188775794  -775950544
## [366]  -101501660   -40643784  1498828506    84482960 -2100000004
## [371]  -595143788   347922498 -1358622464 -1333142468 -1773808304
## [376]  -421211998 -2129820344   709827340  -738829604    88154450
## [381]  -714332032   240069716 -1670573080  -106435334   341946192
## [386]  1777956652  -471868044 -1729577070 -2143277984  -412924532
## [391]   600851296 -1338923550  1434588072 -2050467636  -816016388
## [396]  1493152178   330153456  -259874876   880609624   884297082
## [401]   318437296  -920724036  -424127404 -1671559230 -1377802208
## [406]  1758478524  -827849136  -974276638  1133598824  1446107212
## [411]  1205298428  -412788750  -176620288  1620366708 -1925693432
## [416] -1284222022 -1204628016    -8368148 -1929910892 -1394810414
## [421]   395794784    24697676 -1911967456  -900987006  1044479720
## [426]  -676615956 -1810692036 -2098643086 -1873916304     5946404
## [431]  1338560312 -2069447526 -1960394032 -2084712260  1736900244
## [436]  1245998722  -211910464  1033217852 -1484427120  1009384994
## [441]  -105190392  1935682828  -432456548 -1151421934  1958351744
## [446]    54339092   603258408 -1220625094   635432400 -1399965588
## [451]  -308591308 -1643918702   877657952  -621998260  1200871392
## [456]  -865989726  1688866108   909674858   190317471  2100510249
## [461] -1837211458  1944795564  -682023171 -1069161913   898707760
## [466]  1408219726  1220715131 -1679900195 -1035204598  2077350184
## [471]   640837521 -1367479933   215713412   920761522  1997161479
## [476]  1370558545  -812892202  -813494604 -1197953339 -2047959025
## [481] -1326518136 -1306439258  -127088301  1818426037 -2079698926
## [486] -1252285984   675959881  -913374597  -107001012  2089418458
## [491]   833589967  1395258009  1045733038   933510076 -1653373907
## [496]   741788631  1130881568  1410589566  -738349749  1083838285
## [501] -1067293318  1110005688  -622981791  2104705299    11188532
## [506]   414217026   572449623 -1178180959 -1175481498 -2074235548
## [511]  1907972245 -1894950465  -807793320 -1157434506  1833734531
## [516]   749786053 -1614739230  1275669840   311917689    82775979
## [521] -1021610724  1779794506  1160807999  -354542967 -1345208674
## [526]   874027852   290077469 -1377351001  2067038672   381462894
## [531] -1779379173 -1775927939   747895658  1934654984   297054897
## [536]  1278901539 -1916907996  -463690542  -859871961   -69046415
## [541] -1577964106   302323028  2129171429 -1686483473   965825832
## [546]   315090310  1463927219 -1191830251  -825491598  -723366976
## [551]   956324905 -2043991909  1780790636 -1883034246  1197170351
## [556]   900892089  -685430834 -1551612132   216528525  1960687351
## [561]  -690898432 -1506605986  -512587989   256386093 -1196854118
## [566]  -793692072 -1065652287  -126091149  1951742356  2033236002
## [571] -1565708489  2136027649  1269787398 -1968883900 -2072327563
## [576]  1080273887   -53088456  2066020630   661480739 -1177069851
## [581] -1077858558   983614832   465974553   978767115  1565150716
## [586] -2051447254  1004629599  -849113623 -1365242114  1752993516
## [591]  -439550403 -1900818937  2070340080 -1027939954 -1642584133
## [596]   880841117   720310090  -374901912 -1348278959  -969775549
## [601]  1614449476 -1415161870  1621079495 -1504011375    -9825514
## [606] -1741543564 -1205094907 -1823982001 -2120590776  -987853850
## [611]  2029528339  2081159797  -395327406  -283055456  1474918281
## [616]  -650378309   416318476   434856346  1814208783  1711951705
## [621]   286999022  -438637444 -1604675347   836915095   305231968
## [626]   189776244
loocv=function(fit){
  h=lm.influence(fit)$h
  mean((residuals(fit)/(1-h^2)))
}

#loocv 

loocv(glm.fit)
## [1] 0.0001626385
cv.error = rep(0,10)
degree = 1:10
for(d in degree){
  glm.fit=glm(mpg~poly(horsepower,d),data=Auto)
  cv.error[d]=loocv(glm.fit)
}
plot(degree,cv.error,type='b',xlab="Degree of fitted model",ylab="Cross validation Error",col="blue",ylim=c(0,24))
p=which.min(cv.error)
points(p,cv.error[p],pch=20,col="green")

#10 fold cross validation

cv.error10=rep(0,10)
for( d in degree){
  glm.fit=glm(mpg~poly(horsepower,d),data=Auto)
  cv.error10[d]=cv.glm(Auto,glm.fit,K=10)$delta[1]
}
lines(degree,cv.error10,type="b",col="red")
p1=which.min(cv.error10)
points(p1,cv.error10[p1],pch=20,col="yellow")
legend("topleft",legend=c("Loocv","10 fold CV"),col=c("blue","red"),pch=1)

plot(mpg~horsepower,data=Auto,xlab="Horsepower",ylab="Miles per gallon",main="Best fit line")
glm.fit1=glm(mpg~poly(horsepower,p),data=Auto)
glm.fit2=glm(mpg~poly(horsepower,p1),data=Auto)
points(Auto$horsepower,fitted(glm.fit1),col="blue",pch=20)
points(Auto$horsepower,fitted(glm.fit2),col="red",pch=20)
legend("topright",legend=c("LOOCV","10 fold"),col=c("blue","red"),pch=20)