# Load the relevant libraries
library(lars)
## Loaded lars 1.2
library(car)
## Loading required package: carData
library(leaps)
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.0.5
## Loading required package: Matrix
## Loaded glmnet 4.1-2
# Load the diabetes data
data(diabetes)
names(diabetes)
## [1] "x" "y" "x2"
data.all <- data.frame(cbind(diabetes$x, y = diabetes$y))
fix(data.all)
summary(diabetes)
## x.age x.sex x.bmi x.map x.tc x.ldl x.hdl x.tch x.ltg x.glu
## Min. :-0.10722563 Min. :-0.04464164 Min. :-0.09027530 Min. :-0.11239960 Min. :-0.12678067 Min. :-0.11561307 Min. :-0.10230705 Min. :-0.07639450 Min. :-0.12609739 Min. :-0.13776723
## 1st Qu.:-0.03729927 1st Qu.:-0.04464164 1st Qu.:-0.03422907 1st Qu.:-0.03665645 1st Qu.:-0.03424784 1st Qu.:-0.03035840 1st Qu.:-0.03511716 1st Qu.:-0.03949338 1st Qu.:-0.03324879 1st Qu.:-0.03317903
## Median : 0.00538306 Median :-0.04464164 Median :-0.00728377 Median :-0.00567061 Median :-0.00432087 Median :-0.00381907 Median :-0.00658447 Median :-0.00259226 Median :-0.00194763 Median :-0.00107770
## Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000
## 3rd Qu.: 0.03807591 3rd Qu.: 0.05068012 3rd Qu.: 0.03124802 3rd Qu.: 0.03564384 3rd Qu.: 0.02835801 3rd Qu.: 0.02984439 3rd Qu.: 0.02931150 3rd Qu.: 0.03430886 3rd Qu.: 0.03243323 3rd Qu.: 0.02791705
## Max. : 0.11072668 Max. : 0.05068012 Max. : 0.17055523 Max. : 0.13204422 Max. : 0.15391371 Max. : 0.19878799 Max. : 0.18117906 Max. : 0.18523444 Max. : 0.13359898 Max. : 0.13561183
## y
## Min. : 25.0
## 1st Qu.: 87.0
## Median :140.5
## Mean :152.1
## 3rd Qu.:211.5
## Max. :346.0
## x2.age x2.sex x2.bmi x2.map x2.tc x2.ldl x2.hdl x2.tch x2.ltg x2.glu x2.age^2 x2.bmi^2 x2.map^2 x2.tc^2 x2.ldl^2 x2.hdl^2 x2.tch^2 x2.ltg^2 x2.glu^2 x2.age:sex x2.age:bmi x2.age:map x2.age:tc x2.age:ldl x2.age:hdl x2.age:tch x2.age:ltg x2.age:glu x2.sex:bmi x2.sex:map x2.sex:tc x2.sex:ldl x2.sex:hdl x2.sex:tch x2.sex:ltg x2.sex:glu x2.bmi:map x2.bmi:tc x2.bmi:ldl x2.bmi:hdl x2.bmi:tch x2.bmi:ltg x2.bmi:glu x2.map:tc x2.map:ldl x2.map:hdl x2.map:tch x2.map:ltg x2.map:glu x2.tc:ldl x2.tc:hdl x2.tc:tch x2.tc:ltg x2.tc:glu x2.ldl:hdl x2.ldl:tch x2.ldl:ltg x2.ldl:glu x2.hdl:tch x2.hdl:ltg x2.hdl:glu x2.tch:ltg x2.tch:glu x2.ltg:glu
## Min. :-0.10722563 Min. :-0.04464164 Min. :-0.09027530 Min. :-0.11239960 Min. :-0.12678067 Min. :-0.11561307 Min. :-0.10230705 Min. :-0.07639450 Min. :-0.12609739 Min. :-0.13776723 Min. :-0.04130027 Min. :-0.0329757 Min. :-0.03936938 Min. :-0.03194631 Min. :-0.0296067 Min. :-0.0276538 Min. :-0.0305374 Min. :-0.03493543 Min. :-0.03190221 Min. :-0.12069533 Min. :-0.17481185 Min. :-0.16642937 Min. :-0.13734607 Min. :-0.15547687 Min. :-0.18004384 Min. :-0.20128867 Min. :-0.16005090 Min. :-0.11977803 Min. :-0.15657307 Min. :-0.14043607 Min. :-0.13612974 Min. :-0.12969053 Min. :-0.16598162 Min. :-0.15998969 Min. :-0.14294792 Min. :-0.14044727 Min. :-0.16726677 Min. :-0.29320707 Min. :-0.28657696 Min. :-0.26831607 Min. :-0.13181795 Min. :-0.18509273 Min. :-0.15439177 Min. :-0.20360756 Min. :-0.20386193 Min. :-0.2827677 Min. :-0.18515432 Min. :-0.14589249 Min. :-0.14339331 Min. :-0.0476094 Min. :-0.2108426 Min. :-0.1223554 Min. :-0.10489687 Min. :-0.12053039 Min. :-0.25647092 Min. :-0.1115079 Min. :-0.18259411 Min. :-0.15173440 Min. :-0.23489009 Min. :-0.25468513 Min. :-0.22325464 Min. :-0.1607454 Min. :-0.1289188 Min. :-0.0921654
## 1st Qu.:-0.03729927 1st Qu.:-0.04464164 1st Qu.:-0.03422907 1st Qu.:-0.03665645 1st Qu.:-0.03424784 1st Qu.:-0.03035840 1st Qu.:-0.03511716 1st Qu.:-0.03949338 1st Qu.:-0.03324879 1st Qu.:-0.03317903 1st Qu.:-0.03651112 1st Qu.:-0.0292889 1st Qu.:-0.03401562 1st Qu.:-0.02934795 1st Qu.:-0.0262817 1st Qu.:-0.0247218 1st Qu.:-0.0304485 1st Qu.:-0.02986741 1st Qu.:-0.02934588 1st Qu.:-0.03428520 1st Qu.:-0.02303091 1st Qu.:-0.02236911 1st Qu.:-0.02011722 1st Qu.:-0.01969935 1st Qu.:-0.02294425 1st Qu.:-0.01676616 1st Qu.:-0.02344173 1st Qu.:-0.02225231 1st Qu.:-0.03590179 1st Qu.:-0.03318366 1st Qu.:-0.03105452 1st Qu.:-0.03128987 1st Qu.:-0.03080430 1st Qu.:-0.01954799 1st Qu.:-0.03265781 1st Qu.:-0.02938596 1st Qu.:-0.02258631 1st Qu.:-0.02087785 1st Qu.:-0.02276205 1st Qu.:-0.01884810 1st Qu.:-0.02350512 1st Qu.:-0.02456719 1st Qu.:-0.02439003 1st Qu.:-0.01970707 1st Qu.:-0.01982609 1st Qu.:-0.0192732 1st Qu.:-0.01691159 1st Qu.:-0.02415299 1st Qu.:-0.02303765 1st Qu.:-0.0274244 1st Qu.:-0.0180310 1st Qu.:-0.0222594 1st Qu.:-0.02676351 1st Qu.:-0.02152349 1st Qu.:-0.01503075 1st Qu.:-0.0236963 1st Qu.:-0.02119392 1st Qu.:-0.02093422 1st Qu.:-0.01562203 1st Qu.:-0.01711673 1st Qu.:-0.01350453 1st Qu.:-0.0272451 1st Qu.:-0.0206135 1st Qu.:-0.0236237
## Median : 0.00538306 Median :-0.04464164 Median :-0.00728377 Median :-0.00567061 Median :-0.00432087 Median :-0.00381907 Median :-0.00658447 Median :-0.00259226 Median :-0.00194763 Median :-0.00107770 Median :-0.01485516 Median :-0.0164496 Median :-0.01726140 Median :-0.01717975 Median :-0.0176079 Median :-0.0148615 Median :-0.0094855 Median :-0.01786576 Median :-0.01915998 Median : 0.00136543 Median :-0.00685379 Median :-0.00756265 Median :-0.00884089 Median :-0.00798291 Median : 0.00180070 Median :-0.00866341 Median :-0.00887376 Median :-0.01073070 Median :-0.00221276 Median : 0.00417820 Median :-0.00148989 Median :-0.00017770 Median : 0.00002594 Median : 0.00219364 Median : 0.00754782 Median :-0.00227439 Median :-0.01089069 Median :-0.00840677 Median :-0.00859344 Median : 0.01174772 Median :-0.01702018 Median :-0.01221064 Median :-0.01442884 Median :-0.00724496 Median :-0.00621985 Median : 0.0045702 Median :-0.00990562 Median :-0.01015312 Median :-0.01186424 Median :-0.0168761 Median :-0.0027086 Median :-0.0164679 Median :-0.01498494 Median :-0.01168339 Median : 0.00639820 Median :-0.0147706 Median :-0.00866058 Median :-0.00954984 Median : 0.01714849 Median : 0.00896297 Median : 0.00855963 Median :-0.0133519 Median :-0.0156527 Median :-0.0135282
## Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.0000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.0000000 Mean : 0.0000000 Mean : 0.0000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.0000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.0000000 Mean : 0.0000000 Mean : 0.0000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.0000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.00000000 Mean : 0.0000000 Mean : 0.0000000 Mean : 0.0000000
## 3rd Qu.: 0.03807591 3rd Qu.: 0.05068012 3rd Qu.: 0.03124802 3rd Qu.: 0.03564384 3rd Qu.: 0.02835801 3rd Qu.: 0.02984439 3rd Qu.: 0.02931150 3rd Qu.: 0.03430886 3rd Qu.: 0.03243323 3rd Qu.: 0.02791705 3rd Qu.: 0.01672836 3rd Qu.: 0.0139233 3rd Qu.: 0.01779132 3rd Qu.: 0.00791023 3rd Qu.: 0.0070868 3rd Qu.: 0.0078884 3rd Qu.:-0.0094855 3rd Qu.: 0.01376233 3rd Qu.: 0.01064199 3rd Qu.: 0.03680242 3rd Qu.: 0.01893124 3rd Qu.: 0.02300988 3rd Qu.: 0.01754428 3rd Qu.: 0.01474536 3rd Qu.: 0.01907762 3rd Qu.: 0.02237682 3rd Qu.: 0.01915676 3rd Qu.: 0.01413948 3rd Qu.: 0.03430558 3rd Qu.: 0.03699564 3rd Qu.: 0.03187045 3rd Qu.: 0.03106063 3rd Qu.: 0.03398203 3rd Qu.: 0.02240213 3rd Qu.: 0.03184408 3rd Qu.: 0.03070174 3rd Qu.: 0.01556867 3rd Qu.: 0.01788553 3rd Qu.: 0.01527434 3rd Qu.: 0.02570039 3rd Qu.: 0.02031243 3rd Qu.: 0.02204776 3rd Qu.: 0.01792226 3rd Qu.: 0.01946680 3rd Qu.: 0.02202658 3rd Qu.: 0.0196832 3rd Qu.: 0.02163021 3rd Qu.: 0.02198648 3rd Qu.: 0.01187946 3rd Qu.: 0.0084616 3rd Qu.: 0.0151485 3rd Qu.: 0.0112680 3rd Qu.: 0.01643742 3rd Qu.: 0.01281487 3rd Qu.: 0.01997273 3rd Qu.: 0.0102309 3rd Qu.: 0.02066205 3rd Qu.: 0.01211330 3rd Qu.: 0.03130439 3rd Qu.: 0.02251006 3rd Qu.: 0.02208954 3rd Qu.: 0.0150755 3rd Qu.: 0.0176564 3rd Qu.: 0.0130771
## Max. : 0.11072668 Max. : 0.05068012 Max. : 0.17055523 Max. : 0.13204422 Max. : 0.15391371 Max. : 0.19878799 Max. : 0.18117906 Max. : 0.18523444 Max. : 0.13359898 Max. : 0.13561183 Max. : 0.18275738 Max. : 0.3910172 Max. : 0.26403396 Max. : 0.30255981 Max. : 0.4875149 Max. : 0.3736758 Max. : 0.4326122 Max. : 0.24068220 Max. : 0.23584893 Max. : 0.11161385 Max. : 0.17025839 Max. : 0.22761247 Max. : 0.19942629 Max. : 0.20611884 Max. : 0.20632788 Max. : 0.28412558 Max. : 0.18709678 Max. : 0.18436868 Max. : 0.17914612 Max. : 0.10740703 Max. : 0.16156578 Max. : 0.20613521 Max. : 0.17479136 Max. : 0.19124232 Max. : 0.13661431 Max. : 0.13780206 Max. : 0.22848303 Max. : 0.22698282 Max. : 0.23206883 Max. : 0.14647735 Max. : 0.27645966 Max. : 0.21889208 Max. : 0.22460968 Max. : 0.18974692 Max. : 0.19160847 Max. : 0.3898674 Max. : 0.22948768 Max. : 0.18713993 Max. : 0.27935949 Max. : 0.3976457 Max. : 0.3189508 Max. : 0.4691532 Max. : 0.22312522 Max. : 0.23737028 Max. : 0.16077293 Max. : 0.5551291 Max. : 0.20338090 Max. : 0.29903226 Max. : 0.08044499 Max. : 0.16306655 Max. : 0.20990518 Max. : 0.3758453 Max. : 0.3181041 Max. : 0.3381838
head(diabetes)
## x.age x.sex x.bmi x.map x.tc x.ldl
## 1 0.038075906 0.050680119 0.061696207 0.021872355 -0.044223498 -0.034820763
## 2 -0.001882017 -0.044641637 -0.051474061 -0.026327835 -0.008448724 -0.019163340
## 3 0.085298906 0.050680119 0.044451213 -0.005670611 -0.045599451 -0.034194466
## 4 -0.089062939 -0.044641637 -0.011595015 -0.036656447 0.012190569 0.024990593
## 5 0.005383060 -0.044641637 -0.036384692 0.021872355 0.003934852 0.015596140
## 6 -0.092695478 -0.044641637 -0.040695940 -0.019442093 -0.068990650 -0.079287844
## x.hdl x.tch x.ltg x.glu y x2.age
## 1 -0.043400846 -0.002592262 0.019908421 -0.017646125 151 0.0380759064
## 2 0.074411564 -0.039493383 -0.068329744 -0.092204050 75 -0.0018820165
## 3 -0.032355932 -0.002592262 0.002863771 -0.025930339 141 0.0852989063
## 4 -0.036037570 0.034308859 0.022692023 -0.009361911 206 -0.0890629394
## 5 0.008142084 -0.002592262 -0.031991445 -0.046640874 135 0.0053830604
## 6 0.041276824 -0.076394504 -0.041180385 -0.096346157 97 -0.0926954778
## x2.sex x2.bmi x2.map x2.tc x2.ldl
## 1 0.0506801187 0.0616962065 0.0218723550 -0.0442234984 -0.0348207628
## 2 -0.0446416365 -0.0514740612 -0.0263278347 -0.0084487241 -0.0191633397
## 3 0.0506801187 0.0444512133 -0.0056706106 -0.0455994513 -0.0341944659
## 4 -0.0446416365 -0.0115950145 -0.0366564468 0.0121905688 0.0249905934
## 5 -0.0446416365 -0.0363846922 0.0218723550 0.0039348516 0.0155961395
## 6 -0.0446416365 -0.0406959405 -0.0194420933 -0.0689906499 -0.0792878444
## x2.hdl x2.tch x2.ltg x2.glu x2.age^2
## 1 -0.0434008457 -0.0025922620 0.0199084209 -0.0176461252 -0.0148551625
## 2 0.0744115641 -0.0394933829 -0.0683297436 -0.0922040496 -0.0412915429
## 3 -0.0323559322 -0.0025922620 0.0028637705 -0.0259303390 0.0916434391
## 4 -0.0360375700 0.0343088589 0.0226920226 -0.0093619113 0.1036403301
## 5 0.0081420836 -0.0025922620 -0.0319914449 -0.0466408736 -0.0408265979
## 6 0.0412768238 -0.0763945038 -0.0411803852 -0.0963461565 0.1157092549
## x2.bmi^2 x2.map^2 x2.tc^2 x2.ldl^2 x2.hdl^2
## 1 0.0225045739 -0.0310446765 -0.0043311197 -0.0137399243 -0.0046314248
## 2 0.0056427733 -0.0273076609 -0.0309389016 -0.0248010319 0.0400365241
## 3 -0.0041764214 -0.0388099038 -0.0025859366 -0.0143055611 -0.0148614560
## 4 -0.0310170859 -0.0159874156 -0.0298483890 -0.0214340114 -0.0117828858
## 5 -0.0136807215 -0.0310446765 -0.0317282077 -0.0264236384 -0.0268506700
## 6 -0.0088370145 -0.0327918526 0.0352626533 0.0526602897 -0.0068304035
## x2.tch^2 x2.ltg^2 x2.glu^2 x2.age:sex x2.age:bmi
## 1 -0.0304484629 -0.0288162192 -0.0275255618 0.0328649758 0.0405716741
## 2 -0.0094854824 0.0371612444 0.0880219609 -0.0066099928 -0.0067648038
## 3 -0.0304484629 -0.0348099250 -0.0224326123 0.0840517453 0.0708891293
## 4 -0.0146503349 -0.0269850701 -0.0306820952 0.0766292449 0.0129034054
## 5 -0.0304484629 -0.0191324512 -0.0012284185 -0.0135465959 -0.0129173209
## 6 0.0482386067 -0.0087497144 0.0990402558 0.0800975464 0.0704832806
## x2.age:map x2.age:tc x2.age:ldl x2.age:hdl x2.age:tch
## 1 0.0016606410 -0.0465532511 -0.0382447104 -0.0345115069 -0.0121122609
## 2 -0.0159342243 -0.0117287997 -0.0096555406 0.0006995477 -0.0083689963
## 3 -0.0279128687 -0.0917442739 -0.0716415163 -0.0602920920 -0.0147605279
## 4 0.0562904032 -0.0342989420 -0.0571356258 0.0786805432 -0.0760817036
## 5 -0.0144024121 -0.0116206046 -0.0086502423 0.0049801694 -0.0102788452
## 6 0.0234365273 0.1189685396 0.1438718189 -0.0851146968 0.1432199339
## x2.age:ltg x2.age:glu x2.sex:bmi x2.sex:map x2.sex:tc
## 1 0.0030648916 -0.0302775066 0.0621030123 0.0122820371 -0.0485722318
## 2 -0.0102016795 -0.0113801440 0.0445181909 0.0137392710 0.0062226212
## 3 -0.0077635174 -0.0646990887 0.0435615292 -0.0181579597 -0.0500315236
## 4 -0.0555091413 0.0033785934 0.0067497817 0.0237941848 -0.0130586592
## 5 -0.0165418441 -0.0208710562 0.0302274415 -0.0331836602 -0.0053461470
## 6 0.0675437504 0.1843686827 0.0343105127 0.0070359951 0.0627810439
## x2.sex:ldl x2.sex:hdl x2.sex:tch x2.sex:ltg x2.sex:glu
## 1 -0.0441240238 -0.0308042981 -0.0195479919 0.0142268228 -0.0293859648
## 2 0.0112617659 -0.0565675488 0.0224021267 0.0575880019 0.0784642525
## 3 -0.0434530875 -0.0179545693 -0.0195479919 -0.0041216840 -0.0384231554
## 4 -0.0304033769 0.0566194244 -0.0505546014 -0.0287218500 -0.0011399371
## 5 -0.0215384529 0.0113446351 -0.0140762373 0.0231308241 0.0346819482
## 6 0.0679972794 -0.0226114568 0.0588804908 0.0318440815 0.0824444620
## x2.bmi:map x2.bmi:tc x2.bmi:ldl x2.bmi:hdl x2.bmi:tch
## 1 0.0090008988 -0.0717180778 -0.0603176794 -0.0413575386 -0.0240342004
## 2 0.0091148702 -0.0028355367 0.0087097314 -0.0671553344 0.0240456899
## 3 -0.0226918084 -0.0564432291 -0.0464818380 -0.0136167525 -0.0230540270
## 4 -0.0092925186 -0.0153834201 -0.0193921008 0.0279274131 -0.0292499537
## 5 -0.0334523099 -0.0154230197 -0.0255070025 0.0119441348 -0.0184594644
## 6 -0.0020460266 0.0488321410 0.0580412284 -0.0190229456 0.0476394964
## x2.bmi:ltg x2.bmi:glu x2.map:tc x2.map:ldl x2.map:hdl
## 1 0.0048261936 -0.0410278367 -0.0327209536 -0.0257474212 -0.0112074291
## 2 0.0552994440 0.0806093115 -0.0070399803 0.0018462404 -0.0319794277
## 3 -0.0194513972 -0.0423606977 -0.0062598621 -0.0049233843 0.0120934525
## 4 -0.0280603703 -0.0160690142 -0.0214874364 -0.0291135216 0.0354925517
## 5 0.0034088636 0.0170453175 -0.0099837004 -0.0017149265 0.0119825521
## 6 0.0146962189 0.0634061398 0.0171122204 0.0244459437 -0.0081883427
## x2.map:tch x2.map:ltg x2.map:glu x2.tc:ldl x2.tc:hdl
## 1 -0.0138251131 -0.0101938361 -0.0257784633 -0.0068689647 0.0398230899
## 2 0.0098745200 0.0203696547 0.0313619644 -0.0262353123 -0.0164622948
## 3 -0.0122818754 -0.0203183065 -0.0149534838 -0.0065969775 0.0300168640
## 4 -0.0397827564 -0.0385992765 -0.0109701272 -0.0242291837 -0.0122792654
## 5 -0.0138251131 -0.0356386917 -0.0386583542 -0.0276482686 -0.0018670731
## 6 0.0195036026 -0.0020081382 0.0201031994 0.0483666130 -0.0654803859
## x2.tc:tch x2.tc:ltg x2.tc:glu x2.ldl:hdl x2.ldl:tch
## 1 -0.0193030537 -0.0428915072 0.0009629700 0.0423549304 -0.0220378305
## 2 -0.0155012002 -0.0123430995 0.0009326831 -0.0212564243 -0.0115642516
## 3 -0.0192411418 -0.0271777643 0.0098716066 0.0335869646 -0.0220633407
## 4 -0.0140331587 -0.0186440420 -0.0188580962 -0.0098784247 -0.0099839514
## 5 -0.0214699727 -0.0270791696 -0.0203958711 0.0123759491 -0.0240914053
## 6 0.0701909475 0.0350969957 0.1309600895 -0.0612519836 0.0717191451
## x2.ldl:ltg x2.ldl:glu x2.hdl:tch x2.hdl:ltg x2.hdl:glu
## 1 -0.0311245646 -0.0009221095 0.0334936252 0.0008521487 0.0311502576
## 2 0.0129733755 0.0237834359 -0.0238146613 -0.0945055990 -0.1403775894
## 3 -0.0180161691 0.0049134553 0.0329558784 0.0182807986 0.0327952439
## 4 -0.0033727555 -0.0191092832 0.0081586184 0.0018977323 0.0215138966
## 5 -0.0268464903 -0.0296874121 0.0309841399 0.0144891320 0.0053856590
## 6 0.0560369456 0.1496630223 -0.0278444433 -0.0180308997 -0.0755127518
## x2.tch:ltg x2.tch:glu x2.ltg:glu
## 1 -0.0281911757 -0.0176581553 -0.0277936831
## 2 0.0252977155 0.0530335390 0.1040132768
## 3 -0.0273318271 -0.0172359590 -0.0223037368
## 4 -0.0120454909 -0.0248722040 -0.0250419367
## 5 -0.0255745144 -0.0161804685 0.0087351987
## 6 0.0339989574 0.1261465729 0.0577886517
str(diabetes)
## 'data.frame': 442 obs. of 3 variables:
## $ x : 'AsIs' num [1:442, 1:10] 0.03808 -0.00188 0.0853 -0.08906 0.00538 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:10] "age" "sex" "bmi" "map" ...
## $ y : num 151 75 141 206 135 97 138 63 110 310 ...
## $ x2: 'AsIs' num [1:442, 1:64] 0.03808 -0.00188 0.0853 -0.08906 0.00538 ...
## ..- attr(*, ".Names")= chr [1:28288] "age" "age" "age" "age" ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:442] "1" "2" "3" "4" ...
## .. ..$ : chr [1:64] "age" "sex" "bmi" "map" ...
# Partition the patients into two groups: training (75%) and test (25%)
n <- dim(data.all)[1]
set.seed(1306)
test <- sample(n, round(n/4))
data.train <- data.all[-test,]
data.test <- data.all[test,]
x <- model.matrix(y ~ ., data = data.all)[,-1]
x.train <- x[-test,] # define training predictor matrix
x.test <- x[test,] # define test predictor matrix
y <- data.all$y # define response variable
y.train <- y[-test] # define training response variable
y.test <- y[test] # define test response variable
n.train <- dim(data.train)[1] # training sample size = 332
n.test <- dim(data.test)[1] # test sample size = 110
pairs(data.train)
pairs(data.test)
# Plot Histograms to check for normality of predictor variables
par(mfrow = c(3, 3))
hist(data.train$age)
hist(data.train$bmi)
hist(data.train$map)
hist(data.train$tc)
hist(data.train$ldl)
hist(data.train$hdl)
hist(data.train$tch)
hist(data.train$ltg)
hist(data.train$glu)
#LM Least squares regression model using all ten predictors
model1_lm <- lm(y ~ ., data = data.train)
summary(model1_lm)
##
## Call:
## lm(formula = y ~ ., data = data.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -159.368 -36.505 -0.458 37.795 154.880
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 153.696 2.973 51.695 < 2e-16 ***
## age -23.148 68.290 -0.339 0.734851
## sex -196.185 72.304 -2.713 0.007020 **
## bmi 483.485 78.488 6.160 2.18e-09 ***
## map 275.781 77.479 3.559 0.000428 ***
## tc -671.283 478.034 -1.404 0.161209
## ldl 334.467 386.787 0.865 0.387832
## hdl 17.355 238.209 0.073 0.941964
## tch 182.955 178.239 1.026 0.305450
## ltg 765.822 196.163 3.904 0.000115 ***
## glu 90.875 77.665 1.170 0.242833
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 53.99 on 321 degrees of freedom
## Multiple R-squared: 0.539, Adjusted R-squared: 0.5246
## F-statistic: 37.53 on 10 and 321 DF, p-value: < 2.2e-16
coef(model1_lm)
## (Intercept) age sex bmi map tc
## 153.69607 -23.14847 -196.18527 483.48520 275.78107 -671.28288
## ldl hdl tch ltg glu
## 334.46662 17.35544 182.95461 765.82162 90.87482
par(mfrow = c(2, 2))
plot(model1_lm)
mean((data.test$y - predict(model1_lm, data.test))^2)
## [1] 3095.459
sd((data.test$y - predict(model1_lm, data.test))^2)/sqrt(n.test)
## [1] 368.9687
#Apply best subset selection using BIC to select the number of predictors
model2.bic=regsubsets(y ~ ., data = data.train, nvmax = 10)
summary(model2.bic)
## Subset selection object
## Call: regsubsets.formula(y ~ ., data = data.train, nvmax = 10)
## 10 Variables (and intercept)
## Forced in Forced out
## age FALSE FALSE
## sex FALSE FALSE
## bmi FALSE FALSE
## map FALSE FALSE
## tc FALSE FALSE
## ldl FALSE FALSE
## hdl FALSE FALSE
## tch FALSE FALSE
## ltg FALSE FALSE
## glu FALSE FALSE
## 1 subsets of each size up to 10
## Selection Algorithm: exhaustive
## age sex bmi map tc ldl hdl tch ltg glu
## 1 ( 1 ) " " " " " " " " " " " " " " " " "*" " "
## 2 ( 1 ) " " " " "*" " " " " " " " " " " "*" " "
## 3 ( 1 ) " " " " "*" " " " " " " "*" " " "*" " "
## 4 ( 1 ) " " " " "*" "*" " " " " "*" " " "*" " "
## 5 ( 1 ) " " "*" "*" "*" " " " " "*" " " "*" " "
## 6 ( 1 ) " " "*" "*" "*" "*" " " " " "*" "*" " "
## 7 ( 1 ) " " "*" "*" "*" "*" "*" " " "*" "*" " "
## 8 ( 1 ) " " "*" "*" "*" "*" "*" " " "*" "*" "*"
## 9 ( 1 ) "*" "*" "*" "*" "*" "*" " " "*" "*" "*"
## 10 ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
plot(model2.bic, scale = "bic", main = "Predictors vs. BIC")
summary(model2.bic)$bic
## [1] -135.9897 -203.4646 -208.4680 -211.5524 -214.2140 -213.3876 -209.2048
## [8] -204.7159 -199.0280 -193.2283
summary(model2.bic)$bic[6]
## [1] -213.3876
plot(summary(model2.bic)$bic, xlab = "Number of Predictors", ylab = "BIC", type = "l",main = "Subset Selection (BIC)")
which.min(summary(model2.bic)$bic)
## [1] 5
points(6, summary(model2.bic)$bic[6], col = "Blue", cex = 2, pch = 20)
coef(model2.bic, 6)
## (Intercept) sex bmi map tc tch
## 153.5119 -181.6819 518.2526 286.6225 -364.5528 311.4993
## ltg
## 642.0848
predict.regsubsets=function(object,newdata,id,...){
form=as.formula(object$call[[2]])
mat=model.matrix(form,newdata)
cof=coef(object,id=id)
xvars=names(cof)
mat[,xvars]%*%cof}
mean((data.test$y-predict(model2.bic,data.test,id=6))^2)
## [1] 3105.225
sd((data.test$y-predict(model2.bic,data.test,id=6))^2)/sqrt(n.test)
## [1] 377.0068
lm.bic <- lm(y ~ sex + bmi + map + tc + tch + ltg, data = data.train)
summary(lm.bic)
##
## Call:
## lm(formula = y ~ sex + bmi + map + tc + tch + ltg, data = data.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -157.572 -38.015 -1.183 37.079 159.384
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 153.512 2.964 51.797 < 2e-16 ***
## sex -181.682 70.410 -2.580 0.010308 *
## bmi 518.253 75.285 6.884 3.01e-11 ***
## map 286.622 75.528 3.795 0.000176 ***
## tc -364.553 79.490 -4.586 6.45e-06 ***
## tch 311.499 89.338 3.487 0.000556 ***
## ltg 642.085 87.275 7.357 1.55e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 53.9 on 325 degrees of freedom
## Multiple R-squared: 0.5347, Adjusted R-squared: 0.5261
## F-statistic: 62.25 on 6 and 325 DF, p-value: < 2.2e-16
coef(lm.bic)
## (Intercept) sex bmi map tc tch
## 153.5119 -181.6819 518.2526 286.6225 -364.5528 311.4993
## ltg
## 642.0848
confint(lm.bic)
## 2.5 % 97.5 %
## (Intercept) 147.6813 159.3424
## sex -320.1984 -43.1654
## bmi 370.1453 666.3600
## map 138.0363 435.2087
## tc -520.9318 -208.1737
## tch 135.7454 487.2533
## ltg 470.3891 813.7804
plot(lm.bic)
mean((data.test$y - predict(lm.bic, data.test))^2)
## [1] 3105.225
# Standard Error
sd((data.test$y - predict(lm.bic, data.test))^2)/sqrt(n.test)
## [1] 377.0068
#Apply best subset selection using 10-fold cross-validation to select the number of predictors
k=10
set.seed(1306)
folds=sample(1:k, nrow(data.train), replace = TRUE)
cv.errors=matrix(NA, k, 10, dimnames = list(NULL, paste(1:10)))
for (i in 1:k) {
model3.cv=regsubsets(y~.,data=data.train[folds!=i,],nvmax= 10)
for(j in 1:10){
pred=predict(model3.cv,data.train[folds == i,],id = j)
cv.errors[i, j] = mean((data.train$y[folds == i]-pred)^2) }}
cv.errors
## 1 2 3 4 5 6 7 8
## [1,] 5228.294 3594.910 3803.587 3597.286 3534.969 3372.797 3448.036 3391.736
## [2,] 5250.708 3668.933 3676.897 3502.654 3376.652 3259.932 3291.237 3226.720
## [3,] 4199.806 3016.784 2948.537 3147.238 3004.054 3048.936 3005.914 2998.588
## [4,] 3507.689 2290.441 2482.850 2509.730 2794.157 2589.080 2520.665 2485.886
## [5,] 5244.321 4454.775 4566.870 4293.455 4024.717 4254.735 4273.060 4203.878
## [6,] 4247.804 2423.008 2876.691 2697.154 2625.302 2590.656 2628.224 2543.420
## [7,] 3753.740 2833.985 2735.999 2823.691 2727.169 2517.440 2453.937 2372.566
## [8,] 2777.160 2239.657 2175.244 2236.359 2223.217 2118.511 2103.730 2040.523
## [9,] 4064.103 2901.854 2855.802 2725.318 2630.517 2583.725 2518.280 2471.556
## [10,] 5552.186 4091.603 4022.215 3973.552 3741.940 3834.594 3847.948 3791.971
## 9 10
## [1,] 3397.635 3395.524
## [2,] 3258.746 3270.604
## [3,] 3002.759 3010.564
## [4,] 2507.241 2501.681
## [5,] 4209.845 4209.801
## [6,] 2577.724 2571.787
## [7,] 2397.621 2401.064
## [8,] 2112.749 2123.715
## [9,] 2471.827 2472.689
## [10,] 3838.924 3841.359
mean.cv.errors=apply(cv.errors, 2, mean)
mean.cv.errors
## 1 2 3 4 5 6 7 8
## 4382.581 3151.595 3214.469 3150.644 3068.269 3017.041 3009.103 2952.684
## 9 10
## 2977.507 2979.879
which.min(mean.cv.errors)
## 8
## 8
mean.cv.errors[6]
## 6
## 3017.041
#plotting
par(mfrow = c(1,2))
plot(mean.cv.errors, type = "b", xlab = "Predictors", ylab = "Mean CV Errors",
main = "Subset Selection (10-fold CV)")
points(6, mean.cv.errors[6], col = "Blue", cex = 2, pch =20)
rmse.cv = sqrt(mean.cv.errors)
rmse.cv[6]
## 6
## 54.9276
plot(rmse.cv, pch = 19, type = "b", xlab = "Predictors", ylab = "RMSE CV",
main = "Subset Selection (10-fold CV)")
points(6, rmse.cv[6], col = "Blue", cex = 2, pch =20)
reg.best <- regsubsets(y ~ .,data =data.train,nvmax =10)
coef(reg.best, 6)
## (Intercept) sex bmi map tc tch
## 153.5119 -181.6819 518.2526 286.6225 -364.5528 311.4993
## ltg
## 642.0848
mean((data.test$y-predict(reg.best, data.test, id=6))^2)
## [1] 3105.225
# Standard Error
sd((data.test$y-predict(reg.best, data.test, id=6))^2)/sqrt(n.test)
## [1] 377.0068
par(mfrow = c(1,2))
grid <- 10^seq(10, -2, length = 100)
ridge.mod <- glmnet(x.train, y.train, alpha = 0, lambda = grid, thresh = 1e-12)
plot(ridge.mod, xvar = "lambda", label = TRUE)
set.seed(1306)
cv.out <- cv.glmnet(x.train, y.train, alpha = 0)
plot(cv.out)
bestlam <- cv.out$lambda.min
bestlam
## [1] 4.683924
log(bestlam)
## [1] 1.544136
ridge.mod <- glmnet(x.train, y.train, alpha = 0, lambda = bestlam)
ridge.pred <- predict(ridge.mod, s = bestlam, newx = x.test)
mean((ridge.pred - y.test)^2)
## [1] 3116.206
sd((ridge.pred - y.test)^2)/sqrt(n.test)
## [1] 370.474
largelam <- cv.out$lambda.1se
largelam
## [1] 39.8018
ridge.mod <- glmnet(x.train, y.train, alpha = 0, lambda = largelam)
ridge.pred <- predict(ridge.mod, s = largelam, newx = x.test)
mean((ridge.pred - y.test)^2)
## [1] 3193.515
sd((ridge.pred - y.test)^2)/sqrt(n.test)
## [1] 360.6616
#The estimated coefficients
predict(ridge.mod, type = "coefficients", s = largelam)[1:11,]
## (Intercept) age sex bmi map tc
## 153.598868 5.114496 -108.998500 358.394874 223.296309 -23.185565
## ldl hdl tch ltg glu
## -75.828598 -191.086026 135.304071 366.491450 130.818328
#Lasso model using 10-fold cross-validation to select that largest
par(mfrow = c(1,2))
grid <- 10^seq(10, -2, length = 100)
lasso.mod <- glmnet(x.train, y.train, alpha = 1, lambda = grid, thresh = 1e-12)
plot(lasso.mod, xvar = "lambda", label = TRUE)
set.seed(1306)
cv.out <- cv.glmnet(x.train, y.train, alpha = 1)
plot(cv.out)
bestlam <- cv.out$lambda.min
bestlam
## [1] 0.3081709
log(bestlam)
## [1] -1.177101
lasso.mod <- glmnet(x.train, y.train, alpha = 1, lambda = bestlam)
lasso.pred <- predict(lasso.mod, s = bestlam, newx = x.test)
mean((lasso.pred - y.test)^2)
## [1] 3113.142
sd((lasso.pred - y.test)^2)/sqrt(n.test)
## [1] 371.7285
largelam <- cv.out$lambda.1se
largelam
## [1] 5.512097
lasso.mod <- glmnet(x.train, y.train, alpha = 1, lambda = largelam)
lasso.pred <- predict(lasso.mod, s = largelam, newx = x.test)
mean((lasso.pred - y.test)^2)
## [1] 3243.075
sd((lasso.pred - y.test)^2)/sqrt(n.test)
## [1] 385.5315
#The estimated coefficients
lasso.coef <- predict(lasso.mod, type = "coefficients", s = largelam)[1:11,]
lasso.coef[lasso.coef != 0]
## (Intercept) bmi map hdl ltg
## 153.4433 482.0603 149.1608 -168.9400 510.6780