# Load the relevant libraries
library(lars)

## Loaded lars 1.2

library(car)

## Loading required package: carData

library(leaps)
library(glmnet)

## Warning: package 'glmnet' was built under R version 4.0.5

## Loading required package: Matrix

## Loaded glmnet 4.1-2

# Load the diabetes data
data(diabetes)
names(diabetes)

## [1] "x"  "y"  "x2"

data.all <- data.frame(cbind(diabetes$x, y = diabetes$y))
fix(data.all)
summary(diabetes)

##          x.age                  x.sex                  x.bmi                  x.map                  x.tc                   x.ldl                  x.hdl                  x.tch                  x.ltg                  x.glu        
##  Min.   :-0.10722563    Min.   :-0.04464164    Min.   :-0.09027530    Min.   :-0.11239960    Min.   :-0.12678067    Min.   :-0.11561307    Min.   :-0.10230705    Min.   :-0.07639450    Min.   :-0.12609739    Min.   :-0.13776723  
##  1st Qu.:-0.03729927    1st Qu.:-0.04464164    1st Qu.:-0.03422907    1st Qu.:-0.03665645    1st Qu.:-0.03424784    1st Qu.:-0.03035840    1st Qu.:-0.03511716    1st Qu.:-0.03949338    1st Qu.:-0.03324879    1st Qu.:-0.03317903  
##  Median : 0.00538306    Median :-0.04464164    Median :-0.00728377    Median :-0.00567061    Median :-0.00432087    Median :-0.00381907    Median :-0.00658447    Median :-0.00259226    Median :-0.00194763    Median :-0.00107770  
##  Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000  
##  3rd Qu.: 0.03807591    3rd Qu.: 0.05068012    3rd Qu.: 0.03124802    3rd Qu.: 0.03564384    3rd Qu.: 0.02835801    3rd Qu.: 0.02984439    3rd Qu.: 0.02931150    3rd Qu.: 0.03430886    3rd Qu.: 0.03243323    3rd Qu.: 0.02791705  
##  Max.   : 0.11072668    Max.   : 0.05068012    Max.   : 0.17055523    Max.   : 0.13204422    Max.   : 0.15391371    Max.   : 0.19878799    Max.   : 0.18117906    Max.   : 0.18523444    Max.   : 0.13359898    Max.   : 0.13561183  
##        y        
##  Min.   : 25.0  
##  1st Qu.: 87.0  
##  Median :140.5  
##  Mean   :152.1  
##  3rd Qu.:211.5  
##  Max.   :346.0  
##         x2.age                 x2.sex                 x2.bmi                 x2.map                  x2.tc                 x2.ldl                 x2.hdl                 x2.tch                 x2.ltg                 x2.glu                x2.age^2               x2.bmi^2               x2.map^2                x2.tc^2               x2.ldl^2               x2.hdl^2               x2.tch^2               x2.ltg^2               x2.glu^2              x2.age:sex             x2.age:bmi             x2.age:map              x2.age:tc             x2.age:ldl             x2.age:hdl             x2.age:tch             x2.age:ltg             x2.age:glu             x2.sex:bmi             x2.sex:map              x2.sex:tc             x2.sex:ldl             x2.sex:hdl             x2.sex:tch             x2.sex:ltg             x2.sex:glu             x2.bmi:map              x2.bmi:tc             x2.bmi:ldl             x2.bmi:hdl             x2.bmi:tch             x2.bmi:ltg             x2.bmi:glu              x2.map:tc             x2.map:ldl             x2.map:hdl             x2.map:tch             x2.map:ltg             x2.map:glu              x2.tc:ldl              x2.tc:hdl              x2.tc:tch              x2.tc:ltg              x2.tc:glu             x2.ldl:hdl             x2.ldl:tch             x2.ldl:ltg             x2.ldl:glu             x2.hdl:tch             x2.hdl:ltg             x2.hdl:glu             x2.tch:ltg             x2.tch:glu             x2.ltg:glu      
##  Min.   :-0.10722563    Min.   :-0.04464164    Min.   :-0.09027530    Min.   :-0.11239960    Min.   :-0.12678067    Min.   :-0.11561307    Min.   :-0.10230705    Min.   :-0.07639450    Min.   :-0.12609739    Min.   :-0.13776723    Min.   :-0.04130027    Min.   :-0.0329757     Min.   :-0.03936938    Min.   :-0.03194631    Min.   :-0.0296067     Min.   :-0.0276538     Min.   :-0.0305374     Min.   :-0.03493543    Min.   :-0.03190221    Min.   :-0.12069533    Min.   :-0.17481185    Min.   :-0.16642937    Min.   :-0.13734607    Min.   :-0.15547687    Min.   :-0.18004384    Min.   :-0.20128867    Min.   :-0.16005090    Min.   :-0.11977803    Min.   :-0.15657307    Min.   :-0.14043607    Min.   :-0.13612974    Min.   :-0.12969053    Min.   :-0.16598162    Min.   :-0.15998969    Min.   :-0.14294792    Min.   :-0.14044727    Min.   :-0.16726677    Min.   :-0.29320707    Min.   :-0.28657696    Min.   :-0.26831607    Min.   :-0.13181795    Min.   :-0.18509273    Min.   :-0.15439177    Min.   :-0.20360756    Min.   :-0.20386193    Min.   :-0.2827677     Min.   :-0.18515432    Min.   :-0.14589249    Min.   :-0.14339331    Min.   :-0.0476094     Min.   :-0.2108426     Min.   :-0.1223554     Min.   :-0.10489687    Min.   :-0.12053039    Min.   :-0.25647092    Min.   :-0.1115079     Min.   :-0.18259411    Min.   :-0.15173440    Min.   :-0.23489009    Min.   :-0.25468513    Min.   :-0.22325464    Min.   :-0.1607454     Min.   :-0.1289188     Min.   :-0.0921654   
##  1st Qu.:-0.03729927    1st Qu.:-0.04464164    1st Qu.:-0.03422907    1st Qu.:-0.03665645    1st Qu.:-0.03424784    1st Qu.:-0.03035840    1st Qu.:-0.03511716    1st Qu.:-0.03949338    1st Qu.:-0.03324879    1st Qu.:-0.03317903    1st Qu.:-0.03651112    1st Qu.:-0.0292889     1st Qu.:-0.03401562    1st Qu.:-0.02934795    1st Qu.:-0.0262817     1st Qu.:-0.0247218     1st Qu.:-0.0304485     1st Qu.:-0.02986741    1st Qu.:-0.02934588    1st Qu.:-0.03428520    1st Qu.:-0.02303091    1st Qu.:-0.02236911    1st Qu.:-0.02011722    1st Qu.:-0.01969935    1st Qu.:-0.02294425    1st Qu.:-0.01676616    1st Qu.:-0.02344173    1st Qu.:-0.02225231    1st Qu.:-0.03590179    1st Qu.:-0.03318366    1st Qu.:-0.03105452    1st Qu.:-0.03128987    1st Qu.:-0.03080430    1st Qu.:-0.01954799    1st Qu.:-0.03265781    1st Qu.:-0.02938596    1st Qu.:-0.02258631    1st Qu.:-0.02087785    1st Qu.:-0.02276205    1st Qu.:-0.01884810    1st Qu.:-0.02350512    1st Qu.:-0.02456719    1st Qu.:-0.02439003    1st Qu.:-0.01970707    1st Qu.:-0.01982609    1st Qu.:-0.0192732     1st Qu.:-0.01691159    1st Qu.:-0.02415299    1st Qu.:-0.02303765    1st Qu.:-0.0274244     1st Qu.:-0.0180310     1st Qu.:-0.0222594     1st Qu.:-0.02676351    1st Qu.:-0.02152349    1st Qu.:-0.01503075    1st Qu.:-0.0236963     1st Qu.:-0.02119392    1st Qu.:-0.02093422    1st Qu.:-0.01562203    1st Qu.:-0.01711673    1st Qu.:-0.01350453    1st Qu.:-0.0272451     1st Qu.:-0.0206135     1st Qu.:-0.0236237   
##  Median : 0.00538306    Median :-0.04464164    Median :-0.00728377    Median :-0.00567061    Median :-0.00432087    Median :-0.00381907    Median :-0.00658447    Median :-0.00259226    Median :-0.00194763    Median :-0.00107770    Median :-0.01485516    Median :-0.0164496     Median :-0.01726140    Median :-0.01717975    Median :-0.0176079     Median :-0.0148615     Median :-0.0094855     Median :-0.01786576    Median :-0.01915998    Median : 0.00136543    Median :-0.00685379    Median :-0.00756265    Median :-0.00884089    Median :-0.00798291    Median : 0.00180070    Median :-0.00866341    Median :-0.00887376    Median :-0.01073070    Median :-0.00221276    Median : 0.00417820    Median :-0.00148989    Median :-0.00017770    Median : 0.00002594    Median : 0.00219364    Median : 0.00754782    Median :-0.00227439    Median :-0.01089069    Median :-0.00840677    Median :-0.00859344    Median : 0.01174772    Median :-0.01702018    Median :-0.01221064    Median :-0.01442884    Median :-0.00724496    Median :-0.00621985    Median : 0.0045702     Median :-0.00990562    Median :-0.01015312    Median :-0.01186424    Median :-0.0168761     Median :-0.0027086     Median :-0.0164679     Median :-0.01498494    Median :-0.01168339    Median : 0.00639820    Median :-0.0147706     Median :-0.00866058    Median :-0.00954984    Median : 0.01714849    Median : 0.00896297    Median : 0.00855963    Median :-0.0133519     Median :-0.0156527     Median :-0.0135282   
##  Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.0000000     Mean   : 0.0000000     Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.0000000     Mean   : 0.0000000     Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.0000000     Mean   : 0.0000000   
##  3rd Qu.: 0.03807591    3rd Qu.: 0.05068012    3rd Qu.: 0.03124802    3rd Qu.: 0.03564384    3rd Qu.: 0.02835801    3rd Qu.: 0.02984439    3rd Qu.: 0.02931150    3rd Qu.: 0.03430886    3rd Qu.: 0.03243323    3rd Qu.: 0.02791705    3rd Qu.: 0.01672836    3rd Qu.: 0.0139233     3rd Qu.: 0.01779132    3rd Qu.: 0.00791023    3rd Qu.: 0.0070868     3rd Qu.: 0.0078884     3rd Qu.:-0.0094855     3rd Qu.: 0.01376233    3rd Qu.: 0.01064199    3rd Qu.: 0.03680242    3rd Qu.: 0.01893124    3rd Qu.: 0.02300988    3rd Qu.: 0.01754428    3rd Qu.: 0.01474536    3rd Qu.: 0.01907762    3rd Qu.: 0.02237682    3rd Qu.: 0.01915676    3rd Qu.: 0.01413948    3rd Qu.: 0.03430558    3rd Qu.: 0.03699564    3rd Qu.: 0.03187045    3rd Qu.: 0.03106063    3rd Qu.: 0.03398203    3rd Qu.: 0.02240213    3rd Qu.: 0.03184408    3rd Qu.: 0.03070174    3rd Qu.: 0.01556867    3rd Qu.: 0.01788553    3rd Qu.: 0.01527434    3rd Qu.: 0.02570039    3rd Qu.: 0.02031243    3rd Qu.: 0.02204776    3rd Qu.: 0.01792226    3rd Qu.: 0.01946680    3rd Qu.: 0.02202658    3rd Qu.: 0.0196832     3rd Qu.: 0.02163021    3rd Qu.: 0.02198648    3rd Qu.: 0.01187946    3rd Qu.: 0.0084616     3rd Qu.: 0.0151485     3rd Qu.: 0.0112680     3rd Qu.: 0.01643742    3rd Qu.: 0.01281487    3rd Qu.: 0.01997273    3rd Qu.: 0.0102309     3rd Qu.: 0.02066205    3rd Qu.: 0.01211330    3rd Qu.: 0.03130439    3rd Qu.: 0.02251006    3rd Qu.: 0.02208954    3rd Qu.: 0.0150755     3rd Qu.: 0.0176564     3rd Qu.: 0.0130771   
##  Max.   : 0.11072668    Max.   : 0.05068012    Max.   : 0.17055523    Max.   : 0.13204422    Max.   : 0.15391371    Max.   : 0.19878799    Max.   : 0.18117906    Max.   : 0.18523444    Max.   : 0.13359898    Max.   : 0.13561183    Max.   : 0.18275738    Max.   : 0.3910172     Max.   : 0.26403396    Max.   : 0.30255981    Max.   : 0.4875149     Max.   : 0.3736758     Max.   : 0.4326122     Max.   : 0.24068220    Max.   : 0.23584893    Max.   : 0.11161385    Max.   : 0.17025839    Max.   : 0.22761247    Max.   : 0.19942629    Max.   : 0.20611884    Max.   : 0.20632788    Max.   : 0.28412558    Max.   : 0.18709678    Max.   : 0.18436868    Max.   : 0.17914612    Max.   : 0.10740703    Max.   : 0.16156578    Max.   : 0.20613521    Max.   : 0.17479136    Max.   : 0.19124232    Max.   : 0.13661431    Max.   : 0.13780206    Max.   : 0.22848303    Max.   : 0.22698282    Max.   : 0.23206883    Max.   : 0.14647735    Max.   : 0.27645966    Max.   : 0.21889208    Max.   : 0.22460968    Max.   : 0.18974692    Max.   : 0.19160847    Max.   : 0.3898674     Max.   : 0.22948768    Max.   : 0.18713993    Max.   : 0.27935949    Max.   : 0.3976457     Max.   : 0.3189508     Max.   : 0.4691532     Max.   : 0.22312522    Max.   : 0.23737028    Max.   : 0.16077293    Max.   : 0.5551291     Max.   : 0.20338090    Max.   : 0.29903226    Max.   : 0.08044499    Max.   : 0.16306655    Max.   : 0.20990518    Max.   : 0.3758453     Max.   : 0.3181041     Max.   : 0.3381838

head(diabetes)

##          x.age        x.sex        x.bmi        x.map         x.tc        x.ldl
## 1  0.038075906  0.050680119  0.061696207  0.021872355 -0.044223498 -0.034820763
## 2 -0.001882017 -0.044641637 -0.051474061 -0.026327835 -0.008448724 -0.019163340
## 3  0.085298906  0.050680119  0.044451213 -0.005670611 -0.045599451 -0.034194466
## 4 -0.089062939 -0.044641637 -0.011595015 -0.036656447  0.012190569  0.024990593
## 5  0.005383060 -0.044641637 -0.036384692  0.021872355  0.003934852  0.015596140
## 6 -0.092695478 -0.044641637 -0.040695940 -0.019442093 -0.068990650 -0.079287844
##          x.hdl        x.tch        x.ltg        x.glu   y        x2.age
## 1 -0.043400846 -0.002592262  0.019908421 -0.017646125 151  0.0380759064
## 2  0.074411564 -0.039493383 -0.068329744 -0.092204050  75 -0.0018820165
## 3 -0.032355932 -0.002592262  0.002863771 -0.025930339 141  0.0852989063
## 4 -0.036037570  0.034308859  0.022692023 -0.009361911 206 -0.0890629394
## 5  0.008142084 -0.002592262 -0.031991445 -0.046640874 135  0.0053830604
## 6  0.041276824 -0.076394504 -0.041180385 -0.096346157  97 -0.0926954778
##          x2.sex        x2.bmi        x2.map         x2.tc        x2.ldl
## 1  0.0506801187  0.0616962065  0.0218723550 -0.0442234984 -0.0348207628
## 2 -0.0446416365 -0.0514740612 -0.0263278347 -0.0084487241 -0.0191633397
## 3  0.0506801187  0.0444512133 -0.0056706106 -0.0455994513 -0.0341944659
## 4 -0.0446416365 -0.0115950145 -0.0366564468  0.0121905688  0.0249905934
## 5 -0.0446416365 -0.0363846922  0.0218723550  0.0039348516  0.0155961395
## 6 -0.0446416365 -0.0406959405 -0.0194420933 -0.0689906499 -0.0792878444
##          x2.hdl        x2.tch        x2.ltg        x2.glu      x2.age^2
## 1 -0.0434008457 -0.0025922620  0.0199084209 -0.0176461252 -0.0148551625
## 2  0.0744115641 -0.0394933829 -0.0683297436 -0.0922040496 -0.0412915429
## 3 -0.0323559322 -0.0025922620  0.0028637705 -0.0259303390  0.0916434391
## 4 -0.0360375700  0.0343088589  0.0226920226 -0.0093619113  0.1036403301
## 5  0.0081420836 -0.0025922620 -0.0319914449 -0.0466408736 -0.0408265979
## 6  0.0412768238 -0.0763945038 -0.0411803852 -0.0963461565  0.1157092549
##        x2.bmi^2      x2.map^2       x2.tc^2      x2.ldl^2      x2.hdl^2
## 1  0.0225045739 -0.0310446765 -0.0043311197 -0.0137399243 -0.0046314248
## 2  0.0056427733 -0.0273076609 -0.0309389016 -0.0248010319  0.0400365241
## 3 -0.0041764214 -0.0388099038 -0.0025859366 -0.0143055611 -0.0148614560
## 4 -0.0310170859 -0.0159874156 -0.0298483890 -0.0214340114 -0.0117828858
## 5 -0.0136807215 -0.0310446765 -0.0317282077 -0.0264236384 -0.0268506700
## 6 -0.0088370145 -0.0327918526  0.0352626533  0.0526602897 -0.0068304035
##        x2.tch^2      x2.ltg^2      x2.glu^2    x2.age:sex    x2.age:bmi
## 1 -0.0304484629 -0.0288162192 -0.0275255618  0.0328649758  0.0405716741
## 2 -0.0094854824  0.0371612444  0.0880219609 -0.0066099928 -0.0067648038
## 3 -0.0304484629 -0.0348099250 -0.0224326123  0.0840517453  0.0708891293
## 4 -0.0146503349 -0.0269850701 -0.0306820952  0.0766292449  0.0129034054
## 5 -0.0304484629 -0.0191324512 -0.0012284185 -0.0135465959 -0.0129173209
## 6  0.0482386067 -0.0087497144  0.0990402558  0.0800975464  0.0704832806
##      x2.age:map     x2.age:tc    x2.age:ldl    x2.age:hdl    x2.age:tch
## 1  0.0016606410 -0.0465532511 -0.0382447104 -0.0345115069 -0.0121122609
## 2 -0.0159342243 -0.0117287997 -0.0096555406  0.0006995477 -0.0083689963
## 3 -0.0279128687 -0.0917442739 -0.0716415163 -0.0602920920 -0.0147605279
## 4  0.0562904032 -0.0342989420 -0.0571356258  0.0786805432 -0.0760817036
## 5 -0.0144024121 -0.0116206046 -0.0086502423  0.0049801694 -0.0102788452
## 6  0.0234365273  0.1189685396  0.1438718189 -0.0851146968  0.1432199339
##      x2.age:ltg    x2.age:glu    x2.sex:bmi    x2.sex:map     x2.sex:tc
## 1  0.0030648916 -0.0302775066  0.0621030123  0.0122820371 -0.0485722318
## 2 -0.0102016795 -0.0113801440  0.0445181909  0.0137392710  0.0062226212
## 3 -0.0077635174 -0.0646990887  0.0435615292 -0.0181579597 -0.0500315236
## 4 -0.0555091413  0.0033785934  0.0067497817  0.0237941848 -0.0130586592
## 5 -0.0165418441 -0.0208710562  0.0302274415 -0.0331836602 -0.0053461470
## 6  0.0675437504  0.1843686827  0.0343105127  0.0070359951  0.0627810439
##      x2.sex:ldl    x2.sex:hdl    x2.sex:tch    x2.sex:ltg    x2.sex:glu
## 1 -0.0441240238 -0.0308042981 -0.0195479919  0.0142268228 -0.0293859648
## 2  0.0112617659 -0.0565675488  0.0224021267  0.0575880019  0.0784642525
## 3 -0.0434530875 -0.0179545693 -0.0195479919 -0.0041216840 -0.0384231554
## 4 -0.0304033769  0.0566194244 -0.0505546014 -0.0287218500 -0.0011399371
## 5 -0.0215384529  0.0113446351 -0.0140762373  0.0231308241  0.0346819482
## 6  0.0679972794 -0.0226114568  0.0588804908  0.0318440815  0.0824444620
##      x2.bmi:map     x2.bmi:tc    x2.bmi:ldl    x2.bmi:hdl    x2.bmi:tch
## 1  0.0090008988 -0.0717180778 -0.0603176794 -0.0413575386 -0.0240342004
## 2  0.0091148702 -0.0028355367  0.0087097314 -0.0671553344  0.0240456899
## 3 -0.0226918084 -0.0564432291 -0.0464818380 -0.0136167525 -0.0230540270
## 4 -0.0092925186 -0.0153834201 -0.0193921008  0.0279274131 -0.0292499537
## 5 -0.0334523099 -0.0154230197 -0.0255070025  0.0119441348 -0.0184594644
## 6 -0.0020460266  0.0488321410  0.0580412284 -0.0190229456  0.0476394964
##      x2.bmi:ltg    x2.bmi:glu     x2.map:tc    x2.map:ldl    x2.map:hdl
## 1  0.0048261936 -0.0410278367 -0.0327209536 -0.0257474212 -0.0112074291
## 2  0.0552994440  0.0806093115 -0.0070399803  0.0018462404 -0.0319794277
## 3 -0.0194513972 -0.0423606977 -0.0062598621 -0.0049233843  0.0120934525
## 4 -0.0280603703 -0.0160690142 -0.0214874364 -0.0291135216  0.0354925517
## 5  0.0034088636  0.0170453175 -0.0099837004 -0.0017149265  0.0119825521
## 6  0.0146962189  0.0634061398  0.0171122204  0.0244459437 -0.0081883427
##      x2.map:tch    x2.map:ltg    x2.map:glu     x2.tc:ldl     x2.tc:hdl
## 1 -0.0138251131 -0.0101938361 -0.0257784633 -0.0068689647  0.0398230899
## 2  0.0098745200  0.0203696547  0.0313619644 -0.0262353123 -0.0164622948
## 3 -0.0122818754 -0.0203183065 -0.0149534838 -0.0065969775  0.0300168640
## 4 -0.0397827564 -0.0385992765 -0.0109701272 -0.0242291837 -0.0122792654
## 5 -0.0138251131 -0.0356386917 -0.0386583542 -0.0276482686 -0.0018670731
## 6  0.0195036026 -0.0020081382  0.0201031994  0.0483666130 -0.0654803859
##       x2.tc:tch     x2.tc:ltg     x2.tc:glu    x2.ldl:hdl    x2.ldl:tch
## 1 -0.0193030537 -0.0428915072  0.0009629700  0.0423549304 -0.0220378305
## 2 -0.0155012002 -0.0123430995  0.0009326831 -0.0212564243 -0.0115642516
## 3 -0.0192411418 -0.0271777643  0.0098716066  0.0335869646 -0.0220633407
## 4 -0.0140331587 -0.0186440420 -0.0188580962 -0.0098784247 -0.0099839514
## 5 -0.0214699727 -0.0270791696 -0.0203958711  0.0123759491 -0.0240914053
## 6  0.0701909475  0.0350969957  0.1309600895 -0.0612519836  0.0717191451
##      x2.ldl:ltg    x2.ldl:glu    x2.hdl:tch    x2.hdl:ltg    x2.hdl:glu
## 1 -0.0311245646 -0.0009221095  0.0334936252  0.0008521487  0.0311502576
## 2  0.0129733755  0.0237834359 -0.0238146613 -0.0945055990 -0.1403775894
## 3 -0.0180161691  0.0049134553  0.0329558784  0.0182807986  0.0327952439
## 4 -0.0033727555 -0.0191092832  0.0081586184  0.0018977323  0.0215138966
## 5 -0.0268464903 -0.0296874121  0.0309841399  0.0144891320  0.0053856590
## 6  0.0560369456  0.1496630223 -0.0278444433 -0.0180308997 -0.0755127518
##      x2.tch:ltg    x2.tch:glu    x2.ltg:glu
## 1 -0.0281911757 -0.0176581553 -0.0277936831
## 2  0.0252977155  0.0530335390  0.1040132768
## 3 -0.0273318271 -0.0172359590 -0.0223037368
## 4 -0.0120454909 -0.0248722040 -0.0250419367
## 5 -0.0255745144 -0.0161804685  0.0087351987
## 6  0.0339989574  0.1261465729  0.0577886517

str(diabetes)

## 'data.frame':    442 obs. of  3 variables:
##  $ x : 'AsIs' num [1:442, 1:10] 0.03808 -0.00188 0.0853 -0.08906 0.00538 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:10] "age" "sex" "bmi" "map" ...
##  $ y : num  151 75 141 206 135 97 138 63 110 310 ...
##  $ x2: 'AsIs' num [1:442, 1:64] 0.03808 -0.00188 0.0853 -0.08906 0.00538 ...
##   ..- attr(*, ".Names")= chr [1:28288] "age" "age" "age" "age" ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:442] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:64] "age" "sex" "bmi" "map" ...

# Partition the patients into two groups: training (75%) and test (25%)
n <- dim(data.all)[1] 
set.seed(1306)
test <- sample(n, round(n/4))
data.train <- data.all[-test,]
data.test <- data.all[test,]
x <- model.matrix(y ~ ., data = data.all)[,-1]
x.train <- x[-test,] # define training predictor matrix
x.test <- x[test,] # define test predictor matrix
y <- data.all$y # define response variable
y.train <- y[-test] # define training response variable
y.test <- y[test] # define test response variable
n.train <- dim(data.train)[1] # training sample size = 332
n.test <- dim(data.test)[1] # test sample size = 110
pairs(data.train)

pairs(data.test)

# Plot Histograms to check for normality of predictor variables
par(mfrow = c(3, 3))
hist(data.train$age)
hist(data.train$bmi)
hist(data.train$map)
hist(data.train$tc)
hist(data.train$ldl)
hist(data.train$hdl)
hist(data.train$tch)
hist(data.train$ltg)
hist(data.train$glu)

#LM Least squares regression model using all ten predictors

model1_lm <- lm(y ~ ., data = data.train) 
summary(model1_lm)

## 
## Call:
## lm(formula = y ~ ., data = data.train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -159.368  -36.505   -0.458   37.795  154.880 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  153.696      2.973  51.695  < 2e-16 ***
## age          -23.148     68.290  -0.339 0.734851    
## sex         -196.185     72.304  -2.713 0.007020 ** 
## bmi          483.485     78.488   6.160 2.18e-09 ***
## map          275.781     77.479   3.559 0.000428 ***
## tc          -671.283    478.034  -1.404 0.161209    
## ldl          334.467    386.787   0.865 0.387832    
## hdl           17.355    238.209   0.073 0.941964    
## tch          182.955    178.239   1.026 0.305450    
## ltg          765.822    196.163   3.904 0.000115 ***
## glu           90.875     77.665   1.170 0.242833    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 53.99 on 321 degrees of freedom
## Multiple R-squared:  0.539,  Adjusted R-squared:  0.5246 
## F-statistic: 37.53 on 10 and 321 DF,  p-value: < 2.2e-16

coef(model1_lm)

## (Intercept)         age         sex         bmi         map          tc 
##   153.69607   -23.14847  -196.18527   483.48520   275.78107  -671.28288 
##         ldl         hdl         tch         ltg         glu 
##   334.46662    17.35544   182.95461   765.82162    90.87482

par(mfrow = c(2, 2)) 
plot(model1_lm)

mean((data.test$y - predict(model1_lm, data.test))^2)

## [1] 3095.459

sd((data.test$y - predict(model1_lm, data.test))^2)/sqrt(n.test)

## [1] 368.9687

#Apply best subset selection using BIC to select the number of predictors

model2.bic=regsubsets(y ~ ., data = data.train, nvmax = 10)
summary(model2.bic)

## Subset selection object
## Call: regsubsets.formula(y ~ ., data = data.train, nvmax = 10)
## 10 Variables  (and intercept)
##     Forced in Forced out
## age     FALSE      FALSE
## sex     FALSE      FALSE
## bmi     FALSE      FALSE
## map     FALSE      FALSE
## tc      FALSE      FALSE
## ldl     FALSE      FALSE
## hdl     FALSE      FALSE
## tch     FALSE      FALSE
## ltg     FALSE      FALSE
## glu     FALSE      FALSE
## 1 subsets of each size up to 10
## Selection Algorithm: exhaustive
##           age sex bmi map tc  ldl hdl tch ltg glu
## 1  ( 1 )  " " " " " " " " " " " " " " " " "*" " "
## 2  ( 1 )  " " " " "*" " " " " " " " " " " "*" " "
## 3  ( 1 )  " " " " "*" " " " " " " "*" " " "*" " "
## 4  ( 1 )  " " " " "*" "*" " " " " "*" " " "*" " "
## 5  ( 1 )  " " "*" "*" "*" " " " " "*" " " "*" " "
## 6  ( 1 )  " " "*" "*" "*" "*" " " " " "*" "*" " "
## 7  ( 1 )  " " "*" "*" "*" "*" "*" " " "*" "*" " "
## 8  ( 1 )  " " "*" "*" "*" "*" "*" " " "*" "*" "*"
## 9  ( 1 )  "*" "*" "*" "*" "*" "*" " " "*" "*" "*"
## 10  ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"

plot(model2.bic, scale = "bic", main = "Predictors vs. BIC")

summary(model2.bic)$bic

##  [1] -135.9897 -203.4646 -208.4680 -211.5524 -214.2140 -213.3876 -209.2048
##  [8] -204.7159 -199.0280 -193.2283

summary(model2.bic)$bic[6]

## [1] -213.3876

plot(summary(model2.bic)$bic, xlab = "Number of Predictors", ylab = "BIC", type = "l",main = "Subset Selection (BIC)")
which.min(summary(model2.bic)$bic)

## [1] 5

points(6, summary(model2.bic)$bic[6], col = "Blue", cex = 2, pch = 20)

coef(model2.bic, 6)

## (Intercept)         sex         bmi         map          tc         tch 
##    153.5119   -181.6819    518.2526    286.6225   -364.5528    311.4993 
##         ltg 
##    642.0848

predict.regsubsets=function(object,newdata,id,...){
  form=as.formula(object$call[[2]])
  mat=model.matrix(form,newdata)
  cof=coef(object,id=id)
  xvars=names(cof)
  mat[,xvars]%*%cof}
mean((data.test$y-predict(model2.bic,data.test,id=6))^2)

## [1] 3105.225

sd((data.test$y-predict(model2.bic,data.test,id=6))^2)/sqrt(n.test)

## [1] 377.0068

lm.bic <- lm(y ~ sex + bmi + map + tc + tch + ltg, data = data.train)
summary(lm.bic)

## 
## Call:
## lm(formula = y ~ sex + bmi + map + tc + tch + ltg, data = data.train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -157.572  -38.015   -1.183   37.079  159.384 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  153.512      2.964  51.797  < 2e-16 ***
## sex         -181.682     70.410  -2.580 0.010308 *  
## bmi          518.253     75.285   6.884 3.01e-11 ***
## map          286.622     75.528   3.795 0.000176 ***
## tc          -364.553     79.490  -4.586 6.45e-06 ***
## tch          311.499     89.338   3.487 0.000556 ***
## ltg          642.085     87.275   7.357 1.55e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 53.9 on 325 degrees of freedom
## Multiple R-squared:  0.5347, Adjusted R-squared:  0.5261 
## F-statistic: 62.25 on 6 and 325 DF,  p-value: < 2.2e-16

coef(lm.bic)

## (Intercept)         sex         bmi         map          tc         tch 
##    153.5119   -181.6819    518.2526    286.6225   -364.5528    311.4993 
##         ltg 
##    642.0848

confint(lm.bic)

##                 2.5 %    97.5 %
## (Intercept)  147.6813  159.3424
## sex         -320.1984  -43.1654
## bmi          370.1453  666.3600
## map          138.0363  435.2087
## tc          -520.9318 -208.1737
## tch          135.7454  487.2533
## ltg          470.3891  813.7804

plot(lm.bic)

mean((data.test$y - predict(lm.bic, data.test))^2)

## [1] 3105.225

# Standard Error
sd((data.test$y - predict(lm.bic, data.test))^2)/sqrt(n.test)

## [1] 377.0068

#Apply best subset selection using 10-fold cross-validation to select the number of predictors

k=10
set.seed(1306)
folds=sample(1:k, nrow(data.train), replace = TRUE)
cv.errors=matrix(NA, k, 10, dimnames = list(NULL, paste(1:10)))
for (i in 1:k) {
model3.cv=regsubsets(y~.,data=data.train[folds!=i,],nvmax= 10)
for(j in 1:10){
pred=predict(model3.cv,data.train[folds == i,],id = j)
cv.errors[i, j] = mean((data.train$y[folds == i]-pred)^2) }}
cv.errors

##              1        2        3        4        5        6        7        8
##  [1,] 5228.294 3594.910 3803.587 3597.286 3534.969 3372.797 3448.036 3391.736
##  [2,] 5250.708 3668.933 3676.897 3502.654 3376.652 3259.932 3291.237 3226.720
##  [3,] 4199.806 3016.784 2948.537 3147.238 3004.054 3048.936 3005.914 2998.588
##  [4,] 3507.689 2290.441 2482.850 2509.730 2794.157 2589.080 2520.665 2485.886
##  [5,] 5244.321 4454.775 4566.870 4293.455 4024.717 4254.735 4273.060 4203.878
##  [6,] 4247.804 2423.008 2876.691 2697.154 2625.302 2590.656 2628.224 2543.420
##  [7,] 3753.740 2833.985 2735.999 2823.691 2727.169 2517.440 2453.937 2372.566
##  [8,] 2777.160 2239.657 2175.244 2236.359 2223.217 2118.511 2103.730 2040.523
##  [9,] 4064.103 2901.854 2855.802 2725.318 2630.517 2583.725 2518.280 2471.556
## [10,] 5552.186 4091.603 4022.215 3973.552 3741.940 3834.594 3847.948 3791.971
##              9       10
##  [1,] 3397.635 3395.524
##  [2,] 3258.746 3270.604
##  [3,] 3002.759 3010.564
##  [4,] 2507.241 2501.681
##  [5,] 4209.845 4209.801
##  [6,] 2577.724 2571.787
##  [7,] 2397.621 2401.064
##  [8,] 2112.749 2123.715
##  [9,] 2471.827 2472.689
## [10,] 3838.924 3841.359

mean.cv.errors=apply(cv.errors, 2, mean)
mean.cv.errors

##        1        2        3        4        5        6        7        8 
## 4382.581 3151.595 3214.469 3150.644 3068.269 3017.041 3009.103 2952.684 
##        9       10 
## 2977.507 2979.879

which.min(mean.cv.errors)

## 8 
## 8

mean.cv.errors[6]

##        6 
## 3017.041

#plotting
par(mfrow = c(1,2))
plot(mean.cv.errors, type = "b", xlab = "Predictors", ylab = "Mean CV Errors",
     main = "Subset Selection (10-fold CV)")
points(6, mean.cv.errors[6], col = "Blue", cex = 2, pch =20)
rmse.cv = sqrt(mean.cv.errors)
rmse.cv[6]

##       6 
## 54.9276

plot(rmse.cv, pch = 19, type = "b", xlab = "Predictors", ylab = "RMSE CV",
     main = "Subset Selection (10-fold CV)")
points(6, rmse.cv[6], col = "Blue", cex = 2, pch =20)

reg.best <- regsubsets(y ~ .,data =data.train,nvmax =10)
coef(reg.best, 6)

## (Intercept)         sex         bmi         map          tc         tch 
##    153.5119   -181.6819    518.2526    286.6225   -364.5528    311.4993 
##         ltg 
##    642.0848

mean((data.test$y-predict(reg.best, data.test, id=6))^2)

## [1] 3105.225

# Standard Error
sd((data.test$y-predict(reg.best, data.test, id=6))^2)/sqrt(n.test)

## [1] 377.0068

Ridge regression modeling using 10-fold cross-validation to select the largest value of λ such that the cross-validation error is within 1 standard error of the minimum

par(mfrow = c(1,2))
grid <- 10^seq(10, -2, length = 100)
ridge.mod <- glmnet(x.train, y.train, alpha = 0, lambda = grid, thresh = 1e-12)
plot(ridge.mod, xvar = "lambda", label = TRUE)
set.seed(1306)
cv.out <- cv.glmnet(x.train, y.train, alpha = 0)
plot(cv.out)

bestlam <- cv.out$lambda.min
bestlam

## [1] 4.683924

log(bestlam)

## [1] 1.544136

ridge.mod <- glmnet(x.train, y.train, alpha = 0, lambda = bestlam)
ridge.pred <- predict(ridge.mod, s = bestlam, newx = x.test)
mean((ridge.pred - y.test)^2)

## [1] 3116.206

sd((ridge.pred - y.test)^2)/sqrt(n.test)

## [1] 370.474

largelam <- cv.out$lambda.1se
largelam

## [1] 39.8018

ridge.mod <- glmnet(x.train, y.train, alpha = 0, lambda = largelam)
ridge.pred <- predict(ridge.mod, s = largelam, newx = x.test)
mean((ridge.pred - y.test)^2)

## [1] 3193.515

sd((ridge.pred - y.test)^2)/sqrt(n.test)

## [1] 360.6616

#The estimated coefficients
predict(ridge.mod, type = "coefficients", s = largelam)[1:11,]

## (Intercept)         age         sex         bmi         map          tc 
##  153.598868    5.114496 -108.998500  358.394874  223.296309  -23.185565 
##         ldl         hdl         tch         ltg         glu 
##  -75.828598 -191.086026  135.304071  366.491450  130.818328

#Lasso model using 10-fold cross-validation to select that largest

par(mfrow = c(1,2))
grid <- 10^seq(10, -2, length = 100)
lasso.mod <- glmnet(x.train, y.train, alpha = 1, lambda = grid, thresh = 1e-12)
plot(lasso.mod, xvar = "lambda", label = TRUE)
set.seed(1306)
cv.out <- cv.glmnet(x.train, y.train, alpha = 1)
plot(cv.out)

bestlam <- cv.out$lambda.min
bestlam

## [1] 0.3081709

log(bestlam)

## [1] -1.177101

lasso.mod <- glmnet(x.train, y.train, alpha = 1, lambda = bestlam)
lasso.pred <- predict(lasso.mod, s = bestlam, newx = x.test)
mean((lasso.pred - y.test)^2)

## [1] 3113.142

sd((lasso.pred - y.test)^2)/sqrt(n.test)

## [1] 371.7285

largelam <- cv.out$lambda.1se
largelam

## [1] 5.512097

lasso.mod <- glmnet(x.train, y.train, alpha = 1, lambda = largelam)
lasso.pred <- predict(lasso.mod, s = largelam, newx = x.test)
mean((lasso.pred - y.test)^2)

## [1] 3243.075

sd((lasso.pred - y.test)^2)/sqrt(n.test)

## [1] 385.5315

#The estimated coefficients
lasso.coef <- predict(lasso.mod, type = "coefficients", s = largelam)[1:11,]
lasso.coef[lasso.coef != 0]

## (Intercept)         bmi         map         hdl         ltg 
##    153.4433    482.0603    149.1608   -168.9400    510.6780

Project 1

Yifeng Qi

2021/9/21

Ridge regression modeling using 10-fold cross-validation to select the largest value of λ such that the cross-validation error is within 1 standard error of the minimum