#Librarys

library(ggpubr)
## 载入需要的程辑包:ggplot2
library(psych)
## 
## 载入程辑包:'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(scales)
## 
## 载入程辑包:'scales'
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale
library(ggplot2)
library(stringr)
library(tidyr)
library(dplyr)
## 
## 载入程辑包:'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gridExtra)
## 
## 载入程辑包:'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(caret)
## 载入需要的程辑包:lattice
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## 载入程辑包:'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(reshape2)
## 
## 载入程辑包:'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(corrplot)
## corrplot 0.90 loaded
library(glmnet)
## 载入需要的程辑包:Matrix
## 
## 载入程辑包:'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-2
library(car)
## 载入需要的程辑包:carData
## 
## 载入程辑包:'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:psych':
## 
##     logit
library(faraway)
## 
## 载入程辑包:'faraway'
## The following objects are masked from 'package:car':
## 
##     logit, vif
## The following object is masked from 'package:lattice':
## 
##     melanoma
## The following object is masked from 'package:psych':
## 
##     logit
library(lars)
## Loaded lars 1.2
## 
## 载入程辑包:'lars'
## The following object is masked from 'package:psych':
## 
##     error.bars
library(leaps)

Loading Dataset and base line review

data(diabetes)
names(diabetes)
## [1] "x"  "y"  "x2"
summary(diabetes)
##          x.age                  x.sex                  x.bmi                  x.map                  x.tc                   x.ldl                  x.hdl                  x.tch                  x.ltg                  x.glu        
##  Min.   :-0.10722563    Min.   :-0.04464164    Min.   :-0.09027530    Min.   :-0.11239960    Min.   :-0.12678067    Min.   :-0.11561307    Min.   :-0.10230705    Min.   :-0.07639450    Min.   :-0.12609739    Min.   :-0.13776723  
##  1st Qu.:-0.03729927    1st Qu.:-0.04464164    1st Qu.:-0.03422907    1st Qu.:-0.03665645    1st Qu.:-0.03424784    1st Qu.:-0.03035840    1st Qu.:-0.03511716    1st Qu.:-0.03949338    1st Qu.:-0.03324879    1st Qu.:-0.03317903  
##  Median : 0.00538306    Median :-0.04464164    Median :-0.00728377    Median :-0.00567061    Median :-0.00432087    Median :-0.00381907    Median :-0.00658447    Median :-0.00259226    Median :-0.00194763    Median :-0.00107770  
##  Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000  
##  3rd Qu.: 0.03807591    3rd Qu.: 0.05068012    3rd Qu.: 0.03124802    3rd Qu.: 0.03564384    3rd Qu.: 0.02835801    3rd Qu.: 0.02984439    3rd Qu.: 0.02931150    3rd Qu.: 0.03430886    3rd Qu.: 0.03243323    3rd Qu.: 0.02791705  
##  Max.   : 0.11072668    Max.   : 0.05068012    Max.   : 0.17055523    Max.   : 0.13204422    Max.   : 0.15391371    Max.   : 0.19878799    Max.   : 0.18117906    Max.   : 0.18523444    Max.   : 0.13359898    Max.   : 0.13561183  
##        y        
##  Min.   : 25.0  
##  1st Qu.: 87.0  
##  Median :140.5  
##  Mean   :152.1  
##  3rd Qu.:211.5  
##  Max.   :346.0  
##         x2.age                 x2.sex                 x2.bmi                 x2.map                  x2.tc                 x2.ldl                 x2.hdl                 x2.tch                 x2.ltg                 x2.glu                x2.age^2               x2.bmi^2               x2.map^2                x2.tc^2               x2.ldl^2               x2.hdl^2               x2.tch^2               x2.ltg^2               x2.glu^2              x2.age:sex             x2.age:bmi             x2.age:map              x2.age:tc             x2.age:ldl             x2.age:hdl             x2.age:tch             x2.age:ltg             x2.age:glu             x2.sex:bmi             x2.sex:map              x2.sex:tc             x2.sex:ldl             x2.sex:hdl             x2.sex:tch             x2.sex:ltg             x2.sex:glu             x2.bmi:map              x2.bmi:tc             x2.bmi:ldl             x2.bmi:hdl             x2.bmi:tch             x2.bmi:ltg             x2.bmi:glu              x2.map:tc             x2.map:ldl             x2.map:hdl             x2.map:tch             x2.map:ltg             x2.map:glu              x2.tc:ldl              x2.tc:hdl              x2.tc:tch              x2.tc:ltg              x2.tc:glu             x2.ldl:hdl             x2.ldl:tch             x2.ldl:ltg             x2.ldl:glu             x2.hdl:tch             x2.hdl:ltg             x2.hdl:glu             x2.tch:ltg             x2.tch:glu             x2.ltg:glu      
##  Min.   :-0.10722563    Min.   :-0.04464164    Min.   :-0.09027530    Min.   :-0.11239960    Min.   :-0.12678067    Min.   :-0.11561307    Min.   :-0.10230705    Min.   :-0.07639450    Min.   :-0.12609739    Min.   :-0.13776723    Min.   :-0.04130027    Min.   :-0.0329757     Min.   :-0.03936938    Min.   :-0.03194631    Min.   :-0.0296067     Min.   :-0.0276538     Min.   :-0.0305374     Min.   :-0.03493543    Min.   :-0.03190221    Min.   :-0.12069533    Min.   :-0.17481185    Min.   :-0.16642937    Min.   :-0.13734607    Min.   :-0.15547687    Min.   :-0.18004384    Min.   :-0.20128867    Min.   :-0.16005090    Min.   :-0.11977803    Min.   :-0.15657307    Min.   :-0.14043607    Min.   :-0.13612974    Min.   :-0.12969053    Min.   :-0.16598162    Min.   :-0.15998969    Min.   :-0.14294792    Min.   :-0.14044727    Min.   :-0.16726677    Min.   :-0.29320707    Min.   :-0.28657696    Min.   :-0.26831607    Min.   :-0.13181795    Min.   :-0.18509273    Min.   :-0.15439177    Min.   :-0.20360756    Min.   :-0.20386193    Min.   :-0.2827677     Min.   :-0.18515432    Min.   :-0.14589249    Min.   :-0.14339331    Min.   :-0.0476094     Min.   :-0.2108426     Min.   :-0.1223554     Min.   :-0.10489687    Min.   :-0.12053039    Min.   :-0.25647092    Min.   :-0.1115079     Min.   :-0.18259411    Min.   :-0.15173440    Min.   :-0.23489009    Min.   :-0.25468513    Min.   :-0.22325464    Min.   :-0.1607454     Min.   :-0.1289188     Min.   :-0.0921654   
##  1st Qu.:-0.03729927    1st Qu.:-0.04464164    1st Qu.:-0.03422907    1st Qu.:-0.03665645    1st Qu.:-0.03424784    1st Qu.:-0.03035840    1st Qu.:-0.03511716    1st Qu.:-0.03949338    1st Qu.:-0.03324879    1st Qu.:-0.03317903    1st Qu.:-0.03651112    1st Qu.:-0.0292889     1st Qu.:-0.03401562    1st Qu.:-0.02934795    1st Qu.:-0.0262817     1st Qu.:-0.0247218     1st Qu.:-0.0304485     1st Qu.:-0.02986741    1st Qu.:-0.02934588    1st Qu.:-0.03428520    1st Qu.:-0.02303091    1st Qu.:-0.02236911    1st Qu.:-0.02011722    1st Qu.:-0.01969935    1st Qu.:-0.02294425    1st Qu.:-0.01676616    1st Qu.:-0.02344173    1st Qu.:-0.02225231    1st Qu.:-0.03590179    1st Qu.:-0.03318366    1st Qu.:-0.03105452    1st Qu.:-0.03128987    1st Qu.:-0.03080430    1st Qu.:-0.01954799    1st Qu.:-0.03265781    1st Qu.:-0.02938596    1st Qu.:-0.02258631    1st Qu.:-0.02087785    1st Qu.:-0.02276205    1st Qu.:-0.01884810    1st Qu.:-0.02350512    1st Qu.:-0.02456719    1st Qu.:-0.02439003    1st Qu.:-0.01970707    1st Qu.:-0.01982609    1st Qu.:-0.0192732     1st Qu.:-0.01691159    1st Qu.:-0.02415299    1st Qu.:-0.02303765    1st Qu.:-0.0274244     1st Qu.:-0.0180310     1st Qu.:-0.0222594     1st Qu.:-0.02676351    1st Qu.:-0.02152349    1st Qu.:-0.01503075    1st Qu.:-0.0236963     1st Qu.:-0.02119392    1st Qu.:-0.02093422    1st Qu.:-0.01562203    1st Qu.:-0.01711673    1st Qu.:-0.01350453    1st Qu.:-0.0272451     1st Qu.:-0.0206135     1st Qu.:-0.0236237   
##  Median : 0.00538306    Median :-0.04464164    Median :-0.00728377    Median :-0.00567061    Median :-0.00432087    Median :-0.00381907    Median :-0.00658447    Median :-0.00259226    Median :-0.00194763    Median :-0.00107770    Median :-0.01485516    Median :-0.0164496     Median :-0.01726140    Median :-0.01717975    Median :-0.0176079     Median :-0.0148615     Median :-0.0094855     Median :-0.01786576    Median :-0.01915998    Median : 0.00136543    Median :-0.00685379    Median :-0.00756265    Median :-0.00884089    Median :-0.00798291    Median : 0.00180070    Median :-0.00866341    Median :-0.00887376    Median :-0.01073070    Median :-0.00221276    Median : 0.00417820    Median :-0.00148989    Median :-0.00017770    Median : 0.00002594    Median : 0.00219364    Median : 0.00754782    Median :-0.00227439    Median :-0.01089069    Median :-0.00840677    Median :-0.00859344    Median : 0.01174772    Median :-0.01702018    Median :-0.01221064    Median :-0.01442884    Median :-0.00724496    Median :-0.00621985    Median : 0.0045702     Median :-0.00990562    Median :-0.01015312    Median :-0.01186424    Median :-0.0168761     Median :-0.0027086     Median :-0.0164679     Median :-0.01498494    Median :-0.01168339    Median : 0.00639820    Median :-0.0147706     Median :-0.00866058    Median :-0.00954984    Median : 0.01714849    Median : 0.00896297    Median : 0.00855963    Median :-0.0133519     Median :-0.0156527     Median :-0.0135282   
##  Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.0000000     Mean   : 0.0000000     Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.0000000     Mean   : 0.0000000     Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.00000000    Mean   : 0.0000000     Mean   : 0.0000000     Mean   : 0.0000000   
##  3rd Qu.: 0.03807591    3rd Qu.: 0.05068012    3rd Qu.: 0.03124802    3rd Qu.: 0.03564384    3rd Qu.: 0.02835801    3rd Qu.: 0.02984439    3rd Qu.: 0.02931150    3rd Qu.: 0.03430886    3rd Qu.: 0.03243323    3rd Qu.: 0.02791705    3rd Qu.: 0.01672836    3rd Qu.: 0.0139233     3rd Qu.: 0.01779132    3rd Qu.: 0.00791023    3rd Qu.: 0.0070868     3rd Qu.: 0.0078884     3rd Qu.:-0.0094855     3rd Qu.: 0.01376233    3rd Qu.: 0.01064199    3rd Qu.: 0.03680242    3rd Qu.: 0.01893124    3rd Qu.: 0.02300988    3rd Qu.: 0.01754428    3rd Qu.: 0.01474536    3rd Qu.: 0.01907762    3rd Qu.: 0.02237682    3rd Qu.: 0.01915676    3rd Qu.: 0.01413948    3rd Qu.: 0.03430558    3rd Qu.: 0.03699564    3rd Qu.: 0.03187045    3rd Qu.: 0.03106063    3rd Qu.: 0.03398203    3rd Qu.: 0.02240213    3rd Qu.: 0.03184408    3rd Qu.: 0.03070174    3rd Qu.: 0.01556867    3rd Qu.: 0.01788553    3rd Qu.: 0.01527434    3rd Qu.: 0.02570039    3rd Qu.: 0.02031243    3rd Qu.: 0.02204776    3rd Qu.: 0.01792226    3rd Qu.: 0.01946680    3rd Qu.: 0.02202658    3rd Qu.: 0.0196832     3rd Qu.: 0.02163021    3rd Qu.: 0.02198648    3rd Qu.: 0.01187946    3rd Qu.: 0.0084616     3rd Qu.: 0.0151485     3rd Qu.: 0.0112680     3rd Qu.: 0.01643742    3rd Qu.: 0.01281487    3rd Qu.: 0.01997273    3rd Qu.: 0.0102309     3rd Qu.: 0.02066205    3rd Qu.: 0.01211330    3rd Qu.: 0.03130439    3rd Qu.: 0.02251006    3rd Qu.: 0.02208954    3rd Qu.: 0.0150755     3rd Qu.: 0.0176564     3rd Qu.: 0.0130771   
##  Max.   : 0.11072668    Max.   : 0.05068012    Max.   : 0.17055523    Max.   : 0.13204422    Max.   : 0.15391371    Max.   : 0.19878799    Max.   : 0.18117906    Max.   : 0.18523444    Max.   : 0.13359898    Max.   : 0.13561183    Max.   : 0.18275738    Max.   : 0.3910172     Max.   : 0.26403396    Max.   : 0.30255981    Max.   : 0.4875149     Max.   : 0.3736758     Max.   : 0.4326122     Max.   : 0.24068220    Max.   : 0.23584893    Max.   : 0.11161385    Max.   : 0.17025839    Max.   : 0.22761247    Max.   : 0.19942629    Max.   : 0.20611884    Max.   : 0.20632788    Max.   : 0.28412558    Max.   : 0.18709678    Max.   : 0.18436868    Max.   : 0.17914612    Max.   : 0.10740703    Max.   : 0.16156578    Max.   : 0.20613521    Max.   : 0.17479136    Max.   : 0.19124232    Max.   : 0.13661431    Max.   : 0.13780206    Max.   : 0.22848303    Max.   : 0.22698282    Max.   : 0.23206883    Max.   : 0.14647735    Max.   : 0.27645966    Max.   : 0.21889208    Max.   : 0.22460968    Max.   : 0.18974692    Max.   : 0.19160847    Max.   : 0.3898674     Max.   : 0.22948768    Max.   : 0.18713993    Max.   : 0.27935949    Max.   : 0.3976457     Max.   : 0.3189508     Max.   : 0.4691532     Max.   : 0.22312522    Max.   : 0.23737028    Max.   : 0.16077293    Max.   : 0.5551291     Max.   : 0.20338090    Max.   : 0.29903226    Max.   : 0.08044499    Max.   : 0.16306655    Max.   : 0.20990518    Max.   : 0.3758453     Max.   : 0.3181041     Max.   : 0.3381838
head(diabetes)
##          x.age        x.sex        x.bmi        x.map         x.tc        x.ldl
## 1  0.038075906  0.050680119  0.061696207  0.021872355 -0.044223498 -0.034820763
## 2 -0.001882017 -0.044641637 -0.051474061 -0.026327835 -0.008448724 -0.019163340
## 3  0.085298906  0.050680119  0.044451213 -0.005670611 -0.045599451 -0.034194466
## 4 -0.089062939 -0.044641637 -0.011595015 -0.036656447  0.012190569  0.024990593
## 5  0.005383060 -0.044641637 -0.036384692  0.021872355  0.003934852  0.015596140
## 6 -0.092695478 -0.044641637 -0.040695940 -0.019442093 -0.068990650 -0.079287844
##          x.hdl        x.tch        x.ltg        x.glu   y        x2.age
## 1 -0.043400846 -0.002592262  0.019908421 -0.017646125 151  0.0380759064
## 2  0.074411564 -0.039493383 -0.068329744 -0.092204050  75 -0.0018820165
## 3 -0.032355932 -0.002592262  0.002863771 -0.025930339 141  0.0852989063
## 4 -0.036037570  0.034308859  0.022692023 -0.009361911 206 -0.0890629394
## 5  0.008142084 -0.002592262 -0.031991445 -0.046640874 135  0.0053830604
## 6  0.041276824 -0.076394504 -0.041180385 -0.096346157  97 -0.0926954778
##          x2.sex        x2.bmi        x2.map         x2.tc        x2.ldl
## 1  0.0506801187  0.0616962065  0.0218723550 -0.0442234984 -0.0348207628
## 2 -0.0446416365 -0.0514740612 -0.0263278347 -0.0084487241 -0.0191633397
## 3  0.0506801187  0.0444512133 -0.0056706106 -0.0455994513 -0.0341944659
## 4 -0.0446416365 -0.0115950145 -0.0366564468  0.0121905688  0.0249905934
## 5 -0.0446416365 -0.0363846922  0.0218723550  0.0039348516  0.0155961395
## 6 -0.0446416365 -0.0406959405 -0.0194420933 -0.0689906499 -0.0792878444
##          x2.hdl        x2.tch        x2.ltg        x2.glu      x2.age^2
## 1 -0.0434008457 -0.0025922620  0.0199084209 -0.0176461252 -0.0148551625
## 2  0.0744115641 -0.0394933829 -0.0683297436 -0.0922040496 -0.0412915429
## 3 -0.0323559322 -0.0025922620  0.0028637705 -0.0259303390  0.0916434391
## 4 -0.0360375700  0.0343088589  0.0226920226 -0.0093619113  0.1036403301
## 5  0.0081420836 -0.0025922620 -0.0319914449 -0.0466408736 -0.0408265979
## 6  0.0412768238 -0.0763945038 -0.0411803852 -0.0963461565  0.1157092549
##        x2.bmi^2      x2.map^2       x2.tc^2      x2.ldl^2      x2.hdl^2
## 1  0.0225045739 -0.0310446765 -0.0043311197 -0.0137399243 -0.0046314248
## 2  0.0056427733 -0.0273076609 -0.0309389016 -0.0248010319  0.0400365241
## 3 -0.0041764214 -0.0388099038 -0.0025859366 -0.0143055611 -0.0148614560
## 4 -0.0310170859 -0.0159874156 -0.0298483890 -0.0214340114 -0.0117828858
## 5 -0.0136807215 -0.0310446765 -0.0317282077 -0.0264236384 -0.0268506700
## 6 -0.0088370145 -0.0327918526  0.0352626533  0.0526602897 -0.0068304035
##        x2.tch^2      x2.ltg^2      x2.glu^2    x2.age:sex    x2.age:bmi
## 1 -0.0304484629 -0.0288162192 -0.0275255618  0.0328649758  0.0405716741
## 2 -0.0094854824  0.0371612444  0.0880219609 -0.0066099928 -0.0067648038
## 3 -0.0304484629 -0.0348099250 -0.0224326123  0.0840517453  0.0708891293
## 4 -0.0146503349 -0.0269850701 -0.0306820952  0.0766292449  0.0129034054
## 5 -0.0304484629 -0.0191324512 -0.0012284185 -0.0135465959 -0.0129173209
## 6  0.0482386067 -0.0087497144  0.0990402558  0.0800975464  0.0704832806
##      x2.age:map     x2.age:tc    x2.age:ldl    x2.age:hdl    x2.age:tch
## 1  0.0016606410 -0.0465532511 -0.0382447104 -0.0345115069 -0.0121122609
## 2 -0.0159342243 -0.0117287997 -0.0096555406  0.0006995477 -0.0083689963
## 3 -0.0279128687 -0.0917442739 -0.0716415163 -0.0602920920 -0.0147605279
## 4  0.0562904032 -0.0342989420 -0.0571356258  0.0786805432 -0.0760817036
## 5 -0.0144024121 -0.0116206046 -0.0086502423  0.0049801694 -0.0102788452
## 6  0.0234365273  0.1189685396  0.1438718189 -0.0851146968  0.1432199339
##      x2.age:ltg    x2.age:glu    x2.sex:bmi    x2.sex:map     x2.sex:tc
## 1  0.0030648916 -0.0302775066  0.0621030123  0.0122820371 -0.0485722318
## 2 -0.0102016795 -0.0113801440  0.0445181909  0.0137392710  0.0062226212
## 3 -0.0077635174 -0.0646990887  0.0435615292 -0.0181579597 -0.0500315236
## 4 -0.0555091413  0.0033785934  0.0067497817  0.0237941848 -0.0130586592
## 5 -0.0165418441 -0.0208710562  0.0302274415 -0.0331836602 -0.0053461470
## 6  0.0675437504  0.1843686827  0.0343105127  0.0070359951  0.0627810439
##      x2.sex:ldl    x2.sex:hdl    x2.sex:tch    x2.sex:ltg    x2.sex:glu
## 1 -0.0441240238 -0.0308042981 -0.0195479919  0.0142268228 -0.0293859648
## 2  0.0112617659 -0.0565675488  0.0224021267  0.0575880019  0.0784642525
## 3 -0.0434530875 -0.0179545693 -0.0195479919 -0.0041216840 -0.0384231554
## 4 -0.0304033769  0.0566194244 -0.0505546014 -0.0287218500 -0.0011399371
## 5 -0.0215384529  0.0113446351 -0.0140762373  0.0231308241  0.0346819482
## 6  0.0679972794 -0.0226114568  0.0588804908  0.0318440815  0.0824444620
##      x2.bmi:map     x2.bmi:tc    x2.bmi:ldl    x2.bmi:hdl    x2.bmi:tch
## 1  0.0090008988 -0.0717180778 -0.0603176794 -0.0413575386 -0.0240342004
## 2  0.0091148702 -0.0028355367  0.0087097314 -0.0671553344  0.0240456899
## 3 -0.0226918084 -0.0564432291 -0.0464818380 -0.0136167525 -0.0230540270
## 4 -0.0092925186 -0.0153834201 -0.0193921008  0.0279274131 -0.0292499537
## 5 -0.0334523099 -0.0154230197 -0.0255070025  0.0119441348 -0.0184594644
## 6 -0.0020460266  0.0488321410  0.0580412284 -0.0190229456  0.0476394964
##      x2.bmi:ltg    x2.bmi:glu     x2.map:tc    x2.map:ldl    x2.map:hdl
## 1  0.0048261936 -0.0410278367 -0.0327209536 -0.0257474212 -0.0112074291
## 2  0.0552994440  0.0806093115 -0.0070399803  0.0018462404 -0.0319794277
## 3 -0.0194513972 -0.0423606977 -0.0062598621 -0.0049233843  0.0120934525
## 4 -0.0280603703 -0.0160690142 -0.0214874364 -0.0291135216  0.0354925517
## 5  0.0034088636  0.0170453175 -0.0099837004 -0.0017149265  0.0119825521
## 6  0.0146962189  0.0634061398  0.0171122204  0.0244459437 -0.0081883427
##      x2.map:tch    x2.map:ltg    x2.map:glu     x2.tc:ldl     x2.tc:hdl
## 1 -0.0138251131 -0.0101938361 -0.0257784633 -0.0068689647  0.0398230899
## 2  0.0098745200  0.0203696547  0.0313619644 -0.0262353123 -0.0164622948
## 3 -0.0122818754 -0.0203183065 -0.0149534838 -0.0065969775  0.0300168640
## 4 -0.0397827564 -0.0385992765 -0.0109701272 -0.0242291837 -0.0122792654
## 5 -0.0138251131 -0.0356386917 -0.0386583542 -0.0276482686 -0.0018670731
## 6  0.0195036026 -0.0020081382  0.0201031994  0.0483666130 -0.0654803859
##       x2.tc:tch     x2.tc:ltg     x2.tc:glu    x2.ldl:hdl    x2.ldl:tch
## 1 -0.0193030537 -0.0428915072  0.0009629700  0.0423549304 -0.0220378305
## 2 -0.0155012002 -0.0123430995  0.0009326831 -0.0212564243 -0.0115642516
## 3 -0.0192411418 -0.0271777643  0.0098716066  0.0335869646 -0.0220633407
## 4 -0.0140331587 -0.0186440420 -0.0188580962 -0.0098784247 -0.0099839514
## 5 -0.0214699727 -0.0270791696 -0.0203958711  0.0123759491 -0.0240914053
## 6  0.0701909475  0.0350969957  0.1309600895 -0.0612519836  0.0717191451
##      x2.ldl:ltg    x2.ldl:glu    x2.hdl:tch    x2.hdl:ltg    x2.hdl:glu
## 1 -0.0311245646 -0.0009221095  0.0334936252  0.0008521487  0.0311502576
## 2  0.0129733755  0.0237834359 -0.0238146613 -0.0945055990 -0.1403775894
## 3 -0.0180161691  0.0049134553  0.0329558784  0.0182807986  0.0327952439
## 4 -0.0033727555 -0.0191092832  0.0081586184  0.0018977323  0.0215138966
## 5 -0.0268464903 -0.0296874121  0.0309841399  0.0144891320  0.0053856590
## 6  0.0560369456  0.1496630223 -0.0278444433 -0.0180308997 -0.0755127518
##      x2.tch:ltg    x2.tch:glu    x2.ltg:glu
## 1 -0.0281911757 -0.0176581553 -0.0277936831
## 2  0.0252977155  0.0530335390  0.1040132768
## 3 -0.0273318271 -0.0172359590 -0.0223037368
## 4 -0.0120454909 -0.0248722040 -0.0250419367
## 5 -0.0255745144 -0.0161804685  0.0087351987
## 6  0.0339989574  0.1261465729  0.0577886517
str(diabetes)
## 'data.frame':    442 obs. of  3 variables:
##  $ x : 'AsIs' num [1:442, 1:10] 0.03808 -0.00188 0.0853 -0.08906 0.00538 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:10] "age" "sex" "bmi" "map" ...
##  $ y : num  151 75 141 206 135 97 138 63 110 310 ...
##  $ x2: 'AsIs' num [1:442, 1:64] 0.03808 -0.00188 0.0853 -0.08906 0.00538 ...
##   ..- attr(*, ".Names")= chr [1:28288] "age" "age" "age" "age" ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:442] "1" "2" "3" "4" ...
##   .. ..$ : chr [1:64] "age" "sex" "bmi" "map" ...
###Load the diabetes data
data.all <- data.frame(cbind(diabetes$x, y = diabetes$y)) 

Partition the patients into two groups: training (75%) and test (25%)

n <- dim(data.all)[1]
set.seed(1306)
test <- sample(n, round(n/4))  
data.train <- data.all[-test,]
data.test <- data.all[test,]
x <- model.matrix(y ~ ., data = data.all)[,-1]
x.train <- x[-test,]
x.test <- x[test,]
y <- data.all$y 
y.train <- y[-test]             
y.test <- y[test]                              
n.train <- dim(data.train)[1]
n.test <- dim(data.test)[1]                      

Baseline visualizations

pairs(data.train)

pairs(data.test)

par(mfrow = c(3, 3))
hist(data.train$age)
hist(data.train$bmi)
hist(data.train$map)
hist(data.train$tc)
hist(data.train$ldl)
hist(data.train$hdl)
hist(data.train$tch)
hist(data.train$ltg)
hist(data.train$glu)

1. LM Least squares regression model using all ten predictors (R function lm).

model1_lm=lm(y~.,data=data.train) 
summary(model1_lm) 
## 
## Call:
## lm(formula = y ~ ., data = data.train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -159.368  -36.505   -0.458   37.795  154.880 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  153.696      2.973  51.695  < 2e-16 ***
## age          -23.148     68.290  -0.339 0.734851    
## sex         -196.185     72.304  -2.713 0.007020 ** 
## bmi          483.485     78.488   6.160 2.18e-09 ***
## map          275.781     77.479   3.559 0.000428 ***
## tc          -671.283    478.034  -1.404 0.161209    
## ldl          334.467    386.787   0.865 0.387832    
## hdl           17.355    238.209   0.073 0.941964    
## tch          182.955    178.239   1.026 0.305450    
## ltg          765.822    196.163   3.904 0.000115 ***
## glu           90.875     77.665   1.170 0.242833    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 53.99 on 321 degrees of freedom
## Multiple R-squared:  0.539,  Adjusted R-squared:  0.5246 
## F-statistic: 37.53 on 10 and 321 DF,  p-value: < 2.2e-16
coef(model1_lm)
## (Intercept)         age         sex         bmi         map          tc 
##   153.69607   -23.14847  -196.18527   483.48520   275.78107  -671.28288 
##         ldl         hdl         tch         ltg         glu 
##   334.46662    17.35544   182.95461   765.82162    90.87482
confint(model1_lm)
##                   2.5 %     97.5 %
## (Intercept)   147.84675  159.54538
## age          -157.50044  111.20349
## sex          -338.43386  -53.93668
## bmi           329.06876  637.90164
## map           123.34962  428.21252
## tc          -1611.75845  269.19269
## ldl          -426.49030 1095.42354
## hdl          -451.29188  486.00276
## tch          -167.70988  533.61909
## ltg           379.89472 1151.74852
## glu           -61.92163  243.67128

Plot Model 1

par(mfrow=c(2, 2)) 
plot(model1_lm) 

vif(model1_lm)
##       age       sex       bmi       map        tc       ldl       hdl       tch 
##  1.197840  1.343089  1.667054  1.555690 57.151344 38.934355 15.664544  8.518278 
##       ltg       glu 
##  9.774073  1.562445
cor(data.train)
##             age         sex         bmi        map         tc        ldl
## age  1.00000000  0.21886259  0.16659326  0.3014599 0.21759841  0.1660094
## sex  0.21886259  1.00000000  0.08830411  0.2698654 0.09801807  0.2084060
## bmi  0.16659326  0.08830411  1.00000000  0.4558217 0.27937575  0.2775069
## map  0.30145991  0.26986543  0.45582166  1.0000000 0.28665534  0.2181423
## tc   0.21759841  0.09801807  0.27937575  0.2866553 1.00000000  0.8880786
## ldl  0.16600941  0.20840602  0.27750690  0.2181423 0.88807860  1.0000000
## hdl -0.05531316 -0.36781547 -0.36603905 -0.1566471 0.03601248 -0.2352209
## tch  0.16204989  0.36355202  0.42364192  0.2705984 0.54034932  0.6707657
## ltg  0.25138579  0.15098630  0.47293381  0.4174045 0.51378367  0.3055235
## glu  0.31084781  0.22957664  0.44870585  0.4203983 0.33233111  0.2834238
## y    0.15849219  0.07277159  0.59729622  0.4438919 0.21478302  0.1709790
##             hdl        tch        ltg        glu           y
## age -0.05531316  0.1620499  0.2513858  0.3108478  0.15849219
## sex -0.36781547  0.3635520  0.1509863  0.2295766  0.07277159
## bmi -0.36603905  0.4236419  0.4729338  0.4487059  0.59729622
## map -0.15664712  0.2705984  0.4174045  0.4203983  0.44389187
## tc   0.03601248  0.5403493  0.5137837  0.3323311  0.21478302
## ldl -0.23522092  0.6707657  0.3055235  0.2834238  0.17097899
## hdl  1.00000000 -0.7455099 -0.3764126 -0.2663546 -0.41723696
## tch -0.74550986  1.0000000  0.5919089  0.4024291  0.44885326
## ltg -0.37641258  0.5919089  1.0000000  0.4771046  0.59908719
## glu -0.26635459  0.4024291  0.4771046  1.0000000  0.41738188
## y   -0.41723696  0.4488533  0.5990872  0.4173819  1.00000000

Prediction

predict(model1_lm, data.test)
##        25       288         7        38       265       272       340       101 
## 163.62897 142.91710  77.33420 160.45504 125.93328 166.23665 147.45370 167.03646 
##        97       108       188       326       126        71       210       127 
## 214.68038 153.93364  64.63265 224.35435 201.50822  71.84893 175.85535  56.31939 
##       396       166       290       237       227       435         4       392 
## 172.88497  78.26133 199.07764 183.84059  82.89065 126.46703 171.15459  54.74453 
##       330       280       245       145        77       412       372       220 
## 109.39623 119.59316 141.99563 181.81284 194.97344 148.09177 206.97203 149.63755 
##       114       313       169       229        18       328       110       170 
## 213.75230 121.38601 234.75668 117.96485 183.51502 214.80629 148.76265 234.58248 
##       231       102       106       296       182       216       175       357 
## 170.15084 105.27112 123.48579 158.05581  86.62758 264.54974 175.10598  94.68731 
##       329       118        67        39        28       285        11       218 
## 190.75701 240.17867 157.87725 240.99057 179.30842 164.91879 100.00363 214.51649 
##       292       271        22       196       319       406       369       122 
## 198.78316 191.76904  87.70122 171.91286 157.36529 285.51229 210.19033 199.20308 
##       364       405        21        40       409        62       358       154 
## 174.93078 192.58460 125.30798 144.76574 203.87292 194.11399 207.42593 121.17675 
##       283       186       341       318       244       360        23       130 
## 134.39622 198.83578 160.86432 171.45595 103.01428 178.58320 109.62876 215.11748 
##        13        58       256       221       282       200        52       427 
## 111.39611  81.63310 119.03771  52.84278 111.13153 182.07604 171.53184 167.82711 
##        44       203       109        99       334       184       374       322 
##  76.11001 166.53713 238.83321 125.57034 189.34431 161.85634 142.93638 288.26141 
##        45        15       152       260       158       211       441       267 
## 225.59204  96.48352 142.18825 137.43862 129.70752 115.42896 212.99109  33.38471 
##       202        86        32       159       361       348 
##  72.53322 157.52903  71.71308  87.14277 196.35830 111.79902
predict(model1_lm, data.test, interval = "prediction")
##           fit        lwr      upr
## 25  163.62897  56.391941 270.8660
## 288 142.91710  34.461566 251.3726
## 7    77.33420 -30.079598 184.7480
## 38  160.45504  53.203228 267.7068
## 265 125.93328  18.059917 233.8066
## 272 166.23665  59.171156 273.3021
## 340 147.45370  39.694626 255.2128
## 101 167.03646  59.888827 274.1841
## 97  214.68038 107.569158 321.7916
## 108 153.93364  45.514929 262.3524
## 188  64.63265 -43.330096 172.5954
## 326 224.35435 115.959702 332.7490
## 126 201.50822  93.426840 309.5896
## 71   71.84893 -36.079533 179.7774
## 210 175.85535  67.924209 283.7865
## 127  56.31939 -51.897990 164.5368
## 396 172.88497  65.498788 280.2712
## 166  78.26133 -28.923713 185.4464
## 290 199.07764  90.619740 307.5355
## 237 183.84059  76.628414 291.0528
## 227  82.89065 -24.926872 190.7082
## 435 126.46703  19.173762 233.7603
## 4   171.15459  63.652033 278.6571
## 392  54.74453 -52.915083 162.4041
## 330 109.39623   1.903660 216.8888
## 280 119.59316  12.560337 226.6260
## 245 141.99563  33.901158 250.0901
## 145 181.81284  74.536067 289.0896
## 77  194.97344  86.132162 303.8147
## 412 148.09177  40.052533 256.1310
## 372 206.97203  95.940558 318.0035
## 220 149.63755  41.942590 257.3325
## 114 213.75230 106.371681 321.1329
## 313 121.38601  14.303556 228.4685
## 169 234.75668 126.380547 343.1328
## 229 117.96485  11.063146 224.8666
## 18  183.51502  76.179978 290.8501
## 328 214.80629 106.454549 323.1580
## 110 148.76265  40.566436 256.9589
## 170 234.58248 120.583398 348.5816
## 231 170.15084  59.838198 280.4635
## 102 105.27112  -2.435532 212.9778
## 106 123.48579  16.408945 230.5626
## 296 158.05581  49.737480 266.3741
## 182  86.62758 -21.018342 194.2735
## 216 264.54974 156.238218 372.8613
## 175 175.10598  66.721557 283.4904
## 357  94.68731 -12.523379 201.8980
## 329 190.75701  83.048218 298.4658
## 118 240.17867 130.310997 350.0463
## 67  157.87725  50.288580 265.4659
## 39  240.99057 133.286049 348.6951
## 28  179.30842  71.698668 286.9182
## 285 164.91879  56.967853 272.8697
## 11  100.00363  -9.096660 209.1039
## 218 214.51649 106.554512 322.4785
## 292 198.78316  89.838211 307.7281
## 271 191.76904  84.172297 299.3658
## 22   87.70122 -20.211861 195.6143
## 196 171.91286  65.084469 278.7413
## 319 157.36529  49.851791 264.8788
## 406 285.51229 175.279095 395.7455
## 369 210.19033 102.304195 318.0765
## 122 199.20308  91.900345 306.5058
## 364 174.93078  67.422707 282.4389
## 405 192.58460  84.998927 300.1703
## 21  125.30798  17.868863 232.7471
## 40  144.76574  36.964380 252.5671
## 409 203.87292  94.569808 313.1760
## 62  194.11399  85.626602 302.6014
## 358 207.42593  99.103428 315.7484
## 154 121.17675  13.574553 228.7790
## 283 134.39622  26.377758 242.4147
## 186 198.83578  90.725715 306.9458
## 341 160.86432  51.489638 270.2390
## 318 171.45595  63.115985 279.7959
## 244 103.01428  -4.076069 210.1046
## 360 178.58320  71.597934 285.5685
## 23  109.62876   1.939484 217.3180
## 130 215.11748 106.980645 323.2543
## 13  111.39611   4.655386 218.1368
## 58   81.63310 -25.784392 189.0506
## 256 119.03771  11.905291 226.1701
## 221  52.84278 -55.837026 161.5226
## 282 111.13153   2.322481 219.9406
## 200 182.07604  73.532259 290.6198
## 52  171.53184  64.295121 278.7686
## 427 167.82711  60.024876 275.6293
## 44   76.11001 -32.808430 185.0284
## 203 166.53713  57.091173 275.9831
## 109 238.83321 131.233701 346.4327
## 99  125.57034  17.976161 233.1645
## 334 189.34431  82.344792 296.3438
## 184 161.85634  54.327698 269.3850
## 374 142.93638  35.683868 250.1889
## 322 288.26141 177.725480 398.7973
## 45  225.59204 117.695513 333.4886
## 15   96.48352 -11.098604 204.0656
## 152 142.18825  35.473470 248.9030
## 260 137.43862  29.531099 245.3461
## 158 129.70752  22.626302 236.7887
## 211 115.42896   7.715716 223.1422
## 441 212.99109 105.858355 320.1238
## 267  33.38471 -75.940919 142.7103
## 202  72.53322 -35.001094 180.0675
## 86  157.52903  49.074462 265.9836
## 32   71.71308 -35.437413 178.8636
## 159  87.14277 -20.061216 194.3468
## 361 196.35830  89.439446 303.2772
## 348 111.79902   4.146579 219.4515
predict(model1_lm, data.test, interval = "confidence")
##           fit        lwr       upr
## 25  163.62897 148.842539 178.41541
## 288 142.91710 120.974878 164.85933
## 7    77.33420  61.316057  93.35233
## 38  160.45504 145.561811 175.34826
## 265 125.93328 107.078021 144.78854
## 272 166.23665 152.750466 179.72283
## 340 147.45370 129.263719 165.64368
## 101 167.03646 152.912968 181.15995
## 97  214.68038 200.835853 228.52490
## 108 153.93364 132.174147 175.69314
## 188  64.63265  45.272583  83.99271
## 326 224.35435 202.715096 245.99360
## 126 201.50822 181.497145 221.51930
## 71   71.84893  52.680963  91.01690
## 210 175.85535 156.672312 195.03839
## 127  56.31939  35.586342  77.05244
## 396 172.88497 157.053040 188.71690
## 166  78.26133  63.856786  92.66587
## 290 199.07764 177.123752 221.03152
## 237 183.84059 169.235499 198.44568
## 227  82.89065  64.357562 101.42373
## 435 126.46703 111.278122 141.65594
## 4   171.15459 154.551649 187.75753
## 392  54.74453  37.153353  72.33570
## 330 109.39623  92.858055 125.93441
## 280 119.59316 106.368838 132.81748
## 245 141.99563 121.913977 162.07728
## 145 181.81284 166.740890 196.88479
## 77  194.97344 171.197795 218.74908
## 412 148.09177 128.309585 167.87396
## 372 206.97203 174.617048 239.32700
## 220 149.63755 131.831329 167.44376
## 114 213.75230 197.958123 229.54648
## 313 121.38601 107.765786 135.00624
## 169 234.75668 213.210339 256.30303
## 229 117.96485 105.847419 130.08228
## 18  183.51502 168.033783 198.99625
## 328 214.80629 193.383014 236.22956
## 110 148.76265 128.140363 169.38494
## 170 234.58248 193.174167 275.99079
## 231 170.15084 140.355931 199.94575
## 102 105.27112  87.394323 123.14791
## 106 123.48579 109.909724 137.06187
## 296 158.05581 136.802130 179.30950
## 182  86.62758  69.120390 104.13476
## 216 264.54974 243.330795 285.76869
## 175 175.10598 153.518003 196.69395
## 357  94.68731  80.093117 109.28151
## 329 190.75701 172.867293 208.64673
## 118 240.17867 212.075942 268.28140
## 67  157.87725 140.725610 175.02888
## 39  240.99057 223.126579 258.85457
## 28  179.30842 162.025006 196.59183
## 285 164.91879 145.624668 184.21292
## 11  100.00363  75.069090 124.93817
## 218 214.51649 195.160693 233.87229
## 292 198.78316 174.537343 223.02898
## 271 191.76904 174.566804 208.97128
## 22   87.70122  68.620060 106.78238
## 196 171.91286 160.460213 183.36551
## 319 157.36529 140.691625 174.03896
## 406 285.51229 256.012875 315.01171
## 369 210.19033 191.262174 229.11848
## 122 199.20308 183.947439 214.45872
## 364 174.93078 158.292139 191.56943
## 405 192.58460 175.451754 209.71744
## 21  125.30798 109.120892 141.49507
## 40  144.76574 126.326897 163.20458
## 409 203.87292 178.065414 229.68042
## 62  194.11399 172.014851 216.21314
## 358 207.42593 186.151004 228.70086
## 154 121.17675 103.940408 138.41310
## 283 134.39622 114.727799 154.06465
## 186 198.83578 178.670370 219.00119
## 341 160.86432 134.755354 186.97328
## 318 171.45595 150.092281 192.81963
## 244 103.01428  89.332157 116.69640
## 360 178.58320 165.749450 191.41695
## 23  109.62876  91.856932 127.40059
## 130 215.11748 194.809046 235.42591
## 13  111.39611 100.792363 121.99985
## 58   81.63310  65.590204  97.67599
## 256 119.03771 105.030083 133.04534
## 221  52.84278  29.817625  75.86793
## 282 111.13153  87.503849 134.75922
## 200 182.07604 159.701712 204.45037
## 52  171.53184 156.747655 186.31603
## 427 167.82711 149.383178 186.27103
## 44   76.11001  51.983602 100.23641
## 203 166.53713 140.131164 192.94309
## 109 238.83321 221.613705 256.05271
## 99  125.57034 108.384138 142.75654
## 334 189.34431 176.392327 202.29628
## 184 161.85634 145.085320 178.62737
## 374 142.93638 128.038065 157.83470
## 322 288.26141 257.650142 318.87268
## 45  225.59204 206.604714 244.57937
## 15   96.48352  79.372980 113.59405
## 152 142.18825 131.848936 152.52756
## 260 137.43862 118.388906 156.48834
## 158 129.70752 116.097063 143.31797
## 211 115.42896  97.512460 133.34546
## 441 212.99109 198.981032 227.00116
## 267  33.38471   7.481991  59.28743
## 202  72.53322  55.725861  89.34059
## 86  157.52903 135.591578 179.46649
## 32   71.71308  57.567911  85.85825
## 159  87.14277  72.597929 101.68761
## 361 196.35830 184.090487 208.62611
## 348 111.79902  94.251779 129.34626
mean((data.test$y-predict(model1_lm,data.test))^2) 
## [1] 3095.459
# Standard Error
sd((data.test$y-predict(model1_lm,data.test))^2)/sqrt(n.test) 
## [1] 368.9687

2. Apply best subset selection using BIC to select the number of predictors (R function regsubsets in package leaps).

model2_bic=regsubsets(y ~ ., data = data.train, nvmax = 10)
summary(model2_bic)
## Subset selection object
## Call: regsubsets.formula(y ~ ., data = data.train, nvmax = 10)
## 10 Variables  (and intercept)
##     Forced in Forced out
## age     FALSE      FALSE
## sex     FALSE      FALSE
## bmi     FALSE      FALSE
## map     FALSE      FALSE
## tc      FALSE      FALSE
## ldl     FALSE      FALSE
## hdl     FALSE      FALSE
## tch     FALSE      FALSE
## ltg     FALSE      FALSE
## glu     FALSE      FALSE
## 1 subsets of each size up to 10
## Selection Algorithm: exhaustive
##           age sex bmi map tc  ldl hdl tch ltg glu
## 1  ( 1 )  " " " " " " " " " " " " " " " " "*" " "
## 2  ( 1 )  " " " " "*" " " " " " " " " " " "*" " "
## 3  ( 1 )  " " " " "*" " " " " " " "*" " " "*" " "
## 4  ( 1 )  " " " " "*" "*" " " " " "*" " " "*" " "
## 5  ( 1 )  " " "*" "*" "*" " " " " "*" " " "*" " "
## 6  ( 1 )  " " "*" "*" "*" "*" " " " " "*" "*" " "
## 7  ( 1 )  " " "*" "*" "*" "*" "*" " " "*" "*" " "
## 8  ( 1 )  " " "*" "*" "*" "*" "*" " " "*" "*" "*"
## 9  ( 1 )  "*" "*" "*" "*" "*" "*" " " "*" "*" "*"
## 10  ( 1 ) "*" "*" "*" "*" "*" "*" "*" "*" "*" "*"
plot(model2_bic, scale = "bic", main = "Predictors vs. BIC")

summary(model2_bic)$bic
##  [1] -135.9897 -203.4646 -208.4680 -211.5524 -214.2140 -213.3876 -209.2048
##  [8] -204.7159 -199.0280 -193.2283
summary(model2_bic)$bic[6]
## [1] -213.3876

Plot Model 2

plot(summary(model2_bic)$bic, xlab = "Number of Predictors", ylab = "BIC", type = "l",
     main = "Subset Selection (BIC)")
which.min(summary(model2_bic)$bic)
## [1] 5
points(6, summary(model2_bic)$bic[6], col = "Blue", cex = 2, pch = 20)

coef(model2_bic, 6)
## (Intercept)         sex         bmi         map          tc         tch 
##    153.5119   -181.6819    518.2526    286.6225   -364.5528    311.4993 
##         ltg 
##    642.0848

Predict Model 2

predict.regsubsets=function(object,newdata,id,...){
  form=as.formula(object$call[[2]])
  mat=model.matrix(form,newdata)
  cof=coef(object,id=id)
  xvars=names(cof)
  mat[,xvars]%*%cof}

mean((data.test$y-predict(model2_bic,data.test,id=6))^2) 
## [1] 3105.225
# Standard Error
sd((data.test$y-predict(model2_bic,data.test,id=6))^2)/sqrt(n.test) 
## [1] 377.0068

Summary Model 2

lm_bic <- lm(y ~ sex + bmi + map + tc + tch + ltg, data = data.train)
summary(lm_bic)
## 
## Call:
## lm(formula = y ~ sex + bmi + map + tc + tch + ltg, data = data.train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -157.572  -38.015   -1.183   37.079  159.384 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  153.512      2.964  51.797  < 2e-16 ***
## sex         -181.682     70.410  -2.580 0.010308 *  
## bmi          518.253     75.285   6.884 3.01e-11 ***
## map          286.622     75.528   3.795 0.000176 ***
## tc          -364.553     79.490  -4.586 6.45e-06 ***
## tch          311.499     89.338   3.487 0.000556 ***
## ltg          642.085     87.275   7.357 1.55e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 53.9 on 325 degrees of freedom
## Multiple R-squared:  0.5347, Adjusted R-squared:  0.5261 
## F-statistic: 62.25 on 6 and 325 DF,  p-value: < 2.2e-16
coef(lm_bic)
## (Intercept)         sex         bmi         map          tc         tch 
##    153.5119   -181.6819    518.2526    286.6225   -364.5528    311.4993 
##         ltg 
##    642.0848
confint(lm_bic)
##                 2.5 %    97.5 %
## (Intercept)  147.6813  159.3424
## sex         -320.1984  -43.1654
## bmi          370.1453  666.3600
## map          138.0363  435.2087
## tc          -520.9318 -208.1737
## tch          135.7454  487.2533
## ltg          470.3891  813.7804
plot(lm_bic) 

mean((data.test$y - predict(lm_bic, data.test))^2) 
## [1] 3105.225
# Standard Error
sd((data.test$y - predict(lm_bic, data.test))^2)/sqrt(n.test) 
## [1] 377.0068

3. Apply best subset selection using 10-fold cross-validation to select the number of predictors (R function regsubsets in package leaps). [Use a random number seed of 1306 before entering the command: folds <- sample(1:k, nrow(data.train), replace = TRUE).]

k=10
set.seed(1306)
folds=sample(1:k, nrow(data.train), replace = TRUE)
cv.errors=matrix(NA, k, 10, dimnames = list(NULL, paste(1:10)))
for (i in 1:k) {
model3.cv=regsubsets(y~.,data=data.train[folds!=i,],nvmax= 10)
for(j in 1:10){
pred=predict(model3.cv,data.train[folds == i,],id = j)
cv.errors[i, j] = mean((data.train$y[folds == i]-pred)^2) }
}

Error Check

cv.errors
##              1        2        3        4        5        6        7        8
##  [1,] 5228.294 3594.910 3803.587 3597.286 3534.969 3372.797 3448.036 3391.736
##  [2,] 5250.708 3668.933 3676.897 3502.654 3376.652 3259.932 3291.237 3226.720
##  [3,] 4199.806 3016.784 2948.537 3147.238 3004.054 3048.936 3005.914 2998.588
##  [4,] 3507.689 2290.441 2482.850 2509.730 2794.157 2589.080 2520.665 2485.886
##  [5,] 5244.321 4454.775 4566.870 4293.455 4024.717 4254.735 4273.060 4203.878
##  [6,] 4247.804 2423.008 2876.691 2697.154 2625.302 2590.656 2628.224 2543.420
##  [7,] 3753.740 2833.985 2735.999 2823.691 2727.169 2517.440 2453.937 2372.566
##  [8,] 2777.160 2239.657 2175.244 2236.359 2223.217 2118.511 2103.730 2040.523
##  [9,] 4064.103 2901.854 2855.802 2725.318 2630.517 2583.725 2518.280 2471.556
## [10,] 5552.186 4091.603 4022.215 3973.552 3741.940 3834.594 3847.948 3791.971
##              9       10
##  [1,] 3397.635 3395.524
##  [2,] 3258.746 3270.604
##  [3,] 3002.759 3010.564
##  [4,] 2507.241 2501.681
##  [5,] 4209.845 4209.801
##  [6,] 2577.724 2571.787
##  [7,] 2397.621 2401.064
##  [8,] 2112.749 2123.715
##  [9,] 2471.827 2472.689
## [10,] 3838.924 3841.359
mean.cv.errors=apply(cv.errors, 2, mean)
mean.cv.errors
##        1        2        3        4        5        6        7        8 
## 4382.581 3151.595 3214.469 3150.644 3068.269 3017.041 3009.103 2952.684 
##        9       10 
## 2977.507 2979.879
which.min(mean.cv.errors)
## 8 
## 8
mean.cv.errors[6]
##        6 
## 3017.041

Plot Model 3

par(mfrow = c(1,2))
plot(mean.cv.errors, type = "b", xlab = "Predictors", ylab = "Mean CV Errors",
     main = "Subset Selection (10-fold CV)")
points(6, mean.cv.errors[6], col = "Blue", cex = 2, pch =20)

rmse.cv = sqrt(mean.cv.errors)
rmse.cv[6]
##       6 
## 54.9276
plot(rmse.cv, pch = 19, type = "b", xlab = "Predictors", ylab = "RMSE CV",
     main = "Subset Selection (10-fold CV)")
points(6, rmse.cv[6], col = "Blue", cex = 2, pch =20)

reg.best <- regsubsets(y ~ .,data =data.train,nvmax =10)
coef(reg.best, 6)
## (Intercept)         sex         bmi         map          tc         tch 
##    153.5119   -181.6819    518.2526    286.6225   -364.5528    311.4993 
##         ltg 
##    642.0848

Prediction

mean((data.test$y-predict(reg.best, data.test, id=6))^2) 
## [1] 3105.225
# Standard Error
sd((data.test$y-predict(reg.best, data.test, id=6))^2)/sqrt(n.test) 
## [1] 377.0068

4. Ridge regression modeling using 10-fold cross-validation to select the largest value of λ such that the cross-validation error is within 1 standard error of the minimum (R functions glmnet and cv.glmnet in package glmnet). [Use a random number seed of 1306 immediately before entering the command: cv.out <- cv.glmnet(x.train, y.train, alpha = 0).]

par(mfrow = c(1,2))
grid = 10^seq(10, -2, length = 100)
model4.ridge <- glmnet(x.train, y.train, alpha = 0, lambda = grid, thresh = 1e-12)
plot(model4.ridge, xvar = "lambda", label = TRUE)
set.seed(1306)
cv.out=cv.glmnet(x.train, y.train, alpha = 0)
plot(cv.out)

Prediction

bestlam=cv.out$lambda.min
bestlam 
## [1] 4.683924
log(bestlam)
## [1] 1.544136
model4.ridge=glmnet(x.train, y.train, alpha = 0, lambda = bestlam)
ridge.pred=predict(model4.ridge, s = bestlam, newx = x.test)

mean((ridge.pred - y.test)^2) 
## [1] 3116.206
# Standard Error
sd((ridge.pred - y.test)^2)/sqrt(n.test)
## [1] 370.474
largelam <- cv.out$lambda.1se
largelam
## [1] 39.8018
ridge.mod <- glmnet(x.train, y.train, alpha = 0, lambda = largelam)
ridge.pred <- predict(ridge.mod, s = largelam, newx = x.test)
mean((ridge.pred - y.test)^2)
## [1] 3193.515
sd((ridge.pred - y.test)^2)/sqrt(n.test)
## [1] 360.6616

#5. Lasso model using 10-fold cross-validation to select the largest value of λ such that the crossvalidation error is within 1 standard error of the minimum (R functions glmnet and cv.glmnet in package glmnet). [Use a random number seed of 1306 immediately before entering the command: cv.out <- cv.glmnet(x.train, y.train, alpha = 1).]

grid <- 10^seq(10, -2, length = 100)
model5.lasso <- glmnet(x.train, y.train, alpha = 1, lambda = grid, thresh = 1e-12)
plot(model5.lasso, xvar = "lambda", label = TRUE)

set.seed(1306)
cv.out <- cv.glmnet(x.train, y.train, alpha = 1)
plot(cv.out)

bestlam=cv.out$lambda.min
bestlam 
## [1] 0.3081709
log(bestlam)
## [1] -1.177101
model5.lasso=glmnet(x.train,y.train,alpha =1,lambda =bestlam)
lasso.pred=predict(model5.lasso,s=bestlam,newx=x.test)

mean((lasso.pred-y.test)^2) 
## [1] 3113.142
# Standard Error
sd((lasso.pred-y.test)^2)/sqrt(n.test)
## [1] 371.7285