dim(Hitters)
[1] 263  20
sum(is.na(Hitters))
[1] 0

So now we have zero rows with missing data in Hitters. We’re going to start building a set of models First we’ll find the best model by number of variables

summary(regfit.full)
Subset selection object
Call: regsubsets.formula(Salary ~ ., Hitters)
19 Variables  (and intercept)
           Forced in Forced out
AtBat          FALSE      FALSE
Hits           FALSE      FALSE
HmRun          FALSE      FALSE
Runs           FALSE      FALSE
RBI            FALSE      FALSE
Walks          FALSE      FALSE
Years          FALSE      FALSE
CAtBat         FALSE      FALSE
CHits          FALSE      FALSE
CHmRun         FALSE      FALSE
CRuns          FALSE      FALSE
CRBI           FALSE      FALSE
CWalks         FALSE      FALSE
LeagueN        FALSE      FALSE
DivisionW      FALSE      FALSE
PutOuts        FALSE      FALSE
Assists        FALSE      FALSE
Errors         FALSE      FALSE
NewLeagueN     FALSE      FALSE
1 subsets of each size up to 8
Selection Algorithm: exhaustive
         AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
1  ( 1 ) " "   " "  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     " "       " "     " "     " "    " "       
2  ( 1 ) " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     " "       " "     " "     " "    " "       
3  ( 1 ) " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     " "       "*"     " "     " "    " "       
4  ( 1 ) " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     "*"       "*"     " "     " "    " "       
5  ( 1 ) "*"   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     "*"       "*"     " "     " "    " "       
6  ( 1 ) "*"   "*"  " "   " "  " " "*"   " "   " "    " "   " "    " "   "*"  " "    " "     "*"       "*"     " "     " "    " "       
7  ( 1 ) " "   "*"  " "   " "  " " "*"   " "   "*"    "*"   "*"    " "   " "  " "    " "     "*"       "*"     " "     " "    " "       
8  ( 1 ) "*"   "*"  " "   " "  " " "*"   " "   " "    " "   "*"    "*"   " "  "*"    " "     "*"       "*"     " "     " "    " "       

Note that regsubsets only lists up to 8 models, we will modify that to show all 19 variables

reg.summary
Subset selection object
Call: regsubsets.formula(Salary ~ ., data = Hitters, nvmax = 19)
19 Variables  (and intercept)
           Forced in Forced out
AtBat          FALSE      FALSE
Hits           FALSE      FALSE
HmRun          FALSE      FALSE
Runs           FALSE      FALSE
RBI            FALSE      FALSE
Walks          FALSE      FALSE
Years          FALSE      FALSE
CAtBat         FALSE      FALSE
CHits          FALSE      FALSE
CHmRun         FALSE      FALSE
CRuns          FALSE      FALSE
CRBI           FALSE      FALSE
CWalks         FALSE      FALSE
LeagueN        FALSE      FALSE
DivisionW      FALSE      FALSE
PutOuts        FALSE      FALSE
Assists        FALSE      FALSE
Errors         FALSE      FALSE
NewLeagueN     FALSE      FALSE
1 subsets of each size up to 19
Selection Algorithm: exhaustive
          AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
1  ( 1 )  " "   " "  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     " "       " "     " "     " "    " "       
2  ( 1 )  " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     " "       " "     " "     " "    " "       
3  ( 1 )  " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     " "       "*"     " "     " "    " "       
4  ( 1 )  " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     "*"       "*"     " "     " "    " "       
5  ( 1 )  "*"   "*"  " "   " "  " " " "   " "   " "    " "   " "    " "   "*"  " "    " "     "*"       "*"     " "     " "    " "       
6  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   " "    " "   " "    " "   "*"  " "    " "     "*"       "*"     " "     " "    " "       
7  ( 1 )  " "   "*"  " "   " "  " " "*"   " "   "*"    "*"   "*"    " "   " "  " "    " "     "*"       "*"     " "     " "    " "       
8  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   " "    " "   "*"    "*"   " "  "*"    " "     "*"       "*"     " "     " "    " "       
9  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    " "     "*"       "*"     " "     " "    " "       
10  ( 1 ) "*"   "*"  " "   " "  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    " "     "*"       "*"     "*"     " "    " "       
11  ( 1 ) "*"   "*"  " "   " "  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     " "    " "       
12  ( 1 ) "*"   "*"  " "   "*"  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     " "    " "       
13  ( 1 ) "*"   "*"  " "   "*"  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
14  ( 1 ) "*"   "*"  "*"   "*"  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
15  ( 1 ) "*"   "*"  "*"   "*"  " " "*"   " "   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
16  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   " "   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
17  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   " "   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    "*"       
18  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   "*"   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    "*"       
19  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   "*"   "*"    "*"   "*"    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    "*"       

Next we’ll look at the r-squared value for each of the 19 models. Note the r-squared value goes up as the number of variables goes up

reg.summary$rsq
 [1] 0.3214501 0.4252237 0.4514294 0.4754067 0.4908036 0.5087146 0.5141227 0.5285569 0.5346124 0.5404950 0.5426153 0.5436302 0.5444570 0.5452164 0.5454692 0.5457656 0.5459518
[18] 0.5460945 0.5461159

Let’s plot the RSS, adjusted R2, Cp and BIC for all the models, to help us decide which to select. RSS goes first

Next we’ll look at r-squared

plot(reg.summary$adjr2, xlab="Number of variables", ylab="Adj R2", type="l")
which.max(reg.summary$adjr2)
[1] 11
points(11,reg.summary$adjr2[11], col="red",cex=2,pch=20)

The red dot shows the maximum value for adjusted R2 In a similar manner, plot Mallow’s Cp, and the BIC statistics, and determine which have the smallest using which.min()

Third we’ll look at Mallow’s Cp

Fourth let’s do the analysis using BIC

Now let’s plot to display the selected variables. Our goal is to find the best model. First we’ll look at r-squared.

Now let’s look at adjusted r-squared

Third we’ll look at Mallow’s Cp

Fourth we’ll look at BIC

Clearly the lowest bic in the range of -150, but there are multiple possibilities as well. The minimum model has six variables. Let’s see the variables:

rr coef(regfit.full,6)

 (Intercept)        AtBat         Hits        Walks         CRBI    DivisionW      PutOuts 
  91.5117981   -1.8685892    7.6043976    3.6976468    0.6430169 -122.9515338    0.2643076 

6.5.2 Forward and backward stepwise selection

library(glmnet)
Warning messages:
1: In scan(file = file, what = what, sep = sep, quote = quote, dec = dec,  :
  EOF within quoted string
2: In scan(file = file, what = what, sep = sep, quote = quote, dec = dec,  :
  EOF within quoted string
3: In scan(file = file, what = what, sep = sep, quote = quote, dec = dec,  :
  EOF within quoted string
4: In scan(file = file, what = what, sep = sep, quote = quote, dec = dec,  :
  EOF within quoted string
5: In scan(file = file, what = what, sep = sep, quote = quote, dec = dec,  :
  EOF within quoted string

Now let’s do the same thing using backward stepwise selection

summary(regfit.bwd)
Subset selection object
Call: regsubsets.formula(Salary ~ ., data = Hitters, nvmax = 19, method = "backward")
19 Variables  (and intercept)
           Forced in Forced out
AtBat          FALSE      FALSE
Hits           FALSE      FALSE
HmRun          FALSE      FALSE
Runs           FALSE      FALSE
RBI            FALSE      FALSE
Walks          FALSE      FALSE
Years          FALSE      FALSE
CAtBat         FALSE      FALSE
CHits          FALSE      FALSE
CHmRun         FALSE      FALSE
CRuns          FALSE      FALSE
CRBI           FALSE      FALSE
CWalks         FALSE      FALSE
LeagueN        FALSE      FALSE
DivisionW      FALSE      FALSE
PutOuts        FALSE      FALSE
Assists        FALSE      FALSE
Errors         FALSE      FALSE
NewLeagueN     FALSE      FALSE
1 subsets of each size up to 19
Selection Algorithm: backward
          AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
1  ( 1 )  " "   " "  " "   " "  " " " "   " "   " "    " "   " "    "*"   " "  " "    " "     " "       " "     " "     " "    " "       
2  ( 1 )  " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    "*"   " "  " "    " "     " "       " "     " "     " "    " "       
3  ( 1 )  " "   "*"  " "   " "  " " " "   " "   " "    " "   " "    "*"   " "  " "    " "     " "       "*"     " "     " "    " "       
4  ( 1 )  "*"   "*"  " "   " "  " " " "   " "   " "    " "   " "    "*"   " "  " "    " "     " "       "*"     " "     " "    " "       
5  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   " "    " "   " "    "*"   " "  " "    " "     " "       "*"     " "     " "    " "       
6  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   " "    " "   " "    "*"   " "  " "    " "     "*"       "*"     " "     " "    " "       
7  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   " "    " "   " "    "*"   " "  "*"    " "     "*"       "*"     " "     " "    " "       
8  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   " "    " "   " "    "*"   "*"  "*"    " "     "*"       "*"     " "     " "    " "       
9  ( 1 )  "*"   "*"  " "   " "  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    " "     "*"       "*"     " "     " "    " "       
10  ( 1 ) "*"   "*"  " "   " "  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    " "     "*"       "*"     "*"     " "    " "       
11  ( 1 ) "*"   "*"  " "   " "  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     " "    " "       
12  ( 1 ) "*"   "*"  " "   "*"  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     " "    " "       
13  ( 1 ) "*"   "*"  " "   "*"  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
14  ( 1 ) "*"   "*"  "*"   "*"  " " "*"   " "   "*"    " "   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
15  ( 1 ) "*"   "*"  "*"   "*"  " " "*"   " "   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
16  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   " "   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    " "       
17  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   " "   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    "*"       
18  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   "*"   "*"    "*"   " "    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    "*"       
19  ( 1 ) "*"   "*"  "*"   "*"  "*" "*"   "*"   "*"    "*"   "*"    "*"   "*"  "*"    "*"     "*"       "*"     "*"     "*"    "*"       

Note the one-variable through six-variable models are identical. Let’s look at the seven variable models:

coef(regfit.full, 7)
 (Intercept)         Hits        Walks       CAtBat        CHits       CHmRun    DivisionW      PutOuts 
  79.4509472    1.2833513    3.2274264   -0.3752350    1.4957073    1.4420538 -129.9866432    0.2366813 
coef(regfit.bwd, 7)
 (Intercept)        AtBat         Hits        Walks        CRuns       CWalks    DivisionW      PutOuts 
 105.6487488   -1.9762838    6.7574914    6.0558691    1.1293095   -0.7163346 -116.1692169    0.3028847 

6.5.3 Choosing Among Models Using the Validation Set Approach and Cross-Validation

We will begin by splitting the data into a test set and a training set.

train
  [1]  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
 [29] FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
 [57]  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE
 [85] FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE
[113]  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[141] FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
[169] FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE
[197]  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE
[225] FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE
[253]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
test
  [1] FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
 [29]  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
 [57] FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE
 [85]  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE
[113] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[141]  TRUE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE
[169]  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE
[197] FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE
[225]  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE
[253] FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE

Now we apply regsubsets to the training set in order to perform best subset selection.

regfit.best
Subset selection object
Call: regsubsets.formula(Salary ~ ., data = Hitters[train, ], nvmax = 19)
19 Variables  (and intercept)
           Forced in Forced out
AtBat          FALSE      FALSE
Hits           FALSE      FALSE
HmRun          FALSE      FALSE
Runs           FALSE      FALSE
RBI            FALSE      FALSE
Walks          FALSE      FALSE
Years          FALSE      FALSE
CAtBat         FALSE      FALSE
CHits          FALSE      FALSE
CHmRun         FALSE      FALSE
CRuns          FALSE      FALSE
CRBI           FALSE      FALSE
CWalks         FALSE      FALSE
LeagueN        FALSE      FALSE
DivisionW      FALSE      FALSE
PutOuts        FALSE      FALSE
Assists        FALSE      FALSE
Errors         FALSE      FALSE
NewLeagueN     FALSE      FALSE
1 subsets of each size up to 19
Selection Algorithm: exhaustive
test.mat
                   (Intercept) AtBat Hits HmRun Runs RBI Walks Years CAtBat CHits CHmRun CRuns CRBI CWalks LeagueN DivisionW PutOuts Assists Errors NewLeagueN
-Andre Dawson                1   496  141    20   65  78    37    11   5628  1575    225   828  838    354       1         0     200      11      3          1
-Andres Galarraga            1   321   87    10   39  42    30     2    396   101     12    48   46     33       1         0     805      40      4          1
-Al Newman                   1   185   37     1   23   8    21     2    214    42      1    30    9     24       1         0      76     127      7          0
-Argenis Salazar             1   298   73     0   24  24     7     3    509   108      0    41   37     12       0         1     121     283      9          0
-Andres Thomas               1   323   81     6   26  32     8     2    341    86      6    32   34      8       1         1     143     290     19          1
-Andre Thornton              1   401   92    17   49  66    65    13   5206  1332    253   784  890    866       0         0       0       0      0          0
-Alan Wiggins                1   239   60     0   30  11    22     6   1941   510      4   309  103    207       0         0     121     151      6          0
-Buddy Bell                  1   568  158    20   89  75    73    15   8068  2273    177  1045  993    732       1         1     105     290     10          1
-Bruce Bochy                 1   127   32     8   16  22    14     8    727   180     24    67   82     56       1         1     202      22      2          1
-Barry Bonds                 1   413   92    16   72  48    65     1    413    92     16    72   48     65       1         0     280       9      5          1
-Bob Brenly                  1   472  116    16   60  62    74     6   1924   489     67   242  251    240       1         1     518      55      3          1
-Bill Buckner                1   629  168    18   73 102    40    18   8424  2464    164  1008 1072    402       0         0    1067     157     14          0
-Bob Dernier                 1   324   73     4   32  18    22     7   1931   491     13   291  108    180       1         0     222       3      3          1
-Bob Kearney                 1   204   49     6   23  25    12     7   1309   308     27   126  132     66       0         1     419      46      5          0
-BillyJo Robidoux            1   181   41     1   15  21    33     2    232    50      4    20   29     45       0         0     326      29      5          0
-Chris Brown                 1   416  132     7   57  49    33     3    932   273     24   113  121     80       1         1      73     177     18          1
-Carmen Castillo             1   205   57     8   34  32     9     5    756   192     32   117  107     51       0         0      58       4      4          0
-Chili Davis                 1   526  146    13   71  70    84     6   2648   715     77   352  342    289       1         1     303       9      9          1
-Curt Ford                   1   214   53     2   30  29    23     2    226    59      2    32   32     27       1         0     109       7      3          1
-Chet Lemon                  1   403  101    12   45  53    39    12   5150  1429    166   747  666    526       0         0     316       6      5          0
-Candy Maldonado             1   405  102    18   49  85    20     6    950   231     29    99  138     64       1         1     161      10      3          1
-Carmelo Martinez            1   244   58     9   28  25    35     4   1335   333     49   164  179    194       1         1     142      14      2          1
-Craig Reynolds              1   313   78     6   32  41    12    12   3742   968     35   409  321    170       1         1     106     206      7          1
-Cal Ripken                  1   627  177    25   98  81    70     6   3210   927    133   529  472    313       0         0     240     482     13          0
-Cory Snyder                 1   416  113    24   58  69    16     1    416   113     24    58   69     16       0         0     203      70     10          0
-Dave Anderson               1   216   53     1   31  15    22     4    926   210      9   118   69    114       1         1      73     152     11          1
-Don Baylor                  1   585  139    31   93  94    62    17   7546  1982    315  1141 1179    727       0         0       0       0      0          0
-Darnell Coles               1   521  142    20   67  86    45     4    815   205     22    99  103     78       0         0     107     242     23          0
-Dan Gladden                 1   351   97     4   55  29    39     4   1258   353     16   196  110    117       1         1     226       7      3          0
-Dave Henderson              1   388  103    15   59  47    39     6   2174   555     80   285  274    186       0         1     182       9      4          0
-Davey Lopes                 1   255   70     7   49  35    43    15   6311  1661    154  1019  608    820       1         0      51      54      8          1
-Dave Parker                 1   637  174    31   89 116    56    14   6727  2024    247   978 1093    495       1         1     278       9      9          1
-Dick Schofield              1   458  114    13   67  57    48     4   1350   298     28   160  123    122       0         1     246     389     18          0
-Darryl Strawberry           1   475  123    27   76  93    72     4   1810   471    108   292  343    267       1         0     226      10      6          1
-Danny Tartabull             1   511  138    25   76  96    61     3    592   164     28    87  110     71       0         1     157       7      8          0
-Eddie Milner                1   424  110    15   70  47    36     7   2130   544     38   335  174    258       1         1     292       6      3          1
-Eddie Murray                1   495  151    17   61  84    78    10   5624  1679    275   884 1015    709       0         0    1045      88     13          0
-Frank White                 1   566  154    22   76  84    43    14   6100  1583    131   743  693    300       0         1     316     439     10          0
-George Bell                 1   641  198    31  101 108    41     5   2129   610     92   297  319    117       0         0     269      17     10          0
-George Brett                1   441  128    16   70  73    80    14   6675  2095    209  1072 1050    695       0         1      97     218     16          0
-Glenn Davis                 1   574  152    31   91 101    64     3    985   260     53   148  173     95       1         1    1253     111     11          1
-Greg Gagne                  1   472  118    12   63  54    30     4    793   187     14   102   80     50       0         1     228     377     26          0
-Gary Pettis                 1   539  139     5   93  58    69     5   1469   369     12   247  126    198       0         1     462       9      7          0
-Gary Redus                  1   340   84    11   62  33    47     5   1516   376     42   284  141    219       1         0     185       8      4          0
-Garry Templeton             1   510  126     2   42  44    35    11   5562  1578     44   703  519    256       1         1     207     358     20          1
-Greg Walker                 1   282   78    13   37  51    29     5   1649   453     73   211  280    138       0         1     670      57      5          0
-Harold Baines               1   570  169    21   72  88    38     7   3754  1077    140   492  589    263       0         1     295      15      5          0
-Hubie Brooks                1   306  104    14   50  58    25     7   2954   822     55   313  377    187       1         0     116     222     15          1
-Howard Johnson              1   220   54    10   30  39    31     5   1185   299     40   145  154    128       1         0      50     136     20          1
-Harry Spilman               1   143   39     5   18  30    15     9    639   151     16    80   97     61       1         1     138      15      1          1
 [ reached getOption("max.print") -- omitted 79 rows ]
attr(,"assign")
 [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19
attr(,"contrasts")
attr(,"contrasts")$League
[1] "contr.treatment"

attr(,"contrasts")$Division
[1] "contr.treatment"

attr(,"contrasts")$NewLeague
[1] "contr.treatment"
val.errors
 [1] -349656.2 -399578.0 -423796.6 -458003.5 -465179.9 -479617.1 -476801.7 -461133.8 -474254.5 -477127.3 -484379.6 -488721.9 -490440.3 -488990.6 -492407.4 -495777.8 -495932.4
[18] -496521.5 -496502.3
which.min(val.errors)
[1] 18

Create a function to do what we did above

predict.regsubsets=function(object, newdata, id...){
  form=as.formula(object$call[2])
  mat=model.matrix(form, newdata)
  coefi=coef(object, id=id)
  xvars=names(coefi)
  mat[,xvars]%*%coefi
}

Let’s look at the full 10-variable set. Note that these 10 variables are different from the results from the training set

coef(regfit.best,10)
 (Intercept)        AtBat         Hits        Walks       CAtBat        CRuns         CRBI       CWalks    DivisionW      PutOuts      Assists 
 162.5354420   -2.1686501    6.9180175    5.7732246   -0.1300798    1.4082490    0.7743122   -0.8308264 -112.3800575    0.2973726    0.2831680 
mean.cv.errors
        1         2         3         4         5         6         7         8         9        10        11        12        13        14        15        16        17 
-372757.8 -394983.0 -417675.6 -429044.2 -420025.2 -427329.5 -432711.9 -427515.5 -433256.4 -423672.7 -420272.3 -424385.0 -432931.9 -432409.7 -429792.3 -429673.3 -431672.6 
       18        19 
-431249.2 -431678.3 

reg.best=regsubsets(Salary~., data=Hitters, nvmax=19)
coef(reg.best, 9)
  (Intercept)         AtBat          Hits         Walks        CAtBat         CRuns          CRBI        CWalks     DivisionW       PutOuts 
 146.24960033   -1.93676754    6.65672102    5.55204413   -0.09953904    1.25067124    0.66176849   -0.77798498 -115.34950146    0.27773062 

6.6 Lab 2: Ridge Regression and the Lasso

y
  [1]  475.000  480.000  500.000   91.500  750.000   70.000  100.000   75.000 1100.000  517.143  512.500  550.000  700.000  240.000  775.000  175.000  135.000  100.000  115.000
 [20]  600.000  776.667  765.000  708.333  750.000  625.000  900.000  110.000  612.500  300.000  850.000   90.000   67.500  180.000  305.000  215.000  247.500  815.000  875.000
 [39]   70.000 1200.000  675.000  415.000  340.000  416.667 1350.000   90.000  275.000  230.000  225.000  950.000   75.000  105.000  320.000  850.000  535.000  933.333  850.000
 [58]  210.000  325.000  275.000  450.000 1975.000 1900.000  600.000 1041.667  110.000  260.000  475.000  431.500 1220.000   70.000  145.000  595.000 1861.460  300.000  490.000
 [77] 2460.000  375.000  750.000 1175.000   70.000 1500.000  385.000 1925.571  215.000  900.000  155.000  700.000  535.000  362.500  733.333  200.000  400.000  400.000  737.500
 [96]  500.000  600.000  662.500  950.000  750.000  297.500  325.000   87.500  175.000   90.000 1237.500  430.000  100.000  165.000  250.000 1300.000  773.333 1008.333  275.000
[115]  775.000  850.000  365.000   95.000  110.000  100.000  277.500   80.000  600.000  200.000  657.000   75.000 2412.500  250.000  155.000  640.000  300.000  110.000  825.000
[134]  195.000  450.000  630.000   86.500 1300.000 1000.000 1800.000 1310.000  737.500  625.000  125.000 1043.333  725.000  300.000  365.000   75.000 1183.333  202.500  225.000
[153]  525.000  265.000  787.500  800.000  587.500  145.000  420.000   75.000  575.000  780.000   90.000  150.000  700.000  550.000  650.000   68.000  100.000  670.000  175.000
[172]  137.000 2127.333  875.000  120.000  140.000  210.000  800.000  240.000  350.000  175.000  200.000 1940.000  700.000  750.000  450.000  172.000 1260.000  750.000  190.000
[191]  580.000  130.000  450.000  300.000  250.000 1050.000  215.000  400.000  560.000 1670.000  487.500  425.000  500.000  250.000  400.000  450.000  750.000   70.000  875.000
[210]  190.000  191.000  740.000  250.000  140.000   97.500  740.000  140.000  341.667 1000.000  100.000   90.000  200.000  135.000  155.000  475.000 1450.000  150.000  105.000
[229]  350.000   90.000  530.000  341.667  940.000  350.000  326.667  250.000  740.000  425.000  925.000  185.000  920.000  286.667  245.000  235.000 1150.000  160.000  425.000
[248]  900.000  500.000  277.500  750.000  160.000 1300.000  525.000  550.000 1600.000  120.000  165.000  700.000  875.000  385.000  960.000 1000.000

6.6.1 Ridge Regression

ridge.mod

Call:  glmnet(x = x, y = y, alpha = 0, lambda = grid) 

       Df      %Dev    Lambda
  [1,] 19 2.757e-07 1.000e+10
  [2,] 19 3.645e-07 7.565e+09
  [3,] 19 4.818e-07 5.722e+09
  [4,] 19 6.369e-07 4.329e+09
  [5,] 19 8.420e-07 3.275e+09
  [6,] 19 1.113e-06 2.477e+09
  [7,] 19 1.471e-06 1.874e+09
  [8,] 19 1.945e-06 1.417e+09
  [9,] 19 2.571e-06 1.072e+09
 [10,] 19 3.399e-06 8.111e+08
 [11,] 19 4.493e-06 6.136e+08
 [12,] 19 5.940e-06 4.642e+08
 [13,] 19 7.852e-06 3.511e+08
 [14,] 19 1.038e-05 2.656e+08
 [15,] 19 1.372e-05 2.009e+08
 [16,] 19 1.814e-05 1.520e+08
 [17,] 19 2.398e-05 1.150e+08
 [18,] 19 3.170e-05 8.697e+07
 [19,] 19 4.190e-05 6.579e+07
 [20,] 19 5.539e-05 4.977e+07
 [21,] 19 7.322e-05 3.765e+07
 [22,] 19 9.679e-05 2.848e+07
 [23,] 19 1.279e-04 2.154e+07
 [24,] 19 1.691e-04 1.630e+07
 [25,] 19 2.236e-04 1.233e+07
 [26,] 19 2.955e-04 9.326e+06
 [27,] 19 3.906e-04 7.055e+06
 [28,] 19 5.162e-04 5.337e+06
 [29,] 19 6.822e-04 4.037e+06
 [30,] 19 9.015e-04 3.054e+06
 [31,] 19 1.191e-03 2.310e+06
 [32,] 19 1.574e-03 1.748e+06
 [33,] 19 2.079e-03 1.322e+06
 [34,] 19 2.745e-03 1.000e+06
 [35,] 19 3.623e-03 7.565e+05
 [36,] 19 4.781e-03 5.722e+05
 [37,] 19 6.304e-03 4.329e+05
 [38,] 19 8.306e-03 3.275e+05
 [39,] 19 1.093e-02 2.477e+05
 [40,] 19 1.435e-02 1.874e+05
 [41,] 19 1.881e-02 1.417e+05
 [42,] 19 2.461e-02 1.072e+05
 [43,] 19 3.208e-02 8.111e+04
 [44,] 19 4.165e-02 6.136e+04
 [45,] 19 5.378e-02 4.642e+04
 [46,] 19 6.897e-02 3.511e+04
 [47,] 19 8.769e-02 2.656e+04
 [48,] 19 1.103e-01 2.009e+04
 [49,] 19 1.370e-01 1.520e+04
 [50,] 19 1.675e-01 1.150e+04
 [51,] 19 2.011e-01 8.697e+03
 [52,] 19 2.369e-01 6.579e+03
 [53,] 19 2.731e-01 4.977e+03
 [54,] 19 3.081e-01 3.765e+03
 [55,] 19 3.404e-01 2.848e+03
 [56,] 19 3.688e-01 2.154e+03
 [57,] 19 3.928e-01 1.630e+03
 [58,] 19 4.125e-01 1.233e+03
 [59,] 19 4.284e-01 9.326e+02
 [60,] 19 4.412e-01 7.055e+02
 [61,] 19 4.516e-01 5.337e+02
 [62,] 19 4.601e-01 4.037e+02
 [63,] 19 4.673e-01 3.054e+02
 [64,] 19 4.737e-01 2.310e+02
 [65,] 19 4.795e-01 1.748e+02
 [66,] 19 4.850e-01 1.322e+02
 [67,] 19 4.904e-01 1.000e+02
 [68,] 19 4.957e-01 7.565e+01
 [69,] 19 5.011e-01 5.722e+01
 [70,] 19 5.066e-01 4.329e+01
 [71,] 19 5.119e-01 3.275e+01
 [72,] 19 5.171e-01 2.477e+01
 [73,] 19 5.219e-01 1.874e+01
 [74,] 19 5.263e-01 1.417e+01
 [75,] 19 5.303e-01 1.072e+01
 [76,] 19 5.336e-01 8.111e+00
 [77,] 19 5.365e-01 6.136e+00
 [78,] 19 5.388e-01 4.642e+00
 [79,] 19 5.407e-01 3.511e+00
 [80,] 19 5.421e-01 2.656e+00
 [81,] 19 5.432e-01 2.009e+00
 [82,] 19 5.441e-01 1.520e+00
 [83,] 19 5.447e-01 1.150e+00
 [84,] 19 5.451e-01 8.697e-01
 [85,] 19 5.455e-01 6.579e-01
 [86,] 19 5.457e-01 4.977e-01
 [87,] 19 5.458e-01 3.765e-01
 [88,] 19 5.459e-01 2.848e-01
 [89,] 19 5.460e-01 2.154e-01
 [90,] 19 5.460e-01 1.630e-01
 [91,] 19 5.460e-01 1.233e-01
 [92,] 19 5.460e-01 9.326e-02
 [93,] 19 5.461e-01 7.055e-02
 [94,] 19 5.461e-01 5.337e-02
 [95,] 19 5.461e-01 4.037e-02
 [96,] 19 5.461e-01 3.054e-02
 [97,] 19 5.461e-01 2.310e-02
 [98,] 19 5.461e-01 1.748e-02
 [99,] 19 5.461e-01 1.322e-02
[100,] 19 5.461e-01 1.000e-02
coef(ridge.mod)
20 x 100 sparse Matrix of class "dgCMatrix"
   [[ suppressing 81 column names ‘s0’, ‘s1’, ‘s2’ ... ]]
   [[ suppressing 81 column names ‘s0’, ‘s1’, ‘s2’ ... ]]
                                                                                                                                                                       
(Intercept) 5.359257e+02 5.359256e+02 5.359256e+02 5.359254e+02 5.359253e+02 5.359251e+02 5.359249e+02 5.359246e+02 5.359241e+02 5.359236e+02 5.359228e+02 5.359218e+02
AtBat       5.443467e-08 7.195940e-08 9.512609e-08 1.257511e-07 1.662355e-07 2.197535e-07 2.905011e-07 3.840251e-07 5.076583e-07 6.710939e-07 8.871458e-07 1.172753e-06
Hits        1.974589e-07 2.610289e-07 3.450649e-07 4.561554e-07 6.030105e-07 7.971441e-07 1.053777e-06 1.393031e-06 1.841504e-06 2.434358e-06 3.218075e-06 4.254101e-06
HmRun       7.956523e-07 1.051805e-06 1.390424e-06 1.838059e-06 2.429805e-06 3.212059e-06 4.246151e-06 5.613159e-06 7.420260e-06 9.809139e-06 1.296709e-05 1.714170e-05
Runs        3.339178e-07 4.414196e-07 5.835307e-07 7.713931e-07 1.019736e-06 1.348031e-06 1.782017e-06 2.355720e-06 3.114121e-06 4.116682e-06 5.442006e-06 7.194001e-06
RBI         3.527222e-07 4.662778e-07 6.163918e-07 8.148335e-07 1.077162e-06 1.423944e-06 1.882370e-06 2.488380e-06 3.289490e-06 4.348509e-06 5.748467e-06 7.599123e-06
                                                                                                                                                                       
(Intercept) 5.359205e+02 5.359188e+02 5.359165e+02 5.359135e+02 5.359095e+02 5.359042e+02 5.358972e+02 5.358880e+02 5.358758e+02 5.358597e+02 5.358383e+02 5.358102e+02
AtBat       1.550308e-06 2.049411e-06 2.709192e-06 3.581378e-06 4.734346e-06 6.258482e-06 8.273267e-06 1.093664e-05 1.445735e-05 1.911136e-05 2.526337e-05 3.339542e-05
Hits        5.623662e-06 7.434134e-06 9.827459e-06 1.299127e-05 1.717361e-05 2.270236e-05 3.001092e-05 3.967221e-05 5.244352e-05 6.932585e-05 9.164225e-05 1.211414e-04
HmRun       2.266028e-05 2.995547e-05 3.959923e-05 5.234760e-05 6.920001e-05 9.147759e-05 1.209267e-04 1.598556e-04 2.113157e-04 2.793399e-04 3.692586e-04 4.881167e-04
Runs        9.510029e-06 1.257167e-05 1.661895e-05 2.196918e-05 2.904181e-05 3.839128e-05 5.075051e-05 6.708833e-05 8.868531e-05 1.172341e-04 1.549720e-04 2.048558e-04
RBI         1.004557e-05 1.327962e-05 1.755482e-05 2.320633e-05 3.067722e-05 4.055316e-05 5.360832e-05 7.086606e-05 9.367905e-05 1.238352e-04 1.636976e-04 2.163893e-04
                                                                                                                                                                       
(Intercept) 5.357729e+02 5.357237e+02 5.356586e+02 5.355726e+02 5.354590e+02 5.353088e+02 5.351104e+02 5.348483e+02 5.345021e+02 5.340450e+02 5.334417e+02 5.326458e+02
AtBat       4.414457e-05 5.835267e-05 7.713205e-05 1.019523e-04 1.347543e-04 1.781013e-04 2.353767e-04 3.110444e-04 4.109909e-04 5.429714e-04 7.171926e-04 9.470681e-04
Hits        1.601343e-04 2.116751e-04 2.797992e-04 3.698382e-04 4.888348e-04 6.460892e-04 8.538795e-04 1.128408e-03 1.491040e-03 1.969937e-03 2.602166e-03 3.436467e-03
HmRun       6.452240e-04 8.528825e-04 1.127346e-03 1.490084e-03 1.969454e-03 2.602891e-03 3.439801e-03 4.545349e-03 6.005426e-03 7.933134e-03 1.047721e-02 1.383295e-02
Runs        2.707932e-04 3.579481e-04 4.731433e-04 6.253924e-04 8.266006e-04 1.092488e-03 1.443804e-03 1.907924e-03 2.520942e-03 3.330403e-03 4.398874e-03 5.808559e-03
RBI         2.860379e-04 3.780973e-04 4.997733e-04 6.605853e-04 8.731052e-04 1.153932e-03 1.524974e-03 2.015129e-03 2.662489e-03 3.517224e-03 4.645322e-03 6.133445e-03
                                                                                                                                                                       
(Intercept) 5.315966e+02 5.302145e+02 5.283962e+02 5.260283e+02 522.91172696 5.188425e+02 5.135499e+02 5.067007e+02 497.89432739 486.66538760 472.49837844 454.86126015
AtBat       1.250193e-03 1.649587e-03 2.175269e-03 2.856183e-03   0.00375537 4.929271e-03 6.455825e-03 8.430885e-03   0.01096941   0.01420455   0.01828293   0.02335416
Hits        4.536802e-03 5.986922e-03 7.896145e-03 1.038009e-02   0.01365619 1.793929e-02 2.351966e-02 3.075783e-02   0.04009239   0.05204227   0.06719837   0.08619852
HmRun       1.825616e-02 2.408100e-02 3.174215e-02 4.166863e-02   0.05475351 7.181148e-02 9.395171e-02 1.225246e-01   0.15912611   0.20556391   0.26375808   0.33554396
Runs        7.667251e-03 1.011593e-02 1.333834e-02 1.753936e-02   0.02306905 3.029402e-02 3.969965e-02 5.188629e-02   0.06757994   0.08763193   0.11299845   0.14468833
RBI         8.095158e-03 1.067886e-02 1.407774e-02 1.850770e-02   0.02433453 3.194175e-02 4.183469e-02 5.463507e-02   0.07108903   0.09206175   0.11850817   0.15140705
                                                                                                                                                                            
(Intercept) 433.26825885 407.35605020 377.00656988 342.45146856 304.36285975 263.84665456 222.37830885 181.62066405 143.2209084 108.6007107 78.7760218 54.3251995 35.4632735
AtBat         0.02954756   0.03695718   0.04558973   0.05533408   0.06592341   0.07692026   0.08772909   0.09763889   0.1058832   0.1115181  0.1139551  0.1121111  0.1052545
Hits          0.10967001   0.13818034   0.17210886   0.21156280   0.25628237   0.30566272   0.35881027   0.41475429   0.4726795   0.5318660  0.5929157  0.6562241  0.7236362
HmRun         0.42231272   0.52462998   0.64153683   0.77000823   0.90440096   1.03656335   1.15605327   1.25121946   1.3103300   1.3223521  1.2805113  1.1798091  1.0201569
Runs          0.18366163   0.23070152   0.28620519   0.34999568   0.42114043   0.49795797   0.57807538   0.65873313   0.7371023   0.8107935  0.8778978  0.9376971  0.9897020
RBI           0.19164195   0.23984146   0.29614905   0.36000544   0.42995694   0.50367426   0.57804567   0.64954348   0.7146205   0.7705096  0.8147962  0.8471855  0.8675975
                                                                                                                                                                             
(Intercept) 22.07163454 13.69965470 10.08979010 10.368086610 14.26002563 21.2474898 30.6934829 42.0855152 54.9738422 68.6872524 82.6164012 96.3405543 109.2829120 121.0394128
AtBat        0.09238344  0.07214526  0.04333856  0.003859899 -0.04801371 -0.1143526 -0.1969965 -0.2970743 -0.4148060 -0.5489118 -0.6964360 -0.8532130  -1.0135354  -1.1712169
Hits         0.79744297  0.88059238  0.97661396  1.090128700  1.22574333  1.3888568  1.5858862  1.8230585  2.1053049  2.4364278  2.8163931  3.2390939   3.6940734   4.1655533
HmRun        0.80505438  0.54367980  0.24336135 -0.074578917 -0.39844659 -0.7097177 -0.9847328 -1.2028780 -1.3482833 -1.4028069 -1.3562347 -1.2134681  -0.9796829  -0.6702762
Runs         1.03411394  1.07217873  1.10318741  1.129710071  1.15012799  1.1635394  1.1682319  1.1599670  1.1328125  1.0800738  0.9943610  0.8703921   0.7058863   0.5030149
RBI          0.87728221  0.87912033  0.87391232  0.866429174  0.85667724  0.8455760  0.8329787  0.8163507  0.7921941  0.7564450  0.7049849  0.6365634   0.5510051   0.4507415
                                                                                            
(Intercept) 131.3383871 140.02704637 147.0439149 152.52893912 156.6073700 159.6160967 ......
AtBat        -1.3203039  -1.45573912  -1.5737685  -1.67284639  -1.7526436  -1.8153438 ......
Hits          4.6348424   5.08339645   5.4958805   5.86106113   6.1739859   6.4337591 ......
HmRun        -0.3059515   0.09220561   0.5084434   0.92327754   1.3285278   1.7038461 ......
Runs          0.2686396   0.01299834  -0.2529044  -0.51634440  -0.7689372  -0.9993212 ......
RBI           0.3400248   0.22342867   0.1039164  -0.01416825  -0.1297830  -0.2372834 ......

 ..............................
 ........suppressing columns and rows in show(); maybe adjust 'options(max.print= *, width = *)'
 ..............................
   [[ suppressing 81 column names ‘s0’, ‘s1’, ‘s2’ ... ]]
                                                                                                                                                                                  
LeagueN    -5.800263e-07 -7.667601e-07 -1.013611e-06 -1.339933e-06 -1.771310e-06 -2.341563e-06 -3.095401e-06 -4.091926e-06 -5.409262e-06 -7.150687e-06 -9.452719e-06 -1.249582e-05
DivisionW  -7.807263e-06 -1.032074e-05 -1.364341e-05 -1.803579e-05 -2.384225e-05 -3.151805e-05 -4.166500e-05 -5.507866e-05 -7.281073e-05 -9.625147e-05 -1.272387e-04 -1.682020e-04
PutOuts     2.180288e-08  2.882212e-08  3.810115e-08  5.036747e-08  6.658282e-08  8.801855e-08  1.163553e-07  1.538148e-07  2.033341e-07  2.687955e-07  3.553316e-07  4.697270e-07
Assists     3.561198e-09  4.707694e-09  6.223294e-09  8.226828e-09  1.087538e-08  1.437661e-08  1.900503e-08  2.512352e-08  3.321180e-08  4.390401e-08  5.803847e-08  7.672336e-08
Errors     -1.660460e-08 -2.195031e-08 -2.901702e-08 -3.835881e-08 -5.070811e-08 -6.703317e-08 -8.861396e-08 -1.171425e-07 -1.548557e-07 -2.047105e-07 -2.706157e-07 -3.577390e-07
NewLeagueN -1.152288e-07 -1.523253e-07 -2.013646e-07 -2.661912e-07 -3.518874e-07 -4.651715e-07 -6.149243e-07 -8.128848e-07 -1.074570e-06 -1.420491e-06 -1.877758e-06 -2.482203e-06
                                                                                                                                                                                  
LeagueN    -1.651853e-05 -2.183616e-05 -2.886548e-05 -3.815735e-05 -5.043982e-05 -6.667507e-05 -8.813458e-05 -1.164983e-04 -1.539858e-04 -2.035284e-04 -2.689971e-04 -3.555015e-04
DivisionW  -2.223530e-04 -2.939372e-04 -3.885671e-04 -5.136619e-04 -6.790292e-04 -8.976337e-04 -1.186614e-03 -1.568625e-03 -2.073614e-03 -2.741169e-03 -3.623616e-03 -4.790124e-03
PutOuts     6.209506e-07  8.208588e-07  1.085125e-06  1.434467e-06  1.896273e-06  2.506748e-06  3.313751e-06  4.380543e-06  5.790751e-06  7.654909e-06  1.011913e-05  1.337652e-05
Assists     1.014236e-07  1.340758e-07  1.772398e-07  2.342998e-07  3.097292e-07  4.094413e-07  5.412531e-07  7.154972e-07  9.458322e-07  1.250312e-06  1.652799e-06  2.184833e-06
Errors     -4.729116e-07 -6.251642e-07 -8.264353e-07 -1.092508e-06 -1.444246e-06 -1.909236e-06 -2.523944e-06 -3.336588e-06 -4.410917e-06 -5.831225e-06 -7.708976e-06 -1.019158e-05
NewLeagueN -3.281181e-06 -4.337273e-06 -5.733176e-06 -7.578145e-06 -1.001651e-05 -1.323886e-05 -1.749685e-05 -2.312257e-05 -3.055403e-05 -4.036852e-05 -5.332619e-05 -7.042660e-05
                                                                                                                                                                                  
LeagueN    -4.697827e-04 -6.207293e-04 -8.200508e-04 -1.083156e-03 -1.430291e-03 -1.888006e-03 -2.491020e-03 -3.284581e-03 -4.327356e-03 -0.0056949163 -7.483703e-03 -9.815198e-03
DivisionW  -6.332117e-03 -8.370433e-03 -1.106478e-02 -1.462621e-02 -1.933364e-02 -2.555557e-02 -3.377884e-02 -4.464643e-02 -5.900738e-02 -0.0779823252 -1.030497e-01 -1.361588e-01
PutOuts     1.768232e-05  2.337385e-05  3.089686e-05  4.084036e-05  5.398248e-05  7.135106e-05  9.430339e-05  1.246312e-04  1.646987e-04  0.0002176234  2.875133e-04  3.797757e-04
Assists     2.888099e-06  3.817685e-06  5.046383e-06  6.670373e-06  8.816708e-06  1.165319e-05  1.540137e-05  2.035366e-05  2.689578e-05  0.0000355362  4.694455e-05  6.200169e-05
Errors     -1.347401e-05 -1.781420e-05 -2.355343e-05 -3.114342e-05 -4.118231e-05 -5.446250e-05 -7.203451e-05 -9.529233e-05 -1.260879e-04 -0.0001668854 -2.209705e-04 -2.927359e-04
NewLeagueN -9.298190e-05 -1.227106e-04 -1.618562e-04 -2.133355e-04 -2.809184e-04 -3.694389e-04 -4.850258e-04 -6.353256e-04 -8.296525e-04 -0.0010789371 -1.395226e-03 -1.790270e-03
                                                                                                                                                                                
LeagueN    -0.0128395721 -0.0167373242 -0.0217159907 -0.0280180222 -0.0358218645 -0.0452607965 -0.0562455973 -0.0682475006 -0.0799080742 -0.0884029052 -0.088455415 -0.070900107
DivisionW  -0.1798772840 -0.2375839591 -0.3137181754 -0.4140944699 -0.5463385944 -0.7203748178 -0.9490941869 -1.2491433121 -1.6418704123 -2.1544156505 -2.820910377 -3.683711643
PutOuts     0.0005015182  0.0006620666  0.0008736282  0.0011521526  0.0015183197  0.0019989047  0.0026282664  0.0034501145  0.0045194097  0.0059042365  0.007687359  0.009967040
Assists     0.0000818644  0.0001080486  0.0001425350  0.0001879291  0.0002475408  0.0003256905  0.0004278733  0.0005610195  0.0007337315  0.0009564578  0.001241514  0.001602813
Errors     -0.0003880746 -0.0005149282 -0.0006840601 -0.0009099844 -0.0012130834 -0.0016213247 -0.0021741835 -0.0029279995 -0.0039644255 -0.0054036985 -0.007425602 -0.010302495
NewLeagueN -0.0022723780 -0.0028400432 -0.0034697857 -0.0040981520 -0.0045696803 -0.0045833369 -0.0035662636 -0.0004803376  0.0064935679  0.0204481161  0.046482957  0.092729187
                                                                                                                                                                         
LeagueN    -0.020774094  0.085028114  0.279935865   0.60905693   1.128214010   1.899723621   2.983666664   4.426228883   6.24738811   8.43097213  10.9331798  13.68370191
DivisionW  -4.794557569 -6.215440973 -8.019029703 -10.28826070 -13.114735551 -16.595066317 -20.824312663 -25.884977165 -31.83095172 -38.66712420 -46.3270024 -54.65877750
PutOuts     0.012856678  0.016482577  0.020980083   0.02648785   0.033141498   0.041066380   0.050370740   0.061135827   0.07340191   0.08715144   0.1022656   0.11852289
Assists     0.002055224  0.002612988  0.003287919   0.00408675   0.005009084   0.006047853   0.007193656   0.008446062   0.00983132   0.01145044   0.0134326   0.01606037
Errors     -0.014448993 -0.020502690 -0.029430225  -0.04267625  -0.062329257  -0.091286854  -0.133333823  -0.193078245  -0.27563868  -0.38560521  -0.5276597  -0.70358655
NewLeagueN  0.171640916  0.301433531  0.507144682   0.82061611   1.278131419   1.914781859   2.754632704   3.798020915   5.00844788   6.30348942   7.5577690   8.61181213
                                                                                                                                                                         
LeagueN     16.61465820  19.67455371  22.84517646  26.12460529  29.54380999   33.09896528  3.676542e+01   40.47940462   44.1320135   47.5927880   50.7360881   53.4619888
DivisionW  -63.42474502 -72.32155123 -81.01744688 -89.20563810 -96.63293786 -103.14119518 -1.086578e+02 -113.18373495 -116.7759307 -119.5254674 -121.5339873 -122.9054511
PutOuts      0.13556792   0.15293830   0.17009772   0.18653652   0.20176898    0.21549186  2.275307e-01    0.23784120    0.2465061    0.2536944    0.2595876    0.2643809
Assists      0.01962612   0.02448004   0.03096459   0.03934585   0.04976394    0.06229233  7.689081e-02    0.09336288    0.1114676    0.1309505    0.1513785    0.1722917
Errors      -0.91405980  -1.15688766  -1.42769526  -1.71914339  -2.02403835   -2.33138423 -2.630586e+00   -2.91182205   -3.1655027   -3.3836914   -3.5612567   -3.6953814
NewLeagueN   9.29582644   9.45368575   8.96604057   7.76059860   5.82733116    3.20887052  6.796074e-03   -3.62280233   -7.4842126  -11.3667064  -15.0581359  -18.3782371
                                                                                                                                      
LeagueN      55.7235091   57.5193908   58.8944048   59.9223467   60.6853879   61.2550034   61.6945220   62.0427219   62.3317136 ......
DivisionW  -123.7434565 -124.1385803 -124.1754513 -123.9329566 -123.4841070 -122.8933487 -122.2263138 -121.5286522 -120.8579562 ......
PutOuts       0.2682829    0.2714489    0.2740185    0.2761046    0.2777901    0.2791275    0.2801691    0.2809457    0.2815135 ......
Assists       0.1933099    0.2139257    0.2337346    0.2524191    0.2697458    0.2855605    0.2997947    0.3124435    0.3234769 ......
Errors       -3.7865596   -3.8379446   -3.8551650   -3.8455713   -3.8170608   -3.7767606   -3.7313945   -3.6852362   -3.6425729 ......
NewLeagueN  -21.2158131  -23.5048569  -25.2442911  -26.4819571  -27.2930738  -27.7553037  -27.9647155  -27.9849755  -27.9076182 ......
ridge.mod$lambda[50]
[1] 11497.57
coef(ridge.mod)[,50]
  (Intercept)         AtBat          Hits         HmRun          Runs           RBI         Walks         Years        CAtBat         CHits        CHmRun         CRuns 
407.356050200   0.036957182   0.138180344   0.524629976   0.230701523   0.239841459   0.289618741   1.107702929   0.003131815   0.011653637   0.087545670   0.023379882 
         CRBI        CWalks       LeagueN     DivisionW       PutOuts       Assists        Errors    NewLeagueN 
  0.024138320   0.025015421   0.085028114  -6.215440973   0.016482577   0.002612988  -0.020502690   0.301433531 
sqrt(sum(coef(ridge.mod)[-1,50]^2))
[1] 6.360612
ridge.mod$lambda[60]
[1] 705.4802
sqrt(sum(coef(ridge.mod)[-1,60]^2))
[1] 57.11001
predict(ridge.mod, s=50, type="coefficients")[1:20,]
  (Intercept)         AtBat          Hits         HmRun          Runs           RBI         Walks         Years        CAtBat         CHits        CHmRun         CRuns 
 4.876610e+01 -3.580999e-01  1.969359e+00 -1.278248e+00  1.145892e+00  8.038292e-01  2.716186e+00 -6.218319e+00  5.447837e-03  1.064895e-01  6.244860e-01  2.214985e-01 
         CRBI        CWalks       LeagueN     DivisionW       PutOuts       Assists        Errors    NewLeagueN 
 2.186914e-01 -1.500245e-01  4.592589e+01 -1.182011e+02  2.502322e-01  1.215665e-01 -3.278600e+00 -9.496680e+00 
train
  [1] 208 185  44  17 196 161 261  16  28  97  43  76  49  65  46 119 191   7 130 215  91  12  34  78  38  32  53  54  31 230  77 118 158  23 255 242 211 153  22 111 103  84
 [43] 220  39 179  15  87 235  42 181 154  57 105  18  74 202 245 137 250  83 203 173 192 163 156 236 151 194  58 240 157 218  70  85  30 110 182 262  33 100  71 124 251 190
 [85] 216  66 129  86 228 206  73 169 214 145  14 149  79  19  55 138 249  96 135 175 112 224 263  69  88 143  36 241  64  51 177  27 193  63  82  95 140 186 152 159 116 256
[127] 132  21  20 187  68
y.test
  [1]  475.000  480.000  500.000   91.500  750.000   70.000   75.000 1100.000  517.143  512.500  700.000  750.000  625.000  900.000  300.000  215.000  815.000 1200.000  675.000
 [20] 1350.000  275.000  230.000  950.000  105.000  933.333  325.000  275.000  450.000 1975.000  260.000  145.000  300.000 1175.000   70.000  535.000  362.500  200.000  400.000
 [39]  400.000  662.500  950.000  297.500  325.000  175.000 1237.500  430.000  100.000  165.000 1008.333  275.000  775.000  365.000  100.000  277.500   80.000  600.000  657.000
 [58]   75.000 2412.500  250.000  300.000  825.000  195.000  630.000 1000.000 1310.000  737.500  125.000  725.000  300.000  365.000 1183.333  787.500   75.000  780.000  150.000
 [77]  700.000  550.000  650.000   68.000  670.000  175.000  137.000  875.000  140.000  800.000  350.000 1940.000  700.000 1260.000  750.000  250.000  215.000  400.000  560.000
 [96] 1670.000  487.500  250.000  400.000  750.000  875.000  190.000  740.000  250.000  140.000 1000.000   90.000  200.000  135.000  475.000 1450.000  150.000  350.000  530.000
[115]  341.667  940.000  350.000  740.000  425.000  925.000  245.000  235.000  160.000  425.000  900.000  160.000 1300.000  525.000  120.000  165.000  700.000  875.000
ridge.mod

Call:  glmnet(x = x[train, ], y = y[train], alpha = 0, lambda = grid,      thresh = 1e-12) 

       Df      %Dev    Lambda
  [1,] 19 3.189e-07 1.000e+10
  [2,] 19 4.216e-07 7.565e+09
  [3,] 19 5.573e-07 5.722e+09
  [4,] 19 7.368e-07 4.329e+09
  [5,] 19 9.740e-07 3.275e+09
  [6,] 19 1.288e-06 2.477e+09
  [7,] 19 1.702e-06 1.874e+09
  [8,] 19 2.250e-06 1.417e+09
  [9,] 19 2.974e-06 1.072e+09
 [10,] 19 3.932e-06 8.111e+08
 [11,] 19 5.198e-06 6.136e+08
 [12,] 19 6.871e-06 4.642e+08
 [13,] 19 9.083e-06 3.511e+08
 [14,] 19 1.201e-05 2.656e+08
 [15,] 19 1.587e-05 2.009e+08
 [16,] 19 2.098e-05 1.520e+08
 [17,] 19 2.774e-05 1.150e+08
 [18,] 19 3.667e-05 8.697e+07
 [19,] 19 4.847e-05 6.579e+07
 [20,] 19 6.407e-05 4.977e+07
 [21,] 19 8.470e-05 3.765e+07
 [22,] 19 1.120e-04 2.848e+07
 [23,] 19 1.480e-04 2.154e+07
 [24,] 19 1.956e-04 1.630e+07
 [25,] 19 2.586e-04 1.233e+07
 [26,] 19 3.418e-04 9.326e+06
 [27,] 19 4.517e-04 7.055e+06
 [28,] 19 5.970e-04 5.337e+06
 [29,] 19 7.889e-04 4.037e+06
 [30,] 19 1.042e-03 3.054e+06
 [31,] 19 1.377e-03 2.310e+06
 [32,] 19 1.819e-03 1.748e+06
 [33,] 19 2.402e-03 1.322e+06
 [34,] 19 3.172e-03 1.000e+06
 [35,] 19 4.185e-03 7.565e+05
 [36,] 19 5.520e-03 5.722e+05
 [37,] 19 7.274e-03 4.329e+05
 [38,] 19 9.577e-03 3.275e+05
 [39,] 19 1.259e-02 2.477e+05
 [40,] 19 1.653e-02 1.874e+05
 [41,] 19 2.165e-02 1.417e+05
 [42,] 19 2.827e-02 1.072e+05
 [43,] 19 3.679e-02 8.111e+04
 [44,] 19 4.763e-02 6.136e+04
 [45,] 19 6.130e-02 4.642e+04
 [46,] 19 7.828e-02 3.511e+04
 [47,] 19 9.901e-02 2.656e+04
 [48,] 19 1.237e-01 2.009e+04
 [49,] 19 1.525e-01 1.520e+04
 [50,] 19 1.848e-01 1.150e+04
 [51,] 19 2.198e-01 8.697e+03
 [52,] 19 2.561e-01 6.579e+03
 [53,] 19 2.922e-01 4.977e+03
 [54,] 19 3.262e-01 3.765e+03
 [55,] 19 3.570e-01 2.848e+03
 [56,] 19 3.838e-01 2.154e+03
 [57,] 19 4.064e-01 1.630e+03
 [58,] 19 4.251e-01 1.233e+03
 [59,] 19 4.405e-01 9.326e+02
 [60,] 19 4.533e-01 7.055e+02
 [61,] 19 4.642e-01 5.337e+02
 [62,] 19 4.737e-01 4.037e+02
 [63,] 19 4.822e-01 3.054e+02
 [64,] 19 4.901e-01 2.310e+02
 [65,] 19 4.978e-01 1.748e+02
 [66,] 19 5.054e-01 1.322e+02
 [67,] 19 5.132e-01 1.000e+02
 [68,] 19 5.212e-01 7.565e+01
 [69,] 19 5.294e-01 5.722e+01
 [70,] 19 5.378e-01 4.329e+01
 [71,] 19 5.463e-01 3.275e+01
 [72,] 19 5.548e-01 2.477e+01
 [73,] 19 5.633e-01 1.874e+01
 [74,] 19 5.716e-01 1.417e+01
 [75,] 19 5.796e-01 1.072e+01
 [76,] 19 5.873e-01 8.111e+00
 [77,] 19 5.944e-01 6.136e+00
 [78,] 19 6.008e-01 4.642e+00
 [79,] 19 6.064e-01 3.511e+00
 [80,] 19 6.111e-01 2.656e+00
 [81,] 19 6.149e-01 2.009e+00
 [82,] 19 6.179e-01 1.520e+00
 [83,] 19 6.201e-01 1.150e+00
 [84,] 19 6.217e-01 8.697e-01
 [85,] 19 6.228e-01 6.579e-01
 [86,] 19 6.235e-01 4.977e-01
 [87,] 19 6.240e-01 3.765e-01
 [88,] 19 6.243e-01 2.848e-01
 [89,] 19 6.246e-01 2.154e-01
 [90,] 19 6.247e-01 1.630e-01
 [91,] 19 6.248e-01 1.233e-01
 [92,] 19 6.248e-01 9.326e-02
 [93,] 19 6.248e-01 7.055e-02
 [94,] 19 6.249e-01 5.337e-02
 [95,] 19 6.249e-01 4.037e-02
 [96,] 19 6.249e-01 3.054e-02
 [97,] 19 6.249e-01 2.310e-02
 [98,] 19 6.249e-01 1.748e-02
 [99,] 19 6.249e-01 1.322e-02
[100,] 19 6.249e-01 1.000e-02
ridge.pred
                           1
-Alan Ashby        336.53551
-Andre Dawson     1059.21367
-Alfredo Griffin   425.89084
-Argenis Salazar    33.84849
-Andres Thomas     157.92724
-Andre Thornton    655.28160
-Alan Trammell     913.86802
-Andy VanSlyke     610.52244
-Alan Wiggins      434.28132
-Bill Almon        423.26981
-Barry Bonds       570.36543
-Bobby Bonilla     410.94536
-Bob Brenly        671.37930
-Bo Diaz           436.37279
-Brian Downing     794.39888
-Billy Hatcher     116.88686
-Brook Jacoby      567.72532
-Bill Madlock      787.45952
-BillyJo Robidoux  434.00991
-Bill Schroeder    217.64094
-Chris Brown       328.17030
-Carmen Castillo   231.08101
-Carlton Fisk      633.98649
-Curt Ford         327.65834
-Carney Lansford   645.13196
-Chet Lemon        796.40031
-Cory Snyder       399.74235
-Chris Speier      404.01744
-Curt Wilkerson     84.00033
-Dave Anderson     173.55689
-Don Baylor        954.85178
-Daryl Boston      183.80043
-Darrell Evans    1207.15869
-Dwight Evans     1284.34990
-Damaso Garcia     571.38914
-Davey Lopes       984.13866
-Don Mattingly    1330.55823
-Dick Schofield    368.25201
-Danny Tartabull   481.32167
-Dave Winfield    1284.45009
-Eddie Milner      396.85512
-Ed Romero         110.91895
-Frank White       839.56290
-Glenn Braggs      213.22657
-George Brett     1292.64861
-Gary Carter      1175.66167
-Glenn Davis       963.01302
-Gary Gaetti       665.60382
-Greg Gagne        177.69003
-George Hendrick   642.96866
-Glenn Hubbard     617.24628
-Garth Iorg        305.28398
-Greg Walker       565.99678
-Harold Baines     652.25495
-Harold Reynolds    55.98904
-John Cangelosi    293.37588
-Jose Canseco      536.48392
-Jack Clark        910.21443
-Jim Dwyer         243.98296
-Jim Gantner       476.46424
-Jack Howell       190.74086
-John Kruk         460.41761
-Jeffrey Leonard   252.81864
-Jim Morrison      616.28985
-Jerry Mumphrey    656.69934
-Johnny Ray        937.54471
-Jim Rice         1391.50937
-Jerry Royster     409.81846
-John Russell      521.94946
-John Shelby       206.84447
-Jim Sundberg      504.25006
-Joel Youngblood   332.01159
-Kal Daniels       311.81177
-Kirk Gibson       842.97983
-Ken Griffey      1068.73624
-Kent Hrbek       1073.02311
-Keith Moreland    499.70502
-Ken Oberkfell     608.48368
-Ken Phelps        533.03409
-Kirby Puckett     599.91797
-Larry Herndon     582.55908
-Lloyd Moseby      784.41130
-Lance Parrish     846.70527
-Larry Parrish     759.54613
-Mike Aldrete      386.49208
-Mariano Duncan    248.00900
-Mike Easler       539.70223
-Mike LaValliere   483.13137
-Mike Pagliarulo   548.66076
-Mark Salas        245.70700
-Mickey Tettleton  417.35298
-Milt Thompson     348.81624
-Marvell Wynne     315.95582
-Mike Young        451.29494
-Oddibe McDowell   388.43780
-Pete Incaviglia   412.74710
-Pete Rose        1975.63578
-Pat Tabler        645.18825
-Rick Burleson     585.48615
-Randy Bush        262.29037
-Rick Cerone       250.12064
-Ron Cey           715.76119
-Rob Deer          707.07058
-Rick Dempsey      355.96389
-Ron Kittle        291.87275
-Rick Leach        207.61161
-Rafael Ramirez    239.76641
-Rafael Santana    325.48295
-Ruben Sierra      240.30953
-Roy Smalley       580.17050
-Robby Thompson    341.36676
-Rob Wilfong       165.81407
-Robin Yount      1286.55844
-Steve Balboni     701.73628
-Steve Buechele    255.59910
-Scott Fletcher    291.94484
-Steve Jeltz       453.42790
-Spike Owen        223.91880
-Tony Bernazard    791.59388
-Tom Brunansky     475.47776
-Terry Harper      211.88225
-Terry Pendleton   281.48929
-Tony Phillips     490.45950
-Terry Puhl        513.66793
-Tim Teufel        463.16100
-Vince Coleman     302.79695
-Von Hayes        1209.48512
-Wally Backman     493.92569
-Wally Joyner      874.89298
-Willie Randolph  1221.01325
-Willie Upshaw    1036.08429
-Willie Wilson     743.22265
mean((ridge.pred-y.test)^2)
[1] 101036.8
mean((ridge.pred-y.test)^2)
[1] 193253.1
ridge.pred
                           1
-Alan Ashby        235.63818
-Andre Dawson     1201.14455
-Alfredo Griffin   424.70452
-Argenis Salazar    43.91310
-Andres Thomas     212.42390
-Andre Thornton    570.09883
-Alan Trammell     896.40889
-Andy VanSlyke     623.03648
-Alan Wiggins      498.82999
-Bill Almon        406.52579
-Barry Bonds       466.17981
-Bobby Bonilla     373.48436
-Bob Brenly        631.93056
-Bo Diaz           432.10907
-Brian Downing     712.98638
-Billy Hatcher      68.42194
-Brook Jacoby      573.17053
-Bill Madlock      884.57812
-BillyJo Robidoux  452.73010
-Bill Schroeder    143.76679
-Chris Brown       365.47945
-Carmen Castillo   244.60545
-Carlton Fisk      755.42706
-Curt Ford         312.84071
-Carney Lansford   690.96741
-Chet Lemon        818.19405
-Cory Snyder       380.02113
-Chris Speier       96.16379
-Curt Wilkerson     67.00053
-Dave Anderson     172.24260
-Don Baylor        984.16456
-Daryl Boston      194.01637
-Darrell Evans    1016.35792
-Dwight Evans     1306.65751
-Damaso Garcia     572.71185
-Davey Lopes      1109.86743
-Don Mattingly    1411.46999
-Dick Schofield    321.62810
-Danny Tartabull   493.60568
-Dave Winfield    1351.07626
-Eddie Milner      376.17031
-Ed Romero          34.03658
-Frank White       883.46905
-Glenn Braggs      212.22902
-George Brett     1556.84427
-Gary Carter      1051.71050
-Glenn Davis       956.21852
-Gary Gaetti       675.16823
-Greg Gagne        169.47098
-George Hendrick   642.28258
-Glenn Hubbard     600.48620
-Garth Iorg        295.53586
-Greg Walker       598.71886
-Harold Baines     668.10910
-Harold Reynolds    14.17725
-John Cangelosi    213.79200
-Jose Canseco      507.42094
-Jack Clark        959.16559
-Jim Dwyer         305.04379
-Jim Gantner       410.46689
-Jack Howell       221.01240
-John Kruk         537.90210
-Jeffrey Leonard   246.36762
-Jim Morrison      621.48108
-Jerry Mumphrey    706.13227
-Johnny Ray        968.99215
-Jim Rice         1548.58531
-Jerry Royster     520.07312
-John Russell      520.45394
-John Shelby       146.32708
-Jim Sundberg      200.83746
-Joel Youngblood   350.64455
-Kal Daniels       353.62629
-Kirk Gibson       836.31788
-Ken Griffey      1292.55358
-Kent Hrbek       1059.37186
-Keith Moreland    377.57472
-Ken Oberkfell     546.96602
-Ken Phelps        531.28401
-Kirby Puckett     565.87775
-Larry Herndon     554.61809
-Lloyd Moseby      723.91380
-Lance Parrish     818.11686
-Larry Parrish     740.20328
-Mike Aldrete      410.92263
-Mariano Duncan    237.91611
-Mike Easler       526.73999
-Mike LaValliere   508.06510
-Mike Pagliarulo   515.51065
-Mark Salas        266.16238
-Mickey Tettleton  438.40079
-Milt Thompson     306.13339
-Marvell Wynne     320.46602
-Mike Young        449.64916
-Oddibe McDowell   263.22719
-Pete Incaviglia   374.29057
-Pete Rose        2290.18341
-Pat Tabler        657.56305
-Rick Burleson     624.15442
-Randy Bush        243.51696
-Rick Cerone       171.05247
-Ron Cey           556.39043
-Rob Deer          666.16574
-Rick Dempsey      172.33219
-Ron Kittle        271.61996
-Rick Leach        193.82457
-Rafael Ramirez    142.64372
-Rafael Santana    240.69143
-Ruben Sierra      232.13735
-Roy Smalley       460.97883
-Robby Thompson    313.26347
-Rob Wilfong       164.63301
-Robin Yount      1445.27061
-Steve Balboni     675.98907
-Steve Buechele    233.60031
-Scott Fletcher    272.89817
-Steve Jeltz       421.40294
-Spike Owen        130.33139
-Tony Bernazard    791.48509
-Tom Brunansky     399.09881
-Terry Harper      222.99446
-Terry Pendleton   204.86587
-Tony Phillips     461.06113
-Terry Puhl        585.07449
-Tim Teufel        443.77564
-Vince Coleman     115.59456
-Von Hayes        1197.33277
-Wally Backman     520.84748
-Wally Joyner      907.19053
-Willie Randolph  1341.06580
-Willie Upshaw     976.36746
-Willie Wilson     941.79263
mean((ridge.pred-y.test)^2)
[1] 114783.1
predict(ridge.mod, s=0, exact=T, type="coefficients")[1:20]
 [1] 299.42883596  -2.54014665   8.36611719  11.64400720  -9.09877719   2.44152119   9.23403909 -22.93584442  -0.18160843  -0.11561496  -1.33836534   3.32817777   0.07511771
[14]  -1.07828647  59.76529059 -98.85996590   0.34086400   0.34165605  -0.64205839  -0.67606314

Find the best lambda by using cross-validation

set.seed(1)
cv.out=cv.glmnet(x[train,], y[train],alpha=0)
plot(cv.out)
bestlam=cv.out$lambda.min
bestlam
[1] 211.7416
ridge.pred=predict(ridge.mod, s=bestlam, newx=x[test,])
mean((ridge.pred-y.test)^2)
[1] 96015.51
out=glmnet(x,y,alpha=0)
predict(out,type="coefficients", s=bestlam)[1:20,]
 (Intercept)        AtBat         Hits        HmRun         Runs          RBI        Walks        Years       CAtBat        CHits       CHmRun        CRuns         CRBI 
  9.88487157   0.03143991   1.00882875   0.13927624   1.11320781   0.87318990   1.80410229   0.13074381   0.01113978   0.06489843   0.45158546   0.12900049   0.13737712 
      CWalks      LeagueN    DivisionW      PutOuts      Assists       Errors   NewLeagueN 
  0.02908572  27.18227535 -91.63411299   0.19149252   0.04254536  -1.81244470   7.21208390 
---
title: "Chapter 6 Model Selection"
output: html_notebook
---


```{r}
library(ISLR)
names(Hitters)
Hitters
```
```{r}
dim(Hitters)
```
```{r}
sum(is.na(Hitters))
Hitters=na.omit(Hitters)
dim(Hitters)
sum(is.na(Hitters))
    
```

So now we have zero rows with missing data in Hitters.
We're going to start building a set of models
First we'll find the best model by number of variables

```{r}
library(leaps)
regfit.full=regsubsets(Salary~., Hitters)
summary(regfit.full)

```

Note that regsubsets only lists up to 8 models, we will modify that to show all 19 variables

```{r}
regfit.full=regsubsets(Salary~., data=Hitters, nvmax = 19)
regfit.full
reg.summary=summary(regfit.full)
reg.summary
names(reg.summary)
```


Next we'll look at the r-squared value for each of the 19 models. Note the r-squared value goes up as the number of variables goes up
```{r}
reg.summary$rsq
```

Let's plot the RSS, adjusted R2, Cp and BIC for all the models, to help us decide which to select.
RSS goes first

```{r}
plot(reg.summary$rss, xlab="Number of Variables", ylab="RSS", type="l")

```


Next we'll look at r-squared
```{r}

#plot(reg.summary$adjr2,xlab="Number of Variables",ylab="Adjusted RSq",type="l")
#which.max(reg.summary$adjr2)
#points(11,reg.summary$adjr2[11], col="red",cex=2,pch=20)


plot(reg.summary$adjr2, xlab="Number of variables", ylab="Adj R2", type="l")
which.max(reg.summary$adjr2)
points(11,reg.summary$adjr2[11], col="red",cex=2,pch=20)
#points(x = 11, reg.summary$adjr2[11], col="red", cex=2, pch=20)
#points(11,reg.summary$adjr2[11], col="red",cex=2,pch=20)
#points(11, reg.summary$adjr2[11], col="red", cex=2, pch=20)
```

The red dot shows the maximum value for adjusted R2
In a similar manner, plot Mallow's Cp, and the BIC statistics, and determine which have the smallest using which.min()

Third we'll look at Mallow's Cp
```{r}
plot(reg.summary$cp, xlab="Number of Variables", ylab="Cp", type="l")
which.min(reg.summary$cp)
points(10, reg.summary$cp[10], col="red", cex=2, pch=20)
```

Fourth let's do the analysis using BIC
```{r}
which.min(reg.summary$bic) # it is 6
plot(reg.summary$bic, xlab="Number of variables", ylab="BIC", type="l")
points(6, reg.summary$bic[6], col="red", cex=2, pch=20)

```

Now let's plot to display the selected variables. Our goal is to find the best model. First we'll look at r-squared.

```{r}
plot(regfit.full, scale="r2")
```

Now let's look at adjusted r-squared
```{r}
plot(regfit.full, scale="adjr2")
```

Third we'll look at Mallow's Cp
```{r}
plot(regfit.full, scale = "Cp")
```

Fourth we'll look at BIC
```{r}
plot(regfit.full, scale = "bic")
```



Clearly the lowest bic in the range of -150, but there are multiple possibilities as well. The minimum model has six variables. Let's see the variables:

```{r}
coef(regfit.full,6)
```

# 6.5.2 Forward and backward stepwise selection

```{r}
library(glmnet)
regfit.fwd=regsubsets(Salary~., data=Hitters, nvmax=19, method="forward")
summary(regfit.fwd)
```

Now let's do the same thing using backward stepwise selection

```{r}
regfit.bwd=regsubsets(Salary~., data=Hitters, nvmax=19, method="backward")
summary(regfit.bwd)
```

Note the one-variable through six-variable models are identical. Let's look at the seven variable models:

```{r}
coef(regfit.full, 7)
```

```{r}
coef(regfit.bwd, 7)
```

#### 6.5.3 Choosing Among Models Using the Validation Set Approach and Cross-Validation ####

We will begin by splitting the data into a test set and a training set.
```{r}
set.seed(1)
train=sample(c(TRUE,FALSE),nrow(Hitters), rep=TRUE)
train
```

```{r}
test=(!train)
test
```

Now we apply regsubsets to the training set in order to perform best subset selection.

```{r}
regfit.best=regsubsets(Salary~.,data=Hitters[train,], nvmax=19)
regfit.best
```

```{r}
test.mat=model.matrix(Salary~., data=Hitters[test,])
test.mat
```

```{r}
val.errors=rep(NA,19)
for (i in 1:19){
  coefi=coef(regfit.best, id = i)
  pred=test.mat[,names(coefi)]%*%coefi
  val.errors[i]=mean((Hitters$Salary[test]-pred^2))
}
coefi
pred
val.errors
```

```{r}
which.min(val.errors) #it's supposed to be 10 - why is this wrong???
```

Create a function to do what we did above
```{r}
predict.regsubsets=function(object, newdata, id...){
  form=as.formula(object$call[2])
  mat=model.matrix(form, newdata)
  coefi=coef(object, id=id)
  xvars=names(coefi)
  mat[,xvars]%*%coefi
}
```


Let's look at the full 10-variable set. Note that these 10 variables are different from the results from the training set
```{r}
regfit.best=regsubsets(Salary~., data=Hitters, nvmax=19)
coef(regfit.best,10)
```

```{r}
k=10 #number of folds
set.seed(1)
folds=sample(1:k, nrow(Hitters),replace=TRUE)
cv.errors=matrix(NA,k,19, dimnames=list(NULL, paste(1:19)))
cv.errors
k=10
for(j in 1:k){
  best.fit = regsubsets(Salary~., data=Hitters[folds!=j,],nvmax=19)
  for (i in 1:19){
    pred=predict(best.fit, Hitters[folds==j,],id=i)
    cv.errors[j,i]=mean((Hitters$Salary[folds==j]-pred^2))
  }
}

cv.errors
mean.cv.errors=apply(cv.errors,2,mean)
mean.cv.errors
min(mean.cv.errors) #the minimum is a 9-variable model
```

```{r}
par(mfrow=c(1,1))
plot(mean.cv.errors, type="b")
```

```{r}
reg.best=regsubsets(Salary~., data=Hitters, nvmax=19)
coef(reg.best, 9)
```

#### 6.6 Lab 2: Ridge Regression and the Lasso ####

```{r}
x=model.matrix(Salary~., Hitters)[,-1]
y=Hitters$Salary
x
y
```

#### 6.6.1 Ridge Regression ####

```{r}
library(glmnet)
grid=10^seq(10,-2,length=100)
grid
ridge.mod=glmnet(x,y,alpha=0, lambda=grid)
ridge.mod
```

```{r}
coef(ridge.mod)
dim(coef(ridge.mod))
```

```{r}
ridge.mod$lambda[50]
```

```{r}
coef(ridge.mod)[,50]
```

```{r}
sqrt(sum(coef(ridge.mod)[-1,50]^2))
```

```{r}
ridge.mod$lambda[60]
```

```{r}
sqrt(sum(coef(ridge.mod)[-1,60]^2))
```

```{r}
predict(ridge.mod, s=50, type="coefficients")[1:20,]
```

```{r}
set.seed(1)
train=sample(1:nrow(x), nrow(x)/2)
train

```

```{r}
test=(-train)
test
y.test=y[test]
y.test
```

```{r}
ridge.mod=glmnet(x[train,], y[train],alpha=0, lambda=grid, thresh=1e-12)
ridge.mod
```

```{r}
ridge.pred=predict(ridge.mod, s=4, newx = x[test,])
ridge.pred
```

```{r}
mean((ridge.pred-y.test)^2)
```

```{r}
ridge.pred=predict(ridge.mod,s=1e10, newx=x[test,])
mean((ridge.pred-y.test)^2)
```

```{r}
ridge.pred=predict(ridge.mod, s=0, newx = x[test,], exact=T)
ridge.pred
```

```{r}
mean((ridge.pred-y.test)^2)
```

```{r}
lm(y~x, subset=train)
predict(ridge.mod, s=0, exact=T, type="coefficients")[1:20]
```
Find the best lambda by using cross-validation

```{r}
set.seed(1)
cv.out=cv.glmnet(x[train,], y[train],alpha=0)
plot(cv.out)
bestlam=cv.out$lambda.min
bestlam
```

```{r}
ridge.pred=predict(ridge.mod, s=bestlam, newx=x[test,])
mean((ridge.pred-y.test)^2)

```

```{r}
out=glmnet(x,y,alpha=0)
predict(out,type="coefficients", s=bestlam)[1:20,]
```

