# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]

# Set the resopnse variables
rating = qb_stats["rating"]

# Generate clean data set
data.scaled.no_combine.for_rating = data.frame(scale(na.omit(cbind(rating, college_stats))))

# Generate the linear model
lm.scaled.no_combine.rating <- lm(formula = rating ~ ., data = data.scaled.no_combine.for_rating)

# Find optimum linear regression model for rating
step_reg.scaled.no_combine.rating <- stepAIC(lm.scaled.no_combine.rating, direction = "both")
## Start:  AIC=-21.26
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att
## 
##               Df Sum of Sq RSS    AIC
## - c_numyrs     1      0.01 196 -23.26
## - c_pct        1      0.02 196 -23.24
## - c_avg_tds    1      0.03 196 -23.22
## - c_avg_yds    1      0.18 196 -23.04
## - c_rate       1      0.19 196 -23.03
## - height       1      1.08 197 -21.96
## - c_avg_inter  1      1.24 197 -21.76
## - c_avg_cmpp   1      1.28 197 -21.72
## - c_avg_att    1      1.52 197 -21.43
## <none>                     196 -21.26
## - weight       1      3.70 200 -18.83
## - age          1     14.74 210  -6.06
## 
## Step:  AIC=-23.26
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS    AIC
## - c_pct        1      0.02 196 -25.23
## - c_avg_tds    1      0.03 196 -25.22
## - c_avg_yds    1      0.18 196 -25.04
## - c_rate       1      0.19 196 -25.03
## - height       1      1.07 197 -23.96
## - c_avg_inter  1      1.27 197 -23.72
## - c_avg_cmpp   1      1.36 197 -23.62
## - c_avg_att    1      1.65 198 -23.26
## <none>                     196 -23.26
## + c_numyrs     1      0.01 196 -21.26
## - weight       1      3.74 200 -20.77
## - age          1     14.76 211  -8.03
## 
## Step:  AIC=-25.23
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS    AIC
## - c_avg_tds    1      0.02 196 -27.21
## - c_avg_yds    1      0.17 196 -27.03
## - c_rate       1      0.28 196 -26.89
## - height       1      1.07 197 -25.95
## - c_avg_inter  1      1.30 197 -25.67
## <none>                     196 -25.23
## - c_avg_att    1      2.05 198 -24.77
## - c_avg_cmpp   1      3.26 199 -23.32
## + c_pct        1      0.02 196 -23.26
## + c_numyrs     1      0.00 196 -23.24
## - weight       1      3.80 200 -22.68
## - age          1     15.32 211  -9.38
## 
## Step:  AIC=-27.21
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_avg_inter + 
##     c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS   AIC
## - c_rate       1      0.26 196 -28.9
## - c_avg_yds    1      0.30 196 -28.9
## - height       1      1.09 197 -27.9
## - c_avg_inter  1      1.35 197 -27.6
## <none>                     196 -27.2
## - c_avg_att    1      2.06 198 -26.7
## - c_avg_cmpp   1      3.24 199 -25.3
## + c_avg_tds    1      0.02 196 -25.2
## + c_pct        1      0.01 196 -25.2
## + c_numyrs     1      0.00 196 -25.2
## - weight       1      3.79 200 -24.7
## - age          1     15.56 211 -11.1
## 
## Step:  AIC=-28.89
## rating ~ height + weight + age + c_avg_cmpp + c_avg_inter + c_avg_yds + 
##     c_avg_att
## 
##               Df Sum of Sq RSS   AIC
## - c_avg_yds    1      0.08 196 -30.8
## - height       1      1.10 197 -29.6
## - c_avg_inter  1      1.15 197 -29.5
## <none>                     196 -28.9
## - c_avg_att    1      1.88 198 -28.6
## - c_avg_cmpp   1      3.01 199 -27.3
## + c_rate       1      0.26 196 -27.2
## + c_pct        1      0.11 196 -27.0
## + c_numyrs     1      0.02 196 -26.9
## + c_avg_tds    1      0.00 196 -26.9
## - weight       1      3.75 200 -26.4
## - age          1     15.77 212 -12.6
## 
## Step:  AIC=-30.8
## rating ~ height + weight + age + c_avg_cmpp + c_avg_inter + c_avg_att
## 
##               Df Sum of Sq RSS   AIC
## - height       1      1.13 197 -31.4
## - c_avg_inter  1      1.15 197 -31.4
## <none>                     196 -30.8
## - c_avg_att    1      1.80 198 -30.6
## + c_pct        1      0.10 196 -28.9
## + c_avg_yds    1      0.08 196 -28.9
## + c_avg_tds    1      0.04 196 -28.9
## + c_rate       1      0.04 196 -28.9
## + c_numyrs     1      0.02 196 -28.8
## - weight       1      3.79 200 -28.3
## - c_avg_cmpp   1      4.43 201 -27.5
## - age          1     15.72 212 -14.5
## 
## Step:  AIC=-31.43
## rating ~ weight + age + c_avg_cmpp + c_avg_inter + c_avg_att
## 
##               Df Sum of Sq RSS   AIC
## - c_avg_inter  1      1.39 199 -31.8
## - c_avg_att    1      1.55 199 -31.6
## <none>                     197 -31.4
## + height       1      1.13 196 -30.8
## - weight       1      2.70 200 -30.2
## + c_pct        1      0.12 197 -29.6
## + c_avg_yds    1      0.11 197 -29.6
## + c_avg_tds    1      0.08 197 -29.5
## + c_rate       1      0.03 197 -29.5
## + c_numyrs     1      0.00 197 -29.4
## - c_avg_cmpp   1      4.08 201 -28.6
## - age          1     15.07 212 -16.0
## 
## Step:  AIC=-31.77
## rating ~ weight + age + c_avg_cmpp + c_avg_att
## 
##               Df Sum of Sq RSS   AIC
## <none>                     199 -31.8
## + c_avg_inter  1      1.39 197 -31.4
## + height       1      1.37 197 -31.4
## + c_avg_tds    1      0.18 198 -30.0
## + c_pct        1      0.16 198 -30.0
## + c_avg_yds    1      0.11 199 -29.9
## + c_numyrs     1      0.04 199 -29.8
## + c_rate       1      0.00 199 -29.8
## - weight       1      3.92 203 -29.1
## - c_avg_att    1      6.88 206 -25.7
## - c_avg_cmpp   1      9.76 208 -22.4
## - age          1     14.79 214 -16.8
summary(step_reg.scaled.no_combine.rating)
## 
## Call:
## lm(formula = rating ~ weight + age + c_avg_cmpp + c_avg_att, 
##     data = data.scaled.no_combine.for_rating)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.925 -0.581  0.002  0.591  2.632 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.34e-16   6.01e-02    0.00  1.00000    
## weight       1.39e-01   6.49e-02    2.14  0.03334 *  
## age          2.57e-01   6.19e-02    4.16  4.6e-05 ***
## c_avg_cmpp   1.12e+00   3.33e-01    3.38  0.00086 ***
## c_avg_att   -9.35e-01   3.30e-01   -2.84  0.00498 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.925 on 232 degrees of freedom
## Multiple R-squared: 0.158,   Adjusted R-squared: 0.144 
## F-statistic: 10.9 on 4 and 232 DF,  p-value: 4.17e-08
plot(step_reg.scaled.no_combine.rating)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.scaled.no_combine.rating <- regsubsets(rating ~ ., data = data.scaled.no_combine.for_rating, 
    nbest = 10)
subsets(leaps.scaled.no_combine.rating, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.scaled.no_combine.for_rating, step_reg.scaled.no_combine.rating, 
    m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: rating
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## weight       1    7.1    7.11    8.30  0.0043 ** 
## age          1   15.0   14.98   17.49 4.1e-05 ***
## c_avg_cmpp   1    8.3    8.32    9.71  0.0021 ** 
## c_avg_att    1    6.9    6.88    8.04  0.0050 ** 
## Residuals  232  198.7    0.86                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 47 
##                  3     8     15    18      19      20     21    23     26
## Predicted    1.128 0.223  0.781 0.264  0.0254  0.0511  0.366 0.102 0.5188
## cvpred       1.151 0.265  0.887 0.277  0.1058  0.0738  0.339 0.162 0.5290
## rating       0.228 0.808  0.369 0.421 -0.4874 -0.7258 -0.175 1.479 0.5627
## CV residual -0.923 0.543 -0.518 0.144 -0.5932 -0.7996 -0.514 1.317 0.0337
##                  35    45     54      56      69      71     72     73
## Predicted   -0.0108 0.116  1.028 -0.1036 -0.0344 -0.0353 0.0678  0.817
## cvpred      -0.0686 0.148  1.050 -0.0638  0.0448 -0.0177 0.1279  0.805
## rating       0.1084 0.883  0.637 -0.5321 -0.5023  0.5106 1.2628  0.399
## CV residual  0.1770 0.735 -0.413 -0.4684 -0.5471  0.5283 1.1349 -0.406
##                 76     79      81      82     91      96     115      118
## Predicted    0.112 -0.236 -0.3134  0.0724 -0.151  0.0922 -0.0784 -0.00859
## cvpred       0.132 -0.207 -0.2787  0.0804 -0.152  0.1541 -0.0440 -0.01998
## rating       0.101  0.466 -0.0778 -1.2471 -2.275  0.0860 -1.0311  0.16797
## CV residual -0.031  0.673  0.2009 -1.3275 -2.123 -0.0680 -0.9872  0.18795
##               121    122     124    131    132    133     135    140
## Predicted   0.357 -0.417 -0.0957 -0.179 -0.313 -0.543 -0.3955  0.193
## cvpred      0.331 -0.417 -0.0351 -0.115 -0.270 -0.479 -0.3866  0.262
## rating      1.084 -0.048 -0.1746 -0.316 -0.167 -0.130 -0.4651 -0.458
## CV residual 0.753  0.369 -0.1395 -0.201  0.103  0.349 -0.0785 -0.719
##                150   155    164    176   183    187     194    206    215
## Predicted   -0.113 0.443 0.2015 -0.647 0.240 -0.307 -0.0337 -0.220 -0.251
## cvpred      -0.109 0.457 0.1874 -0.559 0.199 -0.270  0.0315 -0.166 -0.213
## rating       1.762 1.777 0.2797 -0.480 1.010 -1.202  1.1585 -1.746 -3.176
## CV residual  1.870 1.320 0.0923  0.079 0.811 -0.932  1.1270 -1.580 -2.963
##                224    229    236    237    238
## Predicted   -0.600 -0.337 -0.371 -0.398 -0.485
## cvpred      -0.514 -0.261 -0.277 -0.306 -0.384
## rating      -1.255 -1.627 -2.133 -1.739  0.622
## CV residual -0.740 -1.366 -1.857 -1.433  1.006
## 
## Sum of squares = 44.9    Mean square = 0.95    n = 47 
## 
## fold 2 
## Observations in test set: 48 
##                24     31      33     36     37     39     41     42    59
## Predicted   0.115 0.0580 -0.2433  0.165 -0.100  0.129  0.564  0.518 0.177
## cvpred      0.145 0.0664 -0.2815  0.194 -0.104  0.157  0.669  0.603 0.205
## rating      1.352 1.3820  0.0935 -0.644 -2.230 -0.212  0.332  0.198 0.466
## CV residual 1.207 1.3155  0.3750 -0.838 -2.126 -0.369 -0.337 -0.405 0.261
##                 62     74    85     88       89     99    103    108
## Predicted   -0.722  0.870 0.199  0.221 -0.00323  1.025 -0.162 -0.124
## cvpred      -0.820  0.945 0.213  0.274  0.00163  1.100 -0.190 -0.123
## rating      -0.532  0.511 1.159  0.123  1.03937 -0.048  0.972 -0.741
## CV residual  0.288 -0.434 0.946 -0.150  1.03774 -1.148  1.162 -0.618
##                 110    111    116    119     126     128    138   139
## Predicted   -0.2935 -0.402  0.118  0.332 -0.0447 -0.0418 0.0165 0.121
## cvpred      -0.3272 -0.442  0.159  0.355 -0.0503 -0.0295 0.0315 0.125
## rating      -0.3385 -0.190 -0.845  0.220  0.1903 -1.0311 1.0021 0.995
## CV residual -0.0113  0.253 -1.004 -0.135  0.2406 -1.0016 0.9707 0.870
##                146    149     152    153    159    166    170     173
## Predicted    0.390 -0.322  0.0553 -0.315 -0.404  0.108 -0.373 -0.0354
## cvpred       0.454 -0.351  0.1012 -0.340 -0.458  0.148 -0.404 -0.0476
## rating      -1.195 -1.441 -0.0182 -0.972  0.682 -1.091  0.287  1.5011
## CV residual -1.649 -1.090 -0.1194 -0.632  1.139 -1.238  0.691  1.5488
##                175     178    191    192    198    202     209    210
## Predicted   -0.447 -0.5908 -0.230 -0.455  0.207 -0.501  0.0649 0.0336
## cvpred      -0.495 -0.6652 -0.275 -0.512  0.216 -0.551  0.0622 0.0299
## rating       1.360 -0.0182 -0.376 -0.763 -0.324 -0.830 -1.0609 1.1809
## CV residual  1.855  0.6470 -0.101 -0.251 -0.539 -0.279 -1.1231 1.1509
##                212     214    216    218    233    234    240
## Predicted   -0.168 -0.3788 -0.496  0.255 -0.755 -0.126 -0.414
## cvpred      -0.191 -0.4209 -0.557  0.257 -0.851 -0.151 -0.452
## rating      -0.376 -0.3310 -0.830 -1.590 -1.337  1.382 -0.122
## CV residual -0.185  0.0899 -0.273 -1.847 -0.485  1.533  0.330
## 
## Sum of squares = 41.3    Mean square = 0.86    n = 48 
## 
## fold 3 
## Observations in test set: 48 
##                   2      4     5       6       7     14     17        46
## Predicted    0.6601 0.1029 0.312 -0.0582  0.0317 0.5721 -0.216  0.071830
## cvpred       0.5636 0.0197 0.255 -0.1280 -0.1868 0.4538 -0.321  0.000727
## rating       0.5180 0.4882 2.142  0.3318  1.1139 0.5180 -0.636 -0.733224
## CV residual -0.0456 0.4685 1.886  0.4598  1.3006 0.0642 -0.315 -0.733952
##                 47    51      55     60     66    67     70     77     78
## Predicted   -0.121 0.320  0.1104 0.0719 -0.409 0.220  0.572 -0.108 0.1902
## cvpred      -0.121 0.267  0.0663 0.0735 -0.506 0.188  0.498 -0.133 0.0746
## rating       0.250 0.578 -1.0535 0.2052  0.898 2.380 -0.242 -0.487 0.6148
## CV residual  0.371 0.311 -1.1198 0.1317  1.404 2.192 -0.740 -0.355 0.5402
##                80     86      90     100     102   112    114    141
## Predicted   0.478 0.5985  0.5593  0.0943  0.7636 0.397 -0.302 -0.140
## cvpred      0.505 0.7278  0.6078 -0.0107  0.8112 0.345 -0.367 -0.110
## rating      0.943 0.8234  0.0637  0.1084  0.7415 1.092  1.173 -0.338
## CV residual 0.437 0.0956 -0.5441  0.1191 -0.0697 0.747  1.540 -0.228
##                 144     156    157      158    160    163    165    167
## Predicted   0.04902 -0.0821 0.1787 -0.20546 0.0908 -0.164  0.330  0.315
## cvpred      0.00721 -0.1327 0.1573 -0.17590 0.0822 -0.247  0.404  0.319
## rating      1.02448  0.2871 0.1829 -0.16718 0.5478  1.970 -0.204 -0.547
## CV residual 1.01726  0.4199 0.0256  0.00871 0.4656  2.217 -0.609 -0.866
##                171     174    182     184     190    199    201     203
## Predicted   -0.112 -0.0303 -0.691  0.0409 -0.0769 -0.561 -0.298 -0.0724
## cvpred      -0.103  0.0439 -0.742  0.0164 -0.0317 -0.574 -0.299 -0.0996
## rating      -0.793  0.8308 -0.450 -1.1875 -1.2918 -1.768 -0.636 -0.0704
## CV residual -0.690  0.7869  0.292 -1.2040 -1.2601 -1.194 -0.337  0.0292
##                 208    211    219    226    232    235    239
## Predicted   -0.2544 -0.500 -0.714 -0.145 -0.759 -0.122 -0.749
## cvpred      -0.2964 -0.634 -0.762 -0.214 -0.826 -0.106 -0.828
## rating      -0.0629 -1.389 -1.493  1.092  0.384 -0.897  0.183
## CV residual  0.2334 -0.754 -0.731  1.306  1.210 -0.791  1.011
## 
## Sum of squares = 38.3    Mean square = 0.8    n = 48 
## 
## fold 4 
## Observations in test set: 47 
##                 9     13     25    27     29      34    43       44     48
## Predicted   0.288  0.601 -0.245 0.378  0.574  0.0102 0.345  0.00425 -0.185
## cvpred      0.273  0.750 -0.231 0.433  0.700  0.1044 0.371  0.15217 -0.073
## rating      1.307 -0.219  0.801 1.337  0.228  0.0637 0.518 -1.02369 -1.068
## CV residual 1.035 -0.969  1.032 0.904 -0.472 -0.0407 0.147 -1.17586 -0.995
##                50     53     64    65     68     75      97   101     106
## Predicted   0.568 -0.251 0.8815 0.143 -0.170  0.736 -0.0686 0.717  0.0215
## cvpred      0.654 -0.192 1.0712 0.129 -0.115  0.925  0.0350 0.699  0.0719
## rating      1.307  0.339 1.1660 0.548 -0.718 -0.562 -0.8375 0.838 -0.7034
## CV residual 0.654  0.532 0.0948 0.419 -0.604 -1.487 -0.8725 0.139 -0.7754
##                107     113    117      123     129    130     134     137
## Predicted   -0.164 -0.0655  0.362  0.00152  0.0149  0.276 -0.2489 -0.0949
## cvpred      -0.112 -0.0446  0.468 -0.01084  0.0891  0.367 -0.1932 -0.1014
## rating       0.548 -0.2193 -1.627  0.24245 -0.1895 -1.545 -0.1746 -1.5376
## CV residual  0.659 -0.1747 -2.095  0.25329 -0.2786 -1.912  0.0186 -1.4362
##                147    148    154    161    169     177    180    181
## Predicted   -0.177  0.476 -0.176 -0.123 -0.632 -0.3078 -0.297  0.177
## cvpred      -0.199  0.471 -0.226 -0.134 -0.667 -0.2729 -0.301  0.167
## rating      -0.122 -0.301 -1.195 -0.435 -0.532 -0.2491  0.295 -0.592
## CV residual  0.076 -0.772 -0.969 -0.301  0.135  0.0238  0.596 -0.759
##                185    189    193    195   197   200    205    213    220
## Predicted   -0.414 -0.751 -0.263 -0.453 0.283 0.104 -0.174 -0.753 -0.359
## cvpred      -0.296 -0.894 -0.249 -0.477 0.331 0.133 -0.163 -0.864 -0.391
## rating      -1.202  1.881 -0.763 -0.532 1.397 0.473 -0.912 -1.783 -0.726
## CV residual -0.906  2.775 -0.514 -0.055 1.066 0.340 -0.749 -0.919 -0.334
##                222    225    227    230
## Predicted   -1.004 -0.358 -0.564 -0.646
## cvpred      -0.916 -0.400 -0.616 -0.703
## rating      -1.135 -0.271 -1.955  1.635
## CV residual -0.220  0.129 -1.338  2.338
## 
## Sum of squares = 43.5    Mean square = 0.93    n = 47 
## 
## fold 5 
## Observations in test set: 47 
##                  1      10     11    12     16    22    28     30    32
## Predicted   0.1334  0.1190  0.509 0.682  0.253 0.538 0.288 -0.264 0.792
## cvpred      0.0527  0.0875  0.417 0.476  0.169 0.403 0.134 -0.346 0.632
## rating      2.2683  0.0414  0.250 0.861 -0.830 1.807 1.300  0.965 1.315
## CV residual 2.2155 -0.0461 -0.167 0.384 -0.999 1.403 1.166  1.311 0.683
##                38    40     49    52     57     58      61    83     84
## Predicted   0.752 0.967 -0.196 0.691 0.1667  0.515  0.1563 0.905  0.515
## cvpred      0.553 0.713 -0.125 0.539 0.0187  0.415  0.0738 0.720  0.417
## rating      1.672 2.119 -2.141 2.127 0.8234  0.257 -0.1523 2.119  0.272
## CV residual 1.119 1.407 -2.016 1.588 0.8047 -0.157 -0.2261 1.399 -0.145
##                 87     92      93     94       95    98     104     105
## Predicted    0.448  0.200 0.04260 -0.294  0.39320 0.403  0.0367 -0.1492
## cvpred       0.336  0.124 0.00972 -0.290  0.31420 0.333  0.0232 -0.1779
## rating      -0.048 -1.783 0.42120  0.265 -0.00333 1.479 -0.5694  0.0488
## CV residual -0.384 -1.908 0.41148  0.555 -0.31753 1.146 -0.5925  0.2267
##                 109      120   125   127      136      142     143    145
## Predicted   -0.2046 -0.02386 0.313 0.229 -0.23485 -0.09370 -0.0994 -0.299
## cvpred      -0.2054 -0.00736 0.241 0.147 -0.21680 -0.08832 -0.0632 -0.300
## rating      -0.2342 -0.56192 0.920 1.382 -0.21932 -0.08526  0.2499 -0.249
## CV residual -0.0288 -0.55456 0.679 1.235 -0.00252  0.00306  0.3131  0.051
##                151    162    168     172    179    186    188      196
## Predicted   -0.120 -0.323 -0.189 -0.1175 -0.694 -0.353 -0.311 -0.62023
## cvpred      -0.263 -0.307 -0.168 -0.0985 -0.564 -0.266 -0.241 -0.60096
## rating      -0.659 -0.770  0.771 -0.7258 -1.024 -0.689 -1.500 -0.60661
## CV residual -0.396 -0.463  0.940 -0.6273 -0.459 -0.423 -1.259 -0.00565
##                207    217    221     223    228
## Predicted   -0.526 -0.396 -0.125 -0.6205 -0.168
## cvpred      -0.475 -0.337 -0.107 -0.4925 -0.128
## rating      -0.703 -1.917  0.838 -0.0629  1.181
## CV residual -0.228 -1.580  0.945  0.4295  1.309
## 
## Sum of squares = 41.6    Mean square = 0.88    n = 47 
## 
## Overall (Sum over all 47 folds) 
##    ms 
## 0.884