# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]

# Set the resopnse variables
rating = qb_stats["rating"]

# Generate clean data set
data.log.no_combine.for_rating = data.frame(log(na.omit(cbind(rating, college_stats)) + 
    0.1))

# Generate the linear model
lm.log.no_combine.rating <- lm(formula = rating ~ ., data = data.log.no_combine.for_rating)

# Find optimum linear regression model for rating
step_reg.log.no_combine.rating <- stepAIC(lm.log.no_combine.rating, direction = "both")
## Start:  AIC=-776.9
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att
## 
##               Df Sum of Sq  RSS  AIC
## - c_numyrs     1     0.000 8.07 -779
## - c_avg_inter  1     0.010 8.08 -779
## - c_avg_tds    1     0.057 8.13 -777
## <none>                     8.07 -777
## - c_avg_yds    1     0.079 8.15 -777
## - c_rate       1     0.082 8.16 -777
## - height       1     0.102 8.18 -776
## - weight       1     0.176 8.25 -774
## - c_avg_att    1     0.188 8.26 -773
## - c_avg_cmpp   1     0.219 8.29 -773
## - c_pct        1     0.220 8.29 -773
## - age          1     0.562 8.64 -763
## 
## Step:  AIC=-778.9
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq  RSS  AIC
## - c_avg_inter  1     0.010 8.08 -781
## - c_avg_tds    1     0.059 8.13 -779
## <none>                     8.07 -779
## - c_avg_yds    1     0.080 8.15 -779
## - c_rate       1     0.084 8.16 -778
## - height       1     0.103 8.18 -778
## + c_numyrs     1     0.000 8.07 -777
## - weight       1     0.180 8.25 -776
## - c_avg_att    1     0.193 8.27 -775
## - c_avg_cmpp   1     0.227 8.30 -774
## - c_pct        1     0.230 8.30 -774
## - age          1     0.562 8.64 -765
## 
## Step:  AIC=-780.6
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_tds + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq  RSS  AIC
## - c_avg_tds    1     0.053 8.14 -781
## <none>                     8.08 -781
## - height       1     0.100 8.18 -780
## - c_avg_yds    1     0.105 8.19 -780
## + c_avg_inter  1     0.010 8.07 -779
## - c_rate       1     0.128 8.21 -779
## + c_numyrs     1     0.000 8.08 -779
## - weight       1     0.170 8.25 -778
## - c_avg_att    1     0.183 8.27 -777
## - c_avg_cmpp   1     0.218 8.30 -776
## - c_pct        1     0.224 8.31 -776
## - age          1     0.555 8.64 -767
## 
## Step:  AIC=-781.1
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_yds + c_avg_att
## 
##               Df Sum of Sq  RSS  AIC
## - c_avg_yds    1     0.061 8.20 -781
## <none>                     8.14 -781
## - c_rate       1     0.078 8.21 -781
## + c_avg_tds    1     0.053 8.08 -781
## - height       1     0.101 8.24 -780
## + c_avg_inter  1     0.004 8.13 -779
## + c_numyrs     1     0.001 8.14 -779
## - weight       1     0.176 8.31 -778
## - c_avg_att    1     0.209 8.35 -777
## - c_pct        1     0.231 8.37 -776
## - c_avg_cmpp   1     0.235 8.37 -776
## - age          1     0.599 8.74 -766
## 
## Step:  AIC=-781.3
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_att
## 
##               Df Sum of Sq  RSS  AIC
## - c_rate       1     0.017 8.21 -783
## <none>                     8.20 -781
## + c_avg_yds    1     0.061 8.14 -781
## + c_avg_inter  1     0.040 8.16 -780
## - height       1     0.101 8.30 -780
## + c_avg_tds    1     0.008 8.19 -780
## + c_numyrs     1     0.000 8.20 -779
## - weight       1     0.195 8.39 -778
## - c_pct        1     0.382 8.58 -773
## - c_avg_cmpp   1     0.399 8.60 -772
## - c_avg_att    1     0.400 8.60 -772
## - age          1     0.594 8.79 -767
## 
## Step:  AIC=-782.8
## rating ~ height + weight + age + c_avg_cmpp + c_pct + c_avg_att
## 
##               Df Sum of Sq  RSS  AIC
## <none>                     8.21 -783
## + c_avg_inter  1     0.056 8.16 -782
## - height       1     0.105 8.32 -782
## + c_rate       1     0.017 8.20 -781
## + c_avg_tds    1     0.002 8.21 -781
## + c_numyrs     1     0.000 8.21 -781
## + c_avg_yds    1     0.000 8.21 -781
## - weight       1     0.199 8.41 -779
## - c_pct        1     0.379 8.59 -774
## - c_avg_cmpp   1     0.403 8.62 -773
## - c_avg_att    1     0.403 8.62 -773
## - age          1     0.578 8.79 -769
summary(step_reg.log.no_combine.rating)
## 
## Call:
## lm(formula = rating ~ height + weight + age + c_avg_cmpp + c_pct + 
##     c_avg_att, data = data.log.no_combine.for_rating)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7778 -0.1042  0.0189  0.1203  0.4547 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   55.804     16.441    3.39  0.00081 ***
## height        -1.292      0.753   -1.72  0.08763 .  
## weight         0.618      0.262    2.36  0.01900 *  
## age            0.583      0.145    4.02  7.8e-05 ***
## c_avg_cmpp    11.557      3.442    3.36  0.00092 ***
## c_pct        -11.080      3.399   -3.26  0.00128 ** 
## c_avg_att    -11.535      3.433   -3.36  0.00091 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.189 on 230 degrees of freedom
## Multiple R-squared: 0.166,   Adjusted R-squared: 0.144 
## F-statistic: 7.64 on 6 and 230 DF,  p-value: 1.72e-07
plot(step_reg.log.no_combine.rating)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.log.no_combine.rating <- regsubsets(rating ~ ., data = data.log.no_combine.for_rating, 
    nbest = 10)
subsets(leaps.log.no_combine.rating, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.log.no_combine.for_rating, step_reg.log.no_combine.rating, m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: rating
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## height       1   0.06   0.056    1.56 0.21249    
## weight       1   0.23   0.232    6.51 0.01139 *  
## age          1   0.61   0.615   17.21 4.7e-05 ***
## c_avg_cmpp   1   0.11   0.111    3.10 0.07961 .  
## c_pct        1   0.22   0.220    6.15 0.01386 *  
## c_avg_att    1   0.40   0.403   11.29 0.00091 ***
## Residuals  230   8.21   0.036                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 47 
##                  3     8     15     18     19      20      21   23     26
## Predicted    4.424 4.267 4.3074 4.2788  4.280  4.1761  4.2835 4.31 4.3010
## cvpred       4.405 4.273 4.2989 4.2848  4.291  4.1812  4.2866 4.30 4.3082
## rating       4.286 4.388 4.3121 4.3215  4.145  4.0927  4.2092 4.49 4.3464
## CV residual -0.119 0.115 0.0133 0.0367 -0.146 -0.0886 -0.0774 0.19 0.0382
##                   35    45       54      56      69    71    72     73
## Predicted    4.25829 4.253  4.37238  4.1984  4.2284 4.217 4.255  4.432
## cvpred       4.27027 4.253  4.36688  4.2009  4.2325 4.222 4.258  4.431
## rating       4.26409 4.401  4.35927  4.1352  4.1415 4.337 4.461  4.317
## CV residual -0.00618 0.148 -0.00761 -0.0657 -0.0909 0.115 0.203 -0.113
##                 76     79     81     82     91       96    115    118
## Predicted   4.2547 4.2172 4.1965  4.244  4.175 4.255052  4.224 4.2564
## cvpred      4.2586 4.2346 4.2116  4.253  4.193 4.259285  4.238 4.2626
## rating      4.2627 4.3294 4.2283  3.968  3.666 4.259859  4.022 4.2753
## CV residual 0.0041 0.0948 0.0167 -0.284 -0.527 0.000574 -0.216 0.0126
##               121    122      124     131    132    133     135    140
## Predicted   4.277 4.1707  4.21338 4.16006 4.1768 4.1047  4.1970  4.258
## cvpred      4.285 4.1998  4.21438 4.17316 4.1925 4.1213  4.2153  4.254
## rating      4.433 4.2341  4.20916 4.18052 4.2106 4.2180  4.1495  4.151
## CV residual 0.148 0.0343 -0.00522 0.00736 0.0181 0.0967 -0.0659 -0.103
##               150  155    164    176   183    187   194    206    215
## Predicted   4.191 4.27 4.2834 4.0645 4.246  4.154 4.116  4.207  4.074
## cvpred      4.194 4.26 4.2855 4.0949 4.246  4.161 4.117  4.214  4.104
## rating      4.536 4.54 4.2959 4.1463 4.421  3.980 4.445  3.833  3.296
## CV residual 0.341 0.28 0.0105 0.0514 0.175 -0.181 0.328 -0.381 -0.808
##                224    229    236    237   238
## Predicted    4.064  4.095  4.171  4.140 4.152
## cvpred       4.100  4.116  4.193  4.157 4.174
## rating       3.967  3.867  3.714  3.835 4.357
## CV residual -0.133 -0.249 -0.479 -0.322 0.183
## 
## Sum of squares = 2.28    Mean square = 0.05    n = 47 
## 
## fold 2 
## Observations in test set: 48 
##                24    31    33     36     37      39    41      42    59
## Predicted   4.203 4.241 4.060  4.289  4.212  4.2430 4.260  4.2834 4.290
## cvpred      4.203 4.241 4.015  4.297  4.212  4.2466 4.269  4.2955 4.297
## rating      4.475 4.480 4.261  4.111  3.681  4.2017 4.305  4.2808 4.329
## CV residual 0.273 0.238 0.246 -0.186 -0.531 -0.0449 0.036 -0.0146 0.032
##                 62      74    85     88    89     99   103    108    110
## Predicted   4.0889  4.4152 4.276 4.2210 4.257  4.420 4.224  4.240 4.1554
## cvpred      4.0660  4.4332 4.281 4.2234 4.261  4.443 4.218  4.245 4.1425
## rating      4.1352  4.3373 4.445 4.2669 4.426  4.234 4.415  4.089 4.1759
## CV residual 0.0691 -0.0959 0.164 0.0435 0.165 -0.209 0.197 -0.156 0.0335
##                111    116    119    126    128   138   139    146    149
## Predicted   4.1445  4.211 4.2644 4.2286  4.220 4.202 4.284  4.314  4.193
## cvpred      4.1354  4.215 4.2678 4.2292  4.224 4.203 4.292  4.327  4.192
## rating      4.2062  4.066 4.2850 4.2794  4.022 4.420 4.419  3.982  3.918
## CV residual 0.0708 -0.149 0.0172 0.0502 -0.202 0.217 0.126 -0.346 -0.274
##                152    153   159    166   170   173   175   178     191
## Predicted   4.1692  4.172 4.189  4.239 4.152 4.229 4.143 4.121 4.17680
## cvpred      4.1714  4.169 4.182  4.246 4.148 4.228 4.135 4.108 4.16543
## rating      4.2399  4.036 4.367  4.007 4.297 4.498 4.476 4.240 4.16821
## CV residual 0.0685 -0.133 0.184 -0.239 0.149 0.269 0.342 0.132 0.00279
##                  192     198     202    209   210     212     214    216
## Predicted    4.10546  4.2660  4.1167  4.235 4.220  4.2445  4.1944  4.142
## cvpred       4.08746  4.2693  4.1052  4.237 4.218  4.2469  4.1946  4.126
## rating       4.08429  4.1790  4.0690  4.015 4.449  4.1682  4.1775  4.069
## CV residual -0.00316 -0.0903 -0.0361 -0.222 0.231 -0.0786 -0.0172 -0.057
##                218     233   234    240
## Predicted    4.285  4.0202 4.199 4.1288
## cvpred       4.284  3.9883 4.188 4.1209
## rating       3.877  3.9455 4.480 4.2195
## CV residual -0.406 -0.0428 0.291 0.0986
## 
## Sum of squares = 1.74    Mean square = 0.04    n = 48 
## 
## fold 3 
## Observations in test set: 48 
##                  2      4     5      6     7     14      17     46     47
## Predicted   4.3126 4.2599 4.273 4.2379 4.270 4.3226  4.2104  4.221 4.2347
## cvpred      4.2940 4.2449 4.263 4.2231 4.225 4.3004  4.1873  4.208 4.2308
## rating      4.3386 4.3334 4.589 4.3054 4.438 4.3386  4.1125  4.091 4.2905
## CV residual 0.0446 0.0885 0.326 0.0823 0.213 0.0382 -0.0747 -0.117 0.0596
##                 51     55     60    66    67      70      77     78    80
## Predicted   4.2684  4.223 4.2718 4.196 4.273  4.3062  4.2296 4.2997 4.300
## cvpred      4.2598  4.216 4.2658 4.170 4.266  4.2924  4.2229 4.2722 4.306
## rating      4.3490  4.016 4.2822 4.403 4.621  4.1957  4.1447 4.3554 4.410
## CV residual 0.0892 -0.199 0.0164 0.233 0.355 -0.0967 -0.0781 0.0833 0.104
##                 86      90    100    102  112   114     141   144  156
## Predicted   4.2957  4.3186 4.2417 4.3420 4.27 4.205  4.2292 4.252 4.20
## cvpred      4.3168  4.3230 4.2220 4.3413 4.26 4.190  4.2294 4.242 4.19
## rating      4.3907  4.2556 4.2641 4.3770 4.43 4.447  4.1759 4.424 4.30
## CV residual 0.0739 -0.0674 0.0421 0.0357 0.17 0.257 -0.0534 0.182 0.11
##               157    158  160   163    165   167     171   174   182
## Predicted   4.148 4.1914 4.23 4.172  4.314  4.31  4.1526 4.209 4.034
## cvpred      4.132 4.1955 4.22 4.157  4.325  4.30  4.1621 4.221 4.025
## rating      4.278 4.2106 4.34 4.565  4.203  4.13  4.0775 4.392 4.153
## CV residual 0.146 0.0151 0.12 0.409 -0.122 -0.17 -0.0845 0.171 0.128
##                184    190    199     201     203   208    211     219
## Predicted    4.208  4.225  4.133  4.1555 4.22847 4.128  4.080  4.0064
## cvpred       4.207  4.231  4.130  4.1554 4.22239 4.123  4.056  3.9972
## rating       3.983  3.957  3.826  4.1125 4.22975 4.231  3.932  3.9040
## CV residual -0.224 -0.274 -0.304 -0.0429 0.00736 0.109 -0.124 -0.0932
##               226   232    235   239
## Predicted   4.223 4.107  4.232 4.044
## cvpred      4.207 4.088  4.233 4.025
## rating      4.434 4.315  4.054 4.278
## CV residual 0.227 0.227 -0.179 0.253
## 
## Sum of squares = 1.36    Mean square = 0.03    n = 48 
## 
## fold 4 
## Observations in test set: 47 
##                 9     13    25    27      29        34     43     44
## Predicted   4.250  4.318  4.49 4.280  4.2889  4.236143 4.2909  4.188
## cvpred      4.265  4.345  5.70 4.308  4.3238  4.255755 4.3200  4.202
## rating      4.468  4.200  4.39 4.473  4.2863  4.255613 4.3386  4.024
## CV residual 0.203 -0.144 -1.31 0.165 -0.0374 -0.000143 0.0186 -0.178
##                 48    50    53     64     65     68     75      97    101
## Predicted    4.205 4.348 4.158 4.3691 4.2684  4.189  4.374  4.1716 4.3604
## cvpred       4.199 4.359 4.160 4.3843 4.2707  4.202  4.408  4.1645 4.3748
## rating       4.013 4.468 4.307 4.4462 4.3438  4.094  4.129  4.0673 4.3932
## CV residual -0.187 0.109 0.146 0.0618 0.0731 -0.108 -0.279 -0.0972 0.0184
##                106   107       113    117    123     129    130     134
## Predicted    4.216 4.232  4.203780  4.303 4.2391  4.2921  4.282 4.17889
## cvpred       4.222 4.218  4.200963  4.333 4.2338  4.2904  4.293 4.19968
## rating       4.098 4.344  4.200205  3.867 4.2891  4.2062  3.890 4.20916
## CV residual -0.124 0.125 -0.000758 -0.466 0.0553 -0.0842 -0.403 0.00948
##                137   147    148   154     161    169    177   180    181
## Predicted    4.198 4.190  4.324  4.17  4.1870 4.1066 4.1193 4.130  4.295
## cvpred       4.211 4.205  4.336  4.33  4.1968 4.1011 4.0997 4.127  4.274
## rating       3.892 4.220  4.184  3.98  4.1558 4.1352 4.1942 4.299  4.122
## CV residual -0.319 0.014 -0.153 -0.35 -0.0411 0.0341 0.0945 0.172 -0.152
##                185   189     193      195   197   200    205    213
## Predicted    4.147 4.098  4.1746  4.13258 4.309 4.272  4.200  4.053
## cvpred       4.163 4.058  4.1554  4.13717 4.313 4.289  4.198  4.005
## rating       3.980 4.553  4.0843  4.13517 4.482 4.331  4.050  3.822
## CV residual -0.183 0.495 -0.0711 -0.00201 0.169 0.042 -0.148 -0.183
##                 220     222    225    227   230
## Predicted    4.1659 4.00079 4.1754  4.107 4.067
## cvpred       4.1582 3.99064 4.1521  4.099 4.056
## rating       4.0927 3.99636 4.1897  3.770 4.517
## CV residual -0.0656 0.00573 0.0376 -0.329 0.461
## 
## Sum of squares = 3.45    Mean square = 0.07    n = 47 
## 
## fold 5 
## Observations in test set: 47 
##                 1      10      11     12     16   22    28    30    32
## Predicted   4.295  4.2772  4.3689 4.3374  4.251 4.33 4.335 4.151 4.358
## cvpred      4.284  4.2680  4.3492 4.3124  4.247 4.31 4.311 4.140 4.337
## rating      4.606  4.2513  4.2905 4.3969  4.069 4.54 4.467 4.414 4.469
## CV residual 0.322 -0.0166 -0.0588 0.0845 -0.178 0.23 0.156 0.274 0.133
##                38    40     49    52   57        58      61    83     84
## Predicted   4.309 4.427  4.174 4.275 4.28  4.306948  4.2853 4.380 4.2573
## cvpred      4.291 4.387  4.196 4.265 4.25  4.292791  4.2635 4.349 4.2586
## rating      4.523 4.586  3.711 4.587 4.39  4.291828  4.2136 4.586 4.2946
## CV residual 0.232 0.199 -0.485 0.322 0.14 -0.000963 -0.0499 0.237 0.0359
##                 87     92     93    94      95    98     104    105    109
## Predicted    4.341  4.291 4.2839 4.181  4.3193 4.331  4.2334 4.2170  4.213
## cvpred       4.315  4.275 4.2708 4.181  4.3066 4.324  4.2228 4.1987  4.211
## rating       4.234  3.822 4.3215 4.293  4.2428 4.494  4.1271 4.2528  4.197
## CV residual -0.081 -0.453 0.0507 0.113 -0.0638 0.171 -0.0957 0.0541 -0.014
##                 120    125   127     136    142    143    145     151
## Predicted    4.2140 4.3212 4.263 4.19818 4.1809 4.2084 4.1752  4.2130
## cvpred       4.2150 4.3090 4.242 4.19718 4.1880 4.2173 4.1750  4.1978
## rating       4.1287 4.4067 4.480 4.20020 4.2268 4.2905 4.1942  4.1076
## CV residual -0.0863 0.0977 0.237 0.00302 0.0389 0.0731 0.0192 -0.0902
##                 162   168    172     179     186    188    196     207
## Predicted    4.1741 4.204  4.236  4.0401  4.1667  4.200 4.1055  4.1231
## cvpred       4.1691 4.214  4.234  4.0681  4.1806  4.210 4.1108  4.1310
## rating       4.0826 4.382  4.093  4.0236  4.1010  3.902 4.1190  4.0977
## CV residual -0.0864 0.168 -0.142 -0.0445 -0.0796 -0.308 0.0082 -0.0333
##                217   221   223   228
## Predicted    4.132 4.155 4.096 4.199
## cvpred       4.138 4.154 4.120 4.211
## rating       3.782 4.393 4.231 4.449
## CV residual -0.356 0.239 0.112 0.238
## 
## Sum of squares = 1.6    Mean square = 0.03    n = 47 
## 
## Overall (Sum over all 47 folds) 
##     ms 
## 0.0441