# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]

# Set the resopnse variables
yds = qb_stats["yds"]

# Generate clean data set
data.log.no_combine.for_yds = data.frame(log(na.omit(cbind(yds, college_stats)) + 
    0.1))

# Generate the linear model
lm.log.no_combine.yds <- lm(formula = yds ~ ., data = data.log.no_combine.for_yds)

# Find optimum linear regression model for yds
step_reg.log.no_combine.yds <- stepAIC(lm.log.no_combine.yds, direction = "both")
## Start:  AIC=-123.2
## yds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - c_numyrs     1     0.004 129 -125
## - c_avg_tds    1     0.009 129 -125
## - height       1     0.018 129 -125
## - c_rate       1     0.039 129 -125
## - c_avg_inter  1     0.075 129 -125
## - c_avg_yds    1     0.307 129 -125
## - age          1     0.807 130 -124
## <none>                     129 -123
## - c_pct        1     1.154 130 -123
## - c_avg_cmpp   1     1.257 130 -123
## - c_avg_att    1     1.414 130 -123
## - weight       1     1.860 131 -122
## 
## Step:  AIC=-125.2
## yds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - c_avg_tds    1     0.007 129 -127
## - height       1     0.020 129 -127
## - c_rate       1     0.035 129 -127
## - c_avg_inter  1     0.091 129 -127
## - c_avg_yds    1     0.304 129 -127
## - age          1     0.810 130 -126
## <none>                     129 -125
## - c_pct        1     1.267 130 -125
## - c_avg_cmpp   1     1.362 130 -125
## - c_avg_att    1     1.513 131 -124
## - weight       1     1.886 131 -124
## + c_numyrs     1     0.004 129 -123
## 
## Step:  AIC=-127.2
## yds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - height       1     0.020 129 -129
## - c_rate       1     0.052 129 -129
## - c_avg_inter  1     0.213 129 -129
## - c_avg_yds    1     0.518 130 -128
## - age          1     0.805 130 -128
## <none>                     129 -127
## - c_pct        1     1.367 130 -127
## - c_avg_cmpp   1     1.418 130 -127
## - c_avg_att    1     1.528 131 -126
## - weight       1     1.943 131 -126
## + c_avg_tds    1     0.007 129 -125
## + c_numyrs     1     0.002 129 -125
## 
## Step:  AIC=-129.2
## yds ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - c_rate       1      0.05 129 -131
## - c_avg_inter  1      0.22 129 -131
## - c_avg_yds    1      0.52 130 -130
## - age          1      0.83 130 -130
## <none>                     129 -129
## - c_pct        1      1.44 131 -128
## - c_avg_cmpp   1      1.50 131 -128
## - c_avg_att    1      1.61 131 -128
## + height       1      0.02 129 -127
## + c_avg_tds    1      0.01 129 -127
## + c_numyrs     1      0.00 129 -127
## - weight       1      3.46 133 -125
## 
## Step:  AIC=-131.1
## yds ~ weight + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_yds + 
##     c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - c_avg_inter  1      0.71 130 -132
## - age          1      0.90 130 -131
## <none>                     129 -131
## - c_pct        1      1.43 131 -130
## - c_avg_cmpp   1      1.47 131 -130
## - c_avg_att    1      1.56 131 -130
## - c_avg_yds    1      1.92 131 -130
## + c_rate       1      0.05 129 -129
## + c_avg_tds    1      0.02 129 -129
## + height       1      0.02 129 -129
## + c_numyrs     1      0.00 129 -129
## - weight       1      3.56 133 -127
## 
## Step:  AIC=-131.7
## yds ~ weight + age + c_avg_cmpp + c_pct + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - age          1     0.854 131 -132
## - c_pct        1     0.854 131 -132
## - c_avg_cmpp   1     0.882 131 -132
## - c_avg_att    1     0.956 131 -132
## <none>                     130 -132
## + c_avg_inter  1     0.713 129 -131
## + c_rate       1     0.548 129 -131
## - c_avg_yds    1     1.644 132 -131
## + c_avg_tds    1     0.100 130 -130
## + height       1     0.038 130 -130
## + c_numyrs     1     0.033 130 -130
## - weight       1     2.986 133 -128
## 
## Step:  AIC=-132.2
## yds ~ weight + c_avg_cmpp + c_pct + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - c_pct        1     0.774 132 -133
## - c_avg_cmpp   1     0.813 132 -133
## - c_avg_att    1     0.881 132 -133
## <none>                     131 -132
## + age          1     0.854 130 -132
## - c_avg_yds    1     1.419 132 -132
## + c_avg_inter  1     0.664 130 -131
## + c_rate       1     0.650 130 -131
## + c_avg_tds    1     0.188 131 -130
## + height       1     0.076 131 -130
## + c_numyrs     1     0.048 131 -130
## - weight       1     2.467 133 -130
## 
## Step:  AIC=-132.8
## yds ~ weight + c_avg_cmpp + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - c_avg_cmpp   1      0.29 132 -134
## - c_avg_yds    1      1.08 133 -133
## <none>                     132 -133
## + c_pct        1      0.77 131 -132
## + age          1      0.77 131 -132
## + c_rate       1      0.24 131 -131
## + height       1      0.14 131 -131
## + c_avg_inter  1      0.13 131 -131
## + c_numyrs     1      0.12 131 -131
## + c_avg_tds    1      0.05 132 -131
## - c_avg_att    1      2.38 134 -130
## - weight       1      3.42 135 -129
## 
## Step:  AIC=-134.2
## yds ~ weight + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## <none>                     132 -134
## + age          1      0.92 131 -134
## + c_avg_cmpp   1      0.29 132 -133
## + c_pct        1      0.25 132 -133
## + height       1      0.20 132 -133
## + c_numyrs     1      0.09 132 -132
## + c_avg_tds    1      0.07 132 -132
## + c_avg_inter  1      0.06 132 -132
## + c_rate       1      0.00 132 -132
## - c_avg_yds    1      2.70 134 -131
## - c_avg_att    1      2.75 135 -131
## - weight       1      4.00 136 -129
summary(step_reg.log.no_combine.yds)
## 
## Call:
## lm(formula = yds ~ weight + c_avg_yds + c_avg_att, data = data.log.no_combine.for_yds)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.590 -0.213  0.071  0.315  0.904 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   -4.320      4.010   -1.08   0.2825   
## weight         2.022      0.757    2.67   0.0081 **
## c_avg_yds      0.733      0.334    2.19   0.0292 * 
## c_avg_att     -0.820      0.370   -2.22   0.0277 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.749 on 235 degrees of freedom
## Multiple R-squared: 0.0512,  Adjusted R-squared: 0.0391 
## F-statistic: 4.23 on 3 and 235 DF,  p-value: 0.00621
plot(step_reg.log.no_combine.yds)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.log.no_combine.yds <- regsubsets(yds ~ ., data = data.log.no_combine.for_yds, 
    nbest = 10)
subsets(leaps.log.no_combine.yds, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.log.no_combine.for_yds, step_reg.log.no_combine.yds, m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: yds
##            Df Sum Sq Mean Sq F value Pr(>F)   
## weight      1    4.3    4.35    7.76 0.0058 **
## c_avg_yds   1    0.0    0.01    0.01 0.9173   
## c_avg_att   1    2.8    2.75    4.91 0.0277 * 
## Residuals 235  131.8    0.56                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 47 
##                 3      12    15     20    22     43     44      45     49
## Predicted   7.672  7.6035  7.49  7.678 7.658  7.725  7.647 7.62322  7.656
## cvpred      7.659  7.5903  7.48  7.661 7.644  7.713  7.633 7.61295  7.647
## yds         8.127  7.5782  7.36  7.526 8.304  7.168  7.425 7.61830  6.774
## CV residual 0.468 -0.0121 -0.12 -0.136 0.659 -0.545 -0.208 0.00535 -0.873
##                57     58     61   63     70    71    75   78    80    94
## Predicted   7.512 7.8633  7.674 7.68  7.657 7.626 7.811 7.89 7.488 7.532
## cvpred      7.502 7.8446  7.663 7.67  7.642 7.616 7.795 7.87 7.481 7.523
## yds         8.077 7.9442  7.412 8.05  7.349 8.251 7.983 8.12 7.662 7.698
## CV residual 0.575 0.0996 -0.251 0.38 -0.293 0.635 0.189 0.25 0.181 0.175
##                 97   100  105   106    108    116   122    135    139
## Predicted    7.495 7.755 7.63 7.740  7.628  7.508 7.478  7.227 7.6425
## cvpred       7.488 7.741 7.62 7.725  7.620  7.497 7.470  7.226 7.6439
## yds          7.213 7.842 7.92 7.928  7.413  7.005 7.853  7.048 7.6587
## CV residual -0.275 0.101 0.30 0.203 -0.207 -0.492 0.384 -0.178 0.0149
##               143   147   148   160    167   173    175    179   180
## Predicted   7.841 7.356 7.618 7.512  7.543 7.559 7.4750  7.461 7.354
## cvpred      7.835 7.353 7.606 7.506  7.543 7.553 7.4715  7.459 7.356
## yds         8.321 7.911 7.733 8.113  7.026 8.071 7.4928  7.303 7.738
## CV residual 0.485 0.558 0.126 0.607 -0.518 0.518 0.0213 -0.157 0.382
##                183    188     197   200     202   206    215    218   220
## Predicted    7.554  7.584 7.59419 7.392  7.3750  7.43  7.269  7.534 7.113
## cvpred       7.544  7.579 7.59031 7.388  7.3670  7.43  7.274  7.531 7.119
## yds          6.716  7.365 7.59945 7.592  7.3186  7.10  6.790  7.234 7.354
## CV residual -0.828 -0.214 0.00914 0.205 -0.0484 -0.33 -0.484 -0.297 0.235
##              225
## Predicted   7.42
## cvpred      7.42
## yds         7.80
## CV residual 0.38
## 
## Sum of squares = 6.71    Mean square = 0.14    n = 47 
## 
## fold 2 
## Observations in test set: 48 
##                 8     11     16    18     21     26    28    31     32
## Predicted   7.549  7.890  7.510 7.582  7.639  8.034 7.317 7.564 7.6666
## cvpred      7.545  7.924  7.491 7.556  7.614  8.044 7.302 7.544 7.6522
## yds         8.131  7.455  7.351 7.965  7.200  7.793 7.334 8.160 7.7147
## CV residual 0.586 -0.469 -0.139 0.409 -0.415 -0.252 0.032 0.616 0.0626
##                33     35     36      39    42    54     56    66    68
## Predicted   7.397 7.4625  7.625  7.7655 7.548 7.399  7.631 7.756 7.586
## cvpred      7.401 7.4432  7.626  7.7850 7.536 7.384  7.641 7.813 7.590
## yds         7.555 7.4805  7.332  7.6958 7.806 7.682  7.116 7.985 7.738
## CV residual 0.154 0.0373 -0.293 -0.0892 0.269 0.298 -0.526 0.172 0.148
##                72     81    83     90     91     107   109   110    115
## Predicted   7.668  7.552 8.017  7.466  7.879  7.6689 7.544 7.727 7.6797
## cvpred      7.685  7.565 8.017  7.462  7.889  7.6906 7.560 7.715 7.6952
## yds         7.953  7.406 8.278  7.133  7.162  7.6751 8.024 7.822 7.7320
## CV residual 0.268 -0.159 0.261 -0.329 -0.727 -0.0154 0.465 0.107 0.0368
##                 125    126  133     137   141    142   146    152   164
## Predicted    7.6987  7.603 7.33  7.4371 7.320 7.5526 7.596  7.510 7.523
## cvpred       7.7453  7.630 7.34  7.4508 7.313 7.5685 7.581  7.505 7.513
## yds          7.6862  7.144 7.66  7.3512 7.455 7.6324 7.708  7.364 7.975
## CV residual -0.0591 -0.485 0.32 -0.0996 0.141 0.0639 0.127 -0.141 0.461
##               171    186    189    198   201    203   212   216    227
## Predicted   7.486  7.691  7.197  7.477 7.466 7.5991 7.314 7.311  7.549
## cvpred      7.489  7.775  7.211  7.488 7.467 7.5949 7.304 7.272  7.586
## yds         7.745  7.530  6.798  6.820 7.598 7.6770 7.526 8.009  7.398
## CV residual 0.256 -0.245 -0.413 -0.668 0.131 0.0821 0.222 0.737 -0.188
##                229   230     240
## Predicted    7.346  7.52  7.2423
## cvpred       7.363  7.56  7.3388
## yds          7.087  5.25  7.2620
## CV residual -0.276 -2.31 -0.0768
## 
## Sum of squares = 10.4    Mean square = 0.22    n = 48 
## 
## fold 3 
## Observations in test set: 48 
##                4     6    19    23    41    59     60    64    69   73
## Predicted   7.57 7.587 7.731 7.779 7.689 7.534  7.481 7.610 7.621 7.59
## cvpred      7.53 7.530 7.704 7.718 7.666 7.482  7.444 7.582 7.600 7.54
## yds         8.10 7.685 7.801 8.214 7.843 7.681  7.089 7.950 7.860 7.74
## CV residual 0.57 0.155 0.097 0.496 0.177 0.199 -0.355 0.369 0.261 0.20
##                 77    79    84    88    89    95   113    117  119     120
## Predicted    7.517 7.500 7.511 7.707 7.598 7.731 7.472  7.421 7.40  7.7265
## cvpred       7.485 7.461 7.467 7.675 7.564 7.706 7.440  7.381 7.34  7.6914
## yds          6.977 7.926 7.803 8.227 7.686 7.769 7.568  6.803 8.03  7.6271
## CV residual -0.508 0.465 0.336 0.551 0.122 0.063 0.128 -0.579 0.69 -0.0643
##               123   124   127   131   132   134    136   138   140    149
## Predicted   7.499 7.499 7.725 7.198 7.416  7.57  7.619 7.516 7.671  7.519
## cvpred      7.470 7.475 7.685 7.114 7.368  7.52  7.587 7.489 7.655  7.506
## yds         8.039 7.606 8.234 7.876 7.665  7.39  7.404 7.932 7.863  7.008
## CV residual 0.569 0.131 0.549 0.762 0.298 -0.13 -0.183 0.444 0.208 -0.498
##               150   153     154   158   163    177   178    182    192
## Predicted   7.600 7.320  7.5117 7.368 7.572 7.6059 7.226 7.5219  7.534
## cvpred      7.560 7.268  7.4608 7.365 7.528 7.5739 7.172 7.5161  7.495
## yds         8.144 7.597  7.3995 8.153 7.701 7.6203 8.131 7.6020  7.416
## CV residual 0.585 0.328 -0.0613 0.787 0.173 0.0464 0.958 0.0858 -0.079
##               196    208    209    213   221   223    226     228    235
## Predicted   7.283  7.443  7.374  7.354 7.541 7.431  7.589  7.4391  7.307
## cvpred      7.232  7.405  7.337  7.336 7.494 7.403  7.568  7.4096  7.270
## yds         7.457  7.143  7.200  7.089 8.080 7.705  7.426  7.3403  7.013
## CV residual 0.225 -0.262 -0.136 -0.246 0.586 0.302 -0.142 -0.0694 -0.257
## 
## Sum of squares = 7.4    Mean square = 0.15    n = 48 
## 
## fold 4 
## Observations in test set: 48 
##                 2      5     7     9    14       17    24    37    51
## Predicted   7.805  7.725 8.119 7.310 7.881  7.68575 7.539  7.29 7.665
## cvpred      7.779  7.756 7.878 7.425 7.800  7.73194 7.670  7.54 7.726
## yds         8.383  7.503 8.307 7.823 8.164  7.72626 8.143 -2.30 7.971
## CV residual 0.604 -0.252 0.429 0.397 0.364 -0.00569 0.473 -9.84 0.245
##                 52    53     55     62      74      76     85      86
## Predicted   7.7906 7.367  7.535  7.418  7.5522  7.5395  7.435  7.3353
## cvpred      7.8162 7.552  7.634  7.622  7.6519  7.6448  7.578  7.4601
## yds         7.8713 7.828  6.950  7.139  7.6286  7.6124  7.425  7.4056
## CV residual 0.0551 0.276 -0.684 -0.483 -0.0233 -0.0324 -0.153 -0.0545
##                87    92    93    99   102   111     118    128    129
## Predicted    7.69  7.70 7.661 7.661 7.534 7.624  7.3742  7.687  7.681
## cvpred       7.71  7.67 7.641 7.561 7.577 7.581  7.4864  7.649  7.725
## yds          6.88  6.87 8.013 7.699 8.046 7.949  7.3995  7.467  7.299
## CV residual -0.83 -0.80 0.372 0.137 0.469 0.368 -0.0869 -0.182 -0.426
##               130     151   155    156    157   159    162   165    166
## Predicted   7.888  7.3311 7.488 7.5436 7.4390 7.260  7.626 7.305  7.523
## cvpred      7.779  7.5293 7.595 7.6453 7.4894 7.459  7.680 7.397  7.610
## yds         8.083  7.5192 8.080 7.6672 7.5700 7.611  7.443 7.952  7.416
## CV residual 0.305 -0.0101 0.485 0.0219 0.0805 0.153 -0.237 0.555 -0.194
##                169    185    191    194    195    199   205    207   210
## Predicted    7.383  7.719  7.365  7.694  7.548  7.337  7.41  7.377 7.497
## cvpred       7.514  7.681  7.473  7.510  7.562  7.434  7.51  7.558 7.588
## yds          7.155  7.065  6.971  7.342  7.384  6.845  6.44  7.060 7.871
## CV residual -0.358 -0.617 -0.502 -0.168 -0.178 -0.589 -1.07 -0.499 0.282
##               231    236    237   239
## Predicted   7.252  7.692  7.723  7.24
## cvpred      7.370  7.535  7.591  7.40
## yds         7.923  7.377  6.923  6.36
## CV residual 0.553 -0.158 -0.668 -1.04
## 
## Sum of squares = 106    Mean square = 2.21    n = 48 
## 
## fold 5 
## Observations in test set: 48 
##                 1      10     13    25    27     29    30     34   38
## Predicted   7.436  7.6339  7.560 7.304 7.445  7.599 7.626  7.567 7.65
## cvpred      7.400  7.6122  7.506 7.042 7.386  7.565 7.607  7.504 7.65
## yds         8.045  7.5246  7.340 7.997 7.624  7.388 8.239  7.396 8.13
## CV residual 0.645 -0.0876 -0.165 0.955 0.238 -0.178 0.633 -0.108 0.48
##                   40      46      47     48    50    65      67     82
## Predicted    7.55961  7.5784  7.4504  7.470 7.445 7.394  7.5838  7.711
## cvpred       7.54016  7.5417  7.4080  7.409 7.378 7.321  7.5813  7.760
## yds          7.53802  7.5332  7.3512  7.055 7.818 8.097  7.5099  7.133
## CV residual -0.00215 -0.0085 -0.0568 -0.354 0.441 0.776 -0.0714 -0.626
##                 96      98   101  103     104    112   114   121   144
## Predicted    7.582 7.71228 7.438 7.56  7.9201  7.746 7.524 7.535 7.616
## cvpred       7.566 7.71897 7.458 7.52  7.9836  7.756 7.461 7.500 7.617
## yds          7.462 7.72228 7.976 7.68  7.9073  7.480 8.079 7.617 8.187
## CV residual -0.104 0.00331 0.518 0.16 -0.0763 -0.276 0.618 0.117 0.569
##              145   161    168   170    172  174    176   181     184
## Predicted   7.36 7.564  7.472 7.370  7.736 7.48 7.4938 7.452 7.56418
## cvpred      7.29 7.572  7.481 7.321  7.809 7.50 7.4474 7.454 7.53051
## yds         8.09 7.769  7.314 7.766  7.398 8.10 7.5230 7.861 7.54014
## CV residual 0.80 0.197 -0.167 0.445 -0.411 0.60 0.0756 0.407 0.00964
##                 187    190    193    204   211   214    217   219  222
## Predicted    7.2903 7.3734 7.1652 7.5098  7.69 7.214  7.435 7.207 6.79
## cvpred       7.1848 7.2704 7.0114 7.5257  7.69 7.123  7.394 7.063 6.41
## yds          7.1286 7.2897 7.0742 7.5401  6.65 7.268  7.016 7.370 7.50
## CV residual -0.0562 0.0193 0.0628 0.0144 -1.04 0.145 -0.378 0.307 1.09
##               224   232   233    238
## Predicted   7.453 7.275 7.480  7.682
## cvpred      7.434 7.186 7.421  7.744
## yds         7.559 7.599 7.757  7.390
## CV residual 0.125 0.414 0.336 -0.354
## 
## Sum of squares = 9.08    Mean square = 0.19    n = 48 
## 
## Overall (Sum over all 48 folds) 
##    ms 
## 0.585