# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Set the resopnse variables
yds = qb_stats_w_combine["yds"]

# Generate clean data set
data.log.w_combine.for_yds = data.frame(log(na.omit(cbind(yds, college_stats)) + 
    0.1))

# Generate the linear model
lm.log.w_combine.yds <- lm(formula = yds ~ ., data = data.log.w_combine.for_yds)

# Find optimum linear regression model for yds
step_reg.log.w_combine.yds <- stepAIC(lm.log.w_combine.yds, direction = "both")
## Start:  AIC=-54.97
## yds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - broad_jump   1     0.000 3.78 -57.0
## - X40          1     0.006 3.79 -56.9
## - weight       1     0.010 3.79 -56.9
## - vert_leap    1     0.024 3.81 -56.7
## - age          1     0.026 3.81 -56.7
## - height       1     0.033 3.82 -56.6
## - cone         1     0.061 3.85 -56.4
## - shuttle      1     0.081 3.86 -56.2
## - c_avg_inter  1     0.082 3.87 -56.1
## - c_avg_yds    1     0.098 3.88 -56.0
## - c_rate       1     0.129 3.91 -55.7
## <none>                     3.78 -55.0
## - c_avg_tds    1     0.222 4.01 -54.7
## - wonderlic    1     0.249 4.03 -54.5
## - c_numyrs     1     0.253 4.04 -54.5
## - c_avg_att    1     0.657 4.44 -50.7
## - c_pct        1     0.671 4.46 -50.6
## - c_avg_cmpp   1     0.713 4.50 -50.2
## 
## Step:  AIC=-56.97
## yds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     cone + shuttle + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - X40          1     0.006 3.79 -58.9
## - weight       1     0.009 3.79 -58.9
## - age          1     0.027 3.81 -58.7
## - height       1     0.032 3.82 -58.6
## - vert_leap    1     0.033 3.82 -58.6
## - cone         1     0.061 3.85 -58.4
## - shuttle      1     0.081 3.87 -58.1
## - c_avg_inter  1     0.089 3.87 -58.1
## - c_avg_yds    1     0.112 3.90 -57.8
## - c_rate       1     0.149 3.93 -57.5
## <none>                     3.78 -57.0
## - wonderlic    1     0.250 4.03 -56.5
## - c_avg_tds    1     0.255 4.04 -56.4
## - c_numyrs     1     0.269 4.05 -56.3
## + broad_jump   1     0.000 3.78 -55.0
## - c_avg_att    1     0.664 4.45 -52.7
## - c_pct        1     0.671 4.46 -52.6
## - c_avg_cmpp   1     0.713 4.50 -52.2
## 
## Step:  AIC=-58.91
## yds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic + 
##     cone + shuttle + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - weight       1     0.013 3.80 -60.8
## - height       1     0.027 3.82 -60.6
## - vert_leap    1     0.028 3.82 -60.6
## - age          1     0.029 3.82 -60.6
## - cone         1     0.062 3.85 -60.3
## - shuttle      1     0.076 3.87 -60.1
## - c_avg_inter  1     0.083 3.87 -60.1
## - c_avg_yds    1     0.120 3.91 -59.7
## - c_rate       1     0.156 3.95 -59.3
## <none>                     3.79 -58.9
## - wonderlic    1     0.263 4.05 -58.3
## - c_numyrs     1     0.263 4.05 -58.3
## - c_avg_tds    1     0.265 4.06 -58.3
## + X40          1     0.006 3.78 -57.0
## + broad_jump   1     0.000 3.79 -56.9
## - c_avg_att    1     0.658 4.45 -54.7
## - c_pct        1     0.666 4.46 -54.6
## - c_avg_cmpp   1     0.707 4.50 -54.2
## 
## Step:  AIC=-60.77
## yds ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic + 
##     cone + shuttle + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - vert_leap    1     0.020 3.82 -62.6
## - age          1     0.054 3.86 -62.2
## - cone         1     0.083 3.89 -61.9
## - c_avg_inter  1     0.087 3.89 -61.9
## - height       1     0.107 3.91 -61.7
## - shuttle      1     0.124 3.93 -61.5
## - c_avg_yds    1     0.133 3.94 -61.4
## - c_rate       1     0.182 3.99 -61.0
## <none>                     3.80 -60.8
## - c_numyrs     1     0.300 4.10 -59.8
## - c_avg_tds    1     0.327 4.13 -59.6
## - wonderlic    1     0.379 4.18 -59.1
## + weight       1     0.013 3.79 -58.9
## + X40          1     0.010 3.79 -58.9
## + broad_jump   1     0.001 3.80 -58.8
## - c_pct        1     0.713 4.52 -56.1
## - c_avg_att    1     0.730 4.53 -55.9
## - c_avg_cmpp   1     0.776 4.58 -55.5
## 
## Step:  AIC=-62.57
## yds ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic + 
##     cone + shuttle
## 
##               Df Sum of Sq  RSS   AIC
## - age          1     0.050 3.87 -64.1
## - cone         1     0.066 3.89 -63.9
## - c_avg_inter  1     0.083 3.91 -63.7
## - height       1     0.092 3.92 -63.7
## - c_avg_yds    1     0.119 3.94 -63.4
## - c_rate       1     0.169 3.99 -62.9
## <none>                     3.82 -62.6
## - shuttle      1     0.207 4.03 -62.5
## - c_numyrs     1     0.301 4.12 -61.6
## - c_avg_tds    1     0.320 4.14 -61.4
## - wonderlic    1     0.370 4.19 -61.0
## + vert_leap    1     0.020 3.80 -60.8
## + weight       1     0.005 3.82 -60.6
## + broad_jump   1     0.004 3.82 -60.6
## + X40          1     0.000 3.82 -60.6
## - c_pct        1     0.707 4.53 -58.0
## - c_avg_att    1     0.716 4.54 -57.9
## - c_avg_cmpp   1     0.767 4.59 -57.4
## 
## Step:  AIC=-64.06
## yds ~ height + c_avg_cmpp + c_rate + c_pct + c_avg_inter + c_avg_tds + 
##     c_avg_yds + c_numyrs + c_avg_att + wonderlic + cone + shuttle
## 
##               Df Sum of Sq  RSS   AIC
## - cone         1     0.094 3.97 -65.1
## - c_avg_yds    1     0.109 3.98 -65.0
## - c_avg_inter  1     0.115 3.99 -64.9
## - height       1     0.123 4.00 -64.8
## - c_rate       1     0.157 4.03 -64.5
## <none>                     3.87 -64.1
## - shuttle      1     0.226 4.10 -63.9
## - c_numyrs     1     0.303 4.18 -63.1
## - c_avg_tds    1     0.320 4.19 -63.0
## - wonderlic    1     0.321 4.20 -63.0
## + age          1     0.050 3.82 -62.6
## + weight       1     0.024 3.85 -62.3
## + vert_leap    1     0.016 3.86 -62.2
## + broad_jump   1     0.003 3.87 -62.1
## + X40          1     0.002 3.87 -62.1
## - c_pct        1     0.696 4.57 -59.6
## - c_avg_att    1     0.701 4.58 -59.6
## - c_avg_cmpp   1     0.752 4.63 -59.1
## 
## Step:  AIC=-65.13
## yds ~ height + c_avg_cmpp + c_rate + c_pct + c_avg_inter + c_avg_tds + 
##     c_avg_yds + c_numyrs + c_avg_att + wonderlic + shuttle
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_inter  1     0.076 4.04 -66.4
## - height       1     0.113 4.08 -66.0
## - c_avg_yds    1     0.122 4.09 -65.9
## - shuttle      1     0.133 4.10 -65.8
## - c_rate       1     0.165 4.13 -65.5
## <none>                     3.97 -65.1
## - wonderlic    1     0.248 4.22 -64.8
## - c_numyrs     1     0.275 4.24 -64.5
## - c_avg_tds    1     0.316 4.28 -64.1
## + cone         1     0.094 3.87 -64.1
## + age          1     0.078 3.89 -63.9
## + weight       1     0.067 3.90 -63.8
## + X40          1     0.017 3.95 -63.3
## + broad_jump   1     0.003 3.96 -63.2
## + vert_leap    1     0.000 3.97 -63.1
## - c_pct        1     0.738 4.71 -60.5
## - c_avg_att    1     0.742 4.71 -60.4
## - c_avg_cmpp   1     0.796 4.76 -60.0
## 
## Step:  AIC=-66.39
## yds ~ height + c_avg_cmpp + c_rate + c_pct + c_avg_tds + c_avg_yds + 
##     c_numyrs + c_avg_att + wonderlic + shuttle
## 
##               Df Sum of Sq  RSS   AIC
## - shuttle      1     0.125 4.17 -67.2
## - height       1     0.144 4.19 -67.0
## <none>                     4.04 -66.4
## - c_numyrs     1     0.271 4.32 -65.9
## + age          1     0.102 3.94 -65.4
## + c_avg_inter  1     0.076 3.97 -65.1
## + weight       1     0.073 3.97 -65.1
## - c_avg_yds    1     0.365 4.41 -65.0
## - wonderlic    1     0.370 4.41 -65.0
## + cone         1     0.055 3.99 -64.9
## + broad_jump   1     0.009 4.04 -64.5
## + X40          1     0.007 4.04 -64.5
## + vert_leap    1     0.001 4.04 -64.4
## - c_rate       1     0.510 4.55 -63.8
## - c_pct        1     0.663 4.71 -62.5
## - c_avg_tds    1     0.666 4.71 -62.4
## - c_avg_att    1     0.734 4.78 -61.9
## - c_avg_cmpp   1     0.740 4.78 -61.8
## 
## Step:  AIC=-67.2
## yds ~ height + c_avg_cmpp + c_rate + c_pct + c_avg_tds + c_avg_yds + 
##     c_numyrs + c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## <none>                     4.17 -67.2
## - height       1     0.274 4.44 -66.7
## + shuttle      1     0.125 4.04 -66.4
## - c_numyrs     1     0.315 4.48 -66.4
## + weight       1     0.096 4.07 -66.1
## + age          1     0.091 4.08 -66.1
## + c_avg_inter  1     0.069 4.10 -65.8
## - c_avg_yds    1     0.383 4.55 -65.8
## + vert_leap    1     0.057 4.11 -65.7
## + X40          1     0.026 4.14 -65.4
## + broad_jump   1     0.010 4.16 -65.3
## + cone         1     0.000 4.17 -65.2
## - c_rate       1     0.529 4.70 -64.5
## - wonderlic    1     0.541 4.71 -64.4
## - c_avg_tds    1     0.657 4.83 -63.5
## - c_pct        1     0.714 4.88 -63.0
## - c_avg_att    1     0.782 4.95 -62.5
## - c_avg_cmpp   1     0.794 4.96 -62.4
summary(step_reg.log.w_combine.yds)
## 
## Call:
## lm(formula = yds ~ height + c_avg_cmpp + c_rate + c_pct + c_avg_tds + 
##     c_avg_yds + c_numyrs + c_avg_att + wonderlic, data = data.log.w_combine.for_yds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7806 -0.1560  0.0142  0.2552  0.5404 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  228.627    100.921    2.27    0.031 *
## height         3.846      2.787    1.38    0.178  
## c_avg_cmpp    46.257     19.686    2.35    0.026 *
## c_rate       -12.795      6.668   -1.92    0.065 .
## c_pct        -38.400     17.235   -2.23    0.034 *
## c_avg_tds      2.140      1.001    2.14    0.041 *
## c_avg_yds      6.109      3.745    1.63    0.114  
## c_numyrs       0.541      0.365    1.48    0.149  
## c_avg_att    -54.328     23.299   -2.33    0.027 *
## wonderlic     -0.486      0.251   -1.94    0.062 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.379 on 29 degrees of freedom
## Multiple R-squared: 0.326,   Adjusted R-squared: 0.117 
## F-statistic: 1.56 on 9 and 29 DF,  p-value: 0.175
plot(step_reg.log.w_combine.yds)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

## Warning: NaNs produced
## Warning: NaNs produced

plot of chunk unnamed-chunk-1

leaps.log.w_combine.yds <- regsubsets(yds ~ ., data = data.log.w_combine.for_yds, 
    nbest = 10)
subsets(leaps.log.w_combine.yds, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.log.w_combine.for_yds, step_reg.log.w_combine.yds, m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: yds
##            Df Sum Sq Mean Sq F value Pr(>F)  
## height      1   0.24   0.243    1.69  0.204  
## c_avg_cmpp  1   0.00   0.002    0.01  0.917  
## c_rate      1   0.22   0.216    1.50  0.230  
## c_pct       1   0.04   0.036    0.25  0.619  
## c_avg_tds   1   0.29   0.290    2.02  0.166  
## c_avg_yds   1   0.01   0.010    0.07  0.792  
## c_numyrs    1   0.01   0.006    0.04  0.841  
## c_avg_att   1   0.67   0.671    4.67  0.039 *
## wonderlic   1   0.54   0.541    3.76  0.062 .
## Residuals  29   4.17   0.144                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 7 
##                  3     21     24    40    42     52    61
## Predicted    7.289  7.568  7.910 7.632  7.73  7.705 7.404
## cvpred       7.332  7.706  8.028 7.677  7.88  7.856 7.094
## yds          7.133  7.503  7.388 7.828  7.58  7.055 7.555
## CV residual -0.199 -0.202 -0.641 0.152 -0.30 -0.802 0.461
## 
## Sum of squares = 1.46    Mean square = 0.21    n = 7 
## 
## fold 2 
## Observations in test set: 8 
##                 6     18      25    37     43    50     55     63
## Predicted   7.655  8.033  8.2497  8.02  7.402 7.430  7.269  7.789
## cvpred      7.577  8.099  8.3497 15.54  7.345 7.445  7.247  7.845
## yds         8.131  7.618  8.2782  8.00  7.139 7.726  6.977  7.682
## CV residual 0.554 -0.481 -0.0715 -7.54 -0.206 0.282 -0.269 -0.163
## 
## Sum of squares = 57.6    Mean square = 7.21    n = 8 
## 
## fold 3 
## Observations in test set: 8 
##                 5     7    16    20    28     32     49    64
## Predicted   7.843  7.56 7.881  7.40  7.98  7.665  7.891  8.02
## cvpred      7.500  8.10 7.705  7.81  8.32  7.962  7.997  8.18
## yds         8.383  7.09 8.046  7.48  8.12  7.685  7.715  7.46
## CV residual 0.883 -1.01 0.341 -0.33 -0.20 -0.277 -0.282 -0.72
## 
## Sum of squares = 2.75    Mean square = 0.34    n = 8 
## 
## fold 4 
## Observations in test set: 8 
##                 12    13    26    30    38     39     59    65
## Predicted    7.547 7.949 7.904 7.674 7.446  7.585  7.425 7.638
## cvpred       7.551 7.573 7.718 7.730 7.215  7.654  7.817 7.434
## yds          7.406 8.307 7.860 8.097 7.738  7.526  7.334 7.953
## CV residual -0.145 0.734 0.142 0.367 0.523 -0.129 -0.483 0.518
## 
## Sum of squares = 1.51    Mean square = 0.19    n = 8 
## 
## fold 5 
## Observations in test set: 8 
##                 1     4    15    17     19    27     46    56
## Predicted   7.743  7.55 7.893 7.587  7.631 7.913  7.787 7.578
## cvpred      7.520  7.94 7.816 7.657  7.833 7.404  8.104 7.716
## yds         8.251  6.77 7.965 7.806  7.525 8.239  7.801 8.045
## CV residual 0.731 -1.16 0.149 0.149 -0.308 0.835 -0.303 0.329
## 
## Sum of squares = 2.92    Mean square = 0.37    n = 8 
## 
## Overall (Sum over all 8 folds) 
##  ms 
## 1.7