# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Set the resopnse variables
yds = qb_stats_w_combine["yds"]

# Generate clean data set
data.scaled.w_combine.for_yds = data.frame(scale(na.omit(cbind(yds, college_stats))))

# Generate the linear model
lm.scaled.w_combine.yds <- lm(formula = yds ~ ., data = data.scaled.w_combine.for_yds)

# Find optimum linear regression model for yds
step_reg.scaled.w_combine.yds <- stepAIC(lm.scaled.w_combine.yds, direction = "both")
## Start:  AIC=16.18
## yds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - weight       1     0.002 23.5 14.2
## - c_rate       1     0.017 23.5 14.2
## - X40          1     0.027 23.5 14.2
## - cone         1     0.029 23.5 14.2
## - c_avg_yds    1     0.044 23.5 14.2
## - shuttle      1     0.058 23.5 14.3
## - c_avg_tds    1     0.091 23.6 14.3
## - c_pct        1     0.141 23.6 14.4
## - vert_leap    1     0.206 23.7 14.5
## - broad_jump   1     0.570 24.0 15.1
## - c_avg_inter  1     0.607 24.1 15.2
## - height       1     0.794 24.3 15.5
## - c_avg_cmpp   1     0.924 24.4 15.7
## <none>                     23.5 16.2
## - age          1     1.443 24.9 16.5
## - wonderlic    1     1.786 25.2 17.0
## - c_numyrs     1     1.822 25.3 17.1
## - c_avg_att    1     2.641 26.1 18.3
## 
## Step:  AIC=14.18
## yds ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_rate       1     0.018 23.5 12.2
## - cone         1     0.028 23.5 12.2
## - X40          1     0.029 23.5 12.2
## - c_avg_yds    1     0.043 23.5 12.3
## - shuttle      1     0.058 23.5 12.3
## - c_avg_tds    1     0.090 23.6 12.3
## - c_pct        1     0.145 23.6 12.4
## - vert_leap    1     0.213 23.7 12.5
## - broad_jump   1     0.571 24.0 13.1
## - c_avg_inter  1     0.607 24.1 13.2
## - c_avg_cmpp   1     0.926 24.4 13.7
## <none>                     23.5 14.2
## - height       1     1.399 24.9 14.4
## - age          1     1.773 25.2 15.0
## - c_numyrs     1     1.932 25.4 15.3
## - wonderlic    1     2.089 25.6 15.5
## + weight       1     0.002 23.5 16.2
## - c_avg_att    1     2.827 26.3 16.6
## 
## Step:  AIC=12.21
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds + 
##     c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + cone + 
##     shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - X40          1      0.03 23.5 10.3
## - cone         1      0.04 23.5 10.3
## - shuttle      1      0.05 23.5 10.3
## - vert_leap    1      0.27 23.8 10.7
## - c_avg_yds    1      0.27 23.8 10.7
## - c_avg_tds    1      0.28 23.8 10.7
## - c_avg_inter  1      0.63 24.1 11.2
## - broad_jump   1      0.67 24.1 11.3
## - c_pct        1      1.21 24.7 12.2
## <none>                     23.5 12.2
## - height       1      1.55 25.0 12.7
## - c_numyrs     1      1.92 25.4 13.3
## - age          1      2.00 25.5 13.4
## + c_rate       1      0.02 23.5 14.2
## + weight       1      0.00 23.5 14.2
## - wonderlic    1      2.81 26.3 14.6
## - c_avg_cmpp   1      3.12 26.6 15.1
## - c_avg_att    1      3.20 26.7 15.2
## 
## Step:  AIC=10.26
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds + 
##     c_avg_yds + c_numyrs + c_avg_att + wonderlic + cone + shuttle + 
##     vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - cone         1      0.05 23.6  8.34
## - shuttle      1      0.09 23.6  8.41
## - c_avg_yds    1      0.26 23.8  8.69
## - c_avg_tds    1      0.26 23.8  8.70
## - vert_leap    1      0.36 23.9  8.85
## - broad_jump   1      0.71 24.2  9.43
## - c_avg_inter  1      0.74 24.2  9.47
## - c_pct        1      1.21 24.7 10.22
## <none>                     23.5 10.26
## - height       1      1.80 25.3 11.14
## - c_numyrs     1      1.89 25.4 11.28
## - age          1      1.99 25.5 11.43
## + X40          1      0.03 23.5 12.21
## + c_rate       1      0.02 23.5 12.23
## + weight       1      0.01 23.5 12.25
## - wonderlic    1      2.78 26.3 12.62
## - c_avg_cmpp   1      3.18 26.7 13.22
## - c_avg_att    1      3.25 26.8 13.32
## 
## Step:  AIC=8.34
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds + 
##     c_avg_yds + c_numyrs + c_avg_att + wonderlic + shuttle + 
##     vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - shuttle      1      0.05 23.6  6.43
## - c_avg_tds    1      0.24 23.8  6.74
## - c_avg_yds    1      0.27 23.8  6.80
## - vert_leap    1      0.34 23.9  6.90
## - c_avg_inter  1      0.70 24.3  7.48
## - broad_jump   1      0.80 24.4  7.65
## <none>                     23.6  8.34
## - c_pct        1      1.25 24.8  8.37
## - height       1      1.77 25.3  9.18
## - c_numyrs     1      1.86 25.4  9.30
## - age          1      2.20 25.8  9.82
## + cone         1      0.05 23.5 10.26
## + X40          1      0.04 23.5 10.28
## + c_rate       1      0.03 23.5 10.29
## + weight       1      0.00 23.6 10.34
## - wonderlic    1      2.82 26.4 10.76
## - c_avg_cmpp   1      3.18 26.7 11.28
## - c_avg_att    1      3.22 26.8 11.33
## 
## Step:  AIC=6.43
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds + 
##     c_avg_yds + c_numyrs + c_avg_att + wonderlic + vert_leap + 
##     broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_tds    1      0.19 23.8  4.75
## - c_avg_yds    1      0.42 24.0  5.12
## - vert_leap    1      0.60 24.2  5.41
## - c_avg_inter  1      0.72 24.3  5.61
## - broad_jump   1      0.79 24.4  5.71
## <none>                     23.6  6.43
## - c_pct        1      1.26 24.9  6.45
## - c_numyrs     1      1.96 25.6  7.54
## - age          1      2.23 25.9  7.95
## - height       1      2.34 26.0  8.12
## + X40          1      0.07 23.6  8.32
## + shuttle      1      0.05 23.6  8.34
## + c_rate       1      0.02 23.6  8.40
## + cone         1      0.01 23.6  8.41
## + weight       1      0.00 23.6  8.43
## - c_avg_cmpp   1      3.19 26.8  9.38
## - c_avg_att    1      3.40 27.0  9.67
## - wonderlic    1      3.91 27.5 10.41
## 
## Step:  AIC=4.75
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_yds + 
##     c_numyrs + c_avg_att + wonderlic + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_avg_inter  1      0.62 24.4 3.76
## - vert_leap    1      0.66 24.5 3.82
## - broad_jump   1      0.77 24.6 3.99
## - c_avg_yds    1      1.16 25.0 4.61
## <none>                     23.8 4.75
## - c_pct        1      1.26 25.1 4.76
## - c_numyrs     1      2.03 25.8 5.94
## - height       1      2.27 26.1 6.31
## + c_avg_tds    1      0.19 23.6 6.43
## + c_rate       1      0.16 23.6 6.49
## + X40          1      0.03 23.8 6.71
## - age          1      2.54 26.4 6.71
## + cone         1      0.02 23.8 6.72
## + weight       1      0.00 23.8 6.74
## + shuttle      1      0.00 23.8 6.74
## - c_avg_cmpp   1      3.34 27.1 7.87
## - c_avg_att    1      3.68 27.5 8.36
## - wonderlic    1      4.40 28.2 9.36
## 
## Step:  AIC=3.76
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_yds + c_numyrs + 
##     c_avg_att + wonderlic + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - vert_leap    1      0.55 25.0 2.62
## - c_avg_yds    1      0.66 25.1 2.79
## - broad_jump   1      0.80 25.2 3.01
## - c_pct        1      0.93 25.4 3.22
## <none>                     24.4 3.76
## - c_numyrs     1      1.50 25.9 4.08
## + c_avg_inter  1      0.62 23.8 4.75
## - height       1      2.24 26.7 5.18
## + X40          1      0.12 24.3 5.56
## + c_avg_tds    1      0.09 24.3 5.61
## + cone         1      0.02 24.4 5.72
## + weight       1      0.02 24.4 5.72
## + shuttle      1      0.02 24.4 5.72
## + c_rate       1      0.00 24.4 5.75
## - age          1      2.67 27.1 5.80
## - c_avg_cmpp   1      2.74 27.2 5.90
## - c_avg_att    1      4.36 28.8 8.16
## - wonderlic    1      5.16 29.6 9.23
## 
## Step:  AIC=2.62
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_yds + c_numyrs + 
##     c_avg_att + wonderlic + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - broad_jump   1      0.26 25.2 1.03
## - c_avg_yds    1      0.50 25.5 1.40
## - c_pct        1      0.98 25.9 2.11
## <none>                     25.0 2.62
## - c_numyrs     1      1.38 26.4 2.71
## + vert_leap    1      0.55 24.4 3.76
## + c_avg_inter  1      0.50 24.5 3.82
## + X40          1      0.38 24.6 4.02
## + shuttle      1      0.19 24.8 4.32
## + c_avg_tds    1      0.14 24.8 4.40
## - height       1      2.56 27.5 4.42
## + cone         1      0.11 24.9 4.45
## + c_rate       1      0.02 24.9 4.58
## + weight       1      0.01 25.0 4.60
## - age          1      2.70 27.7 4.62
## - c_avg_cmpp   1      3.36 28.3 5.54
## - c_avg_att    1      4.76 29.7 7.42
## - wonderlic    1      4.99 30.0 7.71
## 
## Step:  AIC=1.03
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_yds + c_numyrs + 
##     c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_yds    1      0.52 25.8 -0.17
## - c_pct        1      0.73 26.0  0.14
## - c_numyrs     1      1.13 26.4  0.73
## <none>                     25.2  1.03
## + c_avg_inter  1      0.60 24.6  2.08
## + broad_jump   1      0.26 25.0  2.62
## - height       1      2.45 27.7  2.65
## + c_avg_tds    1      0.09 25.1  2.89
## + weight       1      0.07 25.2  2.92
## + shuttle      1      0.03 25.2  2.98
## + c_rate       1      0.02 25.2  2.99
## + vert_leap    1      0.01 25.2  3.01
## + cone         1      0.00 25.2  3.02
## + X40          1      0.00 25.2  3.03
## - age          1      2.78 28.0  3.10
## - c_avg_cmpp   1      3.10 28.3  3.54
## - c_avg_att    1      4.51 29.7  5.43
## - wonderlic    1      5.25 30.5  6.40
## 
## Step:  AIC=-0.17
## yds ~ height + age + c_avg_cmpp + c_pct + c_numyrs + c_avg_att + 
##     wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_pct        1      0.51 26.3 -1.42
## - c_numyrs     1      0.85 26.6 -0.90
## <none>                     25.8 -0.17
## + c_avg_yds    1      0.52 25.2  1.03
## + c_rate       1      0.50 25.3  1.07
## + c_avg_tds    1      0.47 25.3  1.11
## - height       1      2.32 28.1  1.19
## + broad_jump   1      0.28 25.5  1.40
## + c_avg_inter  1      0.15 25.6  1.59
## + weight       1      0.12 25.6  1.64
## + shuttle      1      0.04 25.7  1.76
## + X40          1      0.01 25.8  1.80
## + cone         1      0.00 25.8  1.82
## + vert_leap    1      0.00 25.8  1.82
## - age          1      2.79 28.6  1.83
## - c_avg_att    1      3.98 29.7  3.43
## - c_avg_cmpp   1      4.50 30.3  4.11
## - wonderlic    1      5.69 31.4  5.61
## 
## Step:  AIC=-1.42
## yds ~ height + age + c_avg_cmpp + c_numyrs + c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_numyrs     1      0.40 26.7 -2.83
## <none>                     26.3 -1.42
## + c_pct        1      0.51 25.8 -0.17
## - height       1      2.37 28.6 -0.05
## + c_avg_tds    1      0.41 25.9 -0.03
## - age          1      2.45 28.7  0.06
## + c_avg_yds    1      0.30 26.0  0.14
## + weight       1      0.12 26.1  0.40
## + shuttle      1      0.10 26.2  0.43
## + vert_leap    1      0.07 26.2  0.48
## + c_avg_inter  1      0.06 26.2  0.49
## + c_rate       1      0.06 26.2  0.49
## + broad_jump   1      0.04 26.2  0.52
## + cone         1      0.00 26.3  0.58
## + X40          1      0.00 26.3  0.58
## - c_avg_att    1      4.05 30.3  2.17
## - c_avg_cmpp   1      5.36 31.6  3.83
## - wonderlic    1      5.53 31.8  4.03
## 
## Step:  AIC=-2.83
## yds ~ height + age + c_avg_cmpp + c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS    AIC
## <none>                     26.7 -2.829
## - height       1      2.22 28.9 -1.704
## + c_avg_tds    1      0.44 26.2 -1.480
## - age          1      2.39 29.1 -1.476
## + c_numyrs     1      0.40 26.3 -1.415
## + c_avg_yds    1      0.22 26.4 -1.152
## + weight       1      0.22 26.4 -1.147
## + shuttle      1      0.14 26.5 -1.041
## + vert_leap    1      0.09 26.6 -0.967
## + c_pct        1      0.05 26.6 -0.905
## + cone         1      0.01 26.6 -0.851
## + broad_jump   1      0.01 26.7 -0.841
## + X40          1      0.01 26.7 -0.840
## + c_avg_inter  1      0.01 26.7 -0.840
## + c_rate       1      0.00 26.7 -0.834
## - c_avg_att    1      3.66 30.3  0.183
## - c_avg_cmpp   1      4.98 31.6  1.845
## - wonderlic    1      5.22 31.9  2.142
summary(step_reg.scaled.w_combine.yds)
## 
## Call:
## lm(formula = yds ~ height + age + c_avg_cmpp + c_avg_att + wonderlic, 
##     data = data.scaled.w_combine.for_yds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7245 -0.4369 -0.0374  0.4716  1.7353 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -4.69e-16   1.44e-01    0.00    1.000  
## height       2.60e-01   1.57e-01    1.66    0.107  
## age         -2.87e-01   1.67e-01   -1.72    0.095 .
## c_avg_cmpp   1.69e+00   6.82e-01    2.48    0.018 *
## c_avg_att   -1.44e+00   6.78e-01   -2.13    0.041 *
## wonderlic   -4.23e-01   1.67e-01   -2.54    0.016 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.899 on 33 degrees of freedom
## Multiple R-squared: 0.298,   Adjusted R-squared: 0.192 
## F-statistic: 2.81 on 5 and 33 DF,  p-value: 0.0322
plot(step_reg.scaled.w_combine.yds)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.scaled.w_combine.yds <- regsubsets(yds ~ ., data = data.scaled.w_combine.for_yds, 
    nbest = 10)
subsets(leaps.scaled.w_combine.yds, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.scaled.w_combine.for_yds, step_reg.scaled.w_combine.yds, m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: yds
##            Df Sum Sq Mean Sq F value Pr(>F)  
## height      1   1.85    1.85    2.28  0.140  
## age         1   0.77    0.77    0.95  0.336  
## c_avg_cmpp  1   1.28    1.28    1.58  0.218  
## c_avg_att   1   2.22    2.22    2.75  0.107  
## wonderlic   1   5.22    5.22    6.46  0.016 *
## Residuals  33  26.66    0.81                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 7 
##                  3     21     24     40      42     52     61
## Predicted   -0.278 -0.578  0.522 -0.698 -0.1943 -1.089 -0.080
## cvpred      -0.172 -0.390  0.810 -0.605  0.0484 -1.032 -0.191
## yds         -1.230 -0.615 -0.833  0.149 -0.4609 -1.335 -0.509
## CV residual -1.059 -0.226 -1.643  0.754 -0.5093 -0.302 -0.318
## 
## Sum of squares = 4.89    Mean square = 0.7    n = 7 
## 
## fold 2 
## Observations in test set: 8 
##                    6     18    25    37     43     50     55      63
## Predicted   -0.01642  0.206 1.539 0.260 -0.534 -0.446 -0.505  0.1233
## cvpred      -0.00115  0.170 1.401 0.107 -0.246 -0.292 -0.408  0.0921
## yds          1.12099 -0.373 1.712 0.653 -1.223 -0.119 -1.429 -0.2263
## CV residual  1.12214 -0.543 0.311 0.545 -0.976  0.173 -1.021 -0.3184
## 
## Sum of squares = 4.07    Mean square = 0.51    n = 8 
## 
## fold 3 
## Observations in test set: 8 
##                  5       7    16      20    28      32      49     64
## Predicted   0.4557 -0.4340 0.457 -0.9541 0.541  0.0667  0.2173  0.566
## cvpred      0.0963 -0.0198 0.328 -0.5982 0.806  0.2040  0.0811  0.649
## yds         2.1909 -1.2897 0.816 -0.6604 1.085 -0.2186 -0.1474 -0.709
## CV residual 2.0946 -1.2699 0.488 -0.0622 0.279 -0.4227 -0.2285 -1.357
## 
## Sum of squares = 8.39    Mean square = 1.05    n = 8 
## 
## fold 4 
## Observations in test set: 8 
##                 12    13     26      30      38     39     59     65
## Predicted   -0.439 0.929  0.449 0.00627 -0.4890  1.029 -0.480 -0.255
## cvpred      -0.434 0.867  0.391 0.07033 -0.5823  1.421 -0.555 -0.469
## yds         -0.800 1.837  0.237 0.99601 -0.0893 -0.571 -0.925  0.513
## CV residual -0.366 0.970 -0.154 0.92568  0.4930 -1.991 -0.369  0.982
## 
## Sum of squares = 7.26    Mean square = 0.91    n = 8 
## 
## fold 5 
## Observations in test set: 8 
##                  1       4    15      17     19     27      46      56
## Predicted    0.295  0.0796 0.423 -0.2180 -0.367  0.252  0.0248 -0.3879
## cvpred      -0.285  0.9490 0.056  0.1355  0.258 -1.068  0.5501 -0.0637
## yds          1.597 -1.6449 0.551  0.0861 -0.573  1.547  0.0752  0.8140
## CV residual  1.882 -2.5938 0.495 -0.0493 -0.831  2.615 -0.4749  0.8777
## 
## Sum of squares = 19    Mean square = 2.38    n = 8 
## 
## Overall (Sum over all 8 folds) 
##   ms 
## 1.12