# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Set the resopnse variables
cpct = qb_stats_w_combine["completion_percentage"]

# Generate clean data set
data.scaled.w_combine.for_cpct = data.frame(scale(na.omit(cbind(cpct, college_stats))))

# Generate the linear model
lm.scaled.w_combine.cpct <- lm(formula = completion_percentage ~ ., data = data.scaled.w_combine.for_cpct)

# Find optimum linear regression model for cpct
step_reg.scaled.w_combine.cpct <- stepAIC(lm.scaled.w_combine.cpct, direction = "both")
## Start:  AIC=7.05
## completion_percentage ~ height + weight + age + c_avg_cmpp + 
##     c_rate + c_pct + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + 
##     c_avg_att + X40 + wonderlic + cone + shuttle + vert_leap + 
##     broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - height       1     0.002 17.7  5.06
## - c_rate       1     0.010 17.8  5.07
## - c_avg_yds    1     0.059 17.8  5.18
## - c_pct        1     0.061 17.8  5.18
## - broad_jump   1     0.079 17.8  5.22
## - wonderlic    1     0.113 17.9  5.30
## - X40          1     0.166 17.9  5.41
## - c_avg_inter  1     0.170 17.9  5.42
## - age          1     0.361 18.1  5.82
## - c_avg_att    1     0.526 18.3  6.16
## - c_numyrs     1     0.549 18.3  6.21
## - weight       1     0.881 18.6  6.90
## <none>                     17.7  7.05
## - c_avg_cmpp   1     0.975 18.7  7.09
## - c_avg_tds    1     1.134 18.9  7.41
## - shuttle      1     1.310 19.1  7.76
## - cone         1     1.386 19.1  7.91
## - vert_leap    1     2.558 20.3 10.17
## 
## Step:  AIC=5.06
## completion_percentage ~ weight + age + c_avg_cmpp + c_rate + 
##     c_pct + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + 
##     c_avg_att + X40 + wonderlic + cone + shuttle + vert_leap + 
##     broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_rate       1     0.013 17.8 3.09
## - c_avg_yds    1     0.068 17.8 3.20
## - c_pct        1     0.070 17.8 3.21
## - broad_jump   1     0.079 17.8 3.23
## - wonderlic    1     0.130 17.9 3.33
## - X40          1     0.167 17.9 3.41
## - c_avg_inter  1     0.172 17.9 3.43
## - age          1     0.390 18.1 3.88
## - c_avg_att    1     0.532 18.3 4.18
## - c_numyrs     1     0.560 18.3 4.24
## <none>                     17.7 5.06
## - c_avg_cmpp   1     0.985 18.7 5.11
## - c_avg_tds    1     1.223 19.0 5.59
## - shuttle      1     1.322 19.1 5.79
## - cone         1     1.477 19.2 6.10
## - weight       1     1.728 19.5 6.59
## + height       1     0.002 17.7 7.05
## - vert_leap    1     2.558 20.3 8.18
## 
## Step:  AIC=3.09
## completion_percentage ~ weight + age + c_avg_cmpp + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_avg_yds    1      0.08 17.8 1.26
## - broad_jump   1      0.11 17.9 1.31
## - X40          1      0.16 17.9 1.42
## - wonderlic    1      0.19 17.9 1.50
## - c_avg_inter  1      0.28 18.0 1.68
## - age          1      0.38 18.1 1.88
## - c_pct        1      0.45 18.2 2.04
## - c_numyrs     1      0.55 18.3 2.24
## - c_avg_att    1      0.58 18.3 2.30
## <none>                     17.8 3.09
## - shuttle      1      1.41 19.2 3.98
## - cone         1      1.47 19.2 4.11
## - weight       1      1.78 19.5 4.72
## + c_rate       1      0.01 17.7 5.06
## + height       1      0.00 17.8 5.07
## - c_avg_tds    1      2.01 19.8 5.15
## - vert_leap    1      2.76 20.5 6.57
## - c_avg_cmpp   1      3.52 21.3 7.95
## 
## Step:  AIC=1.26
## completion_percentage ~ weight + age + c_avg_cmpp + c_pct + c_avg_inter + 
##     c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + cone + 
##     shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - broad_jump   1      0.12 18.0 -0.48
## - X40          1      0.13 18.0 -0.46
## - wonderlic    1      0.21 18.1 -0.30
## - c_avg_inter  1      0.23 18.1 -0.25
## - age          1      0.36 18.2  0.01
## - c_pct        1      0.57 18.4  0.45
## - c_numyrs     1      0.73 18.6  0.79
## <none>                     17.8  1.26
## - c_avg_att    1      0.99 18.8  1.30
## - cone         1      1.55 19.4  2.43
## + c_avg_yds    1      0.08 17.8  3.09
## - shuttle      1      1.90 19.7  3.09
## - weight       1      1.90 19.7  3.11
## + c_rate       1      0.02 17.8  3.20
## + height       1      0.01 17.8  3.24
## - vert_leap    1      3.25 21.1  5.62
## - c_avg_cmpp   1      3.49 21.3  6.04
## - c_avg_tds    1      3.75 21.6  6.50
## 
## Step:  AIC=-0.48
## completion_percentage ~ weight + age + c_avg_cmpp + c_pct + c_avg_inter + 
##     c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + cone + 
##     shuttle + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_inter  1      0.18 18.1 -2.09
## - wonderlic    1      0.20 18.2 -2.07
## - X40          1      0.37 18.3 -1.71
## - age          1      0.43 18.4 -1.59
## - c_pct        1      0.45 18.4 -1.53
## - c_numyrs     1      0.62 18.6 -1.18
## <none>                     18.0 -0.48
## - c_avg_att    1      0.98 18.9 -0.47
## - cone         1      1.45 19.4  0.47
## - shuttle      1      1.80 19.8  1.16
## + broad_jump   1      0.12 17.8  1.26
## + c_avg_yds    1      0.10 17.9  1.31
## + c_rate       1      0.01 17.9  1.49
## + height       1      0.01 17.9  1.49
## - weight       1      2.19 20.1  1.89
## - vert_leap    1      3.18 21.1  3.71
## - c_avg_cmpp   1      3.42 21.4  4.14
## - c_avg_tds    1      4.06 22.0  5.25
## 
## Step:  AIC=-2.09
## completion_percentage ~ weight + age + c_avg_cmpp + c_pct + c_avg_tds + 
##     c_numyrs + c_avg_att + X40 + wonderlic + cone + shuttle + 
##     vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - wonderlic    1      0.17 18.3 -3.74
## - X40          1      0.48 18.6 -3.09
## - age          1      0.49 18.6 -3.08
## - c_pct        1      0.55 18.7 -2.96
## <none>                     18.1 -2.09
## - c_numyrs     1      1.01 19.1 -2.04
## - cone         1      1.27 19.4 -1.53
## - shuttle      1      1.62 19.8 -0.84
## + c_avg_inter  1      0.18 18.0 -0.48
## + broad_jump   1      0.07 18.1 -0.25
## + c_avg_yds    1      0.05 18.1 -0.19
## + height       1      0.03 18.1 -0.17
## + c_rate       1      0.01 18.1 -0.11
## - weight       1      2.07 20.2  0.01
## - c_avg_att    1      3.44 21.6  2.51
## - vert_leap    1      3.59 21.7  2.76
## - c_avg_tds    1      3.94 22.1  3.38
## - c_avg_cmpp   1      5.56 23.7  6.07
## 
## Step:  AIC=-3.74
## completion_percentage ~ weight + age + c_avg_cmpp + c_pct + c_avg_tds + 
##     c_numyrs + c_avg_att + X40 + cone + shuttle + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - c_pct        1      0.56 18.9 -4.59
## - X40          1      0.58 18.9 -4.55
## - age          1      0.85 19.2 -4.02
## - c_numyrs     1      0.93 19.2 -3.86
## <none>                     18.3 -3.74
## - shuttle      1      1.54 19.9 -2.67
## + wonderlic    1      0.17 18.1 -2.09
## + c_avg_inter  1      0.16 18.2 -2.07
## - cone         1      1.89 20.2 -2.01
## + broad_jump   1      0.07 18.2 -1.88
## + c_avg_yds    1      0.06 18.2 -1.87
## + c_rate       1      0.02 18.3 -1.78
## + height       1      0.00 18.3 -1.74
## - weight       1      2.38 20.7 -1.09
## - c_avg_att    1      3.32 21.6  0.58
## - vert_leap    1      3.44 21.8  0.80
## - c_avg_tds    1      3.78 22.1  1.38
## - c_avg_cmpp   1      5.39 23.7  4.07
## 
## Step:  AIC=-4.59
## completion_percentage ~ weight + age + c_avg_cmpp + c_avg_tds + 
##     c_numyrs + c_avg_att + X40 + cone + shuttle + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - c_numyrs     1      0.42 19.3 -5.75
## - X40          1      0.89 19.8 -4.84
## <none>                     18.9 -4.59
## - age          1      1.22 20.1 -4.22
## + c_pct        1      0.56 18.3 -3.74
## - shuttle      1      1.50 20.4 -3.69
## + c_rate       1      0.43 18.4 -3.47
## - cone         1      1.73 20.6 -3.26
## + c_avg_inter  1      0.24 18.6 -3.08
## + wonderlic    1      0.18 18.7 -2.96
## + c_avg_yds    1      0.14 18.7 -2.88
## + height       1      0.00 18.9 -2.60
## + broad_jump   1      0.00 18.9 -2.59
## - weight       1      2.59 21.5 -1.70
## - c_avg_att    1      3.13 22.0 -0.76
## - c_avg_tds    1      4.43 23.3  1.42
## - vert_leap    1      5.45 24.3  3.04
## - c_avg_cmpp   1      5.98 24.9  3.86
## 
## Step:  AIC=-5.75
## completion_percentage ~ weight + age + c_avg_cmpp + c_avg_tds + 
##     c_avg_att + X40 + cone + shuttle + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - X40          1      0.69 20.0 -6.41
## <none>                     19.3 -5.75
## - age          1      1.28 20.6 -5.32
## + c_avg_inter  1      0.47 18.8 -4.69
## + c_numyrs     1      0.42 18.9 -4.59
## - shuttle      1      1.73 21.0 -4.48
## - cone         1      1.84 21.1 -4.29
## + c_avg_yds    1      0.21 19.1 -4.17
## + wonderlic    1      0.11 19.2 -3.96
## + c_rate       1      0.07 19.2 -3.89
## + c_pct        1      0.06 19.2 -3.86
## + height       1      0.03 19.3 -3.80
## + broad_jump   1      0.02 19.3 -3.79
## - c_avg_att    1      2.75 22.1 -2.68
## - weight       1      2.78 22.1 -2.64
## - c_avg_tds    1      4.17 23.5 -0.32
## - vert_leap    1      5.29 24.6  1.46
## - c_avg_cmpp   1      5.58 24.9  1.89
## 
## Step:  AIC=-6.41
## completion_percentage ~ weight + age + c_avg_cmpp + c_avg_tds + 
##     c_avg_att + cone + shuttle + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## <none>                     20.0 -6.41
## - age          1      1.29 21.3 -6.03
## + X40          1      0.69 19.3 -5.75
## + c_avg_inter  1      0.62 19.4 -5.60
## - cone         1      1.78 21.8 -5.17
## + wonderlic    1      0.23 19.8 -4.84
## + c_numyrs     1      0.22 19.8 -4.84
## + c_pct        1      0.22 19.8 -4.82
## + c_rate       1      0.19 19.8 -4.78
## + c_avg_yds    1      0.14 19.9 -4.68
## + height       1      0.13 19.9 -4.66
## + broad_jump   1      0.05 19.9 -4.50
## - c_avg_att    1      2.55 22.5 -3.85
## - shuttle      1      2.62 22.6 -3.73
## - weight       1      2.87 22.9 -3.32
## - c_avg_tds    1      3.55 23.5 -2.19
## - c_avg_cmpp   1      5.07 25.1  0.19
## - vert_leap    1      5.14 25.1  0.29
summary(step_reg.scaled.w_combine.cpct)
## 
## Call:
## lm(formula = completion_percentage ~ weight + age + c_avg_cmpp + 
##     c_avg_tds + c_avg_att + cone + shuttle + vert_leap, data = data.scaled.w_combine.for_cpct)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.449 -0.642  0.104  0.459  2.195 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  2.05e-16   1.35e-01    0.00    1.000  
## weight       3.75e-01   1.84e-01    2.04    0.051 .
## age          2.27e-01   1.66e-01    1.37    0.182  
## c_avg_cmpp   2.08e+00   7.67e-01    2.71    0.011 *
## c_avg_tds   -6.71e-01   2.96e-01   -2.27    0.031 *
## c_avg_att   -1.33e+00   6.89e-01   -1.92    0.064 .
## cone         3.03e-01   1.89e-01    1.61    0.119  
## shuttle     -4.23e-01   2.17e-01   -1.95    0.061 .
## vert_leap   -4.95e-01   1.81e-01   -2.73    0.011 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.83 on 29 degrees of freedom
## Multiple R-squared: 0.46,    Adjusted R-squared: 0.311 
## F-statistic: 3.08 on 8 and 29 DF,  p-value: 0.0121
plot(step_reg.scaled.w_combine.cpct)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.scaled.w_combine.cpct <- regsubsets(completion_percentage ~ ., data = data.scaled.w_combine.for_cpct, 
    nbest = 10)
subsets(leaps.scaled.w_combine.cpct, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.scaled.w_combine.for_cpct, step_reg.scaled.w_combine.cpct, m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: completion_percentage
##            Df Sum Sq Mean Sq F value Pr(>F)  
## weight      1   0.56    0.56    0.82  0.373  
## age         1   1.67    1.67    2.42  0.130  
## c_avg_cmpp  1   1.40    1.40    2.03  0.165  
## c_avg_tds   1   1.71    1.71    2.49  0.126  
## c_avg_att   1   3.85    3.85    5.58  0.025 *
## cone        1   2.39    2.39    3.47  0.073 .
## shuttle     1   0.29    0.29    0.42  0.524  
## vert_leap   1   5.14    5.14    7.46  0.011 *
## Residuals  29  19.99    0.69                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 7 
##                             6      24     26      32     38      46     59
## Predicted              0.3699  0.8773 -0.879 -0.3106 -0.420 -0.1654 -0.265
## cvpred                 0.3746  1.0313 -0.951 -0.3360 -0.366 -0.0801 -0.689
## completion_percentage  0.3100 -0.0706 -0.756 -0.0135 -1.213 -0.5084  0.348
## CV residual           -0.0645 -1.1020  0.195  0.3225 -0.847 -0.4283  1.037
## 
## Sum of squares = 3.34    Mean square = 0.48    n = 7 
## 
## fold 2 
## Observations in test set: 8 
##                            7     19      21       27     37       43
## Predicted              0.326  0.307  0.0476 -0.42391  0.199 -0.87796
## cvpred                 0.448  0.343 -0.3582 -0.75354 -0.105 -0.88005
## completion_percentage -0.699 -0.413  1.1285  0.00551  0.672 -0.88906
## CV residual           -1.146 -0.756  1.4867  0.75905  0.776 -0.00901
##                            55   65
## Predicted             -0.0838 1.25
## cvpred                 0.2249 1.05
## completion_percentage -1.0223 1.41
## CV residual           -1.2472 0.36
## 
## Sum of squares = 6.96    Mean square = 0.87    n = 8 
## 
## fold 3 
## Observations in test set: 8 
##                            5     12     13    17     39    40      50
## Predicted             -0.454 -0.748 -0.197 1.115  0.272 -0.61 -0.4750
## cvpred                -0.722 -1.001 -0.356 1.149  1.065 -1.11 -0.0713
## completion_percentage -0.451 -0.280  0.672 1.490 -0.375  0.12 -0.6036
## CV residual            0.271  0.721  1.028 0.342 -1.440  1.23 -0.5323
##                           56
## Predicted             -0.743
## cvpred                -1.546
## completion_percentage  1.452
## CV residual            2.998
## 
## Sum of squares = 14.6    Mean square = 1.83    n = 8 
## 
## fold 4 
## Observations in test set: 8 
##                            4      15     18      20     28     52     63
## Predicted             -0.219  0.7911 0.5648 -0.3582 -0.246 -0.659  1.470
## cvpred                 0.339  0.8693 0.7668 -0.4453 -0.199 -0.621  2.213
## completion_percentage -1.060  0.8239 0.8620 -0.0897  0.291 -2.107  0.843
## CV residual           -1.399 -0.0453 0.0952  0.3556  0.490 -1.487 -1.370
##                           64
## Predicted             -0.928
## cvpred                -0.586
## completion_percentage -1.898
## CV residual           -1.312
## 
## Sum of squares = 8.14    Mean square = 1.02    n = 8 
## 
## fold 5 
## Observations in test set: 7 
##                             1      3     25     30     42    49      61
## Predicted             -0.0913 -1.140 1.1002  0.146 0.0719 1.316  0.0659
## cvpred                -0.6521 -0.534 0.0351 -0.045 0.2204 0.216 -0.4387
## completion_percentage -0.1087 -2.336 1.1856  0.824 0.2339 1.890  0.3291
## CV residual            0.5434 -1.802 1.1504  0.869 0.0135 1.673  0.7678
## 
## Sum of squares = 9.01    Mean square = 1.29    n = 7 
## 
## Overall (Sum over all 7 folds) 
##   ms 
## 1.11