# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Set the resopnse variables
games_started = qb_stats_w_combine["games_started"]

# Generate clean data set
data.scaled.w_combine.for_games_started = data.frame(scale(na.omit(cbind(games_started, 
    college_stats))))

# Generate the linear model
lm.scaled.w_combine.games_started <- lm(formula = games_started ~ ., data = data.scaled.w_combine.for_games_started)

# Find optimum linear regression model for games_started
step_reg.scaled.w_combine.games_started <- stepAIC(lm.scaled.w_combine.games_started, 
    direction = "both")
## Start:  AIC=15.01
## games_started ~ height + weight + age + c_avg_cmpp + c_rate + 
##     c_pct + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + 
##     c_avg_att + X40 + wonderlic + cone + shuttle + vert_leap + 
##     broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_pct        1      0.01 22.8 13.0
## - vert_leap    1      0.01 22.8 13.0
## - c_rate       1      0.04 22.8 13.1
## - c_avg_yds    1      0.05 22.8 13.1
## - shuttle      1      0.13 22.9 13.2
## - broad_jump   1      0.19 23.0 13.3
## - X40          1      0.22 23.0 13.4
## - cone         1      0.33 23.1 13.6
## - c_avg_inter  1      0.33 23.1 13.6
## - height       1      0.46 23.2 13.8
## - c_avg_tds    1      0.54 23.3 13.9
## - weight       1      0.59 23.4 14.0
## - c_avg_cmpp   1      0.62 23.4 14.1
## <none>                     22.8 15.0
## - c_numyrs     1      1.62 24.4 15.7
## - c_avg_att    1      2.56 25.3 17.2
## - wonderlic    1      3.28 26.1 18.3
## - age          1      4.74 27.5 20.4
## 
## Step:  AIC=13.02
## games_started ~ height + weight + age + c_avg_cmpp + c_rate + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - vert_leap    1      0.01 22.8 11.0
## - shuttle      1      0.12 22.9 11.2
## - c_avg_yds    1      0.17 22.9 11.3
## - broad_jump   1      0.21 23.0 11.4
## - X40          1      0.22 23.0 11.4
## - c_avg_inter  1      0.36 23.1 11.6
## - cone         1      0.37 23.1 11.7
## - height       1      0.50 23.3 11.9
## - weight       1      0.60 23.4 12.0
## - c_avg_tds    1      1.06 23.8 12.8
## <none>                     22.8 13.0
## - c_rate       1      1.31 24.1 13.2
## - c_numyrs     1      1.67 24.4 13.8
## + c_pct        1      0.01 22.8 15.0
## - c_avg_cmpp   1      2.78 25.6 15.5
## - c_avg_att    1      3.15 25.9 16.1
## - wonderlic    1      4.16 26.9 17.6
## - age          1      5.05 27.8 18.8
## 
## Step:  AIC=11.04
## games_started ~ height + weight + age + c_avg_cmpp + c_rate + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + cone + shuttle + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - shuttle      1      0.12 22.9  9.24
## - c_avg_yds    1      0.20 23.0  9.38
## - X40          1      0.22 23.0  9.40
## - broad_jump   1      0.27 23.1  9.49
## - cone         1      0.38 23.2  9.68
## - c_avg_inter  1      0.39 23.2  9.69
## - height       1      0.50 23.3  9.89
## - weight       1      0.59 23.4 10.04
## - c_avg_tds    1      1.08 23.9 10.85
## <none>                     22.8 11.04
## - c_rate       1      1.31 24.1 11.21
## - c_numyrs     1      1.72 24.5 11.88
## + vert_leap    1      0.01 22.8 13.02
## + c_pct        1      0.00 22.8 13.03
## - c_avg_cmpp   1      2.78 25.6 13.52
## - c_avg_att    1      3.20 26.0 14.16
## - wonderlic    1      4.27 27.1 15.74
## - age          1      5.08 27.9 16.88
## 
## Step:  AIC=9.24
## games_started ~ height + weight + age + c_avg_cmpp + c_rate + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + cone + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - cone         1      0.26 23.2  7.68
## - broad_jump   1      0.29 23.2  7.73
## - c_avg_inter  1      0.30 23.2  7.74
## - c_avg_yds    1      0.33 23.2  7.79
## - height       1      0.45 23.4  8.00
## - weight       1      0.48 23.4  8.05
## - X40          1      0.49 23.4  8.07
## - c_avg_tds    1      0.97 23.9  8.85
## <none>                     22.9  9.24
## - c_rate       1      1.40 24.3  9.56
## - c_numyrs     1      1.68 24.6 10.00
## + shuttle      1      0.12 22.8 11.04
## + c_pct        1      0.00 22.9 11.23
## + vert_leap    1      0.00 22.9 11.24
## - c_avg_cmpp   1      2.66 25.6 11.52
## - c_avg_att    1      3.11 26.0 12.20
## - wonderlic    1      4.36 27.3 14.03
## - age          1      5.12 28.0 15.11
## 
## Step:  AIC=7.68
## games_started ~ height + weight + age + c_avg_cmpp + c_rate + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_inter  1      0.17 23.3  5.97
## - c_avg_yds    1      0.29 23.5  6.17
## - height       1      0.37 23.5  6.30
## - X40          1      0.38 23.6  6.32
## - broad_jump   1      0.42 23.6  6.38
## - weight       1      0.45 23.6  6.44
## - c_avg_tds    1      0.95 24.1  7.26
## <none>                     23.2  7.68
## - c_rate       1      1.36 24.5  7.91
## - c_numyrs     1      1.54 24.7  8.18
## + cone         1      0.26 22.9  9.24
## + c_pct        1      0.03 23.1  9.63
## + vert_leap    1      0.01 23.1  9.66
## + shuttle      1      0.00 23.2  9.68
## - c_avg_cmpp   1      2.56 25.7  9.77
## - c_avg_att    1      2.93 26.1 10.32
## - wonderlic    1      4.23 27.4 12.22
## - age          1      5.36 28.5 13.80
## 
## Step:  AIC=5.97
## games_started ~ height + weight + age + c_avg_cmpp + c_rate + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_yds    1      0.21 23.6  4.33
## - height       1      0.31 23.6  4.48
## - weight       1      0.41 23.8  4.65
## - X40          1      0.51 23.9  4.82
## - broad_jump   1      0.58 23.9  4.92
## - c_avg_tds    1      0.85 24.2  5.38
## <none>                     23.3  5.97
## - c_rate       1      1.31 24.6  6.10
## - c_numyrs     1      1.37 24.7  6.20
## + c_avg_inter  1      0.17 23.2  7.68
## + cone         1      0.14 23.2  7.74
## + vert_leap    1      0.03 23.3  7.92
## + c_pct        1      0.00 23.3  7.97
## + shuttle      1      0.00 23.3  7.97
## - c_avg_cmpp   1      2.58 25.9  8.06
## - c_avg_att    1      3.94 27.3 10.06
## - wonderlic    1      4.50 27.8 10.86
## - age          1      5.49 28.8 12.21
## 
## Step:  AIC=4.33
## games_started ~ height + weight + age + c_avg_cmpp + c_rate + 
##     c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - height       1      0.27 23.8  2.77
## - weight       1      0.34 23.9  2.89
## - X40          1      0.42 24.0  3.02
## - broad_jump   1      0.48 24.0  3.12
## - c_rate       1      1.11 24.7  4.12
## - c_numyrs     1      1.16 24.7  4.21
## <none>                     23.6  4.33
## - c_avg_tds    1      1.43 25.0  4.62
## + c_avg_yds    1      0.21 23.3  5.97
## + cone         1      0.14 23.4  6.10
## + c_avg_inter  1      0.09 23.5  6.17
## + c_pct        1      0.08 23.5  6.19
## + vert_leap    1      0.05 23.5  6.24
## + shuttle      1      0.02 23.5  6.30
## - c_avg_cmpp   1      2.74 26.3  6.62
## - wonderlic    1      4.35 27.9  8.94
## - c_avg_att    1      4.62 28.2  9.31
## - age          1      5.28 28.8 10.21
## 
## Step:  AIC=2.77
## games_started ~ weight + age + c_avg_cmpp + c_rate + c_avg_tds + 
##     c_numyrs + c_avg_att + X40 + wonderlic + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - weight       1      0.09 23.9 0.92
## - broad_jump   1      0.48 24.3 1.55
## - X40          1      0.66 24.5 1.84
## - c_numyrs     1      0.96 24.8 2.31
## - c_rate       1      1.03 24.8 2.41
## <none>                     23.8 2.77
## - c_avg_tds    1      1.34 25.2 2.90
## + height       1      0.27 23.6 4.33
## + c_avg_yds    1      0.17 23.6 4.48
## + c_pct        1      0.11 23.7 4.58
## + cone         1      0.10 23.7 4.60
## - c_avg_cmpp   1      2.48 26.3 4.63
## + c_avg_inter  1      0.06 23.8 4.67
## + vert_leap    1      0.05 23.8 4.69
## + shuttle      1      0.01 23.8 4.75
## - wonderlic    1      4.30 28.1 7.24
## - c_avg_att    1      4.41 28.2 7.40
## - age          1      5.01 28.8 8.22
## 
## Step:  AIC=0.92
## games_started ~ age + c_avg_cmpp + c_rate + c_avg_tds + c_numyrs + 
##     c_avg_att + X40 + wonderlic + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - broad_jump   1      0.39 24.3 -0.44
## - X40          1      0.58 24.5 -0.15
## - c_numyrs     1      0.89 24.8  0.35
## - c_rate       1      0.96 24.9  0.46
## <none>                     23.9  0.92
## - c_avg_tds    1      1.26 25.2  0.93
## - c_avg_cmpp   1      2.39 26.3  2.63
## + c_avg_yds    1      0.15 23.8  2.69
## + cone         1      0.12 23.8  2.73
## + weight       1      0.09 23.8  2.77
## + c_pct        1      0.09 23.8  2.77
## + c_avg_inter  1      0.06 23.9  2.82
## + vert_leap    1      0.03 23.9  2.87
## + height       1      0.02 23.9  2.89
## + shuttle      1      0.00 23.9  2.92
## - wonderlic    1      4.40 28.3  5.51
## - c_avg_att    1      4.43 28.4  5.56
## - age          1      5.78 29.7  7.37
## 
## Step:  AIC=-0.44
## games_started ~ age + c_avg_cmpp + c_rate + c_avg_tds + c_numyrs + 
##     c_avg_att + X40 + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - X40          1      0.19 24.5 -2.13
## - c_rate       1      0.61 24.9 -1.47
## - c_numyrs     1      0.61 24.9 -1.47
## - c_avg_tds    1      0.90 25.2 -1.02
## <none>                     24.3 -0.44
## + broad_jump   1      0.39 23.9  0.92
## - c_avg_cmpp   1      2.40 26.7  1.23
## + vert_leap    1      0.19 24.1  1.26
## + cone         1      0.16 24.1  1.30
## + c_avg_inter  1      0.16 24.1  1.30
## + c_avg_yds    1      0.09 24.2  1.42
## + height       1      0.09 24.2  1.42
## + c_pct        1      0.06 24.2  1.46
## + weight       1      0.01 24.3  1.55
## + shuttle      1      0.00 24.3  1.56
## - c_avg_att    1      4.20 28.5  3.78
## - wonderlic    1      4.52 28.8  4.22
## - age          1      5.71 30.0  5.79
## 
## Step:  AIC=-2.13
## games_started ~ age + c_avg_cmpp + c_rate + c_avg_tds + c_numyrs + 
##     c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_rate       1      0.77 25.3 -2.92
## - c_numyrs     1      0.79 25.3 -2.89
## - c_avg_tds    1      0.87 25.4 -2.77
## <none>                     24.5 -2.13
## + X40          1      0.19 24.3 -0.44
## + height       1      0.17 24.3 -0.41
## + c_avg_inter  1      0.16 24.3 -0.39
## + shuttle      1      0.08 24.4 -0.26
## + c_avg_yds    1      0.06 24.4 -0.24
## + c_pct        1      0.04 24.5 -0.20
## + cone         1      0.02 24.5 -0.16
## + broad_jump   1      0.01 24.5 -0.15
## + weight       1      0.01 24.5 -0.14
## + vert_leap    1      0.00 24.5 -0.14
## - c_avg_cmpp   1      2.84 27.3  0.15
## - wonderlic    1      4.38 28.9  2.28
## - c_avg_att    1      4.78 29.3  2.82
## - age          1      5.61 30.1  3.91
## 
## Step:  AIC=-2.92
## games_started ~ age + c_avg_cmpp + c_avg_tds + c_numyrs + c_avg_att + 
##     wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_numyrs     1      0.29 25.6 -4.47
## - c_avg_tds    1      0.35 25.6 -4.39
## <none>                     25.3 -2.92
## + c_pct        1      0.80 24.5 -2.18
## + c_rate       1      0.77 24.5 -2.13
## - c_avg_cmpp   1      2.16 27.4 -1.72
## + X40          1      0.36 24.9 -1.47
## + height       1      0.17 25.1 -1.18
## + shuttle      1      0.15 25.1 -1.15
## + vert_leap    1      0.13 25.1 -1.12
## + c_avg_inter  1      0.13 25.1 -1.12
## + broad_jump   1      0.09 25.2 -1.06
## + c_avg_yds    1      0.04 25.2 -0.98
## + weight       1      0.01 25.3 -0.93
## + cone         1      0.00 25.3 -0.92
## - c_avg_att    1      4.02 29.3  0.84
## - wonderlic    1      4.11 29.4  0.96
## - age          1      5.02 30.3  2.15
## 
## Step:  AIC=-4.47
## games_started ~ age + c_avg_cmpp + c_avg_tds + c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_tds    1      0.38 25.9 -5.90
## <none>                     25.6 -4.47
## - c_avg_cmpp   1      1.93 27.5 -3.63
## + X40          1      0.44 25.1 -3.15
## + c_numyrs     1      0.29 25.3 -2.92
## + c_rate       1      0.27 25.3 -2.89
## + c_pct        1      0.18 25.4 -2.76
## + shuttle      1      0.18 25.4 -2.75
## + vert_leap    1      0.15 25.4 -2.71
## + broad_jump   1      0.15 25.4 -2.70
## + height       1      0.13 25.4 -2.68
## + c_avg_yds    1      0.09 25.5 -2.61
## + c_avg_inter  1      0.04 25.5 -2.54
## + cone         1      0.01 25.6 -2.48
## + weight       1      0.00 25.6 -2.47
## - c_avg_att    1      3.73 29.3 -1.16
## - wonderlic    1      3.89 29.4 -0.95
## - age          1      4.91 30.5  0.38
## 
## Step:  AIC=-5.9
## games_started ~ age + c_avg_cmpp + c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## <none>                     25.9 -5.90
## + c_avg_tds    1      0.38 25.6 -4.47
## + X40          1      0.33 25.6 -4.40
## + c_numyrs     1      0.32 25.6 -4.39
## + broad_jump   1      0.18 25.8 -4.17
## + vert_leap    1      0.17 25.8 -4.16
## + c_pct        1      0.15 25.8 -4.12
## + shuttle      1      0.11 25.8 -4.07
## + height       1      0.10 25.8 -4.06
## + c_rate       1      0.04 25.9 -3.96
## + c_avg_yds    1      0.02 25.9 -3.93
## + c_avg_inter  1      0.00 25.9 -3.90
## + cone         1      0.00 25.9 -3.90
## + weight       1      0.00 25.9 -3.90
## - wonderlic    1      4.50 30.4 -1.66
## - c_avg_cmpp   1      4.55 30.5 -1.60
## - c_avg_att    1      4.74 30.7 -1.36
## - age          1      5.35 31.3 -0.59
summary(step_reg.scaled.w_combine.games_started)
## 
## Call:
## lm(formula = games_started ~ age + c_avg_cmpp + c_avg_att + wonderlic, 
##     data = data.scaled.w_combine.for_games_started)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8516 -0.5632 -0.0246  0.6552  1.4239 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  6.37e-16   1.40e-01    0.00    1.000  
## age         -4.21e-01   1.59e-01   -2.65    0.012 *
## c_avg_cmpp   1.62e+00   6.62e-01    2.44    0.020 *
## c_avg_att   -1.64e+00   6.58e-01   -2.49    0.018 *
## wonderlic   -3.89e-01   1.60e-01   -2.43    0.021 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.873 on 34 degrees of freedom
## Multiple R-squared: 0.317,   Adjusted R-squared: 0.237 
## F-statistic: 3.95 on 4 and 34 DF,  p-value: 0.00971
plot(step_reg.scaled.w_combine.games_started)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.scaled.w_combine.games_started <- regsubsets(games_started ~ ., data = data.scaled.w_combine.for_games_started, 
    nbest = 10)
subsets(leaps.scaled.w_combine.games_started, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.scaled.w_combine.for_games_started, step_reg.scaled.w_combine.games_started, 
    m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: games_started
##            Df Sum Sq Mean Sq F value Pr(>F)  
## age         1   4.20    4.20    5.50  0.025 *
## c_avg_cmpp  1   0.15    0.15    0.19  0.666  
## c_avg_att   1   3.21    3.21    4.21  0.048 *
## wonderlic   1   4.50    4.50    5.90  0.021 *
## Residuals  34  25.94    0.76                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 7 
##                     3     21     24     40     42     52     61
## Predicted     -0.3433 -0.962 -0.220 -0.717 -0.684 -0.956  0.695
## cvpred        -0.2979 -0.923 -0.121 -0.708 -0.574 -0.935  0.687
## games_started -0.2013 -1.409 -1.409  0.403 -0.805 -1.409  0.101
## CV residual    0.0966 -0.486 -1.288  1.110 -0.231 -0.474 -0.587
## 
## Sum of squares = 3.76    Mean square = 0.54    n = 7 
## 
## fold 2 
## Observations in test set: 8 
##                    6      18    25    37     43      50     55     63
## Predicted     0.0289  0.1129 1.205 0.611 -0.433 -0.1114 -0.325 -0.273
## cvpred        0.0709  0.0654 1.169 0.384 -0.163  0.0643 -0.247 -0.200
## games_started 1.3086 -0.5033 1.309 1.309 -0.805 -0.5033 -1.107 -0.805
## CV residual   1.2377 -0.5687 0.139 0.925 -0.642 -0.5676 -0.861 -0.606
## 
## Sum of squares = 4.57    Mean square = 0.57    n = 8 
## 
## fold 3 
## Observations in test set: 8 
##                     5      7     16     20    28      32      49     64
## Predicted      0.2385 -0.188 -0.117 -1.107 0.743  0.0525 -0.1768  0.767
## cvpred        -0.0382  0.257 -0.099 -0.616 0.974  0.2230 -0.1158  0.832
## games_started  1.0066 -1.409  0.101 -1.409 1.309 -0.2013 -0.2013 -0.201
## CV residual    1.0449 -1.666  0.200 -0.793 0.334 -0.4243 -0.0855 -1.033
## 
## Sum of squares = 5.9    Mean square = 0.74    n = 8 
## 
## fold 4 
## Observations in test set: 8 
##                    12    13    26    30     38     39     59      65
## Predicted     -0.0924 1.231 0.696 0.213 -0.413  0.580 -0.425 -0.0693
## cvpred        -0.0108 1.159 0.669 0.355 -0.525  0.812 -0.327 -0.2899
## games_started -0.8053 1.309 1.309 1.309  0.101 -0.805 -1.107  0.7047
## CV residual   -0.7945 0.149 0.640 0.953  0.625 -1.617 -0.780  0.9946
## 
## Sum of squares = 6.58    Mean square = 0.82    n = 8 
## 
## fold 5 
## Observations in test set: 8 
##                    1      4      15     17       19     27    46      56
## Predicted      0.193  0.442  0.2508 -0.579  0.00636 -0.417 0.559 -0.0143
## cvpred        -0.172  1.087 -0.0556 -0.298  0.32811 -1.206 0.830 -0.1397
## games_started  1.309 -1.409  0.4027  0.403 -0.50332  1.007 1.007  1.3086
## CV residual    1.480 -2.496  0.4582  0.700 -0.83144  2.213 0.177  1.4483
## 
## Sum of squares = 16.8    Mean square = 2.11    n = 8 
## 
## Overall (Sum over all 8 folds) 
##    ms 
## 0.965