# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Set the resopnse variables
games_started = qb_stats_w_combine["games_started"]

# Generate clean data set
data.log.w_combine.for_games_started = data.frame(log(na.omit(cbind(games_started, 
    college_stats)) + 0.1))

# Generate the linear model
lm.log.w_combine.games_started <- lm(formula = games_started ~ ., data = data.log.w_combine.for_games_started)

# Find optimum linear regression model for games_started
step_reg.log.w_combine.games_started <- stepAIC(lm.log.w_combine.games_started, 
    direction = "both")
## Start:  AIC=-79.37
## games_started ~ height + weight + age + c_avg_cmpp + c_rate + 
##     c_pct + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + 
##     c_avg_att + X40 + wonderlic + cone + shuttle + vert_leap + 
##     broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - X40          1    0.0000 2.02 -81.4
## - c_avg_yds    1    0.0047 2.03 -81.3
## - height       1    0.0048 2.03 -81.3
## - vert_leap    1    0.0102 2.04 -81.2
## - broad_jump   1    0.0132 2.04 -81.1
## - c_rate       1    0.0137 2.04 -81.1
## - weight       1    0.0143 2.04 -81.1
## - cone         1    0.0389 2.06 -80.6
## - shuttle      1    0.0425 2.07 -80.6
## - c_avg_tds    1    0.0610 2.09 -80.2
## - c_numyrs     1    0.0663 2.09 -80.1
## - c_avg_inter  1    0.0725 2.10 -80.0
## <none>                     2.02 -79.4
## - wonderlic    1    0.1455 2.17 -78.7
## - c_avg_att    1    0.1515 2.18 -78.6
## - c_pct        1    0.1665 2.19 -78.3
## - c_avg_cmpp   1    0.1746 2.20 -78.1
## - age          1    0.1755 2.20 -78.1
## 
## Step:  AIC=-81.37
## games_started ~ height + weight + age + c_avg_cmpp + c_rate + 
##     c_pct + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + 
##     c_avg_att + wonderlic + cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - height       1    0.0049 2.03 -83.3
## - c_avg_yds    1    0.0052 2.03 -83.3
## - vert_leap    1    0.0107 2.04 -83.2
## - weight       1    0.0144 2.04 -83.1
## - c_rate       1    0.0149 2.04 -83.1
## - broad_jump   1    0.0166 2.04 -83.0
## - cone         1    0.0389 2.06 -82.6
## - shuttle      1    0.0486 2.07 -82.4
## - c_avg_tds    1    0.0655 2.09 -82.1
## - c_numyrs     1    0.0663 2.09 -82.1
## - c_avg_inter  1    0.0803 2.10 -81.9
## <none>                     2.02 -81.4
## - wonderlic    1    0.1481 2.17 -80.6
## - c_avg_att    1    0.1515 2.18 -80.6
## - c_pct        1    0.1686 2.19 -80.2
## - c_avg_cmpp   1    0.1754 2.20 -80.1
## - age          1    0.1777 2.20 -80.1
## + X40          1    0.0000 2.02 -79.4
## 
## Step:  AIC=-83.27
## games_started ~ weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     wonderlic + cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_yds    1    0.0034 2.03 -85.2
## - weight       1    0.0096 2.04 -85.1
## - vert_leap    1    0.0107 2.04 -85.1
## - c_rate       1    0.0116 2.04 -85.0
## - broad_jump   1    0.0171 2.05 -84.9
## - cone         1    0.0342 2.06 -84.6
## - shuttle      1    0.0465 2.08 -84.4
## - c_avg_tds    1    0.0605 2.09 -84.1
## - c_numyrs     1    0.0625 2.09 -84.1
## - c_avg_inter  1    0.0814 2.11 -83.7
## <none>                     2.03 -83.3
## - c_avg_att    1    0.1466 2.18 -82.6
## - wonderlic    1    0.1586 2.19 -82.3
## - c_pct        1    0.1652 2.19 -82.2
## - c_avg_cmpp   1    0.1708 2.20 -82.1
## - age          1    0.1728 2.20 -82.1
## + height       1    0.0049 2.02 -81.4
## + X40          1    0.0002 2.03 -81.3
## 
## Step:  AIC=-85.21
## games_started ~ weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_numyrs + c_avg_att + wonderlic + 
##     cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - weight       1    0.0094 2.04 -87.0
## - vert_leap    1    0.0150 2.05 -86.9
## - broad_jump   1    0.0216 2.06 -86.8
## - cone         1    0.0357 2.07 -86.5
## - shuttle      1    0.0494 2.08 -86.3
## - c_rate       1    0.0519 2.08 -86.2
## - c_numyrs     1    0.0602 2.09 -86.1
## <none>                     2.03 -85.2
## - c_avg_tds    1    0.1407 2.17 -84.6
## - c_avg_inter  1    0.1450 2.18 -84.5
## - wonderlic    1    0.1552 2.19 -84.3
## - age          1    0.1697 2.20 -84.1
## - c_pct        1    0.1761 2.21 -84.0
## - c_avg_cmpp   1    0.2104 2.24 -83.4
## + c_avg_yds    1    0.0034 2.03 -83.3
## + height       1    0.0031 2.03 -83.3
## + X40          1    0.0000 2.03 -83.2
## - c_avg_att    1    0.2288 2.26 -83.0
## 
## Step:  AIC=-87.03
## games_started ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_numyrs + c_avg_att + wonderlic + cone + shuttle + 
##     vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - vert_leap    1    0.0111 2.05 -88.8
## - broad_jump   1    0.0262 2.07 -88.5
## - cone         1    0.0330 2.08 -88.4
## - shuttle      1    0.0402 2.08 -88.3
## - c_rate       1    0.0448 2.09 -88.2
## - c_numyrs     1    0.0537 2.10 -88.0
## <none>                     2.04 -87.0
## - c_avg_tds    1    0.1314 2.17 -86.6
## - c_avg_inter  1    0.1401 2.18 -86.4
## - wonderlic    1    0.1476 2.19 -86.3
## - age          1    0.1655 2.21 -86.0
## - c_pct        1    0.1680 2.21 -85.9
## - c_avg_cmpp   1    0.2013 2.24 -85.4
## + weight       1    0.0094 2.03 -85.2
## + c_avg_yds    1    0.0032 2.04 -85.1
## - c_avg_att    1    0.2195 2.26 -85.0
## + height       1    0.0004 2.04 -85.0
## + X40          1    0.0000 2.04 -85.0
## 
## Step:  AIC=-88.82
## games_started ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_numyrs + c_avg_att + wonderlic + cone + shuttle + 
##     broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - broad_jump   1    0.0152 2.07 -90.5
## - shuttle      1    0.0316 2.08 -90.2
## - cone         1    0.0391 2.09 -90.1
## - c_rate       1    0.0394 2.09 -90.1
## - c_numyrs     1    0.0561 2.11 -89.8
## <none>                     2.05 -88.8
## - c_avg_tds    1    0.1235 2.18 -88.5
## - c_avg_inter  1    0.1442 2.20 -88.2
## - c_pct        1    0.1613 2.21 -87.9
## - age          1    0.1675 2.22 -87.8
## - wonderlic    1    0.1679 2.22 -87.8
## - c_avg_cmpp   1    0.1931 2.25 -87.3
## + vert_leap    1    0.0111 2.04 -87.0
## - c_avg_att    1    0.2109 2.26 -87.0
## + c_avg_yds    1    0.0067 2.05 -86.9
## + weight       1    0.0054 2.05 -86.9
## + X40          1    0.0009 2.05 -86.8
## + height       1    0.0001 2.05 -86.8
## 
## Step:  AIC=-90.53
## games_started ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_numyrs + c_avg_att + wonderlic + cone + shuttle
## 
##               Df Sum of Sq  RSS   AIC
## - cone         1    0.0285 2.10 -92.0
## - shuttle      1    0.0469 2.12 -91.7
## - c_rate       1    0.0566 2.12 -91.5
## - c_numyrs     1    0.0678 2.14 -91.3
## <none>                     2.07 -90.5
## - c_avg_inter  1    0.1305 2.20 -90.1
## - c_avg_tds    1    0.1502 2.22 -89.8
## - wonderlic    1    0.1527 2.22 -89.8
## - c_pct        1    0.1564 2.23 -89.7
## - age          1    0.1587 2.23 -89.6
## - c_avg_cmpp   1    0.1888 2.26 -89.1
## - c_avg_att    1    0.2066 2.27 -88.8
## + broad_jump   1    0.0152 2.05 -88.8
## + weight       1    0.0121 2.06 -88.8
## + c_avg_yds    1    0.0079 2.06 -88.7
## + X40          1    0.0029 2.07 -88.6
## + height       1    0.0014 2.07 -88.6
## + vert_leap    1    0.0000 2.07 -88.5
## 
## Step:  AIC=-92
## games_started ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_numyrs + c_avg_att + wonderlic + shuttle
## 
##               Df Sum of Sq  RSS   AIC
## - shuttle      1    0.0201 2.12 -93.6
## - c_rate       1    0.0482 2.15 -93.1
## - c_numyrs     1    0.0578 2.15 -92.9
## - c_avg_inter  1    0.1076 2.21 -92.0
## <none>                     2.10 -92.0
## - wonderlic    1    0.1316 2.23 -91.6
## - c_avg_tds    1    0.1345 2.23 -91.6
## - c_pct        1    0.1628 2.26 -91.1
## - age          1    0.1885 2.29 -90.6
## - c_avg_cmpp   1    0.1948 2.29 -90.5
## + cone         1    0.0285 2.07 -90.5
## - c_avg_att    1    0.2110 2.31 -90.3
## + c_avg_yds    1    0.0106 2.09 -90.2
## + weight       1    0.0053 2.09 -90.1
## + broad_jump   1    0.0046 2.09 -90.1
## + height       1    0.0027 2.10 -90.0
## + vert_leap    1    0.0026 2.10 -90.0
## + X40          1    0.0000 2.10 -90.0
## 
## Step:  AIC=-93.62
## games_started ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_numyrs + c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_rate       1    0.0466 2.16 -94.8
## - c_numyrs     1    0.0668 2.18 -94.4
## - c_avg_inter  1    0.1086 2.23 -93.7
## <none>                     2.12 -93.6
## - c_avg_tds    1    0.1262 2.24 -93.4
## - wonderlic    1    0.1573 2.27 -92.8
## - c_pct        1    0.1740 2.29 -92.5
## - age          1    0.1905 2.31 -92.3
## + shuttle      1    0.0201 2.10 -92.0
## - c_avg_cmpp   1    0.2076 2.33 -92.0
## + broad_jump   1    0.0165 2.10 -91.9
## + c_avg_yds    1    0.0108 2.11 -91.8
## + X40          1    0.0098 2.11 -91.8
## - c_avg_att    1    0.2238 2.34 -91.7
## + vert_leap    1    0.0019 2.12 -91.7
## + cone         1    0.0017 2.12 -91.7
## + weight       1    0.0005 2.12 -91.6
## + height       1    0.0000 2.12 -91.6
## 
## Step:  AIC=-94.77
## games_started ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds + 
##     c_numyrs + c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_numyrs     1    0.0820 2.25 -95.3
## - c_avg_tds    1    0.0946 2.26 -95.1
## <none>                     2.16 -94.8
## - wonderlic    1    0.1253 2.29 -94.6
## - age          1    0.1786 2.34 -93.7
## - c_avg_inter  1    0.1790 2.34 -93.7
## + c_rate       1    0.0466 2.12 -93.6
## + c_avg_yds    1    0.0356 2.13 -93.4
## + broad_jump   1    0.0307 2.13 -93.3
## + shuttle      1    0.0185 2.15 -93.1
## + X40          1    0.0150 2.15 -93.0
## + vert_leap    1    0.0083 2.16 -92.9
## + cone         1    0.0006 2.16 -92.8
## + height       1    0.0003 2.16 -92.8
## + weight       1    0.0001 2.16 -92.8
## - c_pct        1    0.2623 2.43 -92.3
## - c_avg_cmpp   1    0.2742 2.44 -92.1
## - c_avg_att    1    0.2859 2.45 -91.9
## 
## Step:  AIC=-95.32
## games_started ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds + 
##     c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_tds    1    0.0910 2.34 -95.8
## - wonderlic    1    0.1124 2.36 -95.4
## <none>                     2.25 -95.3
## - c_avg_inter  1    0.1339 2.38 -95.1
## + c_numyrs     1    0.0820 2.16 -94.8
## + c_rate       1    0.0618 2.18 -94.4
## + c_avg_yds    1    0.0580 2.19 -94.3
## + broad_jump   1    0.0578 2.19 -94.3
## - age          1    0.1818 2.43 -94.3
## - c_pct        1    0.1857 2.43 -94.2
## - c_avg_cmpp   1    0.1970 2.44 -94.0
## + X40          1    0.0338 2.21 -93.9
## - c_avg_att    1    0.2080 2.45 -93.9
## + shuttle      1    0.0280 2.22 -93.8
## + vert_leap    1    0.0198 2.23 -93.7
## + weight       1    0.0010 2.25 -93.3
## + cone         1    0.0008 2.25 -93.3
## + height       1    0.0000 2.25 -93.3
## 
## Step:  AIC=-95.77
## games_started ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_att + 
##     wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_inter  1    0.0720 2.41 -96.6
## <none>                     2.34 -95.8
## + c_avg_tds    1    0.0910 2.25 -95.3
## - c_pct        1    0.1517 2.49 -95.3
## + c_numyrs     1    0.0784 2.26 -95.1
## - c_avg_cmpp   1    0.1684 2.50 -95.1
## - c_avg_att    1    0.1725 2.51 -95.0
## + broad_jump   1    0.0541 2.28 -94.7
## - wonderlic    1    0.1987 2.54 -94.6
## + X40          1    0.0218 2.31 -94.1
## + vert_leap    1    0.0177 2.32 -94.1
## + shuttle      1    0.0166 2.32 -94.1
## + c_rate       1    0.0096 2.33 -93.9
## + weight       1    0.0030 2.33 -93.8
## + cone         1    0.0006 2.34 -93.8
## + c_avg_yds    1    0.0005 2.34 -93.8
## + height       1    0.0000 2.34 -93.8
## - age          1    0.2637 2.60 -93.6
## 
## Step:  AIC=-96.59
## games_started ~ age + c_avg_cmpp + c_pct + c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## - c_pct        1    0.0819 2.49 -97.3
## - c_avg_cmpp   1    0.0968 2.51 -97.1
## - c_avg_att    1    0.1006 2.51 -97.0
## <none>                     2.41 -96.6
## + c_avg_inter  1    0.0720 2.34 -95.8
## + c_numyrs     1    0.0420 2.37 -95.3
## + broad_jump   1    0.0413 2.37 -95.3
## + X40          1    0.0307 2.38 -95.1
## + c_avg_tds    1    0.0290 2.38 -95.1
## + shuttle      1    0.0185 2.39 -94.9
## + vert_leap    1    0.0152 2.39 -94.8
## + c_avg_yds    1    0.0134 2.40 -94.8
## - wonderlic    1    0.2490 2.66 -94.8
## + cone         1    0.0075 2.40 -94.7
## + weight       1    0.0055 2.40 -94.7
## + c_rate       1    0.0020 2.41 -94.6
## + height       1    0.0001 2.41 -94.6
## - age          1    0.2826 2.69 -94.3
## 
## Step:  AIC=-97.29
## games_started ~ age + c_avg_cmpp + c_avg_att + wonderlic
## 
##               Df Sum of Sq  RSS   AIC
## <none>                     2.49 -97.3
## + c_pct        1    0.0819 2.41 -96.6
## - wonderlic    1    0.1957 2.69 -96.3
## + broad_jump   1    0.0436 2.45 -96.0
## + X40          1    0.0415 2.45 -95.9
## + c_avg_tds    1    0.0410 2.45 -95.9
## - c_avg_cmpp   1    0.2314 2.72 -95.8
## + c_avg_yds    1    0.0292 2.46 -95.7
## + vert_leap    1    0.0252 2.47 -95.7
## + shuttle      1    0.0225 2.47 -95.6
## + weight       1    0.0115 2.48 -95.5
## - age          1    0.2565 2.75 -95.5
## - c_avg_att    1    0.2566 2.75 -95.5
## + c_numyrs     1    0.0074 2.48 -95.4
## + c_avg_inter  1    0.0021 2.49 -95.3
## + c_rate       1    0.0015 2.49 -95.3
## + height       1    0.0014 2.49 -95.3
## + cone         1    0.0006 2.49 -95.3
summary(step_reg.log.w_combine.games_started)
## 
## Call:
## lm(formula = games_started ~ age + c_avg_cmpp + c_avg_att + wonderlic, 
##     data = data.log.w_combine.for_games_started)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6625 -0.1827 -0.0189  0.1733  0.4015 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    8.823      2.378    3.71  0.00074 ***
## age           -1.276      0.682   -1.87  0.06997 .  
## c_avg_cmpp     0.757      0.426    1.78  0.08446 .  
## c_avg_att     -0.934      0.499   -1.87  0.06988 .  
## wonderlic     -0.288      0.176   -1.63  0.11138    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.271 on 34 degrees of freedom
## Multiple R-squared: 0.245,   Adjusted R-squared: 0.156 
## F-statistic: 2.76 on 4 and 34 DF,  p-value: 0.0434
plot(step_reg.log.w_combine.games_started)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.log.w_combine.games_started <- regsubsets(games_started ~ ., data = data.log.w_combine.for_games_started, 
    nbest = 10)
subsets(leaps.log.w_combine.games_started, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.log.w_combine.for_games_started, step_reg.log.w_combine.games_started, 
    m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: games_started
##            Df Sum Sq Mean Sq F value Pr(>F)  
## age         1  0.324   0.324    4.43  0.043 *
## c_avg_cmpp  1  0.079   0.079    1.07  0.308  
## c_avg_att   1  0.210   0.210    2.87  0.099 .
## wonderlic   1  0.196   0.196    2.67  0.111  
## Residuals  34  2.491   0.073                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 7 
##                  3     21     24    40      42     52      61
## Predicted     2.34  2.232  2.317 2.267  2.2272  2.204  2.5690
## cvpred        2.38  2.298  2.357 2.322  2.2910  2.269  2.5775
## games_started 2.41  1.960  1.960 2.573  2.2083  1.960  2.4932
## CV residual   0.03 -0.337 -0.397 0.251 -0.0827 -0.309 -0.0843
## 
## Sum of squares = 0.44    Mean square = 0.06    n = 7 
## 
## fold 2 
## Observations in test set: 8 
##                   6     18    25    37     43     50     55      63
## Predicted     2.398  2.454 2.619 2.621  2.339  2.433  2.350  2.2334
## cvpred        2.416  2.436 2.643 2.381  2.405  2.490  2.366  2.2568
## games_started 2.779  2.313 2.779 2.779  2.208  2.313  2.092  2.2083
## CV residual   0.363 -0.124 0.136 0.398 -0.197 -0.177 -0.275 -0.0485
## 
## Sum of squares = 0.47    Mean square = 0.06    n = 8 
## 
## fold 3 
## Observations in test set: 8 
##                   5     7    16     20     28      32     49     64
## Predicted     2.432  2.41 2.309  2.190 2.6157  2.4276 2.3770  2.584
## cvpred        2.366  2.54 2.350  2.324 2.7081  2.4765 2.3867  2.606
## games_started 2.715  1.96 2.493  1.960 2.7788  2.4069 2.4069  2.407
## CV residual   0.349 -0.58 0.143 -0.364 0.0707 -0.0696 0.0203 -0.199
## 
## Sum of squares = 0.66    Mean square = 0.08    n = 8 
## 
## fold 4 
## Observations in test set: 8 
##                   12      13    26    30    38     39     59    65
## Predicted      2.397  2.8623 2.598 2.418 2.351  2.529  2.347 2.481
## cvpred         2.407  2.8597 2.600 2.426 2.330  2.559  2.395 2.437
## games_started  2.208  2.7788 2.779 2.779 2.493  2.208  2.092 2.646
## CV residual   -0.198 -0.0809 0.178 0.353 0.163 -0.351 -0.303 0.209
## 
## Sum of squares = 0.49    Mean square = 0.06    n = 8 
## 
## fold 5 
## Observations in test set: 8 
##                   1      4   15   17     19    27      46    56
## Predicted     2.478  2.623 2.44 2.28  2.455 2.346  2.6193 2.377
## cvpred        2.367  2.818 2.38 2.33  2.545 2.164  2.7362 2.362
## games_started 2.779  1.960 2.57 2.57  2.313 2.715  2.7147 2.779
## CV residual   0.412 -0.858 0.19 0.24 -0.233 0.551 -0.0215 0.417
## 
## Sum of squares = 1.53    Mean square = 0.19    n = 8 
## 
## Overall (Sum over all 8 folds) 
##     ms 
## 0.0922