# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
games_started = qb_stats_w_combine["games_started"]
# Generate clean data set
data.log.w_combine.for_games_started = data.frame(log(na.omit(cbind(games_started,
college_stats)) + 0.1))
# Generate the linear model
lm.log.w_combine.games_started <- lm(formula = games_started ~ ., data = data.log.w_combine.for_games_started)
# Find optimum linear regression model for games_started
step_reg.log.w_combine.games_started <- stepAIC(lm.log.w_combine.games_started,
direction = "both")
## Start: AIC=-79.37
## games_started ~ height + weight + age + c_avg_cmpp + c_rate +
## c_pct + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs +
## c_avg_att + X40 + wonderlic + cone + shuttle + vert_leap +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - X40 1 0.0000 2.02 -81.4
## - c_avg_yds 1 0.0047 2.03 -81.3
## - height 1 0.0048 2.03 -81.3
## - vert_leap 1 0.0102 2.04 -81.2
## - broad_jump 1 0.0132 2.04 -81.1
## - c_rate 1 0.0137 2.04 -81.1
## - weight 1 0.0143 2.04 -81.1
## - cone 1 0.0389 2.06 -80.6
## - shuttle 1 0.0425 2.07 -80.6
## - c_avg_tds 1 0.0610 2.09 -80.2
## - c_numyrs 1 0.0663 2.09 -80.1
## - c_avg_inter 1 0.0725 2.10 -80.0
## <none> 2.02 -79.4
## - wonderlic 1 0.1455 2.17 -78.7
## - c_avg_att 1 0.1515 2.18 -78.6
## - c_pct 1 0.1665 2.19 -78.3
## - c_avg_cmpp 1 0.1746 2.20 -78.1
## - age 1 0.1755 2.20 -78.1
##
## Step: AIC=-81.37
## games_started ~ height + weight + age + c_avg_cmpp + c_rate +
## c_pct + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs +
## c_avg_att + wonderlic + cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - height 1 0.0049 2.03 -83.3
## - c_avg_yds 1 0.0052 2.03 -83.3
## - vert_leap 1 0.0107 2.04 -83.2
## - weight 1 0.0144 2.04 -83.1
## - c_rate 1 0.0149 2.04 -83.1
## - broad_jump 1 0.0166 2.04 -83.0
## - cone 1 0.0389 2.06 -82.6
## - shuttle 1 0.0486 2.07 -82.4
## - c_avg_tds 1 0.0655 2.09 -82.1
## - c_numyrs 1 0.0663 2.09 -82.1
## - c_avg_inter 1 0.0803 2.10 -81.9
## <none> 2.02 -81.4
## - wonderlic 1 0.1481 2.17 -80.6
## - c_avg_att 1 0.1515 2.18 -80.6
## - c_pct 1 0.1686 2.19 -80.2
## - c_avg_cmpp 1 0.1754 2.20 -80.1
## - age 1 0.1777 2.20 -80.1
## + X40 1 0.0000 2.02 -79.4
##
## Step: AIC=-83.27
## games_started ~ weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## wonderlic + cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.0034 2.03 -85.2
## - weight 1 0.0096 2.04 -85.1
## - vert_leap 1 0.0107 2.04 -85.1
## - c_rate 1 0.0116 2.04 -85.0
## - broad_jump 1 0.0171 2.05 -84.9
## - cone 1 0.0342 2.06 -84.6
## - shuttle 1 0.0465 2.08 -84.4
## - c_avg_tds 1 0.0605 2.09 -84.1
## - c_numyrs 1 0.0625 2.09 -84.1
## - c_avg_inter 1 0.0814 2.11 -83.7
## <none> 2.03 -83.3
## - c_avg_att 1 0.1466 2.18 -82.6
## - wonderlic 1 0.1586 2.19 -82.3
## - c_pct 1 0.1652 2.19 -82.2
## - c_avg_cmpp 1 0.1708 2.20 -82.1
## - age 1 0.1728 2.20 -82.1
## + height 1 0.0049 2.02 -81.4
## + X40 1 0.0002 2.03 -81.3
##
## Step: AIC=-85.21
## games_started ~ weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_numyrs + c_avg_att + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - weight 1 0.0094 2.04 -87.0
## - vert_leap 1 0.0150 2.05 -86.9
## - broad_jump 1 0.0216 2.06 -86.8
## - cone 1 0.0357 2.07 -86.5
## - shuttle 1 0.0494 2.08 -86.3
## - c_rate 1 0.0519 2.08 -86.2
## - c_numyrs 1 0.0602 2.09 -86.1
## <none> 2.03 -85.2
## - c_avg_tds 1 0.1407 2.17 -84.6
## - c_avg_inter 1 0.1450 2.18 -84.5
## - wonderlic 1 0.1552 2.19 -84.3
## - age 1 0.1697 2.20 -84.1
## - c_pct 1 0.1761 2.21 -84.0
## - c_avg_cmpp 1 0.2104 2.24 -83.4
## + c_avg_yds 1 0.0034 2.03 -83.3
## + height 1 0.0031 2.03 -83.3
## + X40 1 0.0000 2.03 -83.2
## - c_avg_att 1 0.2288 2.26 -83.0
##
## Step: AIC=-87.03
## games_started ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + wonderlic + cone + shuttle +
## vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - vert_leap 1 0.0111 2.05 -88.8
## - broad_jump 1 0.0262 2.07 -88.5
## - cone 1 0.0330 2.08 -88.4
## - shuttle 1 0.0402 2.08 -88.3
## - c_rate 1 0.0448 2.09 -88.2
## - c_numyrs 1 0.0537 2.10 -88.0
## <none> 2.04 -87.0
## - c_avg_tds 1 0.1314 2.17 -86.6
## - c_avg_inter 1 0.1401 2.18 -86.4
## - wonderlic 1 0.1476 2.19 -86.3
## - age 1 0.1655 2.21 -86.0
## - c_pct 1 0.1680 2.21 -85.9
## - c_avg_cmpp 1 0.2013 2.24 -85.4
## + weight 1 0.0094 2.03 -85.2
## + c_avg_yds 1 0.0032 2.04 -85.1
## - c_avg_att 1 0.2195 2.26 -85.0
## + height 1 0.0004 2.04 -85.0
## + X40 1 0.0000 2.04 -85.0
##
## Step: AIC=-88.82
## games_started ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + wonderlic + cone + shuttle +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - broad_jump 1 0.0152 2.07 -90.5
## - shuttle 1 0.0316 2.08 -90.2
## - cone 1 0.0391 2.09 -90.1
## - c_rate 1 0.0394 2.09 -90.1
## - c_numyrs 1 0.0561 2.11 -89.8
## <none> 2.05 -88.8
## - c_avg_tds 1 0.1235 2.18 -88.5
## - c_avg_inter 1 0.1442 2.20 -88.2
## - c_pct 1 0.1613 2.21 -87.9
## - age 1 0.1675 2.22 -87.8
## - wonderlic 1 0.1679 2.22 -87.8
## - c_avg_cmpp 1 0.1931 2.25 -87.3
## + vert_leap 1 0.0111 2.04 -87.0
## - c_avg_att 1 0.2109 2.26 -87.0
## + c_avg_yds 1 0.0067 2.05 -86.9
## + weight 1 0.0054 2.05 -86.9
## + X40 1 0.0009 2.05 -86.8
## + height 1 0.0001 2.05 -86.8
##
## Step: AIC=-90.53
## games_started ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + wonderlic + cone + shuttle
##
## Df Sum of Sq RSS AIC
## - cone 1 0.0285 2.10 -92.0
## - shuttle 1 0.0469 2.12 -91.7
## - c_rate 1 0.0566 2.12 -91.5
## - c_numyrs 1 0.0678 2.14 -91.3
## <none> 2.07 -90.5
## - c_avg_inter 1 0.1305 2.20 -90.1
## - c_avg_tds 1 0.1502 2.22 -89.8
## - wonderlic 1 0.1527 2.22 -89.8
## - c_pct 1 0.1564 2.23 -89.7
## - age 1 0.1587 2.23 -89.6
## - c_avg_cmpp 1 0.1888 2.26 -89.1
## - c_avg_att 1 0.2066 2.27 -88.8
## + broad_jump 1 0.0152 2.05 -88.8
## + weight 1 0.0121 2.06 -88.8
## + c_avg_yds 1 0.0079 2.06 -88.7
## + X40 1 0.0029 2.07 -88.6
## + height 1 0.0014 2.07 -88.6
## + vert_leap 1 0.0000 2.07 -88.5
##
## Step: AIC=-92
## games_started ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + wonderlic + shuttle
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.0201 2.12 -93.6
## - c_rate 1 0.0482 2.15 -93.1
## - c_numyrs 1 0.0578 2.15 -92.9
## - c_avg_inter 1 0.1076 2.21 -92.0
## <none> 2.10 -92.0
## - wonderlic 1 0.1316 2.23 -91.6
## - c_avg_tds 1 0.1345 2.23 -91.6
## - c_pct 1 0.1628 2.26 -91.1
## - age 1 0.1885 2.29 -90.6
## - c_avg_cmpp 1 0.1948 2.29 -90.5
## + cone 1 0.0285 2.07 -90.5
## - c_avg_att 1 0.2110 2.31 -90.3
## + c_avg_yds 1 0.0106 2.09 -90.2
## + weight 1 0.0053 2.09 -90.1
## + broad_jump 1 0.0046 2.09 -90.1
## + height 1 0.0027 2.10 -90.0
## + vert_leap 1 0.0026 2.10 -90.0
## + X40 1 0.0000 2.10 -90.0
##
## Step: AIC=-93.62
## games_started ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.0466 2.16 -94.8
## - c_numyrs 1 0.0668 2.18 -94.4
## - c_avg_inter 1 0.1086 2.23 -93.7
## <none> 2.12 -93.6
## - c_avg_tds 1 0.1262 2.24 -93.4
## - wonderlic 1 0.1573 2.27 -92.8
## - c_pct 1 0.1740 2.29 -92.5
## - age 1 0.1905 2.31 -92.3
## + shuttle 1 0.0201 2.10 -92.0
## - c_avg_cmpp 1 0.2076 2.33 -92.0
## + broad_jump 1 0.0165 2.10 -91.9
## + c_avg_yds 1 0.0108 2.11 -91.8
## + X40 1 0.0098 2.11 -91.8
## - c_avg_att 1 0.2238 2.34 -91.7
## + vert_leap 1 0.0019 2.12 -91.7
## + cone 1 0.0017 2.12 -91.7
## + weight 1 0.0005 2.12 -91.6
## + height 1 0.0000 2.12 -91.6
##
## Step: AIC=-94.77
## games_started ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds +
## c_numyrs + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.0820 2.25 -95.3
## - c_avg_tds 1 0.0946 2.26 -95.1
## <none> 2.16 -94.8
## - wonderlic 1 0.1253 2.29 -94.6
## - age 1 0.1786 2.34 -93.7
## - c_avg_inter 1 0.1790 2.34 -93.7
## + c_rate 1 0.0466 2.12 -93.6
## + c_avg_yds 1 0.0356 2.13 -93.4
## + broad_jump 1 0.0307 2.13 -93.3
## + shuttle 1 0.0185 2.15 -93.1
## + X40 1 0.0150 2.15 -93.0
## + vert_leap 1 0.0083 2.16 -92.9
## + cone 1 0.0006 2.16 -92.8
## + height 1 0.0003 2.16 -92.8
## + weight 1 0.0001 2.16 -92.8
## - c_pct 1 0.2623 2.43 -92.3
## - c_avg_cmpp 1 0.2742 2.44 -92.1
## - c_avg_att 1 0.2859 2.45 -91.9
##
## Step: AIC=-95.32
## games_started ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds +
## c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.0910 2.34 -95.8
## - wonderlic 1 0.1124 2.36 -95.4
## <none> 2.25 -95.3
## - c_avg_inter 1 0.1339 2.38 -95.1
## + c_numyrs 1 0.0820 2.16 -94.8
## + c_rate 1 0.0618 2.18 -94.4
## + c_avg_yds 1 0.0580 2.19 -94.3
## + broad_jump 1 0.0578 2.19 -94.3
## - age 1 0.1818 2.43 -94.3
## - c_pct 1 0.1857 2.43 -94.2
## - c_avg_cmpp 1 0.1970 2.44 -94.0
## + X40 1 0.0338 2.21 -93.9
## - c_avg_att 1 0.2080 2.45 -93.9
## + shuttle 1 0.0280 2.22 -93.8
## + vert_leap 1 0.0198 2.23 -93.7
## + weight 1 0.0010 2.25 -93.3
## + cone 1 0.0008 2.25 -93.3
## + height 1 0.0000 2.25 -93.3
##
## Step: AIC=-95.77
## games_started ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_att +
## wonderlic
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.0720 2.41 -96.6
## <none> 2.34 -95.8
## + c_avg_tds 1 0.0910 2.25 -95.3
## - c_pct 1 0.1517 2.49 -95.3
## + c_numyrs 1 0.0784 2.26 -95.1
## - c_avg_cmpp 1 0.1684 2.50 -95.1
## - c_avg_att 1 0.1725 2.51 -95.0
## + broad_jump 1 0.0541 2.28 -94.7
## - wonderlic 1 0.1987 2.54 -94.6
## + X40 1 0.0218 2.31 -94.1
## + vert_leap 1 0.0177 2.32 -94.1
## + shuttle 1 0.0166 2.32 -94.1
## + c_rate 1 0.0096 2.33 -93.9
## + weight 1 0.0030 2.33 -93.8
## + cone 1 0.0006 2.34 -93.8
## + c_avg_yds 1 0.0005 2.34 -93.8
## + height 1 0.0000 2.34 -93.8
## - age 1 0.2637 2.60 -93.6
##
## Step: AIC=-96.59
## games_started ~ age + c_avg_cmpp + c_pct + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_pct 1 0.0819 2.49 -97.3
## - c_avg_cmpp 1 0.0968 2.51 -97.1
## - c_avg_att 1 0.1006 2.51 -97.0
## <none> 2.41 -96.6
## + c_avg_inter 1 0.0720 2.34 -95.8
## + c_numyrs 1 0.0420 2.37 -95.3
## + broad_jump 1 0.0413 2.37 -95.3
## + X40 1 0.0307 2.38 -95.1
## + c_avg_tds 1 0.0290 2.38 -95.1
## + shuttle 1 0.0185 2.39 -94.9
## + vert_leap 1 0.0152 2.39 -94.8
## + c_avg_yds 1 0.0134 2.40 -94.8
## - wonderlic 1 0.2490 2.66 -94.8
## + cone 1 0.0075 2.40 -94.7
## + weight 1 0.0055 2.40 -94.7
## + c_rate 1 0.0020 2.41 -94.6
## + height 1 0.0001 2.41 -94.6
## - age 1 0.2826 2.69 -94.3
##
## Step: AIC=-97.29
## games_started ~ age + c_avg_cmpp + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## <none> 2.49 -97.3
## + c_pct 1 0.0819 2.41 -96.6
## - wonderlic 1 0.1957 2.69 -96.3
## + broad_jump 1 0.0436 2.45 -96.0
## + X40 1 0.0415 2.45 -95.9
## + c_avg_tds 1 0.0410 2.45 -95.9
## - c_avg_cmpp 1 0.2314 2.72 -95.8
## + c_avg_yds 1 0.0292 2.46 -95.7
## + vert_leap 1 0.0252 2.47 -95.7
## + shuttle 1 0.0225 2.47 -95.6
## + weight 1 0.0115 2.48 -95.5
## - age 1 0.2565 2.75 -95.5
## - c_avg_att 1 0.2566 2.75 -95.5
## + c_numyrs 1 0.0074 2.48 -95.4
## + c_avg_inter 1 0.0021 2.49 -95.3
## + c_rate 1 0.0015 2.49 -95.3
## + height 1 0.0014 2.49 -95.3
## + cone 1 0.0006 2.49 -95.3
summary(step_reg.log.w_combine.games_started)
##
## Call:
## lm(formula = games_started ~ age + c_avg_cmpp + c_avg_att + wonderlic,
## data = data.log.w_combine.for_games_started)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6625 -0.1827 -0.0189 0.1733 0.4015
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.823 2.378 3.71 0.00074 ***
## age -1.276 0.682 -1.87 0.06997 .
## c_avg_cmpp 0.757 0.426 1.78 0.08446 .
## c_avg_att -0.934 0.499 -1.87 0.06988 .
## wonderlic -0.288 0.176 -1.63 0.11138
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.271 on 34 degrees of freedom
## Multiple R-squared: 0.245, Adjusted R-squared: 0.156
## F-statistic: 2.76 on 4 and 34 DF, p-value: 0.0434
plot(step_reg.log.w_combine.games_started)
leaps.log.w_combine.games_started <- regsubsets(games_started ~ ., data = data.log.w_combine.for_games_started,
nbest = 10)
subsets(leaps.log.w_combine.games_started, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.log.w_combine.for_games_started, step_reg.log.w_combine.games_started,
m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: games_started
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 0.324 0.324 4.43 0.043 *
## c_avg_cmpp 1 0.079 0.079 1.07 0.308
## c_avg_att 1 0.210 0.210 2.87 0.099 .
## wonderlic 1 0.196 0.196 2.67 0.111
## Residuals 34 2.491 0.073
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 3 21 24 40 42 52 61
## Predicted 2.34 2.232 2.317 2.267 2.2272 2.204 2.5690
## cvpred 2.38 2.298 2.357 2.322 2.2910 2.269 2.5775
## games_started 2.41 1.960 1.960 2.573 2.2083 1.960 2.4932
## CV residual 0.03 -0.337 -0.397 0.251 -0.0827 -0.309 -0.0843
##
## Sum of squares = 0.44 Mean square = 0.06 n = 7
##
## fold 2
## Observations in test set: 8
## 6 18 25 37 43 50 55 63
## Predicted 2.398 2.454 2.619 2.621 2.339 2.433 2.350 2.2334
## cvpred 2.416 2.436 2.643 2.381 2.405 2.490 2.366 2.2568
## games_started 2.779 2.313 2.779 2.779 2.208 2.313 2.092 2.2083
## CV residual 0.363 -0.124 0.136 0.398 -0.197 -0.177 -0.275 -0.0485
##
## Sum of squares = 0.47 Mean square = 0.06 n = 8
##
## fold 3
## Observations in test set: 8
## 5 7 16 20 28 32 49 64
## Predicted 2.432 2.41 2.309 2.190 2.6157 2.4276 2.3770 2.584
## cvpred 2.366 2.54 2.350 2.324 2.7081 2.4765 2.3867 2.606
## games_started 2.715 1.96 2.493 1.960 2.7788 2.4069 2.4069 2.407
## CV residual 0.349 -0.58 0.143 -0.364 0.0707 -0.0696 0.0203 -0.199
##
## Sum of squares = 0.66 Mean square = 0.08 n = 8
##
## fold 4
## Observations in test set: 8
## 12 13 26 30 38 39 59 65
## Predicted 2.397 2.8623 2.598 2.418 2.351 2.529 2.347 2.481
## cvpred 2.407 2.8597 2.600 2.426 2.330 2.559 2.395 2.437
## games_started 2.208 2.7788 2.779 2.779 2.493 2.208 2.092 2.646
## CV residual -0.198 -0.0809 0.178 0.353 0.163 -0.351 -0.303 0.209
##
## Sum of squares = 0.49 Mean square = 0.06 n = 8
##
## fold 5
## Observations in test set: 8
## 1 4 15 17 19 27 46 56
## Predicted 2.478 2.623 2.44 2.28 2.455 2.346 2.6193 2.377
## cvpred 2.367 2.818 2.38 2.33 2.545 2.164 2.7362 2.362
## games_started 2.779 1.960 2.57 2.57 2.313 2.715 2.7147 2.779
## CV residual 0.412 -0.858 0.19 0.24 -0.233 0.551 -0.0215 0.417
##
## Sum of squares = 1.53 Mean square = 0.19 n = 8
##
## Overall (Sum over all 8 folds)
## ms
## 0.0922