# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
tds = qb_stats_w_combine["tds"]
# Generate clean data set
data.log.w_combine.for_tds = data.frame(log(na.omit(cbind(tds, college_stats)) +
0.1))
# Generate the linear model
lm.log.w_combine.tds <- lm(formula = tds ~ ., data = data.log.w_combine.for_tds)
# Find optimum linear regression model for tds
step_reg.log.w_combine.tds <- stepAIC(lm.log.w_combine.tds, direction = "both")
## Start: AIC=-19.68
## tds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - X40 1 0.000 8.78 -21.7
## - age 1 0.001 8.78 -21.7
## - cone 1 0.040 8.82 -21.5
## - shuttle 1 0.065 8.84 -21.4
## - broad_jump 1 0.119 8.90 -21.2
## - height 1 0.166 8.95 -21.0
## - vert_leap 1 0.186 8.97 -20.9
## - c_avg_yds 1 0.192 8.97 -20.9
## - c_rate 1 0.215 8.99 -20.8
## - c_avg_inter 1 0.256 9.04 -20.6
## - weight 1 0.268 9.05 -20.5
## - c_avg_tds 1 0.269 9.05 -20.5
## <none> 8.78 -19.7
## - wonderlic 1 0.514 9.29 -19.5
## - c_numyrs 1 1.073 9.85 -17.3
## - c_avg_att 1 1.530 10.31 -15.6
## - c_avg_cmpp 1 1.743 10.52 -14.8
## - c_pct 1 1.804 10.58 -14.6
##
## Step: AIC=-21.68
## tds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - age 1 0.001 8.78 -23.7
## - cone 1 0.040 8.82 -23.5
## - shuttle 1 0.072 8.85 -23.4
## - broad_jump 1 0.159 8.94 -23.0
## - height 1 0.180 8.96 -22.9
## - vert_leap 1 0.191 8.97 -22.9
## - c_avg_yds 1 0.204 8.98 -22.8
## - c_rate 1 0.228 9.01 -22.7
## - weight 1 0.274 9.05 -22.5
## - c_avg_inter 1 0.286 9.06 -22.5
## - c_avg_tds 1 0.287 9.07 -22.5
## <none> 8.78 -21.7
## - wonderlic 1 0.522 9.30 -21.5
## + X40 1 0.000 8.78 -19.7
## - c_numyrs 1 1.073 9.85 -19.3
## - c_avg_att 1 1.530 10.31 -17.6
## - c_avg_cmpp 1 1.752 10.53 -16.8
## - c_pct 1 1.826 10.61 -16.5
##
## Step: AIC=-23.67
## tds ~ height + weight + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - cone 1 0.041 8.82 -25.5
## - shuttle 1 0.072 8.85 -25.4
## - broad_jump 1 0.159 8.94 -25.0
## - height 1 0.187 8.97 -24.9
## - vert_leap 1 0.192 8.97 -24.9
## - c_avg_yds 1 0.203 8.98 -24.8
## - c_rate 1 0.228 9.01 -24.7
## - c_avg_tds 1 0.286 9.07 -24.4
## - c_avg_inter 1 0.300 9.08 -24.4
## - weight 1 0.309 9.09 -24.4
## <none> 8.78 -23.7
## - wonderlic 1 0.606 9.39 -23.1
## + age 1 0.001 8.78 -21.7
## + X40 1 0.000 8.78 -21.7
## - c_numyrs 1 1.073 9.85 -21.3
## - c_avg_att 1 1.545 10.32 -19.5
## - c_avg_cmpp 1 1.767 10.55 -18.7
## - c_pct 1 1.835 10.62 -18.5
##
## Step: AIC=-25.49
## tds ~ height + weight + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic +
## shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.034 8.86 -27.4
## - broad_jump 1 0.167 8.99 -26.8
## - vert_leap 1 0.175 9.00 -26.8
## - c_avg_yds 1 0.237 9.06 -26.5
## - c_rate 1 0.256 9.08 -26.4
## - c_avg_inter 1 0.259 9.08 -26.4
## - c_avg_tds 1 0.291 9.11 -26.3
## - height 1 0.303 9.12 -26.2
## <none> 8.82 -25.5
## - wonderlic 1 0.568 9.39 -25.1
## - weight 1 0.571 9.39 -25.1
## + cone 1 0.041 8.78 -23.7
## + age 1 0.002 8.82 -23.5
## + X40 1 0.000 8.82 -23.5
## - c_numyrs 1 1.034 9.86 -23.3
## - c_avg_att 1 1.687 10.51 -20.9
## - c_avg_cmpp 1 1.930 10.75 -20.0
## - c_pct 1 1.996 10.82 -19.7
##
## Step: AIC=-27.35
## tds ~ height + weight + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic +
## vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - broad_jump 1 0.147 9.00 -28.7
## - c_avg_inter 1 0.247 9.10 -28.3
## - vert_leap 1 0.285 9.14 -28.1
## - c_avg_yds 1 0.287 9.14 -28.1
## - c_rate 1 0.291 9.15 -28.1
## - c_avg_tds 1 0.292 9.15 -28.1
## - height 1 0.304 9.16 -28.1
## <none> 8.86 -27.4
## - wonderlic 1 0.801 9.66 -26.1
## - weight 1 0.929 9.78 -25.6
## + shuttle 1 0.034 8.82 -25.5
## + X40 1 0.005 8.85 -25.4
## + cone 1 0.003 8.85 -25.4
## + age 1 0.001 8.85 -25.4
## - c_numyrs 1 1.018 9.87 -25.2
## - c_avg_att 1 1.816 10.67 -22.3
## - c_avg_cmpp 1 2.055 10.91 -21.4
## - c_pct 1 2.110 10.97 -21.2
##
## Step: AIC=-28.72
## tds ~ height + weight + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic +
## vert_leap
##
## Df Sum of Sq RSS AIC
## - vert_leap 1 0.140 9.14 -30.1
## - c_avg_yds 1 0.219 9.22 -29.8
## - c_avg_tds 1 0.220 9.22 -29.8
## - c_rate 1 0.220 9.22 -29.8
## - c_avg_inter 1 0.341 9.34 -29.3
## - height 1 0.345 9.35 -29.3
## <none> 9.00 -28.7
## - wonderlic 1 0.786 9.79 -27.5
## + broad_jump 1 0.147 8.86 -27.4
## - c_numyrs 1 0.888 9.89 -27.1
## + X40 1 0.019 8.98 -26.8
## + shuttle 1 0.014 8.99 -26.8
## + cone 1 0.012 8.99 -26.8
## + age 1 0.001 9.00 -26.7
## - weight 1 1.091 10.09 -26.4
## - c_avg_att 1 1.756 10.76 -23.9
## - c_avg_cmpp 1 2.029 11.03 -23.0
## - c_pct 1 2.122 11.12 -22.7
##
## Step: AIC=-30.13
## tds ~ height + weight + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.207 9.35 -31.3
## - c_rate 1 0.228 9.37 -31.2
## - c_avg_tds 1 0.246 9.39 -31.1
## - height 1 0.293 9.44 -30.9
## - c_avg_inter 1 0.331 9.47 -30.8
## <none> 9.14 -30.1
## - wonderlic 1 0.811 9.95 -28.9
## + vert_leap 1 0.140 9.00 -28.7
## + shuttle 1 0.111 9.03 -28.6
## + X40 1 0.032 9.11 -28.3
## - c_numyrs 1 0.991 10.13 -28.2
## + cone 1 0.016 9.13 -28.2
## + broad_jump 1 0.002 9.14 -28.1
## + age 1 0.001 9.14 -28.1
## - weight 1 1.025 10.17 -28.1
## - c_avg_att 1 1.840 10.98 -25.2
## - c_avg_cmpp 1 2.146 11.29 -24.1
## - c_pct 1 2.231 11.37 -23.8
##
## Step: AIC=-31.28
## tds ~ height + weight + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.025 9.37 -33.2
## - c_avg_tds 1 0.040 9.39 -33.1
## - height 1 0.377 9.73 -31.8
## <none> 9.35 -31.3
## - wonderlic 1 0.672 10.02 -30.6
## - c_numyrs 1 0.802 10.15 -30.2
## + c_avg_yds 1 0.207 9.14 -30.1
## + shuttle 1 0.161 9.19 -29.9
## + vert_leap 1 0.128 9.22 -29.8
## - c_avg_inter 1 0.961 10.31 -29.6
## + X40 1 0.026 9.32 -29.4
## + cone 1 0.016 9.33 -29.3
## - weight 1 1.026 10.38 -29.3
## + broad_jump 1 0.008 9.34 -29.3
## + age 1 0.002 9.35 -29.3
## - c_pct 1 2.098 11.45 -25.6
## - c_avg_cmpp 1 2.236 11.59 -25.1
## - c_avg_att 1 2.263 11.61 -25.0
##
## Step: AIC=-33.18
## tds ~ height + weight + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds +
## c_numyrs + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.016 9.39 -35.1
## - height 1 0.407 9.78 -33.6
## <none> 9.37 -33.2
## - wonderlic 1 0.653 10.03 -32.6
## - c_numyrs 1 0.847 10.22 -31.9
## + shuttle 1 0.161 9.21 -31.8
## + vert_leap 1 0.150 9.22 -31.8
## + X40 1 0.035 9.34 -31.3
## + c_rate 1 0.025 9.35 -31.3
## + cone 1 0.021 9.35 -31.3
## + broad_jump 1 0.017 9.36 -31.3
## + c_avg_yds 1 0.004 9.37 -31.2
## + age 1 0.003 9.37 -31.2
## - weight 1 1.058 10.43 -31.1
## - c_avg_inter 1 1.157 10.53 -30.8
## - c_pct 1 2.458 11.83 -26.3
## - c_avg_att 1 2.461 11.84 -26.3
## - c_avg_cmpp 1 2.465 11.84 -26.3
##
## Step: AIC=-35.12
## tds ~ height + weight + c_avg_cmpp + c_pct + c_avg_inter + c_numyrs +
## c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - height 1 0.433 9.82 -35.4
## <none> 9.39 -35.1
## - wonderlic 1 0.710 10.10 -34.3
## - c_numyrs 1 0.836 10.23 -33.9
## + vert_leap 1 0.146 9.24 -33.7
## + shuttle 1 0.131 9.26 -33.7
## + X40 1 0.028 9.36 -33.2
## + cone 1 0.021 9.37 -33.2
## + broad_jump 1 0.016 9.37 -33.2
## + c_avg_tds 1 0.016 9.37 -33.2
## + age 1 0.001 9.39 -33.1
## + c_avg_yds 1 0.001 9.39 -33.1
## + c_rate 1 0.000 9.39 -33.1
## - weight 1 1.181 10.57 -32.6
## - c_avg_inter 1 1.235 10.62 -32.4
## - c_avg_cmpp 1 2.454 11.84 -28.3
## - c_pct 1 2.455 11.84 -28.3
## - c_avg_att 1 2.458 11.85 -28.3
##
## Step: AIC=-35.41
## tds ~ weight + c_avg_cmpp + c_pct + c_avg_inter + c_numyrs +
## c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## <none> 9.82 -35.4
## + height 1 0.433 9.39 -35.1
## - weight 1 0.763 10.59 -34.6
## - c_numyrs 1 0.895 10.72 -34.1
## + vert_leap 1 0.095 9.73 -33.8
## + shuttle 1 0.086 9.74 -33.7
## + c_avg_tds 1 0.041 9.78 -33.6
## + cone 1 0.006 9.82 -33.4
## + age 1 0.004 9.82 -33.4
## + broad_jump 1 0.003 9.82 -33.4
## + c_rate 1 0.001 9.82 -33.4
## + X40 1 0.001 9.82 -33.4
## + c_avg_yds 1 0.001 9.82 -33.4
## - wonderlic 1 1.234 11.06 -32.9
## - c_avg_inter 1 1.277 11.10 -32.8
## - c_pct 1 2.364 12.19 -29.2
## - c_avg_cmpp 1 2.383 12.21 -29.2
## - c_avg_att 1 2.391 12.21 -29.1
summary(step_reg.log.w_combine.tds)
##
## Call:
## lm(formula = tds ~ weight + c_avg_cmpp + c_pct + c_avg_inter +
## c_numyrs + c_avg_att + wonderlic, data = data.log.w_combine.for_tds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.297 -0.295 -0.067 0.347 1.183
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 284.429 111.167 2.56 0.016 *
## weight 2.992 1.960 1.53 0.137
## c_avg_cmpp 66.302 24.578 2.70 0.011 *
## c_pct -63.631 23.680 -2.69 0.012 *
## c_avg_inter 0.992 0.503 1.97 0.058 .
## c_numyrs 0.862 0.521 1.65 0.109
## c_avg_att -67.085 24.828 -2.70 0.011 *
## wonderlic -0.728 0.375 -1.94 0.062 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.572 on 30 degrees of freedom
## Multiple R-squared: 0.314, Adjusted R-squared: 0.154
## F-statistic: 1.96 on 7 and 30 DF, p-value: 0.0937
plot(step_reg.log.w_combine.tds)
## Warning: NaNs produced
## Warning: NaNs produced
leaps.log.w_combine.tds <- regsubsets(tds ~ ., data = data.log.w_combine.for_tds,
nbest = 10)
subsets(leaps.log.w_combine.tds, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.log.w_combine.for_tds, step_reg.log.w_combine.tds, m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: tds
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 1.01 1.006 3.07 0.090 .
## c_avg_cmpp 1 0.06 0.062 0.19 0.666
## c_pct 1 0.03 0.030 0.09 0.765
## c_avg_inter 1 0.14 0.141 0.43 0.516
## c_numyrs 1 0.12 0.120 0.37 0.549
## c_avg_att 1 1.91 1.910 5.83 0.022 *
## wonderlic 1 1.23 1.234 3.77 0.062 .
## Residuals 30 9.82 0.327
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 6 21 26 32 38 46 59
## Predicted 2.282 2.049 2.446 2.728 2.001 2.563 2.364
## cvpred 2.184 1.726 2.441 2.692 1.767 2.695 2.758
## tds 3.001 2.313 2.208 2.313 2.493 2.493 2.407
## CV residual 0.816 0.587 -0.233 -0.379 0.727 -0.202 -0.351
##
## Sum of squares = 1.9 Mean square = 0.27 n = 7
##
## fold 2
## Observations in test set: 8
## 7 18 20 27 37 43 55 65
## Predicted 2.420181 2.819 2.313 2.795 2.71 2.177 2.048 2.468
## cvpred 2.207996 3.022 2.480 2.703 22.16 1.752 2.098 2.430
## tds 2.208274 2.313 1.808 3.371 2.65 1.960 1.629 2.896
## CV residual 0.000278 -0.709 -0.671 0.668 -19.51 0.208 -0.469 0.466
##
## Sum of squares = 382 Mean square = 47.8 n = 8
##
## fold 3
## Observations in test set: 8
## 5 12 13 16 39 40 50 56
## Predicted 2.32 2.323 2.773 2.427 2.6119 2.240 2.308 2.08
## cvpred 1.98 2.307 2.612 2.164 2.3683 2.214 2.082 1.96
## tds 3.14 2.092 3.049 3.096 2.3125 2.407 2.573 3.26
## CV residual 1.16 -0.215 0.437 0.932 -0.0557 0.193 0.491 1.30
##
## Sum of squares = 4.43 Mean square = 0.55 n = 8
##
## fold 4
## Observations in test set: 8
## 4 15 17 19 28 52 63 64
## Predicted 1.3922 2.777 2.22 2.593 3.116 2.541 2.1600 2.652
## cvpred 2.1257 2.702 2.48 2.679 2.884 2.455 2.3647 2.793
## tds 0.0953 2.493 2.31 2.573 3.049 1.629 2.3125 2.493
## CV residual -2.0304 -0.209 -0.17 -0.106 0.165 -0.826 -0.0522 -0.299
##
## Sum of squares = 5.01 Mean square = 0.63 n = 8
##
## fold 5
## Observations in test set: 7
## 1 3 24 30 42 49 61
## Predicted 2.562 1.74 2.683 2.4681 2.285 2.777 1.666
## cvpred 2.334 1.48 3.043 2.7544 2.516 3.108 -0.116
## tds 3.262 1.13 2.092 2.8391 2.208 2.208 2.208
## CV residual 0.928 -0.35 -0.951 0.0847 -0.308 -0.899 2.324
##
## Sum of squares = 8.2 Mean square = 1.17 n = 7
##
## Overall (Sum over all 7 folds)
## ms
## 10.6