# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
ints = qb_stats_w_combine["ints"]
# Generate clean data set
data.log.w_combine.for_ints = data.frame(log(na.omit(cbind(ints, college_stats)) +
0.1))
# Generate the linear model
lm.log.w_combine.ints <- lm(formula = ints ~ ., data = data.log.w_combine.for_ints)
# Find optimum linear regression model for ints
step_reg.log.w_combine.ints <- stepAIC(lm.log.w_combine.ints, direction = "both")
## Start: AIC=-53.61
## ints ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - vert_leap 1 0.027 3.95 -55.3
## - cone 1 0.029 3.95 -55.3
## - height 1 0.036 3.96 -55.3
## - shuttle 1 0.039 3.96 -55.2
## - c_avg_inter 1 0.044 3.96 -55.2
## - X40 1 0.052 3.97 -55.1
## - c_avg_yds 1 0.055 3.97 -55.1
## - weight 1 0.063 3.98 -55.0
## - c_rate 1 0.076 4.00 -54.9
## - wonderlic 1 0.082 4.00 -54.8
## - c_avg_tds 1 0.092 4.01 -54.7
## - broad_jump 1 0.095 4.01 -54.7
## - c_pct 1 0.154 4.07 -54.1
## - c_avg_cmpp 1 0.191 4.11 -53.7
## - c_avg_att 1 0.194 4.11 -53.7
## <none> 3.92 -53.6
## - c_numyrs 1 0.269 4.19 -53.0
## - age 1 2.291 6.21 -37.7
##
## Step: AIC=-55.34
## ints ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - height 1 0.033 3.98 -57.0
## - cone 1 0.035 3.98 -57.0
## - c_avg_yds 1 0.042 3.99 -56.9
## - c_avg_inter 1 0.048 3.99 -56.9
## - shuttle 1 0.061 4.01 -56.7
## - c_rate 1 0.065 4.01 -56.7
## - X40 1 0.067 4.01 -56.7
## - broad_jump 1 0.069 4.02 -56.7
## - wonderlic 1 0.071 4.02 -56.6
## - weight 1 0.074 4.02 -56.6
## - c_avg_tds 1 0.085 4.03 -56.5
## - c_pct 1 0.152 4.10 -55.9
## - c_avg_att 1 0.185 4.13 -55.6
## - c_avg_cmpp 1 0.186 4.13 -55.5
## <none> 3.95 -55.3
## - c_numyrs 1 0.255 4.20 -54.9
## + vert_leap 1 0.027 3.92 -53.6
## - age 1 2.319 6.27 -39.3
##
## Step: AIC=-57.01
## ints ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.034 4.01 -58.7
## - weight 1 0.040 4.02 -58.6
## - c_avg_inter 1 0.043 4.02 -58.6
## - wonderlic 1 0.044 4.02 -58.6
## - shuttle 1 0.047 4.03 -58.6
## - c_rate 1 0.052 4.03 -58.5
## - cone 1 0.059 4.04 -58.4
## - c_avg_tds 1 0.068 4.05 -58.3
## - broad_jump 1 0.086 4.07 -58.2
## - X40 1 0.103 4.08 -58.0
## - c_pct 1 0.139 4.12 -57.7
## - c_avg_att 1 0.164 4.14 -57.4
## - c_avg_cmpp 1 0.167 4.15 -57.4
## <none> 3.98 -57.0
## - c_numyrs 1 0.235 4.21 -56.8
## + height 1 0.033 3.95 -55.3
## + vert_leap 1 0.025 3.96 -55.3
## - age 1 2.286 6.27 -41.3
##
## Step: AIC=-58.68
## ints ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + cone +
## shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - wonderlic 1 0.032 4.05 -60.4
## - weight 1 0.035 4.05 -60.3
## - c_avg_tds 1 0.042 4.06 -60.3
## - c_rate 1 0.050 4.06 -60.2
## - cone 1 0.054 4.07 -60.2
## - shuttle 1 0.060 4.07 -60.1
## - broad_jump 1 0.064 4.08 -60.1
## - X40 1 0.076 4.09 -59.9
## - c_pct 1 0.107 4.12 -59.6
## - c_avg_cmpp 1 0.135 4.15 -59.4
## - c_avg_att 1 0.146 4.16 -59.3
## - c_avg_inter 1 0.156 4.17 -59.2
## - c_numyrs 1 0.201 4.22 -58.8
## <none> 4.01 -58.7
## + c_avg_yds 1 0.034 3.98 -57.0
## + height 1 0.025 3.99 -56.9
## + vert_leap 1 0.014 4.00 -56.8
## - age 1 2.253 6.27 -43.3
##
## Step: AIC=-60.37
## ints ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + X40 + cone + shuttle +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - weight 1 0.026 4.07 -62.1
## - c_rate 1 0.038 4.08 -62.0
## - c_avg_tds 1 0.039 4.08 -62.0
## - X40 1 0.057 4.10 -61.8
## - broad_jump 1 0.066 4.11 -61.7
## - shuttle 1 0.069 4.11 -61.7
## - cone 1 0.092 4.14 -61.5
## - c_pct 1 0.112 4.16 -61.3
## - c_avg_cmpp 1 0.137 4.18 -61.1
## - c_avg_att 1 0.148 4.19 -61.0
## - c_avg_inter 1 0.185 4.23 -60.6
## - c_numyrs 1 0.195 4.24 -60.5
## <none> 4.05 -60.4
## + wonderlic 1 0.032 4.01 -58.7
## + c_avg_yds 1 0.022 4.02 -58.6
## + vert_leap 1 0.010 4.04 -58.5
## + height 1 0.005 4.04 -58.4
## - age 1 2.455 6.50 -43.9
##
## Step: AIC=-62.12
## ints ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + c_avg_tds +
## c_numyrs + c_avg_att + X40 + cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.027 4.10 -63.9
## - c_rate 1 0.029 4.10 -63.8
## - shuttle 1 0.045 4.12 -63.7
## - broad_jump 1 0.050 4.12 -63.6
## - X40 1 0.063 4.13 -63.5
## - cone 1 0.098 4.17 -63.2
## - c_pct 1 0.099 4.17 -63.2
## - c_avg_cmpp 1 0.122 4.19 -63.0
## - c_avg_att 1 0.132 4.20 -62.9
## - c_avg_inter 1 0.171 4.24 -62.5
## - c_numyrs 1 0.176 4.25 -62.5
## <none> 4.07 -62.1
## + weight 1 0.026 4.05 -60.4
## + wonderlic 1 0.023 4.05 -60.3
## + c_avg_yds 1 0.020 4.05 -60.3
## + vert_leap 1 0.018 4.05 -60.3
## + height 1 0.002 4.07 -60.1
## - age 1 2.610 6.68 -44.8
##
## Step: AIC=-63.86
## ints ~ age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + c_numyrs +
## c_avg_att + X40 + cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.004 4.10 -65.8
## - shuttle 1 0.036 4.13 -65.5
## - broad_jump 1 0.037 4.14 -65.5
## - X40 1 0.056 4.15 -65.3
## - cone 1 0.108 4.21 -64.9
## - c_pct 1 0.128 4.23 -64.7
## - c_avg_cmpp 1 0.145 4.24 -64.5
## - c_avg_att 1 0.151 4.25 -64.5
## - c_avg_inter 1 0.182 4.28 -64.2
## - c_numyrs 1 0.182 4.28 -64.2
## <none> 4.10 -63.9
## + c_avg_tds 1 0.027 4.07 -62.1
## + vert_leap 1 0.025 4.07 -62.1
## + wonderlic 1 0.023 4.08 -62.1
## + weight 1 0.014 4.08 -62.0
## + c_avg_yds 1 0.005 4.09 -61.9
## + height 1 0.001 4.10 -61.9
## - age 1 2.724 6.82 -46.0
##
## Step: AIC=-65.83
## ints ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_numyrs + c_avg_att +
## X40 + cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.035 4.14 -67.5
## - broad_jump 1 0.035 4.14 -67.5
## - X40 1 0.062 4.16 -67.2
## - cone 1 0.104 4.21 -66.9
## - c_pct 1 0.149 4.25 -66.4
## - c_avg_cmpp 1 0.161 4.26 -66.3
## - c_avg_att 1 0.168 4.27 -66.3
## - c_numyrs 1 0.185 4.29 -66.1
## <none> 4.10 -65.8
## - c_avg_inter 1 0.253 4.36 -65.5
## + vert_leap 1 0.027 4.08 -64.1
## + wonderlic 1 0.016 4.09 -64.0
## + weight 1 0.015 4.09 -64.0
## + c_avg_yds 1 0.007 4.10 -63.9
## + c_rate 1 0.004 4.10 -63.9
## + c_avg_tds 1 0.002 4.10 -63.8
## + height 1 0.001 4.10 -63.8
## - age 1 2.735 6.84 -47.9
##
## Step: AIC=-67.5
## ints ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_numyrs + c_avg_att +
## X40 + cone + broad_jump
##
## Df Sum of Sq RSS AIC
## - broad_jump 1 0.047 4.18 -69.1
## - X40 1 0.129 4.27 -68.3
## - c_pct 1 0.142 4.28 -68.2
## - c_avg_cmpp 1 0.152 4.29 -68.1
## - c_avg_att 1 0.159 4.30 -68.0
## - c_numyrs 1 0.174 4.31 -67.9
## <none> 4.14 -67.5
## - c_avg_inter 1 0.227 4.36 -67.4
## - cone 1 0.236 4.37 -67.3
## + shuttle 1 0.035 4.10 -65.8
## + vert_leap 1 0.034 4.10 -65.8
## + wonderlic 1 0.025 4.11 -65.7
## + c_avg_yds 1 0.003 4.13 -65.5
## + c_rate 1 0.003 4.13 -65.5
## + c_avg_tds 1 0.001 4.14 -65.5
## + weight 1 0.001 4.14 -65.5
## + height 1 0.000 4.14 -65.5
## - age 1 2.830 6.97 -49.2
##
## Step: AIC=-69.06
## ints ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_numyrs + c_avg_att +
## X40 + cone
##
## Df Sum of Sq RSS AIC
## - X40 1 0.085 4.27 -70.3
## - c_numyrs 1 0.156 4.34 -69.6
## - c_pct 1 0.170 4.35 -69.5
## - c_avg_cmpp 1 0.185 4.37 -69.4
## - c_avg_att 1 0.194 4.38 -69.3
## - cone 1 0.215 4.40 -69.1
## <none> 4.18 -69.1
## - c_avg_inter 1 0.309 4.49 -68.3
## + broad_jump 1 0.047 4.14 -67.5
## + shuttle 1 0.046 4.14 -67.5
## + wonderlic 1 0.033 4.15 -67.4
## + vert_leap 1 0.003 4.18 -67.1
## + height 1 0.002 4.18 -67.1
## + c_avg_yds 1 0.001 4.18 -67.1
## + c_rate 1 0.001 4.18 -67.1
## + weight 1 0.001 4.18 -67.1
## + c_avg_tds 1 0.000 4.18 -67.1
## - age 1 2.820 7.00 -51.0
##
## Step: AIC=-70.28
## ints ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_numyrs + c_avg_att +
## cone
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.210 4.48 -70.4
## <none> 4.27 -70.3
## - c_pct 1 0.262 4.53 -70.0
## - c_avg_cmpp 1 0.283 4.55 -69.8
## - c_avg_att 1 0.292 4.56 -69.7
## - c_avg_inter 1 0.302 4.57 -69.6
## + shuttle 1 0.103 4.17 -69.2
## + X40 1 0.085 4.18 -69.1
## + vert_leap 1 0.056 4.21 -68.8
## + c_avg_yds 1 0.013 4.26 -68.4
## + c_rate 1 0.013 4.26 -68.4
## + height 1 0.010 4.26 -68.4
## + broad_jump 1 0.003 4.27 -68.3
## + c_avg_tds 1 0.002 4.27 -68.3
## + wonderlic 1 0.002 4.27 -68.3
## + weight 1 0.000 4.27 -68.3
## - cone 1 0.473 4.74 -68.2
## - age 1 2.845 7.11 -52.4
##
## Step: AIC=-70.41
## ints ~ age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_att + cone
##
## Df Sum of Sq RSS AIC
## - c_pct 1 0.124 4.60 -71.3
## - c_avg_cmpp 1 0.140 4.62 -71.2
## - c_avg_att 1 0.147 4.63 -71.2
## - c_avg_inter 1 0.188 4.67 -70.8
## <none> 4.48 -70.4
## + c_numyrs 1 0.210 4.27 -70.3
## + X40 1 0.138 4.34 -69.6
## + shuttle 1 0.104 4.37 -69.3
## + vert_leap 1 0.087 4.39 -69.2
## + c_avg_yds 1 0.040 4.44 -68.8
## + c_rate 1 0.027 4.45 -68.6
## + broad_jump 1 0.021 4.46 -68.6
## + height 1 0.015 4.46 -68.5
## + c_avg_tds 1 0.006 4.47 -68.5
## + weight 1 0.001 4.48 -68.4
## + wonderlic 1 0.000 4.48 -68.4
## - cone 1 0.589 5.07 -67.6
## - age 1 2.981 7.46 -52.5
##
## Step: AIC=-71.35
## ints ~ age + c_avg_cmpp + c_avg_inter + c_avg_att + cone
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.080 4.68 -72.7
## - c_avg_cmpp 1 0.231 4.83 -71.4
## - c_avg_att 1 0.236 4.84 -71.4
## <none> 4.60 -71.3
## + X40 1 0.193 4.41 -71.0
## + shuttle 1 0.127 4.48 -70.4
## + c_pct 1 0.124 4.48 -70.4
## + vert_leap 1 0.118 4.48 -70.4
## + c_avg_yds 1 0.101 4.50 -70.2
## + c_numyrs 1 0.071 4.53 -70.0
## + c_rate 1 0.070 4.53 -69.9
## + height 1 0.027 4.58 -69.6
## + broad_jump 1 0.021 4.58 -69.5
## + c_avg_tds 1 0.018 4.58 -69.5
## + weight 1 0.004 4.60 -69.4
## + wonderlic 1 0.001 4.60 -69.4
## - cone 1 0.559 5.16 -68.9
## - age 1 2.958 7.56 -54.0
##
## Step: AIC=-72.68
## ints ~ age + c_avg_cmpp + c_avg_att + cone
##
## Df Sum of Sq RSS AIC
## - c_avg_cmpp 1 0.154 4.84 -73.4
## - c_avg_att 1 0.159 4.84 -73.4
## <none> 4.68 -72.7
## + X40 1 0.127 4.55 -71.8
## + c_rate 1 0.113 4.57 -71.6
## + c_avg_yds 1 0.111 4.57 -71.6
## + c_avg_inter 1 0.080 4.60 -71.3
## + shuttle 1 0.073 4.61 -71.3
## + c_numyrs 1 0.065 4.62 -71.2
## + vert_leap 1 0.048 4.63 -71.1
## + c_avg_tds 1 0.046 4.64 -71.1
## + c_pct 1 0.015 4.67 -70.8
## + height 1 0.012 4.67 -70.8
## + weight 1 0.005 4.68 -70.7
## + broad_jump 1 0.002 4.68 -70.7
## + wonderlic 1 0.000 4.68 -70.7
## - cone 1 0.854 5.54 -68.1
## - age 1 3.079 7.76 -55.0
##
## Step: AIC=-73.41
## ints ~ age + c_avg_att + cone
##
## Df Sum of Sq RSS AIC
## - c_avg_att 1 0.01 4.84 -75.4
## <none> 4.84 -73.4
## + X40 1 0.19 4.65 -72.9
## + c_avg_cmpp 1 0.15 4.68 -72.7
## + c_pct 1 0.15 4.69 -72.6
## + shuttle 1 0.10 4.74 -72.2
## + c_numyrs 1 0.09 4.74 -72.2
## + vert_leap 1 0.08 4.76 -72.0
## + c_rate 1 0.03 4.80 -71.7
## + weight 1 0.03 4.81 -71.6
## + height 1 0.02 4.82 -71.6
## + c_avg_tds 1 0.01 4.83 -71.5
## + c_avg_yds 1 0.00 4.83 -71.4
## + c_avg_inter 1 0.00 4.83 -71.4
## + broad_jump 1 0.00 4.83 -71.4
## + wonderlic 1 0.00 4.84 -71.4
## - cone 1 0.74 5.57 -69.9
## - age 1 3.49 8.33 -54.2
##
## Step: AIC=-75.36
## ints ~ age + cone
##
## Df Sum of Sq RSS AIC
## <none> 4.84 -75.4
## + X40 1 0.19 4.65 -74.9
## + shuttle 1 0.10 4.74 -74.2
## + vert_leap 1 0.08 4.76 -74.0
## + weight 1 0.03 4.81 -73.6
## + c_numyrs 1 0.03 4.81 -73.6
## + height 1 0.02 4.82 -73.5
## + c_pct 1 0.02 4.82 -73.5
## + c_avg_att 1 0.01 4.84 -73.4
## + c_avg_yds 1 0.01 4.84 -73.4
## + c_avg_inter 1 0.00 4.84 -73.4
## + c_avg_tds 1 0.00 4.84 -73.4
## + broad_jump 1 0.00 4.84 -73.4
## + c_avg_cmpp 1 0.00 4.84 -73.4
## + c_rate 1 0.00 4.84 -73.4
## + wonderlic 1 0.00 4.84 -73.4
## - cone 1 0.73 5.58 -71.9
## - age 1 3.64 8.48 -55.5
summary(step_reg.log.w_combine.ints)
##
## Call:
## lm(formula = ints ~ age + cone, data = data.log.w_combine.for_ints)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9370 -0.2568 0.0337 0.2138 0.7093
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.093 3.873 2.09 0.044 *
## age -4.457 0.857 -5.20 8.2e-06 ***
## cone 4.301 1.843 2.33 0.025 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.367 on 36 degrees of freedom
## Multiple R-squared: 0.436, Adjusted R-squared: 0.404
## F-statistic: 13.9 on 2 and 36 DF, p-value: 3.36e-05
plot(step_reg.log.w_combine.ints)
leaps.log.w_combine.ints <- regsubsets(ints ~ ., data = data.log.w_combine.for_ints,
nbest = 10)
subsets(leaps.log.w_combine.ints, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.log.w_combine.for_ints, step_reg.log.w_combine.ints, m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: ints
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 3.01 3.007 22.36 3.4e-05 ***
## cone 1 0.73 0.733 5.45 0.025 *
## Residuals 36 4.84 0.135
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 3 21 24 40 42 52 61
## Predicted 2.154 2.068 2.36 2.068 1.782 2.0582 2.583
## cvpred 2.204 2.128 2.38 2.128 1.850 2.1060 2.596
## ints 1.808 1.131 2.09 2.313 2.092 2.0919 2.493
## CV residual -0.396 -0.997 -0.29 0.184 0.242 -0.0142 -0.103
##
## Sum of squares = 1.34 Mean square = 0.19 n = 7
##
## fold 2
## Observations in test set: 8
## 6 18 25 37 43 50 55 63
## Predicted 2.299 2.384 2.6556 2.4198 2.847 2.9701 2.413 2.020
## cvpred 2.307 2.429 2.7271 2.4380 2.943 3.0752 2.448 2.013
## ints 2.573 1.960 2.7788 2.4932 2.208 3.0007 1.960 2.313
## CV residual 0.266 -0.469 0.0517 0.0552 -0.735 -0.0745 -0.488 0.299
##
## Sum of squares = 1.17 Mean square = 0.15 n = 8
##
## fold 3
## Observations in test set: 8
## 5 7 16 20 28 32 49 64
## Predicted 2.407 2.0828 2.058 1.852 2.5194 2.20 2.363 2.130
## cvpred 2.363 2.1604 2.090 1.931 2.5411 2.17 2.446 2.086
## ints 2.896 2.0919 1.808 1.808 2.5726 2.41 2.208 1.808
## CV residual 0.533 -0.0685 -0.282 -0.123 0.0315 0.24 -0.238 -0.278
##
## Sum of squares = 0.58 Mean square = 0.07 n = 8
##
## fold 4
## Observations in test set: 8
## 12 13 26 30 38 39 59 65
## Predicted 2.691 2.679 2.56 2.58 2.341 3.0000 1.645 2.4609
## cvpred 2.665 2.631 2.54 2.56 2.338 2.9285 1.720 2.4571
## ints 2.208 2.839 2.71 2.78 2.779 2.8959 1.131 2.4932
## CV residual -0.457 0.208 0.18 0.22 0.441 -0.0326 -0.588 0.0361
##
## Sum of squares = 0.88 Mean square = 0.11 n = 8
##
## fold 5
## Observations in test set: 8
## 1 4 15 17 19 27 46 56
## Predicted 2.464 2.823 2.443 2.130 2.438 2.729 2.566 2.3232
## cvpred 2.313 2.850 2.370 2.095 2.441 2.557 2.524 2.2916
## ints 3.096 2.407 2.646 2.839 2.573 2.950 3.001 2.3125
## CV residual 0.782 -0.443 0.276 0.745 0.132 0.392 0.476 0.0209
##
## Sum of squares = 1.84 Mean square = 0.23 n = 8
##
## Overall (Sum over all 8 folds)
## ms
## 0.149