# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
wins = qb_stats_w_combine["wins"]
# Generate clean data set
data.scaled.w_combine.for_wins = data.frame(scale(na.omit(cbind(wins, college_stats))))
# Generate the linear model
lm.scaled.w_combine.wins <- lm(formula = wins ~ ., data = data.scaled.w_combine.for_wins)
# Find optimum linear regression model for wins
step_reg.scaled.w_combine.wins <- stepAIC(lm.scaled.w_combine.wins, direction = "both")
## Start: AIC=8.65
## wins ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - weight 1 0.06 19.4 6.77
## - height 1 0.12 19.5 6.89
## - cone 1 0.17 19.5 7.00
## - c_avg_inter 1 0.30 19.6 7.25
## - shuttle 1 0.40 19.7 7.45
## - c_avg_att 1 0.49 19.8 7.63
## - c_avg_cmpp 1 0.71 20.1 8.05
## - vert_leap 1 0.86 20.2 8.34
## - X40 1 0.97 20.3 8.56
## <none> 19.3 8.65
## - c_pct 1 1.80 21.1 10.13
## - c_numyrs 1 1.84 21.2 10.20
## - broad_jump 1 2.59 21.9 11.56
## - age 1 2.91 22.3 12.13
## - c_avg_yds 1 2.96 22.3 12.20
## - c_avg_tds 1 3.11 22.4 12.46
## - c_rate 1 3.36 22.7 12.90
## - wonderlic 1 4.32 23.7 14.51
##
## Step: AIC=6.77
## wins ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - cone 1 0.22 19.6 5.21
## - c_avg_inter 1 0.27 19.7 5.31
## - shuttle 1 0.34 19.7 5.45
## - height 1 0.49 19.9 5.74
## - c_avg_att 1 0.64 20.0 6.03
## - c_avg_cmpp 1 0.67 20.1 6.09
## - vert_leap 1 0.81 20.2 6.37
## - X40 1 0.93 20.3 6.59
## <none> 19.4 6.77
## - c_pct 1 1.87 21.3 8.35
## + weight 1 0.06 19.3 8.65
## - c_numyrs 1 2.19 21.6 8.93
## - broad_jump 1 2.73 22.1 9.89
## - c_avg_yds 1 3.02 22.4 10.40
## - c_avg_tds 1 3.47 22.9 11.19
## - c_rate 1 3.51 22.9 11.25
## - age 1 4.20 23.6 12.40
## - wonderlic 1 5.69 25.1 14.79
##
## Step: AIC=5.21
## wins ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.42 20.0 4.03
## - height 1 0.42 20.1 4.04
## - c_avg_cmpp 1 0.56 20.2 4.30
## - c_avg_att 1 0.66 20.3 4.51
## - shuttle 1 0.69 20.3 4.56
## - vert_leap 1 0.70 20.3 4.58
## - X40 1 1.02 20.6 5.19
## <none> 19.6 5.21
## - c_pct 1 1.67 21.3 6.40
## + cone 1 0.22 19.4 6.77
## + weight 1 0.11 19.5 7.00
## - c_numyrs 1 2.15 21.8 7.26
## - c_avg_yds 1 2.86 22.5 8.52
## - broad_jump 1 2.99 22.6 8.74
## - c_avg_tds 1 3.26 22.9 9.20
## - c_rate 1 3.29 22.9 9.26
## - age 1 4.59 24.2 11.41
## - wonderlic 1 5.61 25.2 13.02
##
## Step: AIC=4.03
## wins ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_tds +
## c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + shuttle +
## vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_cmpp 1 0.19 20.2 2.40
## - height 1 0.40 20.4 2.81
## - shuttle 1 0.57 20.6 3.12
## - vert_leap 1 0.72 20.8 3.40
## - X40 1 0.80 20.8 3.56
## <none> 20.0 4.03
## - c_pct 1 1.26 21.3 4.40
## + c_avg_inter 1 0.42 19.6 5.21
## + cone 1 0.36 19.7 5.31
## + weight 1 0.08 20.0 5.87
## - c_avg_yds 1 2.49 22.5 6.60
## - broad_jump 1 2.63 22.7 6.84
## - c_avg_tds 1 2.88 22.9 7.27
## - c_rate 1 2.96 23.0 7.40
## - c_numyrs 1 3.24 23.3 7.87
## - age 1 4.20 24.2 9.44
## - c_avg_att 1 4.64 24.7 10.16
## - wonderlic 1 5.22 25.3 11.06
##
## Step: AIC=2.4
## wins ~ height + age + c_rate + c_pct + c_avg_tds + c_avg_yds +
## c_numyrs + c_avg_att + X40 + wonderlic + shuttle + vert_leap +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - height 1 0.32 20.6 1.02
## - shuttle 1 0.48 20.7 1.31
## - vert_leap 1 0.55 20.8 1.45
## - X40 1 0.85 21.1 2.01
## <none> 20.2 2.40
## + c_avg_cmpp 1 0.19 20.0 4.03
## + cone 1 0.15 20.1 4.10
## + weight 1 0.05 20.2 4.30
## + c_avg_inter 1 0.05 20.2 4.30
## - broad_jump 1 2.48 22.7 4.92
## - c_pct 1 2.64 22.9 5.18
## - c_avg_tds 1 3.20 23.4 6.13
## - c_avg_yds 1 3.23 23.5 6.17
## - c_numyrs 1 3.80 24.0 7.11
## - age 1 4.05 24.3 7.52
## - wonderlic 1 5.05 25.3 9.09
## - c_rate 1 6.08 26.3 10.65
## - c_avg_att 1 7.16 27.4 12.22
##
## Step: AIC=1.02
## wins ~ age + c_rate + c_pct + c_avg_tds + c_avg_yds + c_numyrs +
## c_avg_att + X40 + wonderlic + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.31 20.9 -0.40
## - vert_leap 1 0.45 21.0 -0.14
## <none> 20.6 1.02
## - X40 1 1.21 21.8 1.24
## + height 1 0.32 20.2 2.40
## + weight 1 0.31 20.2 2.43
## + cone 1 0.13 20.4 2.76
## + c_avg_cmpp 1 0.11 20.4 2.81
## + c_avg_inter 1 0.07 20.5 2.87
## - c_pct 1 2.39 22.9 3.31
## - broad_jump 1 2.80 23.4 3.99
## - c_avg_yds 1 2.99 23.5 4.31
## - c_avg_tds 1 3.34 23.9 4.89
## - c_numyrs 1 3.60 24.2 5.32
## - age 1 4.56 25.1 6.84
## - wonderlic 1 4.73 25.3 7.09
## - c_rate 1 5.80 26.4 8.71
## - c_avg_att 1 6.89 27.4 10.29
##
## Step: AIC=-0.4
## wins ~ age + c_rate + c_pct + c_avg_tds + c_avg_yds + c_numyrs +
## c_avg_att + X40 + wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - vert_leap 1 0.28 21.1 -1.88
## - X40 1 0.91 21.8 -0.74
## <none> 20.9 -0.40
## + cone 1 0.32 20.6 1.01
## + shuttle 1 0.31 20.6 1.02
## + height 1 0.16 20.7 1.31
## + weight 1 0.08 20.8 1.44
## + c_avg_inter 1 0.07 20.8 1.47
## + c_avg_cmpp 1 0.07 20.8 1.48
## - c_pct 1 2.42 23.3 1.87
## - broad_jump 1 2.55 23.4 2.09
## - c_avg_yds 1 2.68 23.5 2.31
## - c_numyrs 1 3.34 24.2 3.40
## - c_avg_tds 1 4.16 25.0 4.70
## - age 1 4.28 25.1 4.88
## - wonderlic 1 4.60 25.5 5.37
## - c_rate 1 5.64 26.5 6.93
## - c_avg_att 1 6.59 27.4 8.31
##
## Step: AIC=-1.88
## wins ~ age + c_rate + c_pct + c_avg_tds + c_avg_yds + c_numyrs +
## c_avg_att + X40 + wonderlic + broad_jump
##
## Df Sum of Sq RSS AIC
## <none> 21.1 -1.88
## - X40 1 1.72 22.9 -0.83
## + vert_leap 1 0.28 20.9 -0.40
## + cone 1 0.23 20.9 -0.31
## + c_avg_inter 1 0.14 21.0 -0.14
## + shuttle 1 0.14 21.0 -0.14
## + height 1 0.13 21.0 -0.13
## + weight 1 0.07 21.1 -0.01
## + c_avg_cmpp 1 0.01 21.1 0.10
## - broad_jump 1 2.29 23.4 0.13
## - c_pct 1 2.39 23.5 0.29
## - c_avg_yds 1 2.52 23.7 0.52
## - c_numyrs 1 3.18 24.3 1.59
## - age 1 4.37 25.5 3.44
## - wonderlic 1 4.40 25.5 3.48
## - c_avg_tds 1 4.84 26.0 4.16
## - c_rate 1 5.67 26.8 5.38
## - c_avg_att 1 6.51 27.6 6.58
summary(step_reg.scaled.w_combine.wins)
##
## Call:
## lm(formula = wins ~ age + c_rate + c_pct + c_avg_tds + c_avg_yds +
## c_numyrs + c_avg_att + X40 + wonderlic + broad_jump, data = data.scaled.w_combine.for_wins)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.598 -0.566 0.035 0.431 1.414
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.21e-16 1.39e-01 0.00 1.0000
## age -4.02e-01 1.67e-01 -2.41 0.0230 *
## c_rate -1.68e+00 6.14e-01 -2.74 0.0106 *
## c_pct 8.94e-01 5.03e-01 1.78 0.0863 .
## c_avg_tds 1.10e+00 4.35e-01 2.53 0.0172 *
## c_avg_yds 1.91e+00 1.05e+00 1.83 0.0781 .
## c_numyrs 4.06e-01 1.98e-01 2.05 0.0494 *
## c_avg_att -2.71e+00 9.25e-01 -2.94 0.0066 **
## X40 3.43e-01 2.27e-01 1.51 0.1423
## wonderlic -4.05e-01 1.68e-01 -2.41 0.0226 *
## broad_jump 4.28e-01 2.45e-01 1.74 0.0922 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.869 on 28 degrees of freedom
## Multiple R-squared: 0.444, Adjusted R-squared: 0.245
## F-statistic: 2.23 on 10 and 28 DF, p-value: 0.046
plot(step_reg.scaled.w_combine.wins)
leaps.scaled.w_combine.wins <- regsubsets(wins ~ ., data = data.scaled.w_combine.for_wins,
nbest = 10)
subsets(leaps.scaled.w_combine.wins, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.scaled.w_combine.for_wins, step_reg.scaled.w_combine.wins, m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: wins
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 1.65 1.65 2.18 0.151
## c_rate 1 0.13 0.13 0.17 0.683
## c_pct 1 0.46 0.46 0.60 0.444
## c_avg_tds 1 0.31 0.31 0.41 0.525
## c_avg_yds 1 3.66 3.66 4.85 0.036 *
## c_numyrs 1 0.03 0.03 0.03 0.854
## c_avg_att 1 3.68 3.68 4.88 0.036 *
## X40 1 0.00 0.00 0.00 0.946
## wonderlic 1 4.65 4.65 6.16 0.019 *
## broad_jump 1 2.29 2.29 3.04 0.092 .
## Residuals 28 21.14 0.75
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 3 21 24 40 42 52 61
## Predicted -0.815 -0.954 -0.636 -0.618 -0.369 -0.103 1.266
## cvpred -0.845 -1.251 -0.490 -0.655 -0.325 -0.162 1.597
## wins -1.033 -0.151 -1.621 0.143 -0.739 -1.033 0.731
## CV residual -0.188 1.100 -1.131 0.798 -0.413 -0.871 -0.866
##
## Sum of squares = 4.84 Mean square = 0.69 n = 7
##
## fold 2
## Observations in test set: 8
## 6 18 25 37 43 50 55 63
## Predicted -0.226 -0.0273 1.201 1.181 -0.11491 -0.681 -1.223 -0.249
## cvpred -0.359 -0.0628 0.994 0.739 -0.14611 -0.932 -1.490 0.326
## wins 1.025 0.1432 1.613 1.613 -0.15075 -1.033 -0.739 -1.327
## CV residual 1.384 0.2060 0.619 0.874 -0.00464 -0.101 0.751 -1.652
##
## Sum of squares = 6.41 Mean square = 0.8 n = 8
##
## fold 3
## Observations in test set: 8
## 5 7 16 20 28 32 49 64
## Predicted 0.642 0.0539 0.667 -1.056 1.205 -0.175 0.0726 1.034
## cvpred -0.184 0.5387 0.197 -0.335 1.162 0.161 0.3052 0.869
## wins 1.613 -0.1508 0.731 -1.327 1.613 -0.445 -0.4447 0.437
## CV residual 1.797 -0.6894 0.535 -0.991 0.451 -0.605 -0.7500 -0.432
##
## Sum of squares = 6.29 Mean square = 0.79 n = 8
##
## fold 4
## Observations in test set: 8
## 12 13 26 30 38 39 59 65
## Predicted 0.271 -0.0492 0.310 -0.266 -0.0219 0.217 -1.116 0.199
## cvpred 1.093 0.0156 0.208 -0.339 0.1159 0.755 -1.295 -0.121
## wins -1.327 0.1432 -0.445 0.731 -0.7387 -0.739 -0.739 1.613
## CV residual -2.419 0.1276 -0.653 1.070 -0.8546 -1.494 0.557 1.734
##
## Sum of squares = 13.7 Mean square = 1.71 n = 8
##
## fold 5
## Observations in test set: 8
## 1 4 15 17 19 27 46 56
## Predicted 0.0153 -0.418 0.4022 -0.875 0.0176 0.161 0.597 0.478
## cvpred -0.0991 0.336 0.0543 -0.570 0.6055 -0.901 0.859 0.389
## wins 0.4372 -1.033 0.4372 -0.445 -1.0326 1.319 0.731 1.613
## CV residual 0.5363 -1.369 0.3828 0.125 -1.6381 2.220 -0.128 1.224
##
## Sum of squares = 11.4 Mean square = 1.43 n = 8
##
## Overall (Sum over all 8 folds)
## ms
## 1.1