# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
yds = qb_stats_w_combine["yds"]
# Generate clean data set
data.scaled.w_combine.for_yds = data.frame(scale(na.omit(cbind(yds, college_stats))))
# Generate the linear model
lm.scaled.w_combine.yds <- lm(formula = yds ~ ., data = data.scaled.w_combine.for_yds)
# Find optimum linear regression model for yds
step_reg.scaled.w_combine.yds <- stepAIC(lm.scaled.w_combine.yds, direction = "both")
## Start: AIC=16.18
## yds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - weight 1 0.002 23.5 14.2
## - c_rate 1 0.017 23.5 14.2
## - X40 1 0.027 23.5 14.2
## - cone 1 0.029 23.5 14.2
## - c_avg_yds 1 0.044 23.5 14.2
## - shuttle 1 0.058 23.5 14.3
## - c_avg_tds 1 0.091 23.6 14.3
## - c_pct 1 0.141 23.6 14.4
## - vert_leap 1 0.206 23.7 14.5
## - broad_jump 1 0.570 24.0 15.1
## - c_avg_inter 1 0.607 24.1 15.2
## - height 1 0.794 24.3 15.5
## - c_avg_cmpp 1 0.924 24.4 15.7
## <none> 23.5 16.2
## - age 1 1.443 24.9 16.5
## - wonderlic 1 1.786 25.2 17.0
## - c_numyrs 1 1.822 25.3 17.1
## - c_avg_att 1 2.641 26.1 18.3
##
## Step: AIC=14.18
## yds ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.018 23.5 12.2
## - cone 1 0.028 23.5 12.2
## - X40 1 0.029 23.5 12.2
## - c_avg_yds 1 0.043 23.5 12.3
## - shuttle 1 0.058 23.5 12.3
## - c_avg_tds 1 0.090 23.6 12.3
## - c_pct 1 0.145 23.6 12.4
## - vert_leap 1 0.213 23.7 12.5
## - broad_jump 1 0.571 24.0 13.1
## - c_avg_inter 1 0.607 24.1 13.2
## - c_avg_cmpp 1 0.926 24.4 13.7
## <none> 23.5 14.2
## - height 1 1.399 24.9 14.4
## - age 1 1.773 25.2 15.0
## - c_numyrs 1 1.932 25.4 15.3
## - wonderlic 1 2.089 25.6 15.5
## + weight 1 0.002 23.5 16.2
## - c_avg_att 1 2.827 26.3 16.6
##
## Step: AIC=12.21
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds +
## c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + cone +
## shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - X40 1 0.03 23.5 10.3
## - cone 1 0.04 23.5 10.3
## - shuttle 1 0.05 23.5 10.3
## - vert_leap 1 0.27 23.8 10.7
## - c_avg_yds 1 0.27 23.8 10.7
## - c_avg_tds 1 0.28 23.8 10.7
## - c_avg_inter 1 0.63 24.1 11.2
## - broad_jump 1 0.67 24.1 11.3
## - c_pct 1 1.21 24.7 12.2
## <none> 23.5 12.2
## - height 1 1.55 25.0 12.7
## - c_numyrs 1 1.92 25.4 13.3
## - age 1 2.00 25.5 13.4
## + c_rate 1 0.02 23.5 14.2
## + weight 1 0.00 23.5 14.2
## - wonderlic 1 2.81 26.3 14.6
## - c_avg_cmpp 1 3.12 26.6 15.1
## - c_avg_att 1 3.20 26.7 15.2
##
## Step: AIC=10.26
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds +
## c_avg_yds + c_numyrs + c_avg_att + wonderlic + cone + shuttle +
## vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - cone 1 0.05 23.6 8.34
## - shuttle 1 0.09 23.6 8.41
## - c_avg_yds 1 0.26 23.8 8.69
## - c_avg_tds 1 0.26 23.8 8.70
## - vert_leap 1 0.36 23.9 8.85
## - broad_jump 1 0.71 24.2 9.43
## - c_avg_inter 1 0.74 24.2 9.47
## - c_pct 1 1.21 24.7 10.22
## <none> 23.5 10.26
## - height 1 1.80 25.3 11.14
## - c_numyrs 1 1.89 25.4 11.28
## - age 1 1.99 25.5 11.43
## + X40 1 0.03 23.5 12.21
## + c_rate 1 0.02 23.5 12.23
## + weight 1 0.01 23.5 12.25
## - wonderlic 1 2.78 26.3 12.62
## - c_avg_cmpp 1 3.18 26.7 13.22
## - c_avg_att 1 3.25 26.8 13.32
##
## Step: AIC=8.34
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds +
## c_avg_yds + c_numyrs + c_avg_att + wonderlic + shuttle +
## vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.05 23.6 6.43
## - c_avg_tds 1 0.24 23.8 6.74
## - c_avg_yds 1 0.27 23.8 6.80
## - vert_leap 1 0.34 23.9 6.90
## - c_avg_inter 1 0.70 24.3 7.48
## - broad_jump 1 0.80 24.4 7.65
## <none> 23.6 8.34
## - c_pct 1 1.25 24.8 8.37
## - height 1 1.77 25.3 9.18
## - c_numyrs 1 1.86 25.4 9.30
## - age 1 2.20 25.8 9.82
## + cone 1 0.05 23.5 10.26
## + X40 1 0.04 23.5 10.28
## + c_rate 1 0.03 23.5 10.29
## + weight 1 0.00 23.6 10.34
## - wonderlic 1 2.82 26.4 10.76
## - c_avg_cmpp 1 3.18 26.7 11.28
## - c_avg_att 1 3.22 26.8 11.33
##
## Step: AIC=6.43
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds +
## c_avg_yds + c_numyrs + c_avg_att + wonderlic + vert_leap +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.19 23.8 4.75
## - c_avg_yds 1 0.42 24.0 5.12
## - vert_leap 1 0.60 24.2 5.41
## - c_avg_inter 1 0.72 24.3 5.61
## - broad_jump 1 0.79 24.4 5.71
## <none> 23.6 6.43
## - c_pct 1 1.26 24.9 6.45
## - c_numyrs 1 1.96 25.6 7.54
## - age 1 2.23 25.9 7.95
## - height 1 2.34 26.0 8.12
## + X40 1 0.07 23.6 8.32
## + shuttle 1 0.05 23.6 8.34
## + c_rate 1 0.02 23.6 8.40
## + cone 1 0.01 23.6 8.41
## + weight 1 0.00 23.6 8.43
## - c_avg_cmpp 1 3.19 26.8 9.38
## - c_avg_att 1 3.40 27.0 9.67
## - wonderlic 1 3.91 27.5 10.41
##
## Step: AIC=4.75
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_yds +
## c_numyrs + c_avg_att + wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.62 24.4 3.76
## - vert_leap 1 0.66 24.5 3.82
## - broad_jump 1 0.77 24.6 3.99
## - c_avg_yds 1 1.16 25.0 4.61
## <none> 23.8 4.75
## - c_pct 1 1.26 25.1 4.76
## - c_numyrs 1 2.03 25.8 5.94
## - height 1 2.27 26.1 6.31
## + c_avg_tds 1 0.19 23.6 6.43
## + c_rate 1 0.16 23.6 6.49
## + X40 1 0.03 23.8 6.71
## - age 1 2.54 26.4 6.71
## + cone 1 0.02 23.8 6.72
## + weight 1 0.00 23.8 6.74
## + shuttle 1 0.00 23.8 6.74
## - c_avg_cmpp 1 3.34 27.1 7.87
## - c_avg_att 1 3.68 27.5 8.36
## - wonderlic 1 4.40 28.2 9.36
##
## Step: AIC=3.76
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_yds + c_numyrs +
## c_avg_att + wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - vert_leap 1 0.55 25.0 2.62
## - c_avg_yds 1 0.66 25.1 2.79
## - broad_jump 1 0.80 25.2 3.01
## - c_pct 1 0.93 25.4 3.22
## <none> 24.4 3.76
## - c_numyrs 1 1.50 25.9 4.08
## + c_avg_inter 1 0.62 23.8 4.75
## - height 1 2.24 26.7 5.18
## + X40 1 0.12 24.3 5.56
## + c_avg_tds 1 0.09 24.3 5.61
## + cone 1 0.02 24.4 5.72
## + weight 1 0.02 24.4 5.72
## + shuttle 1 0.02 24.4 5.72
## + c_rate 1 0.00 24.4 5.75
## - age 1 2.67 27.1 5.80
## - c_avg_cmpp 1 2.74 27.2 5.90
## - c_avg_att 1 4.36 28.8 8.16
## - wonderlic 1 5.16 29.6 9.23
##
## Step: AIC=2.62
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_yds + c_numyrs +
## c_avg_att + wonderlic + broad_jump
##
## Df Sum of Sq RSS AIC
## - broad_jump 1 0.26 25.2 1.03
## - c_avg_yds 1 0.50 25.5 1.40
## - c_pct 1 0.98 25.9 2.11
## <none> 25.0 2.62
## - c_numyrs 1 1.38 26.4 2.71
## + vert_leap 1 0.55 24.4 3.76
## + c_avg_inter 1 0.50 24.5 3.82
## + X40 1 0.38 24.6 4.02
## + shuttle 1 0.19 24.8 4.32
## + c_avg_tds 1 0.14 24.8 4.40
## - height 1 2.56 27.5 4.42
## + cone 1 0.11 24.9 4.45
## + c_rate 1 0.02 24.9 4.58
## + weight 1 0.01 25.0 4.60
## - age 1 2.70 27.7 4.62
## - c_avg_cmpp 1 3.36 28.3 5.54
## - c_avg_att 1 4.76 29.7 7.42
## - wonderlic 1 4.99 30.0 7.71
##
## Step: AIC=1.03
## yds ~ height + age + c_avg_cmpp + c_pct + c_avg_yds + c_numyrs +
## c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.52 25.8 -0.17
## - c_pct 1 0.73 26.0 0.14
## - c_numyrs 1 1.13 26.4 0.73
## <none> 25.2 1.03
## + c_avg_inter 1 0.60 24.6 2.08
## + broad_jump 1 0.26 25.0 2.62
## - height 1 2.45 27.7 2.65
## + c_avg_tds 1 0.09 25.1 2.89
## + weight 1 0.07 25.2 2.92
## + shuttle 1 0.03 25.2 2.98
## + c_rate 1 0.02 25.2 2.99
## + vert_leap 1 0.01 25.2 3.01
## + cone 1 0.00 25.2 3.02
## + X40 1 0.00 25.2 3.03
## - age 1 2.78 28.0 3.10
## - c_avg_cmpp 1 3.10 28.3 3.54
## - c_avg_att 1 4.51 29.7 5.43
## - wonderlic 1 5.25 30.5 6.40
##
## Step: AIC=-0.17
## yds ~ height + age + c_avg_cmpp + c_pct + c_numyrs + c_avg_att +
## wonderlic
##
## Df Sum of Sq RSS AIC
## - c_pct 1 0.51 26.3 -1.42
## - c_numyrs 1 0.85 26.6 -0.90
## <none> 25.8 -0.17
## + c_avg_yds 1 0.52 25.2 1.03
## + c_rate 1 0.50 25.3 1.07
## + c_avg_tds 1 0.47 25.3 1.11
## - height 1 2.32 28.1 1.19
## + broad_jump 1 0.28 25.5 1.40
## + c_avg_inter 1 0.15 25.6 1.59
## + weight 1 0.12 25.6 1.64
## + shuttle 1 0.04 25.7 1.76
## + X40 1 0.01 25.8 1.80
## + cone 1 0.00 25.8 1.82
## + vert_leap 1 0.00 25.8 1.82
## - age 1 2.79 28.6 1.83
## - c_avg_att 1 3.98 29.7 3.43
## - c_avg_cmpp 1 4.50 30.3 4.11
## - wonderlic 1 5.69 31.4 5.61
##
## Step: AIC=-1.42
## yds ~ height + age + c_avg_cmpp + c_numyrs + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.40 26.7 -2.83
## <none> 26.3 -1.42
## + c_pct 1 0.51 25.8 -0.17
## - height 1 2.37 28.6 -0.05
## + c_avg_tds 1 0.41 25.9 -0.03
## - age 1 2.45 28.7 0.06
## + c_avg_yds 1 0.30 26.0 0.14
## + weight 1 0.12 26.1 0.40
## + shuttle 1 0.10 26.2 0.43
## + vert_leap 1 0.07 26.2 0.48
## + c_avg_inter 1 0.06 26.2 0.49
## + c_rate 1 0.06 26.2 0.49
## + broad_jump 1 0.04 26.2 0.52
## + cone 1 0.00 26.3 0.58
## + X40 1 0.00 26.3 0.58
## - c_avg_att 1 4.05 30.3 2.17
## - c_avg_cmpp 1 5.36 31.6 3.83
## - wonderlic 1 5.53 31.8 4.03
##
## Step: AIC=-2.83
## yds ~ height + age + c_avg_cmpp + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## <none> 26.7 -2.829
## - height 1 2.22 28.9 -1.704
## + c_avg_tds 1 0.44 26.2 -1.480
## - age 1 2.39 29.1 -1.476
## + c_numyrs 1 0.40 26.3 -1.415
## + c_avg_yds 1 0.22 26.4 -1.152
## + weight 1 0.22 26.4 -1.147
## + shuttle 1 0.14 26.5 -1.041
## + vert_leap 1 0.09 26.6 -0.967
## + c_pct 1 0.05 26.6 -0.905
## + cone 1 0.01 26.6 -0.851
## + broad_jump 1 0.01 26.7 -0.841
## + X40 1 0.01 26.7 -0.840
## + c_avg_inter 1 0.01 26.7 -0.840
## + c_rate 1 0.00 26.7 -0.834
## - c_avg_att 1 3.66 30.3 0.183
## - c_avg_cmpp 1 4.98 31.6 1.845
## - wonderlic 1 5.22 31.9 2.142
summary(step_reg.scaled.w_combine.yds)
##
## Call:
## lm(formula = yds ~ height + age + c_avg_cmpp + c_avg_att + wonderlic,
## data = data.scaled.w_combine.for_yds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7245 -0.4369 -0.0374 0.4716 1.7353
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.69e-16 1.44e-01 0.00 1.000
## height 2.60e-01 1.57e-01 1.66 0.107
## age -2.87e-01 1.67e-01 -1.72 0.095 .
## c_avg_cmpp 1.69e+00 6.82e-01 2.48 0.018 *
## c_avg_att -1.44e+00 6.78e-01 -2.13 0.041 *
## wonderlic -4.23e-01 1.67e-01 -2.54 0.016 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.899 on 33 degrees of freedom
## Multiple R-squared: 0.298, Adjusted R-squared: 0.192
## F-statistic: 2.81 on 5 and 33 DF, p-value: 0.0322
plot(step_reg.scaled.w_combine.yds)
leaps.scaled.w_combine.yds <- regsubsets(yds ~ ., data = data.scaled.w_combine.for_yds,
nbest = 10)
subsets(leaps.scaled.w_combine.yds, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.scaled.w_combine.for_yds, step_reg.scaled.w_combine.yds, m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: yds
## Df Sum Sq Mean Sq F value Pr(>F)
## height 1 1.85 1.85 2.28 0.140
## age 1 0.77 0.77 0.95 0.336
## c_avg_cmpp 1 1.28 1.28 1.58 0.218
## c_avg_att 1 2.22 2.22 2.75 0.107
## wonderlic 1 5.22 5.22 6.46 0.016 *
## Residuals 33 26.66 0.81
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 3 21 24 40 42 52 61
## Predicted -0.278 -0.578 0.522 -0.698 -0.1943 -1.089 -0.080
## cvpred -0.172 -0.390 0.810 -0.605 0.0484 -1.032 -0.191
## yds -1.230 -0.615 -0.833 0.149 -0.4609 -1.335 -0.509
## CV residual -1.059 -0.226 -1.643 0.754 -0.5093 -0.302 -0.318
##
## Sum of squares = 4.89 Mean square = 0.7 n = 7
##
## fold 2
## Observations in test set: 8
## 6 18 25 37 43 50 55 63
## Predicted -0.01642 0.206 1.539 0.260 -0.534 -0.446 -0.505 0.1233
## cvpred -0.00115 0.170 1.401 0.107 -0.246 -0.292 -0.408 0.0921
## yds 1.12099 -0.373 1.712 0.653 -1.223 -0.119 -1.429 -0.2263
## CV residual 1.12214 -0.543 0.311 0.545 -0.976 0.173 -1.021 -0.3184
##
## Sum of squares = 4.07 Mean square = 0.51 n = 8
##
## fold 3
## Observations in test set: 8
## 5 7 16 20 28 32 49 64
## Predicted 0.4557 -0.4340 0.457 -0.9541 0.541 0.0667 0.2173 0.566
## cvpred 0.0963 -0.0198 0.328 -0.5982 0.806 0.2040 0.0811 0.649
## yds 2.1909 -1.2897 0.816 -0.6604 1.085 -0.2186 -0.1474 -0.709
## CV residual 2.0946 -1.2699 0.488 -0.0622 0.279 -0.4227 -0.2285 -1.357
##
## Sum of squares = 8.39 Mean square = 1.05 n = 8
##
## fold 4
## Observations in test set: 8
## 12 13 26 30 38 39 59 65
## Predicted -0.439 0.929 0.449 0.00627 -0.4890 1.029 -0.480 -0.255
## cvpred -0.434 0.867 0.391 0.07033 -0.5823 1.421 -0.555 -0.469
## yds -0.800 1.837 0.237 0.99601 -0.0893 -0.571 -0.925 0.513
## CV residual -0.366 0.970 -0.154 0.92568 0.4930 -1.991 -0.369 0.982
##
## Sum of squares = 7.26 Mean square = 0.91 n = 8
##
## fold 5
## Observations in test set: 8
## 1 4 15 17 19 27 46 56
## Predicted 0.295 0.0796 0.423 -0.2180 -0.367 0.252 0.0248 -0.3879
## cvpred -0.285 0.9490 0.056 0.1355 0.258 -1.068 0.5501 -0.0637
## yds 1.597 -1.6449 0.551 0.0861 -0.573 1.547 0.0752 0.8140
## CV residual 1.882 -2.5938 0.495 -0.0493 -0.831 2.615 -0.4749 0.8777
##
## Sum of squares = 19 Mean square = 2.38 n = 8
##
## Overall (Sum over all 8 folds)
## ms
## 1.12