# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
cpct = qb_stats_w_combine["completion_percentage"]
# Generate clean data set
data.scaled.w_combine.for_cpct = data.frame(scale(na.omit(cbind(cpct, college_stats))))
# Generate the linear model
lm.scaled.w_combine.cpct <- lm(formula = completion_percentage ~ ., data = data.scaled.w_combine.for_cpct)
# Find optimum linear regression model for cpct
step_reg.scaled.w_combine.cpct <- stepAIC(lm.scaled.w_combine.cpct, direction = "both")
## Start: AIC=7.05
## completion_percentage ~ height + weight + age + c_avg_cmpp +
## c_rate + c_pct + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs +
## c_avg_att + X40 + wonderlic + cone + shuttle + vert_leap +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - height 1 0.002 17.7 5.06
## - c_rate 1 0.010 17.8 5.07
## - c_avg_yds 1 0.059 17.8 5.18
## - c_pct 1 0.061 17.8 5.18
## - broad_jump 1 0.079 17.8 5.22
## - wonderlic 1 0.113 17.9 5.30
## - X40 1 0.166 17.9 5.41
## - c_avg_inter 1 0.170 17.9 5.42
## - age 1 0.361 18.1 5.82
## - c_avg_att 1 0.526 18.3 6.16
## - c_numyrs 1 0.549 18.3 6.21
## - weight 1 0.881 18.6 6.90
## <none> 17.7 7.05
## - c_avg_cmpp 1 0.975 18.7 7.09
## - c_avg_tds 1 1.134 18.9 7.41
## - shuttle 1 1.310 19.1 7.76
## - cone 1 1.386 19.1 7.91
## - vert_leap 1 2.558 20.3 10.17
##
## Step: AIC=5.06
## completion_percentage ~ weight + age + c_avg_cmpp + c_rate +
## c_pct + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs +
## c_avg_att + X40 + wonderlic + cone + shuttle + vert_leap +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.013 17.8 3.09
## - c_avg_yds 1 0.068 17.8 3.20
## - c_pct 1 0.070 17.8 3.21
## - broad_jump 1 0.079 17.8 3.23
## - wonderlic 1 0.130 17.9 3.33
## - X40 1 0.167 17.9 3.41
## - c_avg_inter 1 0.172 17.9 3.43
## - age 1 0.390 18.1 3.88
## - c_avg_att 1 0.532 18.3 4.18
## - c_numyrs 1 0.560 18.3 4.24
## <none> 17.7 5.06
## - c_avg_cmpp 1 0.985 18.7 5.11
## - c_avg_tds 1 1.223 19.0 5.59
## - shuttle 1 1.322 19.1 5.79
## - cone 1 1.477 19.2 6.10
## - weight 1 1.728 19.5 6.59
## + height 1 0.002 17.7 7.05
## - vert_leap 1 2.558 20.3 8.18
##
## Step: AIC=3.09
## completion_percentage ~ weight + age + c_avg_cmpp + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.08 17.8 1.26
## - broad_jump 1 0.11 17.9 1.31
## - X40 1 0.16 17.9 1.42
## - wonderlic 1 0.19 17.9 1.50
## - c_avg_inter 1 0.28 18.0 1.68
## - age 1 0.38 18.1 1.88
## - c_pct 1 0.45 18.2 2.04
## - c_numyrs 1 0.55 18.3 2.24
## - c_avg_att 1 0.58 18.3 2.30
## <none> 17.8 3.09
## - shuttle 1 1.41 19.2 3.98
## - cone 1 1.47 19.2 4.11
## - weight 1 1.78 19.5 4.72
## + c_rate 1 0.01 17.7 5.06
## + height 1 0.00 17.8 5.07
## - c_avg_tds 1 2.01 19.8 5.15
## - vert_leap 1 2.76 20.5 6.57
## - c_avg_cmpp 1 3.52 21.3 7.95
##
## Step: AIC=1.26
## completion_percentage ~ weight + age + c_avg_cmpp + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + cone +
## shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - broad_jump 1 0.12 18.0 -0.48
## - X40 1 0.13 18.0 -0.46
## - wonderlic 1 0.21 18.1 -0.30
## - c_avg_inter 1 0.23 18.1 -0.25
## - age 1 0.36 18.2 0.01
## - c_pct 1 0.57 18.4 0.45
## - c_numyrs 1 0.73 18.6 0.79
## <none> 17.8 1.26
## - c_avg_att 1 0.99 18.8 1.30
## - cone 1 1.55 19.4 2.43
## + c_avg_yds 1 0.08 17.8 3.09
## - shuttle 1 1.90 19.7 3.09
## - weight 1 1.90 19.7 3.11
## + c_rate 1 0.02 17.8 3.20
## + height 1 0.01 17.8 3.24
## - vert_leap 1 3.25 21.1 5.62
## - c_avg_cmpp 1 3.49 21.3 6.04
## - c_avg_tds 1 3.75 21.6 6.50
##
## Step: AIC=-0.48
## completion_percentage ~ weight + age + c_avg_cmpp + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + cone +
## shuttle + vert_leap
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.18 18.1 -2.09
## - wonderlic 1 0.20 18.2 -2.07
## - X40 1 0.37 18.3 -1.71
## - age 1 0.43 18.4 -1.59
## - c_pct 1 0.45 18.4 -1.53
## - c_numyrs 1 0.62 18.6 -1.18
## <none> 18.0 -0.48
## - c_avg_att 1 0.98 18.9 -0.47
## - cone 1 1.45 19.4 0.47
## - shuttle 1 1.80 19.8 1.16
## + broad_jump 1 0.12 17.8 1.26
## + c_avg_yds 1 0.10 17.9 1.31
## + c_rate 1 0.01 17.9 1.49
## + height 1 0.01 17.9 1.49
## - weight 1 2.19 20.1 1.89
## - vert_leap 1 3.18 21.1 3.71
## - c_avg_cmpp 1 3.42 21.4 4.14
## - c_avg_tds 1 4.06 22.0 5.25
##
## Step: AIC=-2.09
## completion_percentage ~ weight + age + c_avg_cmpp + c_pct + c_avg_tds +
## c_numyrs + c_avg_att + X40 + wonderlic + cone + shuttle +
## vert_leap
##
## Df Sum of Sq RSS AIC
## - wonderlic 1 0.17 18.3 -3.74
## - X40 1 0.48 18.6 -3.09
## - age 1 0.49 18.6 -3.08
## - c_pct 1 0.55 18.7 -2.96
## <none> 18.1 -2.09
## - c_numyrs 1 1.01 19.1 -2.04
## - cone 1 1.27 19.4 -1.53
## - shuttle 1 1.62 19.8 -0.84
## + c_avg_inter 1 0.18 18.0 -0.48
## + broad_jump 1 0.07 18.1 -0.25
## + c_avg_yds 1 0.05 18.1 -0.19
## + height 1 0.03 18.1 -0.17
## + c_rate 1 0.01 18.1 -0.11
## - weight 1 2.07 20.2 0.01
## - c_avg_att 1 3.44 21.6 2.51
## - vert_leap 1 3.59 21.7 2.76
## - c_avg_tds 1 3.94 22.1 3.38
## - c_avg_cmpp 1 5.56 23.7 6.07
##
## Step: AIC=-3.74
## completion_percentage ~ weight + age + c_avg_cmpp + c_pct + c_avg_tds +
## c_numyrs + c_avg_att + X40 + cone + shuttle + vert_leap
##
## Df Sum of Sq RSS AIC
## - c_pct 1 0.56 18.9 -4.59
## - X40 1 0.58 18.9 -4.55
## - age 1 0.85 19.2 -4.02
## - c_numyrs 1 0.93 19.2 -3.86
## <none> 18.3 -3.74
## - shuttle 1 1.54 19.9 -2.67
## + wonderlic 1 0.17 18.1 -2.09
## + c_avg_inter 1 0.16 18.2 -2.07
## - cone 1 1.89 20.2 -2.01
## + broad_jump 1 0.07 18.2 -1.88
## + c_avg_yds 1 0.06 18.2 -1.87
## + c_rate 1 0.02 18.3 -1.78
## + height 1 0.00 18.3 -1.74
## - weight 1 2.38 20.7 -1.09
## - c_avg_att 1 3.32 21.6 0.58
## - vert_leap 1 3.44 21.8 0.80
## - c_avg_tds 1 3.78 22.1 1.38
## - c_avg_cmpp 1 5.39 23.7 4.07
##
## Step: AIC=-4.59
## completion_percentage ~ weight + age + c_avg_cmpp + c_avg_tds +
## c_numyrs + c_avg_att + X40 + cone + shuttle + vert_leap
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.42 19.3 -5.75
## - X40 1 0.89 19.8 -4.84
## <none> 18.9 -4.59
## - age 1 1.22 20.1 -4.22
## + c_pct 1 0.56 18.3 -3.74
## - shuttle 1 1.50 20.4 -3.69
## + c_rate 1 0.43 18.4 -3.47
## - cone 1 1.73 20.6 -3.26
## + c_avg_inter 1 0.24 18.6 -3.08
## + wonderlic 1 0.18 18.7 -2.96
## + c_avg_yds 1 0.14 18.7 -2.88
## + height 1 0.00 18.9 -2.60
## + broad_jump 1 0.00 18.9 -2.59
## - weight 1 2.59 21.5 -1.70
## - c_avg_att 1 3.13 22.0 -0.76
## - c_avg_tds 1 4.43 23.3 1.42
## - vert_leap 1 5.45 24.3 3.04
## - c_avg_cmpp 1 5.98 24.9 3.86
##
## Step: AIC=-5.75
## completion_percentage ~ weight + age + c_avg_cmpp + c_avg_tds +
## c_avg_att + X40 + cone + shuttle + vert_leap
##
## Df Sum of Sq RSS AIC
## - X40 1 0.69 20.0 -6.41
## <none> 19.3 -5.75
## - age 1 1.28 20.6 -5.32
## + c_avg_inter 1 0.47 18.8 -4.69
## + c_numyrs 1 0.42 18.9 -4.59
## - shuttle 1 1.73 21.0 -4.48
## - cone 1 1.84 21.1 -4.29
## + c_avg_yds 1 0.21 19.1 -4.17
## + wonderlic 1 0.11 19.2 -3.96
## + c_rate 1 0.07 19.2 -3.89
## + c_pct 1 0.06 19.2 -3.86
## + height 1 0.03 19.3 -3.80
## + broad_jump 1 0.02 19.3 -3.79
## - c_avg_att 1 2.75 22.1 -2.68
## - weight 1 2.78 22.1 -2.64
## - c_avg_tds 1 4.17 23.5 -0.32
## - vert_leap 1 5.29 24.6 1.46
## - c_avg_cmpp 1 5.58 24.9 1.89
##
## Step: AIC=-6.41
## completion_percentage ~ weight + age + c_avg_cmpp + c_avg_tds +
## c_avg_att + cone + shuttle + vert_leap
##
## Df Sum of Sq RSS AIC
## <none> 20.0 -6.41
## - age 1 1.29 21.3 -6.03
## + X40 1 0.69 19.3 -5.75
## + c_avg_inter 1 0.62 19.4 -5.60
## - cone 1 1.78 21.8 -5.17
## + wonderlic 1 0.23 19.8 -4.84
## + c_numyrs 1 0.22 19.8 -4.84
## + c_pct 1 0.22 19.8 -4.82
## + c_rate 1 0.19 19.8 -4.78
## + c_avg_yds 1 0.14 19.9 -4.68
## + height 1 0.13 19.9 -4.66
## + broad_jump 1 0.05 19.9 -4.50
## - c_avg_att 1 2.55 22.5 -3.85
## - shuttle 1 2.62 22.6 -3.73
## - weight 1 2.87 22.9 -3.32
## - c_avg_tds 1 3.55 23.5 -2.19
## - c_avg_cmpp 1 5.07 25.1 0.19
## - vert_leap 1 5.14 25.1 0.29
summary(step_reg.scaled.w_combine.cpct)
##
## Call:
## lm(formula = completion_percentage ~ weight + age + c_avg_cmpp +
## c_avg_tds + c_avg_att + cone + shuttle + vert_leap, data = data.scaled.w_combine.for_cpct)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.449 -0.642 0.104 0.459 2.195
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.05e-16 1.35e-01 0.00 1.000
## weight 3.75e-01 1.84e-01 2.04 0.051 .
## age 2.27e-01 1.66e-01 1.37 0.182
## c_avg_cmpp 2.08e+00 7.67e-01 2.71 0.011 *
## c_avg_tds -6.71e-01 2.96e-01 -2.27 0.031 *
## c_avg_att -1.33e+00 6.89e-01 -1.92 0.064 .
## cone 3.03e-01 1.89e-01 1.61 0.119
## shuttle -4.23e-01 2.17e-01 -1.95 0.061 .
## vert_leap -4.95e-01 1.81e-01 -2.73 0.011 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.83 on 29 degrees of freedom
## Multiple R-squared: 0.46, Adjusted R-squared: 0.311
## F-statistic: 3.08 on 8 and 29 DF, p-value: 0.0121
plot(step_reg.scaled.w_combine.cpct)
leaps.scaled.w_combine.cpct <- regsubsets(completion_percentage ~ ., data = data.scaled.w_combine.for_cpct,
nbest = 10)
subsets(leaps.scaled.w_combine.cpct, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.scaled.w_combine.for_cpct, step_reg.scaled.w_combine.cpct, m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: completion_percentage
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 0.56 0.56 0.82 0.373
## age 1 1.67 1.67 2.42 0.130
## c_avg_cmpp 1 1.40 1.40 2.03 0.165
## c_avg_tds 1 1.71 1.71 2.49 0.126
## c_avg_att 1 3.85 3.85 5.58 0.025 *
## cone 1 2.39 2.39 3.47 0.073 .
## shuttle 1 0.29 0.29 0.42 0.524
## vert_leap 1 5.14 5.14 7.46 0.011 *
## Residuals 29 19.99 0.69
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 6 24 26 32 38 46 59
## Predicted 0.3699 0.8773 -0.879 -0.3106 -0.420 -0.1654 -0.265
## cvpred 0.3746 1.0313 -0.951 -0.3360 -0.366 -0.0801 -0.689
## completion_percentage 0.3100 -0.0706 -0.756 -0.0135 -1.213 -0.5084 0.348
## CV residual -0.0645 -1.1020 0.195 0.3225 -0.847 -0.4283 1.037
##
## Sum of squares = 3.34 Mean square = 0.48 n = 7
##
## fold 2
## Observations in test set: 8
## 7 19 21 27 37 43
## Predicted 0.326 0.307 0.0476 -0.42391 0.199 -0.87796
## cvpred 0.448 0.343 -0.3582 -0.75354 -0.105 -0.88005
## completion_percentage -0.699 -0.413 1.1285 0.00551 0.672 -0.88906
## CV residual -1.146 -0.756 1.4867 0.75905 0.776 -0.00901
## 55 65
## Predicted -0.0838 1.25
## cvpred 0.2249 1.05
## completion_percentage -1.0223 1.41
## CV residual -1.2472 0.36
##
## Sum of squares = 6.96 Mean square = 0.87 n = 8
##
## fold 3
## Observations in test set: 8
## 5 12 13 17 39 40 50
## Predicted -0.454 -0.748 -0.197 1.115 0.272 -0.61 -0.4750
## cvpred -0.722 -1.001 -0.356 1.149 1.065 -1.11 -0.0713
## completion_percentage -0.451 -0.280 0.672 1.490 -0.375 0.12 -0.6036
## CV residual 0.271 0.721 1.028 0.342 -1.440 1.23 -0.5323
## 56
## Predicted -0.743
## cvpred -1.546
## completion_percentage 1.452
## CV residual 2.998
##
## Sum of squares = 14.6 Mean square = 1.83 n = 8
##
## fold 4
## Observations in test set: 8
## 4 15 18 20 28 52 63
## Predicted -0.219 0.7911 0.5648 -0.3582 -0.246 -0.659 1.470
## cvpred 0.339 0.8693 0.7668 -0.4453 -0.199 -0.621 2.213
## completion_percentage -1.060 0.8239 0.8620 -0.0897 0.291 -2.107 0.843
## CV residual -1.399 -0.0453 0.0952 0.3556 0.490 -1.487 -1.370
## 64
## Predicted -0.928
## cvpred -0.586
## completion_percentage -1.898
## CV residual -1.312
##
## Sum of squares = 8.14 Mean square = 1.02 n = 8
##
## fold 5
## Observations in test set: 7
## 1 3 25 30 42 49 61
## Predicted -0.0913 -1.140 1.1002 0.146 0.0719 1.316 0.0659
## cvpred -0.6521 -0.534 0.0351 -0.045 0.2204 0.216 -0.4387
## completion_percentage -0.1087 -2.336 1.1856 0.824 0.2339 1.890 0.3291
## CV residual 0.5434 -1.802 1.1504 0.869 0.0135 1.673 0.7678
##
## Sum of squares = 9.01 Mean square = 1.29 n = 7
##
## Overall (Sum over all 7 folds)
## ms
## 1.11