# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
rating = qb_stats_w_combine["rating"]
# Generate clean data set
data.scaled.w_combine.for_rating = data.frame(scale(na.omit(cbind(rating, college_stats))))
# Generate the linear model
lm.scaled.w_combine.rating <- lm(formula = rating ~ ., data = data.scaled.w_combine.for_rating)
# Find optimum linear regression model for rating
step_reg.scaled.w_combine.rating <- stepAIC(lm.scaled.w_combine.rating, direction = "both")
## Start: AIC=13.81
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - cone 1 0.000 21.2 11.8
## - height 1 0.008 21.2 11.8
## - c_avg_tds 1 0.012 21.2 11.8
## - c_pct 1 0.020 21.2 11.8
## - c_avg_inter 1 0.031 21.2 11.9
## - c_avg_cmpp 1 0.110 21.3 12.0
## - c_rate 1 0.135 21.3 12.1
## - c_avg_yds 1 0.180 21.4 12.1
## - shuttle 1 0.199 21.4 12.2
## - X40 1 0.260 21.4 12.3
## - broad_jump 1 0.339 21.5 12.4
## - c_avg_att 1 0.454 21.6 12.6
## - wonderlic 1 0.704 21.9 13.1
## <none> 21.2 13.8
## - c_numyrs 1 1.147 22.3 13.8
## - vert_leap 1 1.512 22.7 14.4
## - weight 1 1.773 23.0 14.9
## - age 1 2.156 23.4 15.5
##
## Step: AIC=11.81
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - height 1 0.008 21.2 9.83
## - c_avg_tds 1 0.012 21.2 9.84
## - c_pct 1 0.022 21.2 9.85
## - c_avg_inter 1 0.032 21.2 9.87
## - c_avg_cmpp 1 0.111 21.3 10.01
## - c_rate 1 0.143 21.3 10.07
## - c_avg_yds 1 0.184 21.4 10.14
## - shuttle 1 0.235 21.4 10.23
## - X40 1 0.265 21.5 10.29
## - broad_jump 1 0.341 21.5 10.42
## - c_avg_att 1 0.455 21.6 10.62
## - wonderlic 1 0.837 22.0 11.29
## <none> 21.2 11.81
## - c_numyrs 1 1.159 22.4 11.84
## - vert_leap 1 1.544 22.7 12.49
## - weight 1 1.842 23.0 12.98
## - age 1 2.160 23.4 13.50
## + cone 1 0.000 21.2 13.81
##
## Step: AIC=9.83
## rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.009 21.2 7.84
## - c_pct 1 0.029 21.2 7.88
## - c_avg_inter 1 0.031 21.2 7.88
## - c_avg_cmpp 1 0.108 21.3 8.02
## - c_rate 1 0.168 21.4 8.13
## - c_avg_yds 1 0.210 21.4 8.20
## - shuttle 1 0.238 21.4 8.25
## - X40 1 0.294 21.5 8.35
## - broad_jump 1 0.345 21.6 8.44
## - c_avg_att 1 0.492 21.7 8.70
## - wonderlic 1 1.132 22.3 9.80
## <none> 21.2 9.83
## - c_numyrs 1 1.246 22.4 10.00
## - vert_leap 1 1.551 22.8 10.51
## - age 1 2.216 23.4 11.61
## + height 1 0.008 21.2 11.81
## + cone 1 0.000 21.2 11.83
## - weight 1 3.001 24.2 12.86
##
## Step: AIC=7.84
## rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + shuttle +
## vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.045 21.3 5.93
## - c_pct 1 0.092 21.3 6.01
## - c_avg_cmpp 1 0.118 21.3 6.05
## - shuttle 1 0.236 21.4 6.27
## - c_avg_yds 1 0.257 21.5 6.30
## - X40 1 0.286 21.5 6.35
## - broad_jump 1 0.410 21.6 6.57
## - c_rate 1 0.415 21.6 6.58
## - c_avg_att 1 0.518 21.7 6.76
## <none> 21.2 7.84
## - wonderlic 1 1.160 22.4 7.87
## - c_numyrs 1 1.249 22.5 8.02
## - vert_leap 1 1.542 22.8 8.51
## - age 1 2.208 23.4 9.61
## + c_avg_tds 1 0.009 21.2 9.83
## + height 1 0.005 21.2 9.84
## + cone 1 0.000 21.2 9.84
## - weight 1 2.994 24.2 10.86
##
## Step: AIC=5.93
## rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_yds +
## c_numyrs + c_avg_att + X40 + wonderlic + shuttle + vert_leap +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - c_pct 1 0.052 21.3 4.02
## - shuttle 1 0.206 21.5 4.29
## - c_avg_yds 1 0.222 21.5 4.32
## - X40 1 0.347 21.6 4.54
## - c_avg_cmpp 1 0.375 21.6 4.59
## - broad_jump 1 0.375 21.6 4.59
## - c_rate 1 0.380 21.6 4.60
## - wonderlic 1 1.141 22.4 5.91
## <none> 21.3 5.93
## - vert_leap 1 1.537 22.8 6.58
## - c_numyrs 1 1.591 22.9 6.67
## - c_avg_att 1 1.877 23.1 7.14
## + c_avg_inter 1 0.045 21.2 7.84
## + c_avg_tds 1 0.023 21.2 7.88
## + cone 1 0.007 21.2 7.91
## + height 1 0.003 21.2 7.92
## - age 1 2.366 23.6 7.94
## - weight 1 2.951 24.2 8.86
##
## Step: AIC=4.02
## rating ~ weight + age + c_avg_cmpp + c_rate + c_avg_yds + c_numyrs +
## c_avg_att + X40 + wonderlic + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.166 21.5 2.31
## - c_avg_yds 1 0.214 21.5 2.40
## - X40 1 0.310 21.6 2.57
## - broad_jump 1 0.381 21.7 2.69
## - wonderlic 1 1.092 22.4 3.92
## <none> 21.3 4.02
## - c_rate 1 1.374 22.7 4.39
## - vert_leap 1 1.572 22.9 4.72
## - c_numyrs 1 1.737 23.1 5.00
## - c_avg_att 1 1.983 23.3 5.40
## - c_avg_cmpp 1 2.085 23.4 5.57
## + c_avg_tds 1 0.067 21.2 5.90
## + c_pct 1 0.052 21.3 5.93
## + height 1 0.008 21.3 6.00
## + c_avg_inter 1 0.006 21.3 6.01
## + cone 1 0.001 21.3 6.02
## - age 1 2.455 23.8 6.16
## - weight 1 2.914 24.2 6.89
##
## Step: AIC=2.31
## rating ~ weight + age + c_avg_cmpp + c_rate + c_avg_yds + c_numyrs +
## c_avg_att + X40 + wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.122 21.6 0.53
## - broad_jump 1 0.317 21.8 0.87
## - X40 1 0.705 22.2 1.54
## - wonderlic 1 0.939 22.4 1.94
## <none> 21.5 2.31
## - c_rate 1 1.238 22.7 2.44
## - vert_leap 1 1.409 22.9 2.73
## - c_numyrs 1 1.657 23.1 3.14
## - c_avg_att 1 1.926 23.4 3.58
## + shuttle 1 0.166 21.3 4.02
## + cone 1 0.036 21.4 4.25
## + c_avg_tds 1 0.025 21.4 4.27
## - age 1 2.363 23.8 4.28
## + c_pct 1 0.012 21.5 4.29
## + height 1 0.008 21.5 4.30
## + c_avg_inter 1 0.004 21.5 4.31
## - c_avg_cmpp 1 2.459 23.9 4.43
## - weight 1 2.838 24.3 5.03
##
## Step: AIC=0.53
## rating ~ weight + age + c_avg_cmpp + c_rate + c_numyrs + c_avg_att +
## X40 + wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - broad_jump 1 0.231 21.8 -1.07
## - wonderlic 1 0.915 22.5 0.10
## - X40 1 0.966 22.6 0.19
## <none> 21.6 0.53
## - c_rate 1 1.373 23.0 0.87
## - vert_leap 1 1.400 23.0 0.91
## - c_numyrs 1 1.612 23.2 1.26
## - c_avg_att 1 2.179 23.8 2.18
## + c_avg_yds 1 0.122 21.5 2.31
## + shuttle 1 0.074 21.5 2.40
## + cone 1 0.038 21.6 2.46
## + c_pct 1 0.036 21.6 2.46
## + c_avg_inter 1 0.024 21.6 2.49
## + height 1 0.016 21.6 2.50
## + c_avg_tds 1 0.000 21.6 2.53
## - age 1 2.662 24.3 2.94
## - c_avg_cmpp 1 2.823 24.4 3.20
## - weight 1 3.055 24.6 3.56
##
## Step: AIC=-1.07
## rating ~ weight + age + c_avg_cmpp + c_rate + c_numyrs + c_avg_att +
## X40 + wonderlic + vert_leap
##
## Df Sum of Sq RSS AIC
## - wonderlic 1 0.82 22.6 -1.660
## - vert_leap 1 1.18 23.0 -1.071
## <none> 21.8 -1.068
## - c_rate 1 1.19 23.0 -1.046
## - c_numyrs 1 1.40 23.2 -0.706
## - X40 1 1.59 23.4 -0.403
## - c_avg_att 1 2.08 23.9 0.398
## + broad_jump 1 0.23 21.6 0.528
## + shuttle 1 0.06 21.8 0.824
## + cone 1 0.04 21.8 0.856
## + c_avg_yds 1 0.04 21.8 0.870
## + c_avg_tds 1 0.03 21.8 0.879
## + height 1 0.01 21.8 0.913
## + c_pct 1 0.00 21.8 0.927
## + c_avg_inter 1 0.00 21.8 0.931
## - c_avg_cmpp 1 2.70 24.5 1.357
## - age 1 2.89 24.7 1.659
## - weight 1 3.65 25.5 2.808
##
## Step: AIC=-1.66
## rating ~ weight + age + c_avg_cmpp + c_rate + c_numyrs + c_avg_att +
## X40 + vert_leap
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.98 23.6 -2.05
## - c_numyrs 1 1.09 23.7 -1.87
## <none> 22.6 -1.66
## - vert_leap 1 1.24 23.9 -1.64
## - c_avg_att 1 1.50 24.1 -1.23
## + wonderlic 1 0.82 21.8 -1.07
## - X40 1 1.85 24.5 -0.68
## - c_avg_cmpp 1 2.02 24.7 -0.41
## + height 1 0.26 22.4 -0.09
## + broad_jump 1 0.14 22.5 0.10
## + cone 1 0.04 22.6 0.27
## + c_avg_yds 1 0.04 22.6 0.28
## + c_pct 1 0.03 22.6 0.29
## + c_avg_inter 1 0.01 22.6 0.32
## + c_avg_tds 1 0.00 22.6 0.33
## + shuttle 1 0.00 22.6 0.34
## - weight 1 5.60 28.2 4.73
## - age 1 6.17 28.8 5.49
##
## Step: AIC=-2.05
## rating ~ weight + age + c_avg_cmpp + c_numyrs + c_avg_att + X40 +
## vert_leap
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.44 24.1 -3.36
## - c_avg_att 1 0.68 24.3 -2.98
## - c_avg_cmpp 1 1.07 24.7 -2.37
## <none> 23.6 -2.05
## + c_rate 1 0.98 22.6 -1.66
## + c_pct 1 0.88 22.8 -1.49
## - X40 1 1.81 25.4 -1.25
## - vert_leap 1 1.82 25.4 -1.23
## + wonderlic 1 0.61 23.0 -1.05
## + c_avg_tds 1 0.36 23.3 -0.63
## + height 1 0.26 23.4 -0.47
## + c_avg_yds 1 0.22 23.4 -0.40
## + c_avg_inter 1 0.08 23.6 -0.18
## + cone 1 0.04 23.6 -0.11
## + broad_jump 1 0.03 23.6 -0.09
## + shuttle 1 0.00 23.6 -0.06
## - weight 1 5.56 29.2 3.97
## - age 1 7.29 30.9 6.17
##
## Step: AIC=-3.36
## rating ~ weight + age + c_avg_cmpp + c_avg_att + X40 + vert_leap
##
## Df Sum of Sq RSS AIC
## - c_avg_att 1 0.47 24.5 -4.62
## - c_avg_cmpp 1 0.87 24.9 -4.01
## <none> 24.1 -3.36
## - X40 1 1.60 25.7 -2.91
## - vert_leap 1 1.73 25.8 -2.72
## + wonderlic 1 0.48 23.6 -2.12
## + c_numyrs 1 0.44 23.6 -2.05
## + height 1 0.37 23.7 -1.95
## + c_rate 1 0.32 23.7 -1.87
## + c_avg_tds 1 0.26 23.8 -1.77
## + c_avg_yds 1 0.26 23.8 -1.76
## + c_pct 1 0.13 23.9 -1.57
## + cone 1 0.04 24.0 -1.42
## + shuttle 1 0.02 24.0 -1.39
## + c_avg_inter 1 0.00 24.1 -1.36
## + broad_jump 1 0.00 24.1 -1.36
## - weight 1 5.68 29.7 2.69
## - age 1 7.36 31.4 4.78
##
## Step: AIC=-4.62
## rating ~ weight + age + c_avg_cmpp + X40 + vert_leap
##
## Df Sum of Sq RSS AIC
## <none> 24.5 -4.62
## - c_avg_cmpp 1 1.41 25.9 -4.50
## - X40 1 1.71 26.3 -4.06
## - vert_leap 1 2.08 26.6 -3.53
## + height 1 0.53 24.0 -3.45
## + c_avg_att 1 0.47 24.1 -3.36
## + c_avg_yds 1 0.46 24.1 -3.33
## + c_avg_inter 1 0.24 24.3 -3.00
## + wonderlic 1 0.23 24.3 -2.98
## + c_numyrs 1 0.23 24.3 -2.98
## + c_avg_tds 1 0.14 24.4 -2.84
## + shuttle 1 0.10 24.4 -2.77
## + c_rate 1 0.05 24.5 -2.70
## + broad_jump 1 0.01 24.5 -2.65
## + c_pct 1 0.00 24.5 -2.63
## + cone 1 0.00 24.5 -2.63
## - weight 1 6.92 31.5 2.82
## - age 1 7.19 31.7 3.14
summary(step_reg.scaled.w_combine.rating)
##
## Call:
## lm(formula = rating ~ weight + age + c_avg_cmpp + X40 + vert_leap,
## data = data.scaled.w_combine.for_rating)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8369 -0.5566 -0.0951 0.5703 2.3523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.16e-15 1.42e-01 0.00 1.0000
## weight 4.87e-01 1.62e-01 3.00 0.0051 **
## age 4.96e-01 1.62e-01 3.06 0.0044 **
## c_avg_cmpp 1.99e-01 1.47e-01 1.36 0.1848
## X40 -3.32e-01 2.22e-01 -1.50 0.1447
## vert_leap -3.67e-01 2.23e-01 -1.65 0.1092
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.876 on 32 degrees of freedom
## Multiple R-squared: 0.337, Adjusted R-squared: 0.233
## F-statistic: 3.25 on 5 and 32 DF, p-value: 0.0174
plot(step_reg.scaled.w_combine.rating)
leaps.scaled.w_combine.rating <- regsubsets(rating ~ ., data = data.scaled.w_combine.for_rating,
nbest = 10)
subsets(leaps.scaled.w_combine.rating, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.scaled.w_combine.for_rating, step_reg.scaled.w_combine.rating,
m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: rating
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 1.78 1.78 2.32 0.1378
## age 1 7.08 7.08 9.24 0.0047 **
## c_avg_cmpp 1 1.41 1.41 1.84 0.1841
## X40 1 0.11 0.11 0.14 0.7098
## vert_leap 1 2.08 2.08 2.71 0.1092
## Residuals 32 24.54 0.77
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 6 24 26 32 38 46 59
## Predicted -0.0371 0.775 -0.7961 0.43217 -0.484 -0.746 0.0304
## cvpred -0.0162 0.894 -0.8552 0.42652 -0.450 -0.656 -0.3169
## rating 0.5204 -0.102 -0.8837 0.00987 -1.115 -0.868 1.0470
## CV residual 0.5366 -0.996 -0.0284 -0.41666 -0.665 -0.211 1.3639
##
## Sum of squares = 3.8 Mean square = 0.54 n = 7
##
## fold 2
## Observations in test set: 8
## 7 19 21 27 37 43 55 65
## Predicted 0.525 -0.0601 1.096 0.113 -0.212 -0.767 -0.386 -0.30
## cvpred 0.354 -0.0598 1.168 -0.431 -0.986 -0.595 -0.147 -1.45
## rating -0.126 -0.3013 1.948 0.688 0.512 -0.916 -0.868 1.01
## CV residual -0.480 -0.2415 0.781 1.119 1.499 -0.320 -0.720 2.46
##
## Sum of squares = 11.1 Mean square = 1.38 n = 8
##
## fold 3
## Observations in test set: 8
## 5 12 13 17 39 40 50 56
## Predicted 0.2503 -0.900 0.1243 0.10263 -0.450 -0.6119 -0.446 -0.268
## cvpred 0.1429 -1.205 0.0898 0.00675 -0.163 -0.9399 -0.457 -0.845
## rating 0.2093 -0.429 0.8475 -0.13373 -1.123 0.0178 -1.027 2.084
## CV residual 0.0664 0.776 0.7578 -0.14048 -0.960 0.9578 -0.571 2.929
##
## Sum of squares = 11.9 Mean square = 1.49 n = 8
##
## fold 4
## Observations in test set: 8
## 4 15 18 20 28 52 63 64
## Predicted -1.238 0.692 0.0462 0.195 0.679 -0.0598 0.925 -0.0953
## cvpred -0.959 0.992 0.2086 0.190 0.763 0.0439 1.448 -0.2195
## rating -2.639 0.106 0.6002 -0.229 0.313 -1.4900 0.337 -0.0779
## CV residual -1.680 -0.886 0.3917 -0.419 -0.450 -1.5339 -1.112 0.1416
##
## Sum of squares = 7.75 Mean square = 0.97 n = 8
##
## fold 5
## Observations in test set: 7
## 1 3 25 30 42 49 61
## Predicted 0.000154 0.155 1.166 -0.654 0.67691 0.680 -0.156
## cvpred 0.089007 0.189 0.647 -0.765 0.56778 0.507 0.145
## rating 0.201336 -1.681 1.925 0.241 0.57629 1.063 -0.245
## CV residual 0.112328 -1.871 1.278 1.006 0.00851 0.556 -0.391
##
## Sum of squares = 6.62 Mean square = 0.95 n = 7
##
## Overall (Sum over all 7 folds)
## ms
## 1.08