# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
ints = qb_stats_w_combine["ints"]
# Generate clean data set
data.scaled.w_combine.for_ints = data.frame(scale(na.omit(cbind(ints, college_stats))))
# Generate the linear model
lm.scaled.w_combine.ints <- lm(formula = ints ~ ., data = data.scaled.w_combine.for_ints)
# Find optimum linear regression model for ints
step_reg.scaled.w_combine.ints <- stepAIC(lm.scaled.w_combine.ints, direction = "both")
## Start: AIC=7.24
## ints ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.01 18.7 5.25
## - vert_leap 1 0.03 18.7 5.30
## - c_rate 1 0.07 18.7 5.38
## - c_pct 1 0.10 18.8 5.45
## - height 1 0.13 18.8 5.50
## - c_avg_tds 1 0.20 18.9 5.65
## - shuttle 1 0.21 18.9 5.68
## - wonderlic 1 0.22 18.9 5.68
## - weight 1 0.23 18.9 5.72
## - X40 1 0.29 18.9 5.84
## - c_avg_inter 1 0.29 18.9 5.84
## - cone 1 0.40 19.1 6.07
## - c_numyrs 1 0.41 19.1 6.08
## - c_avg_cmpp 1 0.55 19.2 6.37
## - broad_jump 1 0.74 19.4 6.75
## - c_avg_att 1 0.85 19.5 6.98
## <none> 18.6 7.24
## - age 1 9.12 27.8 20.76
##
## Step: AIC=5.25
## ints ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - vert_leap 1 0.05 18.7 3.34
## - c_rate 1 0.11 18.8 3.48
## - height 1 0.14 18.8 3.55
## - c_pct 1 0.16 18.8 3.59
## - c_avg_tds 1 0.20 18.9 3.66
## - shuttle 1 0.21 18.9 3.68
## - weight 1 0.24 18.9 3.74
## - wonderlic 1 0.29 18.9 3.84
## - c_avg_inter 1 0.29 18.9 3.85
## - X40 1 0.30 19.0 3.87
## - cone 1 0.40 19.1 4.07
## - c_numyrs 1 0.42 19.1 4.12
## - broad_jump 1 0.82 19.5 4.93
## - c_avg_att 1 0.85 19.5 4.98
## - c_avg_cmpp 1 0.91 19.6 5.10
## <none> 18.7 5.25
## + c_avg_yds 1 0.01 18.6 7.24
## - age 1 9.93 28.6 19.89
##
## Step: AIC=3.34
## ints ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic +
## cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.10 18.8 1.56
## - height 1 0.15 18.9 1.65
## - c_pct 1 0.16 18.9 1.68
## - c_avg_tds 1 0.18 18.9 1.71
## - c_avg_inter 1 0.27 19.0 1.89
## - shuttle 1 0.27 19.0 1.90
## - wonderlic 1 0.27 19.0 1.90
## - weight 1 0.28 19.0 1.91
## - c_numyrs 1 0.42 19.1 2.21
## - X40 1 0.42 19.1 2.21
## - cone 1 0.43 19.1 2.22
## - broad_jump 1 0.78 19.5 2.94
## - c_avg_att 1 0.87 19.6 3.11
## - c_avg_cmpp 1 0.93 19.6 3.23
## <none> 18.7 3.34
## + vert_leap 1 0.05 18.7 5.25
## + c_avg_yds 1 0.02 18.7 5.30
## - age 1 10.44 29.1 18.64
##
## Step: AIC=1.56
## ints ~ height + weight + age + c_avg_cmpp + c_pct + c_avg_inter +
## c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + cone +
## shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.08 18.9 -0.28
## - c_pct 1 0.10 18.9 -0.24
## - c_avg_inter 1 0.18 19.0 -0.07
## - height 1 0.18 19.0 -0.07
## - weight 1 0.32 19.1 0.21
## - c_numyrs 1 0.34 19.1 0.24
## - cone 1 0.36 19.2 0.29
## - shuttle 1 0.37 19.2 0.31
## - wonderlic 1 0.38 19.2 0.33
## - X40 1 0.39 19.2 0.35
## - c_avg_att 1 0.82 19.6 1.22
## - broad_jump 1 0.88 19.7 1.33
## <none> 18.8 1.56
## - c_avg_cmpp 1 1.08 19.9 1.73
## + c_rate 1 0.10 18.7 3.34
## + vert_leap 1 0.04 18.8 3.48
## + c_avg_yds 1 0.03 18.8 3.50
## - age 1 10.79 29.6 17.24
##
## Step: AIC=-0.28
## ints ~ height + weight + age + c_avg_cmpp + c_pct + c_avg_inter +
## c_numyrs + c_avg_att + X40 + wonderlic + cone + shuttle +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - c_pct 1 0.14 19.0 -1.99
## - height 1 0.21 19.1 -1.85
## - cone 1 0.32 19.2 -1.62
## - c_avg_inter 1 0.33 19.2 -1.61
## - wonderlic 1 0.34 19.2 -1.58
## - weight 1 0.39 19.3 -1.48
## - c_numyrs 1 0.43 19.3 -1.42
## - X40 1 0.43 19.3 -1.41
## - shuttle 1 0.45 19.3 -1.36
## <none> 18.9 -0.28
## - c_avg_att 1 1.01 19.9 -0.25
## - broad_jump 1 1.01 19.9 -0.24
## - c_avg_cmpp 1 1.07 19.9 -0.14
## + c_avg_tds 1 0.08 18.8 1.56
## + vert_leap 1 0.03 18.9 1.66
## + c_avg_yds 1 0.00 18.9 1.71
## + c_rate 1 0.00 18.9 1.71
## - age 1 10.73 29.6 15.26
##
## Step: AIC=-1.99
## ints ~ height + weight + age + c_avg_cmpp + c_avg_inter + c_numyrs +
## c_avg_att + X40 + wonderlic + cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - height 1 0.20 19.2 -3.57
## - c_numyrs 1 0.28 19.3 -3.41
## - cone 1 0.30 19.3 -3.39
## - c_avg_inter 1 0.32 19.3 -3.34
## - wonderlic 1 0.32 19.4 -3.33
## - X40 1 0.35 19.4 -3.28
## - weight 1 0.35 19.4 -3.27
## - shuttle 1 0.49 19.5 -2.99
## - broad_jump 1 0.87 19.9 -2.24
## - c_avg_att 1 0.88 19.9 -2.22
## <none> 19.0 -1.99
## - c_avg_cmpp 1 1.03 20.1 -1.93
## + c_pct 1 0.14 18.9 -0.28
## + c_rate 1 0.12 18.9 -0.24
## + c_avg_tds 1 0.12 18.9 -0.24
## + vert_leap 1 0.05 19.0 -0.09
## + c_avg_yds 1 0.02 19.0 -0.04
## - age 1 10.71 29.7 13.43
##
## Step: AIC=-3.57
## ints ~ weight + age + c_avg_cmpp + c_avg_inter + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - weight 1 0.15 19.4 -5.27
## - wonderlic 1 0.17 19.4 -5.23
## - c_numyrs 1 0.18 19.4 -5.22
## - c_avg_inter 1 0.24 19.5 -5.08
## - cone 1 0.40 19.6 -4.77
## - shuttle 1 0.42 19.6 -4.73
## - X40 1 0.60 19.8 -4.38
## - c_avg_att 1 0.70 19.9 -4.18
## - c_avg_cmpp 1 0.83 20.1 -3.93
## - broad_jump 1 0.99 20.2 -3.61
## <none> 19.2 -3.57
## + height 1 0.20 19.0 -1.99
## + c_avg_tds 1 0.16 19.1 -1.89
## + c_pct 1 0.14 19.1 -1.85
## + c_rate 1 0.12 19.1 -1.81
## + vert_leap 1 0.05 19.2 -1.67
## + c_avg_yds 1 0.04 19.2 -1.65
## - age 1 10.52 29.8 11.44
##
## Step: AIC=-5.27
## ints ~ age + c_avg_cmpp + c_avg_inter + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - wonderlic 1 0.11 19.5 -7.05
## - c_numyrs 1 0.14 19.5 -6.99
## - c_avg_inter 1 0.23 19.6 -6.81
## - shuttle 1 0.30 19.7 -6.67
## - cone 1 0.45 19.8 -6.37
## - c_avg_att 1 0.59 20.0 -6.10
## - X40 1 0.59 20.0 -6.09
## - c_avg_cmpp 1 0.70 20.1 -5.88
## - broad_jump 1 0.86 20.2 -5.57
## <none> 19.4 -5.27
## + c_avg_tds 1 0.20 19.2 -3.66
## + weight 1 0.15 19.2 -3.57
## + c_rate 1 0.10 19.3 -3.47
## + c_pct 1 0.10 19.3 -3.47
## + vert_leap 1 0.08 19.3 -3.42
## + c_avg_yds 1 0.04 19.4 -3.34
## + height 1 0.00 19.4 -3.27
## - age 1 12.03 31.4 11.56
##
## Step: AIC=-7.05
## ints ~ age + c_avg_cmpp + c_avg_inter + c_numyrs + c_avg_att +
## X40 + cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.12 19.6 -8.82
## - c_avg_inter 1 0.25 19.7 -8.56
## - shuttle 1 0.37 19.9 -8.32
## - X40 1 0.50 20.0 -8.06
## - c_avg_att 1 0.59 20.1 -7.90
## - cone 1 0.66 20.1 -7.76
## - c_avg_cmpp 1 0.68 20.2 -7.72
## - broad_jump 1 0.88 20.4 -7.32
## <none> 19.5 -7.05
## + c_avg_tds 1 0.13 19.4 -5.32
## + wonderlic 1 0.11 19.4 -5.27
## + c_pct 1 0.10 19.4 -5.25
## + weight 1 0.09 19.4 -5.23
## + c_rate 1 0.07 19.4 -5.20
## + vert_leap 1 0.05 19.4 -5.15
## + c_avg_yds 1 0.03 19.5 -5.10
## + height 1 0.00 19.5 -5.05
## - age 1 12.77 32.3 10.60
##
## Step: AIC=-8.82
## ints ~ age + c_avg_cmpp + c_avg_inter + c_avg_att + X40 + cone +
## shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.18 19.8 -10.47
## - shuttle 1 0.32 19.9 -10.19
## - c_avg_att 1 0.48 20.1 -9.89
## - X40 1 0.57 20.2 -9.71
## - c_avg_cmpp 1 0.57 20.2 -9.71
## - cone 1 0.75 20.4 -9.36
## - broad_jump 1 0.89 20.5 -9.09
## <none> 19.6 -8.82
## + c_avg_tds 1 0.15 19.5 -7.12
## + c_numyrs 1 0.12 19.5 -7.05
## + wonderlic 1 0.09 19.5 -6.99
## + weight 1 0.07 19.5 -6.96
## + c_avg_yds 1 0.05 19.6 -6.92
## + vert_leap 1 0.03 19.6 -6.88
## + c_rate 1 0.01 19.6 -6.84
## + height 1 0.00 19.6 -6.83
## + c_pct 1 0.00 19.6 -6.82
## - age 1 12.96 32.6 8.96
##
## Step: AIC=-10.47
## ints ~ age + c_avg_cmpp + c_avg_att + X40 + cone + shuttle +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.23 20.0 -12.03
## - c_avg_att 1 0.35 20.1 -11.80
## - c_avg_cmpp 1 0.41 20.2 -11.68
## - X40 1 0.80 20.6 -10.92
## <none> 19.8 -10.47
## - cone 1 1.22 21.0 -10.13
## - broad_jump 1 1.47 21.3 -9.68
## + c_avg_tds 1 0.25 19.5 -8.98
## + c_avg_inter 1 0.18 19.6 -8.82
## + wonderlic 1 0.11 19.7 -8.68
## + c_avg_yds 1 0.11 19.7 -8.68
## + weight 1 0.06 19.7 -8.60
## + c_rate 1 0.05 19.7 -8.57
## + c_numyrs 1 0.05 19.7 -8.56
## + vert_leap 1 0.01 19.8 -8.50
## + c_pct 1 0.01 19.8 -8.49
## + height 1 0.01 19.8 -8.49
## - age 1 13.55 33.3 7.87
##
## Step: AIC=-12.03
## ints ~ age + c_avg_cmpp + c_avg_att + X40 + cone + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_att 1 0.38 20.4 -13.29
## - c_avg_cmpp 1 0.45 20.5 -13.16
## <none> 20.0 -12.03
## - X40 1 1.45 21.5 -11.30
## - broad_jump 1 1.60 21.6 -11.02
## + c_avg_tds 1 0.26 19.8 -10.54
## + shuttle 1 0.23 19.8 -10.47
## + wonderlic 1 0.16 19.8 -10.34
## + c_avg_inter 1 0.08 19.9 -10.19
## + c_rate 1 0.06 20.0 -10.14
## + c_avg_yds 1 0.04 20.0 -10.10
## + vert_leap 1 0.04 20.0 -10.10
## + c_numyrs 1 0.03 20.0 -10.09
## + c_pct 1 0.03 20.0 -10.08
## + weight 1 0.00 20.0 -10.03
## + height 1 0.00 20.0 -10.03
## - cone 1 2.42 22.4 -9.57
## - age 1 14.06 34.1 6.73
##
## Step: AIC=-13.29
## ints ~ age + c_avg_cmpp + X40 + cone + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_cmpp 1 0.10 20.5 -15.10
## <none> 20.4 -13.29
## - broad_jump 1 1.70 22.1 -12.16
## + c_avg_att 1 0.38 20.0 -12.03
## - X40 1 1.83 22.2 -11.93
## + shuttle 1 0.26 20.1 -11.80
## + wonderlic 1 0.14 20.3 -11.56
## - cone 1 2.07 22.5 -11.52
## + c_avg_yds 1 0.11 20.3 -11.51
## + c_avg_tds 1 0.10 20.3 -11.49
## + vert_leap 1 0.10 20.3 -11.48
## + c_avg_inter 1 0.10 20.3 -11.48
## + c_pct 1 0.03 20.4 -11.34
## + weight 1 0.02 20.4 -11.32
## + c_numyrs 1 0.00 20.4 -11.29
## + c_rate 1 0.00 20.4 -11.29
## + height 1 0.00 20.4 -11.29
## - age 1 14.48 34.9 5.63
##
## Step: AIC=-15.1
## ints ~ age + X40 + cone + broad_jump
##
## Df Sum of Sq RSS AIC
## <none> 20.5 -15.1
## - broad_jump 1 1.66 22.1 -14.1
## - X40 1 1.87 22.4 -13.7
## + shuttle 1 0.27 20.2 -13.6
## - cone 1 2.02 22.5 -13.4
## + c_pct 1 0.11 20.4 -13.3
## + c_avg_cmpp 1 0.10 20.4 -13.3
## + vert_leap 1 0.09 20.4 -13.3
## + wonderlic 1 0.08 20.4 -13.3
## + c_avg_yds 1 0.06 20.4 -13.2
## + c_rate 1 0.03 20.5 -13.2
## + c_avg_att 1 0.03 20.5 -13.2
## + c_avg_tds 1 0.02 20.5 -13.1
## + weight 1 0.01 20.5 -13.1
## + c_numyrs 1 0.01 20.5 -13.1
## + height 1 0.00 20.5 -13.1
## + c_avg_inter 1 0.00 20.5 -13.1
## - age 1 14.62 35.1 3.9
summary(step_reg.scaled.w_combine.ints)
##
## Call:
## lm(formula = ints ~ age + X40 + cone + broad_jump, data = data.scaled.w_combine.for_ints)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3891 -0.5983 0.0201 0.5724 1.8927
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.12e-16 1.24e-01 0.00 1.000
## age -6.49e-01 1.32e-01 -4.92 2.1e-05 ***
## X40 3.35e-01 1.90e-01 1.76 0.087 .
## cone 2.63e-01 1.44e-01 1.83 0.076 .
## broad_jump 3.11e-01 1.88e-01 1.66 0.106
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.776 on 34 degrees of freedom
## Multiple R-squared: 0.461, Adjusted R-squared: 0.397
## F-statistic: 7.26 on 4 and 34 DF, p-value: 0.000243
plot(step_reg.scaled.w_combine.ints)
leaps.scaled.w_combine.ints <- regsubsets(ints ~ ., data = data.scaled.w_combine.for_ints,
nbest = 10)
subsets(leaps.scaled.w_combine.ints, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.scaled.w_combine.for_ints, step_reg.scaled.w_combine.ints, m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: ints
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 12.61 12.61 20.92 6.1e-05 ***
## X40 1 1.59 1.59 2.63 0.11
## cone 1 1.65 1.65 2.75 0.11
## broad_jump 1 1.66 1.66 2.75 0.11
## Residuals 34 20.49 0.60
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 3 21 24 40 42 52 61
## Predicted -0.532 -0.887 0.0399 -0.5491 -1.043 -0.486 0.0319
## cvpred -0.412 -0.720 0.0902 -0.4347 -0.937 -0.382 0.1445
## ints -1.166 -1.776 -0.7602 -0.3541 -0.760 -0.760 0.0521
## CV residual -0.755 -1.055 -0.8505 0.0807 0.176 -0.379 -0.0924
##
## Sum of squares = 2.6 Mean square = 0.37 n = 7
##
## fold 2
## Observations in test set: 8
## 6 18 25 37 43 50 55 63
## Predicted -0.260 -0.257 0.5261 0.1863 0.832 0.986 -0.2234 -1.033
## cvpred -0.361 -0.239 0.7967 0.0925 1.072 1.069 -0.0951 -1.158
## ints 0.255 -0.963 0.8644 0.0521 -0.557 1.677 -0.9633 -0.354
## CV residual 0.616 -0.724 0.0677 -0.0405 -1.629 0.607 -0.8682 0.804
##
## Sum of squares = 5.33 Mean square = 0.67 n = 8
##
## fold 3
## Observations in test set: 8
## 5 7 16 20 28 32 49 64
## Predicted 0.309 -0.663 -0.605 -1.220 0.2635 -0.645 0.362 -0.379
## cvpred 0.169 -0.410 -0.455 -0.909 0.3201 -0.879 0.847 -0.515
## ints 1.271 -0.760 -1.166 -1.166 0.2551 -0.151 -0.557 -1.166
## CV residual 1.102 -0.350 -0.711 -0.257 -0.0649 0.728 -1.404 -0.652
##
## Sum of squares = 4.84 Mean square = 0.6 n = 8
##
## fold 4
## Observations in test set: 8
## 12 13 26 30 38 39 59 65
## Predicted 0.592 0.8363 0.018 0.234 0.198 1.748 -1.255 0.5822
## cvpred 0.627 1.0288 -0.187 0.112 0.337 2.108 -1.003 0.7679
## ints -0.557 1.0675 0.661 0.864 0.864 1.271 -1.776 0.0521
## CV residual -1.184 0.0387 0.849 0.752 0.528 -0.838 -0.772 -0.7158
##
## Sum of squares = 4.78 Mean square = 0.6 n = 8
##
## fold 5
## Observations in test set: 8
## 1 4 15 17 19 27 46 56
## Predicted 0.190 0.804 0.1710 -0.37 0.0829 0.786 0.884 -0.2577
## cvpred -0.394 0.961 0.0542 -0.41 0.1628 0.262 0.629 -0.2842
## ints 2.083 -0.151 0.4582 1.07 0.2551 1.474 1.677 -0.3541
## CV residual 2.477 -1.112 0.4041 1.48 0.0924 1.211 1.048 -0.0698
##
## Sum of squares = 12.3 Mean square = 1.54 n = 8
##
## Overall (Sum over all 8 folds)
## ms
## 0.765