# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
sacks = qb_stats_w_combine["sacked"]
# Generate clean data set
data.scaled.w_combine.for_sacks = data.frame(scale(na.omit(cbind(sacks, college_stats))))
# Generate the linear model
lm.scaled.w_combine.sacks <- lm(formula = sacked ~ ., data = data.scaled.w_combine.for_sacks)
# Find optimum linear regression model for sacks
step_reg.scaled.w_combine.sacks <- stepAIC(lm.scaled.w_combine.sacks, direction = "both")
## Start: AIC=18.18
## sacked ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.001 23.8 16.2
## - X40 1 0.002 23.8 16.2
## - c_pct 1 0.015 23.8 16.2
## - cone 1 0.015 23.8 16.2
## - weight 1 0.018 23.8 16.2
## - broad_jump 1 0.124 23.9 16.4
## - c_avg_inter 1 0.151 23.9 16.4
## - age 1 0.193 24.0 16.5
## - height 1 0.194 24.0 16.5
## - c_avg_tds 1 0.223 24.0 16.5
## - c_avg_cmpp 1 0.304 24.1 16.7
## - vert_leap 1 0.523 24.3 17.0
## - wonderlic 1 0.669 24.4 17.2
## - c_numyrs 1 0.889 24.7 17.6
## - shuttle 1 1.080 24.9 17.9
## <none> 23.8 18.2
## - c_avg_yds 1 1.373 25.1 18.3
## - c_avg_att 1 2.887 26.7 20.5
##
## Step: AIC=16.18
## sacked ~ height + weight + age + c_avg_cmpp + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - X40 1 0.00 23.8 14.2
## - cone 1 0.01 23.8 14.2
## - weight 1 0.02 23.8 14.2
## - broad_jump 1 0.14 23.9 14.4
## - c_pct 1 0.16 23.9 14.4
## - c_avg_inter 1 0.17 23.9 14.4
## - height 1 0.21 24.0 14.5
## - age 1 0.22 24.0 14.5
## - c_avg_tds 1 0.37 24.1 14.8
## - vert_leap 1 0.56 24.3 15.1
## - wonderlic 1 0.85 24.6 15.5
## - c_numyrs 1 0.89 24.7 15.6
## - c_avg_cmpp 1 0.92 24.7 15.6
## - shuttle 1 1.09 24.9 15.9
## <none> 23.8 16.2
## + c_rate 1 0.00 23.8 18.2
## - c_avg_att 1 3.24 27.0 19.0
## - c_avg_yds 1 3.81 27.6 19.8
##
## Step: AIC=14.19
## sacked ~ height + weight + age + c_avg_cmpp + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - cone 1 0.01 23.8 12.2
## - weight 1 0.03 23.8 12.2
## - c_pct 1 0.16 23.9 12.4
## - broad_jump 1 0.17 23.9 12.5
## - c_avg_inter 1 0.20 24.0 12.5
## - age 1 0.22 24.0 12.5
## - height 1 0.27 24.1 12.6
## - c_avg_tds 1 0.37 24.1 12.8
## - vert_leap 1 0.57 24.4 13.1
## - wonderlic 1 0.86 24.6 13.5
## - c_numyrs 1 0.90 24.7 13.6
## - c_avg_cmpp 1 0.99 24.8 13.7
## <none> 23.8 14.2
## - shuttle 1 1.31 25.1 14.2
## + X40 1 0.00 23.8 16.2
## + c_rate 1 0.00 23.8 16.2
## - c_avg_att 1 3.40 27.2 17.3
## - c_avg_yds 1 3.87 27.6 17.9
##
## Step: AIC=12.21
## sacked ~ height + weight + age + c_avg_cmpp + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + wonderlic +
## shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - weight 1 0.04 23.8 10.3
## - c_pct 1 0.16 23.9 10.5
## - broad_jump 1 0.16 23.9 10.5
## - age 1 0.22 24.0 10.6
## - c_avg_inter 1 0.27 24.1 10.6
## - height 1 0.31 24.1 10.7
## - c_avg_tds 1 0.35 24.1 10.8
## - vert_leap 1 0.57 24.4 11.1
## - c_numyrs 1 0.95 24.7 11.7
## - c_avg_cmpp 1 1.04 24.8 11.8
## - wonderlic 1 1.11 24.9 11.9
## <none> 23.8 12.2
## - shuttle 1 1.55 25.3 12.6
## + cone 1 0.01 23.8 14.2
## + X40 1 0.00 23.8 14.2
## + c_rate 1 0.00 23.8 14.2
## - c_avg_att 1 3.56 27.4 15.5
## - c_avg_yds 1 3.86 27.6 15.9
##
## Step: AIC=10.27
## sacked ~ height + age + c_avg_cmpp + c_pct + c_avg_inter + c_avg_tds +
## c_avg_yds + c_numyrs + c_avg_att + wonderlic + shuttle +
## vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_pct 1 0.14 24.0 8.48
## - broad_jump 1 0.16 24.0 8.52
## - age 1 0.19 24.0 8.56
## - c_avg_inter 1 0.25 24.1 8.67
## - height 1 0.35 24.2 8.83
## - c_avg_tds 1 0.44 24.3 8.96
## - vert_leap 1 0.80 24.6 9.52
## - c_numyrs 1 0.93 24.8 9.73
## - c_avg_cmpp 1 1.10 24.9 9.99
## - wonderlic 1 1.17 25.0 10.09
## <none> 23.8 10.27
## - shuttle 1 2.12 25.9 11.51
## + weight 1 0.04 23.8 12.21
## + cone 1 0.02 23.8 12.23
## + X40 1 0.01 23.8 12.26
## + c_rate 1 0.00 23.8 12.27
## - c_avg_att 1 3.86 27.7 13.97
## - c_avg_yds 1 3.96 27.8 14.11
##
## Step: AIC=8.48
## sacked ~ height + age + c_avg_cmpp + c_avg_inter + c_avg_tds +
## c_avg_yds + c_numyrs + c_avg_att + wonderlic + shuttle +
## vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - broad_jump 1 0.10 24.1 6.64
## - age 1 0.15 24.1 6.72
## - c_avg_inter 1 0.19 24.1 6.78
## - height 1 0.34 24.3 7.02
## - c_avg_tds 1 0.44 24.4 7.17
## - vert_leap 1 0.78 24.8 7.70
## - c_numyrs 1 0.92 24.9 7.91
## - c_avg_cmpp 1 1.11 25.1 8.21
## - wonderlic 1 1.22 25.2 8.36
## <none> 24.0 8.48
## - shuttle 1 2.11 26.1 9.68
## + c_pct 1 0.14 23.8 10.27
## + c_rate 1 0.13 23.8 10.28
## + weight 1 0.02 23.9 10.46
## + cone 1 0.02 23.9 10.46
## + X40 1 0.00 24.0 10.48
## - c_avg_yds 1 3.83 27.8 12.11
## - c_avg_att 1 4.66 28.6 13.23
##
## Step: AIC=6.64
## sacked ~ height + age + c_avg_cmpp + c_avg_inter + c_avg_tds +
## c_avg_yds + c_numyrs + c_avg_att + wonderlic + shuttle +
## vert_leap
##
## Df Sum of Sq RSS AIC
## - age 1 0.19 24.2 4.93
## - c_avg_inter 1 0.21 24.3 4.97
## - height 1 0.36 24.4 5.21
## - c_avg_tds 1 0.44 24.5 5.33
## - vert_leap 1 0.85 24.9 5.96
## - c_numyrs 1 0.87 24.9 5.99
## - wonderlic 1 1.25 25.3 6.57
## - c_avg_cmpp 1 1.28 25.4 6.61
## <none> 24.1 6.64
## - shuttle 1 2.10 26.2 7.83
## + broad_jump 1 0.10 24.0 8.48
## + c_pct 1 0.08 24.0 8.52
## + c_rate 1 0.06 24.0 8.55
## + X40 1 0.04 24.0 8.58
## + weight 1 0.02 24.1 8.62
## + cone 1 0.00 24.1 8.64
## - c_avg_yds 1 3.81 27.9 10.23
## - c_avg_att 1 4.98 29.1 11.79
##
## Step: AIC=4.93
## sacked ~ height + c_avg_cmpp + c_avg_inter + c_avg_tds + c_avg_yds +
## c_numyrs + c_avg_att + wonderlic + shuttle + vert_leap
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.26 24.5 3.33
## - c_avg_tds 1 0.36 24.6 3.49
## - height 1 0.48 24.7 3.67
## - vert_leap 1 0.80 25.1 4.17
## - c_numyrs 1 0.84 25.1 4.23
## - wonderlic 1 1.07 25.3 4.57
## - c_avg_cmpp 1 1.29 25.5 4.90
## <none> 24.2 4.93
## - shuttle 1 2.07 26.3 6.05
## + age 1 0.19 24.1 6.64
## + broad_jump 1 0.13 24.1 6.72
## + X40 1 0.07 24.2 6.82
## + c_pct 1 0.05 24.2 6.86
## + c_rate 1 0.02 24.2 6.90
## + weight 1 0.02 24.2 6.90
## + cone 1 0.00 24.2 6.93
## - c_avg_yds 1 3.74 28.0 8.38
## - c_avg_att 1 5.13 29.4 10.22
##
## Step: AIC=3.33
## sacked ~ height + c_avg_cmpp + c_avg_tds + c_avg_yds + c_numyrs +
## c_avg_att + wonderlic + shuttle + vert_leap
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.44 25.0 2.01
## - height 1 0.47 25.0 2.05
## - vert_leap 1 0.65 25.2 2.33
## - c_numyrs 1 0.65 25.2 2.34
## - c_avg_cmpp 1 1.03 25.5 2.90
## - wonderlic 1 1.22 25.7 3.17
## <none> 24.5 3.33
## - shuttle 1 1.99 26.5 4.31
## + c_avg_inter 1 0.26 24.3 4.93
## + age 1 0.23 24.3 4.97
## + broad_jump 1 0.17 24.3 5.08
## + weight 1 0.04 24.5 5.27
## + X40 1 0.03 24.5 5.29
## + cone 1 0.02 24.5 5.30
## + c_pct 1 0.01 24.5 5.32
## + c_rate 1 0.01 24.5 5.32
## - c_avg_yds 1 3.48 28.0 6.38
## - c_avg_att 1 8.63 33.1 12.80
##
## Step: AIC=2.01
## sacked ~ height + c_avg_cmpp + c_avg_yds + c_numyrs + c_avg_att +
## wonderlic + shuttle + vert_leap
##
## Df Sum of Sq RSS AIC
## - vert_leap 1 0.36 25.3 0.56
## - height 1 0.37 25.3 0.57
## - c_numyrs 1 0.51 25.5 0.79
## - c_avg_cmpp 1 0.84 25.8 1.28
## - wonderlic 1 0.93 25.9 1.41
## <none> 25.0 2.01
## - shuttle 1 1.57 26.5 2.34
## + c_avg_tds 1 0.44 24.5 3.33
## + c_avg_inter 1 0.34 24.6 3.49
## + broad_jump 1 0.17 24.8 3.76
## + age 1 0.14 24.8 3.80
## + c_rate 1 0.04 24.9 3.95
## + cone 1 0.01 24.9 3.99
## + X40 1 0.01 24.9 4.00
## + c_pct 1 0.01 24.9 4.01
## + weight 1 0.00 25.0 4.01
## - c_avg_yds 1 3.76 28.7 5.35
## - c_avg_att 1 8.97 33.9 11.69
##
## Step: AIC=0.56
## sacked ~ height + c_avg_cmpp + c_avg_yds + c_numyrs + c_avg_att +
## wonderlic + shuttle
##
## Df Sum of Sq RSS AIC
## - height 1 0.28 25.6 -1.02
## - c_numyrs 1 0.53 25.8 -0.65
## - wonderlic 1 0.68 26.0 -0.44
## - c_avg_cmpp 1 1.21 26.5 0.33
## - shuttle 1 1.25 26.6 0.39
## <none> 25.3 0.56
## + vert_leap 1 0.36 25.0 2.01
## + c_avg_inter 1 0.16 25.2 2.32
## + c_avg_tds 1 0.15 25.2 2.33
## + age 1 0.13 25.2 2.37
## + c_rate 1 0.08 25.2 2.43
## + X40 1 0.07 25.2 2.46
## + weight 1 0.06 25.3 2.48
## + c_pct 1 0.05 25.3 2.48
## + cone 1 0.05 25.3 2.48
## + broad_jump 1 0.03 25.3 2.52
## - c_avg_yds 1 3.42 28.7 3.38
## - c_avg_att 1 9.35 34.7 10.51
##
## Step: AIC=-1.02
## sacked ~ c_avg_cmpp + c_avg_yds + c_numyrs + c_avg_att + wonderlic +
## shuttle
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.44 26.0 -2.36
## - wonderlic 1 0.48 26.1 -2.32
## - shuttle 1 0.98 26.6 -1.59
## - c_avg_cmpp 1 1.16 26.8 -1.33
## <none> 25.6 -1.02
## + height 1 0.28 25.3 0.56
## + vert_leap 1 0.28 25.3 0.57
## + age 1 0.21 25.4 0.67
## + c_avg_inter 1 0.16 25.4 0.74
## + c_avg_tds 1 0.13 25.5 0.79
## + X40 1 0.09 25.5 0.85
## + cone 1 0.05 25.5 0.91
## + c_rate 1 0.04 25.6 0.92
## + weight 1 0.03 25.6 0.93
## + c_pct 1 0.03 25.6 0.93
## + broad_jump 1 0.01 25.6 0.97
## - c_avg_yds 1 3.32 28.9 1.61
## - c_avg_att 1 9.21 34.8 8.67
##
## Step: AIC=-2.36
## sacked ~ c_avg_cmpp + c_avg_yds + c_avg_att + wonderlic + shuttle
##
## Df Sum of Sq RSS AIC
## - wonderlic 1 0.37 26.4 -3.83
## - shuttle 1 0.91 26.9 -3.06
## - c_avg_cmpp 1 1.00 27.0 -2.94
## <none> 26.0 -2.36
## + c_numyrs 1 0.44 25.6 -1.02
## + vert_leap 1 0.30 25.7 -0.80
## + height 1 0.20 25.8 -0.65
## + age 1 0.17 25.9 -0.61
## + X40 1 0.13 25.9 -0.56
## + cone 1 0.08 26.0 -0.49
## + c_avg_tds 1 0.07 26.0 -0.46
## + c_pct 1 0.06 26.0 -0.45
## + weight 1 0.06 26.0 -0.44
## + c_rate 1 0.05 26.0 -0.43
## + c_avg_inter 1 0.03 26.0 -0.41
## + broad_jump 1 0.03 26.0 -0.40
## - c_avg_yds 1 3.09 29.1 -0.11
## - c_avg_att 1 8.86 34.9 6.77
##
## Step: AIC=-3.83
## sacked ~ c_avg_cmpp + c_avg_yds + c_avg_att + shuttle
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.72 27.1 -4.80
## - c_avg_cmpp 1 0.73 27.1 -4.79
## <none> 26.4 -3.83
## + wonderlic 1 0.37 26.0 -2.36
## + c_numyrs 1 0.34 26.1 -2.32
## + cone 1 0.25 26.2 -2.19
## + vert_leap 1 0.13 26.3 -2.01
## + c_avg_inter 1 0.10 26.3 -1.97
## + weight 1 0.09 26.3 -1.95
## + height 1 0.06 26.3 -1.91
## + c_avg_tds 1 0.04 26.4 -1.88
## + c_rate 1 0.04 26.4 -1.88
## + X40 1 0.03 26.4 -1.88
## + c_pct 1 0.03 26.4 -1.88
## + age 1 0.02 26.4 -1.86
## + broad_jump 1 0.00 26.4 -1.83
## - c_avg_yds 1 3.45 29.9 -1.16
## - c_avg_att 1 8.49 34.9 4.77
##
## Step: AIC=-4.8
## sacked ~ c_avg_cmpp + c_avg_yds + c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_avg_cmpp 1 0.77 27.9 -5.75
## <none> 27.1 -4.80
## + shuttle 1 0.72 26.4 -3.83
## + c_numyrs 1 0.30 26.8 -3.23
## + wonderlic 1 0.18 26.9 -3.06
## + X40 1 0.16 27.0 -3.02
## + c_avg_inter 1 0.12 27.0 -2.97
## + broad_jump 1 0.11 27.0 -2.96
## + c_rate 1 0.10 27.0 -2.95
## + c_pct 1 0.08 27.1 -2.91
## + age 1 0.04 27.1 -2.86
## + vert_leap 1 0.03 27.1 -2.85
## + cone 1 0.02 27.1 -2.83
## + c_avg_tds 1 0.01 27.1 -2.81
## + height 1 0.00 27.1 -2.81
## + weight 1 0.00 27.1 -2.81
## - c_avg_yds 1 3.42 30.5 -2.30
## - c_avg_att 1 8.66 35.8 3.72
##
## Step: AIC=-5.75
## sacked ~ c_avg_yds + c_avg_att
##
## Df Sum of Sq RSS AIC
## <none> 27.9 -5.75
## + c_avg_cmpp 1 0.77 27.1 -4.80
## + shuttle 1 0.76 27.1 -4.79
## + c_pct 1 0.45 27.4 -4.36
## + c_numyrs 1 0.22 27.7 -4.04
## + c_rate 1 0.22 27.7 -4.04
## + age 1 0.08 27.8 -3.85
## + cone 1 0.07 27.8 -3.84
## + broad_jump 1 0.06 27.8 -3.83
## + X40 1 0.03 27.9 -3.78
## + weight 1 0.03 27.9 -3.78
## + c_avg_inter 1 0.02 27.9 -3.78
## + wonderlic 1 0.02 27.9 -3.77
## + c_avg_tds 1 0.01 27.9 -3.76
## + vert_leap 1 0.00 27.9 -3.75
## + height 1 0.00 27.9 -3.75
## - c_avg_yds 1 8.76 36.7 2.63
## - c_avg_att 1 9.05 37.0 2.94
summary(step_reg.scaled.w_combine.sacks)
##
## Call:
## lm(formula = sacked ~ c_avg_yds + c_avg_att, data = data.scaled.w_combine.for_sacks)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8050 -0.5894 0.0471 0.4827 2.1989
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.43e-16 1.45e-01 0.00 1.0000
## c_avg_yds 1.83e+00 5.53e-01 3.32 0.0021 **
## c_avg_att -1.86e+00 5.53e-01 -3.37 0.0018 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.893 on 35 degrees of freedom
## Multiple R-squared: 0.246, Adjusted R-squared: 0.203
## F-statistic: 5.71 on 2 and 35 DF, p-value: 0.00714
plot(step_reg.scaled.w_combine.sacks)
leaps.scaled.w_combine.sacks <- regsubsets(sacked ~ ., data = data.scaled.w_combine.for_sacks,
nbest = 10)
subsets(leaps.scaled.w_combine.sacks, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.scaled.w_combine.for_sacks, step_reg.scaled.w_combine.sacks,
m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: sacked
## Df Sum Sq Mean Sq F value Pr(>F)
## c_avg_yds 1 0.05 0.05 0.06 0.8059
## c_avg_att 1 9.05 9.05 11.36 0.0018 **
## Residuals 35 27.90 0.80
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 6 21 25 32 38 46 59
## Predicted 0.0288 0.114 0.5135 -0.633 -0.0807 0.2858 -0.293
## cvpred 0.1514 0.238 0.6257 -0.497 0.0620 0.4337 -0.172
## sacked -0.2867 -1.086 0.7127 -0.187 -1.8857 -0.0868 -1.286
## CV residual -0.4381 -1.324 0.0871 0.310 -1.9477 -0.5205 -1.114
##
## Sum of squares = 7.35 Mean square = 1.05 n = 7
##
## fold 2
## Observations in test set: 8
## 7 18 20 27 37 43 55 65
## Predicted -0.159 0.1001 -0.512 -0.335 0.2840 -1.1077 -0.0912 0.0608
## cvpred -0.200 0.0723 -0.478 -0.233 0.0796 -1.0192 -0.0693 -0.0125
## sacked -0.786 0.2130 -0.187 -1.286 0.5129 -0.9863 -1.4860 1.4123
## CV residual -0.586 0.1408 0.291 -1.053 0.4333 0.0329 -1.4166 1.4248
##
## Sum of squares = 5.78 Mean square = 0.72 n = 8
##
## fold 3
## Observations in test set: 8
## 5 12 13 16 39 40 50 56
## Predicted 0.764 0.4897 0.670 0.613 -0.6677 -0.686 0.030 0.0200
## cvpred 0.978 0.6043 0.654 0.869 -0.7323 -0.888 0.068 0.0913
## sacked 1.412 0.0132 0.813 -0.487 -0.6864 0.413 -0.287 0.6128
## CV residual 0.435 -0.5912 0.159 -1.356 0.0458 1.301 -0.355 0.5215
##
## Sum of squares = 4.49 Mean square = 0.56 n = 8
##
## fold 4
## Observations in test set: 8
## 4 15 17 19 28 52 63 64
## Predicted 0.569 -0.5816 -0.10 -0.246 0.627 -0.421 0.266 0.876
## cvpred 0.484 -0.7338 -0.30 -0.317 0.491 -0.482 -0.119 0.715
## sacked 0.213 -0.0868 1.71 0.313 1.812 -0.786 1.013 0.613
## CV residual -0.271 0.6470 2.01 0.630 1.321 -0.304 1.131 -0.102
##
## Sum of squares = 8.07 Mean square = 1.01 n = 8
##
## fold 5
## Observations in test set: 7
## 1 3 24 30 42 49 61
## Predicted 0.113 0.770 -0.645 -0.728 0.339 -0.365 0.121
## cvpred 0.106 0.686 -0.646 -0.715 0.262 -0.342 0.174
## sacked 2.312 0.913 -1.286 -0.287 0.313 -1.086 -0.786
## CV residual 2.206 0.227 -0.640 0.429 0.051 -0.744 -0.960
##
## Sum of squares = 6.99 Mean square = 1 n = 7
##
## Overall (Sum over all 7 folds)
## ms
## 0.86