# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
tds = qb_stats_w_combine["tds"]
# Generate clean data set
data.scaled.w_combine.for_tds = data.frame(scale(na.omit(cbind(tds, college_stats))))
# Generate the linear model
lm.scaled.w_combine.tds <- lm(formula = tds ~ ., data = data.scaled.w_combine.for_tds)
# Find optimum linear regression model for tds
step_reg.scaled.w_combine.tds <- stepAIC(lm.scaled.w_combine.tds, direction = "both")
## Start: AIC=19.32
## tds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - cone 1 0.005 24.5 17.3
## - weight 1 0.007 24.5 17.3
## - shuttle 1 0.013 24.5 17.3
## - c_avg_cmpp 1 0.019 24.5 17.4
## - c_avg_inter 1 0.036 24.5 17.4
## - height 1 0.175 24.7 17.6
## - c_pct 1 0.219 24.7 17.7
## - X40 1 0.299 24.8 17.8
## - c_rate 1 0.596 25.1 18.2
## - c_avg_tds 1 0.638 25.1 18.3
## - vert_leap 1 0.871 25.4 18.6
## - c_avg_att 1 0.885 25.4 18.7
## - c_avg_yds 1 1.295 25.8 19.3
## <none> 24.5 19.3
## - c_numyrs 1 1.945 26.4 20.2
## - age 1 2.332 26.8 20.8
## - broad_jump 1 2.537 27.0 21.1
## - wonderlic 1 2.784 27.3 21.4
##
## Step: AIC=17.33
## tds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - weight 1 0.003 24.5 15.3
## - shuttle 1 0.008 24.5 15.3
## - c_avg_cmpp 1 0.023 24.5 15.4
## - c_avg_inter 1 0.044 24.6 15.4
## - height 1 0.208 24.7 15.7
## - c_pct 1 0.238 24.7 15.7
## - X40 1 0.294 24.8 15.8
## - c_rate 1 0.626 25.1 16.3
## - c_avg_tds 1 0.747 25.2 16.5
## - c_avg_att 1 0.880 25.4 16.7
## - vert_leap 1 0.905 25.4 16.7
## - c_avg_yds 1 1.293 25.8 17.3
## <none> 24.5 17.3
## - c_numyrs 1 1.976 26.5 18.3
## - age 1 2.327 26.8 18.8
## - broad_jump 1 2.532 27.0 19.1
## + cone 1 0.005 24.5 19.3
## - wonderlic 1 3.132 27.6 19.9
##
## Step: AIC=15.33
## tds ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.00 24.5 13.3
## - c_avg_cmpp 1 0.02 24.5 13.4
## - c_avg_inter 1 0.04 24.6 13.4
## - c_pct 1 0.23 24.7 13.7
## - X40 1 0.29 24.8 13.8
## - height 1 0.39 24.9 13.9
## - c_rate 1 0.62 25.1 14.3
## - c_avg_tds 1 0.80 25.3 14.6
## - vert_leap 1 0.91 25.4 14.7
## - c_avg_att 1 0.91 25.4 14.7
## - c_avg_yds 1 1.31 25.8 15.3
## <none> 24.5 15.3
## - c_numyrs 1 2.11 26.6 16.5
## - broad_jump 1 2.67 27.2 17.3
## + weight 1 0.00 24.5 17.3
## + cone 1 0.00 24.5 17.3
## - age 1 2.80 27.3 17.4
## - wonderlic 1 3.21 27.7 18.0
##
## Step: AIC=13.34
## tds ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_cmpp 1 0.02 24.5 11.4
## - c_avg_inter 1 0.05 24.6 11.4
## - c_pct 1 0.23 24.7 11.7
## - X40 1 0.30 24.8 11.8
## - height 1 0.41 24.9 12.0
## - c_rate 1 0.64 25.1 12.3
## - c_avg_tds 1 0.82 25.3 12.6
## - c_avg_att 1 0.90 25.4 12.7
## - vert_leap 1 0.97 25.5 12.8
## <none> 24.5 13.3
## - c_avg_yds 1 1.54 26.1 13.7
## - c_numyrs 1 2.12 26.6 14.5
## - broad_jump 1 2.71 27.2 15.3
## + shuttle 1 0.00 24.5 15.3
## + weight 1 0.00 24.5 15.3
## + cone 1 0.00 24.5 15.3
## - age 1 2.90 27.4 15.6
## - wonderlic 1 4.33 28.9 17.5
##
## Step: AIC=11.37
## tds ~ height + age + c_rate + c_pct + c_avg_inter + c_avg_tds +
## c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + vert_leap +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.15 24.7 9.60
## - X40 1 0.29 24.8 9.81
## - height 1 0.40 24.9 9.98
## - c_pct 1 0.64 25.2 10.34
## - vert_leap 1 0.96 25.5 10.82
## - c_avg_tds 1 1.12 25.6 11.06
## <none> 24.5 11.37
## - c_rate 1 1.58 26.1 11.75
## - c_avg_yds 1 2.24 26.8 12.68
## - c_avg_att 1 2.31 26.8 12.79
## + c_avg_cmpp 1 0.02 24.5 13.34
## + cone 1 0.00 24.5 13.36
## + shuttle 1 0.00 24.5 13.36
## + weight 1 0.00 24.5 13.37
## - broad_jump 1 2.76 27.3 13.42
## - c_numyrs 1 2.78 27.3 13.44
## - age 1 2.91 27.4 13.62
## - wonderlic 1 4.40 28.9 15.64
##
## Step: AIC=9.6
## tds ~ height + age + c_rate + c_pct + c_avg_tds + c_avg_yds +
## c_numyrs + c_avg_att + X40 + wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - height 1 0.34 25.0 8.12
## - X40 1 0.42 25.1 8.23
## - c_pct 1 0.52 25.2 8.39
## - vert_leap 1 0.84 25.5 8.87
## - c_avg_tds 1 0.99 25.7 9.09
## <none> 24.7 9.60
## - c_rate 1 1.45 26.1 9.77
## - c_avg_yds 1 2.12 26.8 10.73
## + c_avg_inter 1 0.15 24.5 11.37
## + c_avg_cmpp 1 0.12 24.6 11.41
## - c_numyrs 1 2.64 27.3 11.46
## + cone 1 0.03 24.6 11.55
## - c_avg_att 1 2.72 27.4 11.57
## + weight 1 0.00 24.7 11.60
## + shuttle 1 0.00 24.7 11.60
## - broad_jump 1 2.95 27.6 11.89
## - age 1 3.02 27.7 11.98
## - wonderlic 1 4.71 29.4 14.23
##
## Step: AIC=8.12
## tds ~ age + c_rate + c_pct + c_avg_tds + c_avg_yds + c_numyrs +
## c_avg_att + X40 + wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_pct 1 0.33 25.4 6.63
## - vert_leap 1 0.85 25.9 7.38
## - X40 1 0.89 25.9 7.45
## - c_avg_tds 1 0.95 26.0 7.54
## - c_rate 1 1.20 26.2 7.90
## <none> 25.0 8.12
## - c_avg_yds 1 1.91 26.9 8.91
## + height 1 0.34 24.7 9.60
## - c_avg_att 1 2.48 27.5 9.72
## - c_numyrs 1 2.51 27.5 9.75
## + weight 1 0.14 24.9 9.90
## + c_avg_inter 1 0.09 24.9 9.98
## + c_avg_cmpp 1 0.07 24.9 10.01
## + cone 1 0.06 25.0 10.03
## + shuttle 1 0.03 25.0 10.08
## - age 1 3.25 28.3 10.77
## - broad_jump 1 3.49 28.5 11.08
## - wonderlic 1 4.37 29.4 12.23
##
## Step: AIC=6.63
## tds ~ age + c_rate + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.75 26.1 5.73
## - vert_leap 1 0.91 26.3 5.97
## - X40 1 1.09 26.4 6.22
## <none> 25.4 6.63
## - c_avg_yds 1 1.57 26.9 6.91
## - c_rate 1 1.76 27.1 7.17
## - c_avg_att 1 2.27 27.6 7.88
## - c_numyrs 1 2.30 27.7 7.93
## + c_pct 1 0.33 25.0 8.12
## + height 1 0.16 25.2 8.39
## + c_avg_cmpp 1 0.14 25.2 8.41
## + weight 1 0.13 25.2 8.43
## + cone 1 0.05 25.3 8.55
## + shuttle 1 0.04 25.3 8.57
## + c_avg_inter 1 0.02 25.3 8.59
## - age 1 2.92 28.3 8.77
## - broad_jump 1 3.50 28.9 9.54
## - wonderlic 1 4.17 29.5 10.41
##
## Step: AIC=5.73
## tds ~ age + c_rate + c_avg_yds + c_numyrs + c_avg_att + X40 +
## wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - X40 1 0.69 26.8 4.72
## - c_rate 1 1.24 27.3 5.49
## - vert_leap 1 1.35 27.4 5.65
## <none> 26.1 5.73
## - c_numyrs 1 2.05 28.1 6.60
## + c_avg_tds 1 0.75 25.4 6.63
## + c_avg_cmpp 1 0.28 25.8 7.32
## + weight 1 0.27 25.8 7.34
## - c_avg_att 1 2.69 28.8 7.46
## + height 1 0.19 25.9 7.46
## + c_pct 1 0.13 26.0 7.54
## + cone 1 0.03 26.1 7.69
## + shuttle 1 0.00 26.1 7.73
## + c_avg_inter 1 0.00 26.1 7.73
## - broad_jump 1 3.00 29.1 7.87
## - age 1 3.12 29.2 8.02
## - c_avg_yds 1 3.42 29.5 8.41
## - wonderlic 1 4.63 30.7 9.94
##
## Step: AIC=4.72
## tds ~ age + c_rate + c_avg_yds + c_numyrs + c_avg_att + wonderlic +
## vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.87 27.7 3.94
## <none> 26.8 4.72
## - c_numyrs 1 1.79 28.6 5.18
## + X40 1 0.69 26.1 5.73
## - c_avg_att 1 2.21 29.0 5.74
## - broad_jump 1 2.33 29.1 5.90
## + weight 1 0.53 26.3 5.96
## + height 1 0.48 26.3 6.04
## + c_avg_tds 1 0.35 26.4 6.22
## + c_avg_cmpp 1 0.32 26.5 6.27
## + c_pct 1 0.28 26.5 6.32
## - vert_leap 1 2.70 29.5 6.37
## - age 1 2.77 29.6 6.46
## + shuttle 1 0.14 26.7 6.53
## + cone 1 0.11 26.7 6.56
## - c_avg_yds 1 2.88 29.7 6.60
## + c_avg_inter 1 0.03 26.8 6.68
## - wonderlic 1 4.37 31.2 8.46
##
## Step: AIC=3.94
## tds ~ age + c_avg_yds + c_numyrs + c_avg_att + wonderlic + vert_leap +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.94 28.6 3.21
## - c_avg_att 1 1.36 29.0 3.77
## <none> 27.7 3.94
## - broad_jump 1 1.71 29.4 4.23
## + c_rate 1 0.87 26.8 4.72
## - age 1 2.19 29.9 4.83
## - c_avg_yds 1 2.35 30.0 5.05
## - vert_leap 1 2.42 30.1 5.13
## + weight 1 0.47 27.2 5.29
## + height 1 0.41 27.3 5.38
## + c_pct 1 0.38 27.3 5.41
## + X40 1 0.33 27.3 5.49
## + shuttle 1 0.24 27.4 5.61
## + c_avg_tds 1 0.12 27.5 5.78
## + cone 1 0.11 27.6 5.79
## + c_avg_inter 1 0.06 27.6 5.85
## + c_avg_cmpp 1 0.04 27.6 5.89
## - wonderlic 1 4.28 31.9 7.41
##
## Step: AIC=3.21
## tds ~ age + c_avg_yds + c_avg_att + wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_att 1 0.91 29.5 2.41
## - broad_jump 1 1.27 29.9 2.87
## <none> 28.6 3.21
## - c_avg_yds 1 1.90 30.5 3.66
## - vert_leap 1 2.00 30.6 3.78
## + c_numyrs 1 0.94 27.7 3.94
## - age 1 2.19 30.8 4.02
## + weight 1 0.68 27.9 4.30
## + X40 1 0.37 28.2 4.72
## + height 1 0.33 28.3 4.77
## + shuttle 1 0.29 28.3 4.83
## + c_avg_tds 1 0.20 28.4 4.95
## + cone 1 0.19 28.4 4.97
## + c_rate 1 0.03 28.6 5.18
## + c_avg_cmpp 1 0.02 28.6 5.19
## + c_avg_inter 1 0.00 28.6 5.21
## + c_pct 1 0.00 28.6 5.21
## - wonderlic 1 4.06 32.7 6.25
##
## Step: AIC=2.41
## tds ~ age + c_avg_yds + wonderlic + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - broad_jump 1 1.50 31.0 2.29
## <none> 29.5 2.41
## - vert_leap 1 2.12 31.6 3.04
## + weight 1 0.92 28.6 3.20
## + c_avg_att 1 0.91 28.6 3.21
## - age 1 2.37 31.9 3.34
## + c_avg_tds 1 0.69 28.8 3.51
## - c_avg_yds 1 2.70 32.2 3.73
## + c_avg_inter 1 0.51 29.0 3.75
## + c_numyrs 1 0.49 29.0 3.77
## + shuttle 1 0.35 29.2 3.96
## + height 1 0.29 29.2 4.04
## + c_avg_cmpp 1 0.27 29.2 4.05
## + X40 1 0.26 29.3 4.07
## + c_rate 1 0.16 29.4 4.21
## + c_pct 1 0.05 29.5 4.34
## + cone 1 0.03 29.5 4.36
## - wonderlic 1 3.93 33.5 5.15
##
## Step: AIC=2.29
## tds ~ age + c_avg_yds + wonderlic + vert_leap
##
## Df Sum of Sq RSS AIC
## - vert_leap 1 0.65 31.7 1.08
## <none> 31.0 2.29
## + broad_jump 1 1.50 29.5 2.41
## + weight 1 1.38 29.6 2.56
## + c_avg_att 1 1.14 29.9 2.87
## - c_avg_yds 1 2.40 33.4 3.12
## + c_avg_tds 1 0.71 30.3 3.41
## - age 1 2.80 33.8 3.57
## + c_avg_inter 1 0.57 30.5 3.59
## + height 1 0.38 30.6 3.82
## + c_rate 1 0.33 30.7 3.88
## + shuttle 1 0.32 30.7 3.90
## + c_avg_cmpp 1 0.21 30.8 4.03
## + c_numyrs 1 0.16 30.9 4.09
## + c_pct 1 0.16 30.9 4.10
## + X40 1 0.00 31.0 4.29
## + cone 1 0.00 31.0 4.29
## - wonderlic 1 3.94 35.0 4.83
##
## Step: AIC=1.08
## tds ~ age + c_avg_yds + wonderlic
##
## Df Sum of Sq RSS AIC
## <none> 31.7 1.08
## + weight 1 1.54 30.1 1.19
## + c_avg_att 1 1.06 30.6 1.79
## + c_avg_tds 1 0.93 30.7 1.95
## - c_avg_yds 1 2.51 34.2 1.98
## + shuttle 1 0.85 30.8 2.05
## + c_avg_inter 1 0.76 30.9 2.15
## + vert_leap 1 0.65 31.0 2.29
## - age 1 2.81 34.5 2.30
## + height 1 0.56 31.1 2.41
## + X40 1 0.38 31.3 2.62
## + c_rate 1 0.19 31.5 2.85
## + c_numyrs 1 0.17 31.5 2.88
## + cone 1 0.14 31.5 2.91
## + c_pct 1 0.10 31.6 2.96
## + c_avg_cmpp 1 0.06 31.6 3.01
## + broad_jump 1 0.03 31.6 3.04
## - wonderlic 1 3.63 35.3 3.20
summary(step_reg.scaled.w_combine.tds)
##
## Call:
## lm(formula = tds ~ age + c_avg_yds + wonderlic, data = data.scaled.w_combine.for_tds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.502 -0.692 -0.202 0.520 1.806
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.79e-16 1.57e-01 0.00 1.000
## age -3.17e-01 1.83e-01 -1.74 0.092 .
## c_avg_yds 2.78e-01 1.69e-01 1.64 0.110
## wonderlic -3.54e-01 1.80e-01 -1.97 0.057 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.965 on 34 degrees of freedom
## Multiple R-squared: 0.144, Adjusted R-squared: 0.0685
## F-statistic: 1.91 on 3 and 34 DF, p-value: 0.147
plot(step_reg.scaled.w_combine.tds)
leaps.scaled.w_combine.tds <- regsubsets(tds ~ ., data = data.scaled.w_combine.for_tds,
nbest = 10)
subsets(leaps.scaled.w_combine.tds, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.scaled.w_combine.for_tds, step_reg.scaled.w_combine.tds, m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: tds
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 0.5 0.45 0.49 0.490
## c_avg_yds 1 1.2 1.25 1.34 0.255
## wonderlic 1 3.6 3.63 3.89 0.057 .
## Residuals 34 31.7 0.93
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 6 21 26 32 38 46 59
## Predicted 0.0551 -0.605 0.222 0.308 -0.4200 -0.11484 0.139
## cvpred 0.0327 -0.781 0.290 0.420 -0.4894 -0.10757 0.278
## tds 1.1019 -0.399 -0.549 -0.399 -0.0987 -0.09874 -0.249
## CV residual 1.0693 0.382 -0.839 -0.819 0.3907 0.00883 -0.526
##
## Sum of squares = 3.09 Mean square = 0.44 n = 7
##
## fold 2
## Observations in test set: 8
## 7 18 20 27 37 43 55 65
## Predicted -0.179 -0.0173 -0.296 0.646 -0.480 0.329 -0.133 -0.576
## cvpred -0.124 -0.0105 -0.205 0.649 -0.564 0.259 -0.149 -0.611
## tds -0.549 -0.3989 -0.999 2.453 0.201 -0.849 -1.149 0.802
## CV residual -0.425 -0.3884 -0.794 1.804 0.765 -1.108 -1.000 1.413
##
## Sum of squares = 9.03 Mean square = 1.13 n = 8
##
## fold 3
## Observations in test set: 8
## 5 12 13 16 39 40 50 56
## Predicted -0.000613 0.289 0.313 0.299 0.729 -0.568 0.1238 0.218640
## cvpred -0.309489 0.124 0.404 -0.020 0.597 -0.654 -0.0997 0.000097
## tds 1.552209 -0.699 1.252 1.402 -0.399 -0.249 0.0513 2.002468
## CV residual 1.861698 -0.823 0.848 1.422 -0.996 0.405 0.1511 2.002371
##
## Sum of squares = 12.1 Mean square = 1.51 n = 8
##
## fold 4
## Observations in test set: 8
## 4 15 17 19 28 52 63 64
## Predicted -0.2476 0.3693 -0.530 -0.3506 0.693 -0.492 0.155 0.3352
## cvpred 0.0633 0.3756 -0.230 -0.1161 0.556 -0.381 0.268 0.3416
## tds -1.7497 -0.0987 -0.399 0.0513 1.252 -1.149 -0.399 -0.0987
## CV residual -1.8130 -0.4743 -0.169 0.1674 0.696 -0.768 -0.667 -0.4403
##
## Sum of squares = 5.28 Mean square = 0.66 n = 8
##
## fold 5
## Observations in test set: 7
## 1 3 24 30 42 49 61
## Predicted 0.236 0.0307 0.201 0.381 -0.2168 -0.6281 -0.217
## cvpred 0.160 0.1014 0.303 0.433 -0.0266 -0.5155 -0.399
## tds 2.002 -1.4495 -0.699 0.652 -0.5490 -0.5490 -0.549
## CV residual 1.842 -1.5509 -1.003 0.218 -0.5224 -0.0335 -0.150
##
## Sum of squares = 7.15 Mean square = 1.02 n = 7
##
## Overall (Sum over all 7 folds)
## ms
## 0.964