# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
games_started = qb_stats_w_combine["games_started"]
# Generate clean data set
data.scaled.w_combine.for_games_started = data.frame(scale(na.omit(cbind(games_started,
college_stats))))
# Generate the linear model
lm.scaled.w_combine.games_started <- lm(formula = games_started ~ ., data = data.scaled.w_combine.for_games_started)
# Find optimum linear regression model for games_started
step_reg.scaled.w_combine.games_started <- stepAIC(lm.scaled.w_combine.games_started,
direction = "both")
## Start: AIC=15.01
## games_started ~ height + weight + age + c_avg_cmpp + c_rate +
## c_pct + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs +
## c_avg_att + X40 + wonderlic + cone + shuttle + vert_leap +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - c_pct 1 0.01 22.8 13.0
## - vert_leap 1 0.01 22.8 13.0
## - c_rate 1 0.04 22.8 13.1
## - c_avg_yds 1 0.05 22.8 13.1
## - shuttle 1 0.13 22.9 13.2
## - broad_jump 1 0.19 23.0 13.3
## - X40 1 0.22 23.0 13.4
## - cone 1 0.33 23.1 13.6
## - c_avg_inter 1 0.33 23.1 13.6
## - height 1 0.46 23.2 13.8
## - c_avg_tds 1 0.54 23.3 13.9
## - weight 1 0.59 23.4 14.0
## - c_avg_cmpp 1 0.62 23.4 14.1
## <none> 22.8 15.0
## - c_numyrs 1 1.62 24.4 15.7
## - c_avg_att 1 2.56 25.3 17.2
## - wonderlic 1 3.28 26.1 18.3
## - age 1 4.74 27.5 20.4
##
## Step: AIC=13.02
## games_started ~ height + weight + age + c_avg_cmpp + c_rate +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - vert_leap 1 0.01 22.8 11.0
## - shuttle 1 0.12 22.9 11.2
## - c_avg_yds 1 0.17 22.9 11.3
## - broad_jump 1 0.21 23.0 11.4
## - X40 1 0.22 23.0 11.4
## - c_avg_inter 1 0.36 23.1 11.6
## - cone 1 0.37 23.1 11.7
## - height 1 0.50 23.3 11.9
## - weight 1 0.60 23.4 12.0
## - c_avg_tds 1 1.06 23.8 12.8
## <none> 22.8 13.0
## - c_rate 1 1.31 24.1 13.2
## - c_numyrs 1 1.67 24.4 13.8
## + c_pct 1 0.01 22.8 15.0
## - c_avg_cmpp 1 2.78 25.6 15.5
## - c_avg_att 1 3.15 25.9 16.1
## - wonderlic 1 4.16 26.9 17.6
## - age 1 5.05 27.8 18.8
##
## Step: AIC=11.04
## games_started ~ height + weight + age + c_avg_cmpp + c_rate +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + broad_jump
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.12 22.9 9.24
## - c_avg_yds 1 0.20 23.0 9.38
## - X40 1 0.22 23.0 9.40
## - broad_jump 1 0.27 23.1 9.49
## - cone 1 0.38 23.2 9.68
## - c_avg_inter 1 0.39 23.2 9.69
## - height 1 0.50 23.3 9.89
## - weight 1 0.59 23.4 10.04
## - c_avg_tds 1 1.08 23.9 10.85
## <none> 22.8 11.04
## - c_rate 1 1.31 24.1 11.21
## - c_numyrs 1 1.72 24.5 11.88
## + vert_leap 1 0.01 22.8 13.02
## + c_pct 1 0.00 22.8 13.03
## - c_avg_cmpp 1 2.78 25.6 13.52
## - c_avg_att 1 3.20 26.0 14.16
## - wonderlic 1 4.27 27.1 15.74
## - age 1 5.08 27.9 16.88
##
## Step: AIC=9.24
## games_started ~ height + weight + age + c_avg_cmpp + c_rate +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + broad_jump
##
## Df Sum of Sq RSS AIC
## - cone 1 0.26 23.2 7.68
## - broad_jump 1 0.29 23.2 7.73
## - c_avg_inter 1 0.30 23.2 7.74
## - c_avg_yds 1 0.33 23.2 7.79
## - height 1 0.45 23.4 8.00
## - weight 1 0.48 23.4 8.05
## - X40 1 0.49 23.4 8.07
## - c_avg_tds 1 0.97 23.9 8.85
## <none> 22.9 9.24
## - c_rate 1 1.40 24.3 9.56
## - c_numyrs 1 1.68 24.6 10.00
## + shuttle 1 0.12 22.8 11.04
## + c_pct 1 0.00 22.9 11.23
## + vert_leap 1 0.00 22.9 11.24
## - c_avg_cmpp 1 2.66 25.6 11.52
## - c_avg_att 1 3.11 26.0 12.20
## - wonderlic 1 4.36 27.3 14.03
## - age 1 5.12 28.0 15.11
##
## Step: AIC=7.68
## games_started ~ height + weight + age + c_avg_cmpp + c_rate +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.17 23.3 5.97
## - c_avg_yds 1 0.29 23.5 6.17
## - height 1 0.37 23.5 6.30
## - X40 1 0.38 23.6 6.32
## - broad_jump 1 0.42 23.6 6.38
## - weight 1 0.45 23.6 6.44
## - c_avg_tds 1 0.95 24.1 7.26
## <none> 23.2 7.68
## - c_rate 1 1.36 24.5 7.91
## - c_numyrs 1 1.54 24.7 8.18
## + cone 1 0.26 22.9 9.24
## + c_pct 1 0.03 23.1 9.63
## + vert_leap 1 0.01 23.1 9.66
## + shuttle 1 0.00 23.2 9.68
## - c_avg_cmpp 1 2.56 25.7 9.77
## - c_avg_att 1 2.93 26.1 10.32
## - wonderlic 1 4.23 27.4 12.22
## - age 1 5.36 28.5 13.80
##
## Step: AIC=5.97
## games_started ~ height + weight + age + c_avg_cmpp + c_rate +
## c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## broad_jump
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.21 23.6 4.33
## - height 1 0.31 23.6 4.48
## - weight 1 0.41 23.8 4.65
## - X40 1 0.51 23.9 4.82
## - broad_jump 1 0.58 23.9 4.92
## - c_avg_tds 1 0.85 24.2 5.38
## <none> 23.3 5.97
## - c_rate 1 1.31 24.6 6.10
## - c_numyrs 1 1.37 24.7 6.20
## + c_avg_inter 1 0.17 23.2 7.68
## + cone 1 0.14 23.2 7.74
## + vert_leap 1 0.03 23.3 7.92
## + c_pct 1 0.00 23.3 7.97
## + shuttle 1 0.00 23.3 7.97
## - c_avg_cmpp 1 2.58 25.9 8.06
## - c_avg_att 1 3.94 27.3 10.06
## - wonderlic 1 4.50 27.8 10.86
## - age 1 5.49 28.8 12.21
##
## Step: AIC=4.33
## games_started ~ height + weight + age + c_avg_cmpp + c_rate +
## c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + broad_jump
##
## Df Sum of Sq RSS AIC
## - height 1 0.27 23.8 2.77
## - weight 1 0.34 23.9 2.89
## - X40 1 0.42 24.0 3.02
## - broad_jump 1 0.48 24.0 3.12
## - c_rate 1 1.11 24.7 4.12
## - c_numyrs 1 1.16 24.7 4.21
## <none> 23.6 4.33
## - c_avg_tds 1 1.43 25.0 4.62
## + c_avg_yds 1 0.21 23.3 5.97
## + cone 1 0.14 23.4 6.10
## + c_avg_inter 1 0.09 23.5 6.17
## + c_pct 1 0.08 23.5 6.19
## + vert_leap 1 0.05 23.5 6.24
## + shuttle 1 0.02 23.5 6.30
## - c_avg_cmpp 1 2.74 26.3 6.62
## - wonderlic 1 4.35 27.9 8.94
## - c_avg_att 1 4.62 28.2 9.31
## - age 1 5.28 28.8 10.21
##
## Step: AIC=2.77
## games_started ~ weight + age + c_avg_cmpp + c_rate + c_avg_tds +
## c_numyrs + c_avg_att + X40 + wonderlic + broad_jump
##
## Df Sum of Sq RSS AIC
## - weight 1 0.09 23.9 0.92
## - broad_jump 1 0.48 24.3 1.55
## - X40 1 0.66 24.5 1.84
## - c_numyrs 1 0.96 24.8 2.31
## - c_rate 1 1.03 24.8 2.41
## <none> 23.8 2.77
## - c_avg_tds 1 1.34 25.2 2.90
## + height 1 0.27 23.6 4.33
## + c_avg_yds 1 0.17 23.6 4.48
## + c_pct 1 0.11 23.7 4.58
## + cone 1 0.10 23.7 4.60
## - c_avg_cmpp 1 2.48 26.3 4.63
## + c_avg_inter 1 0.06 23.8 4.67
## + vert_leap 1 0.05 23.8 4.69
## + shuttle 1 0.01 23.8 4.75
## - wonderlic 1 4.30 28.1 7.24
## - c_avg_att 1 4.41 28.2 7.40
## - age 1 5.01 28.8 8.22
##
## Step: AIC=0.92
## games_started ~ age + c_avg_cmpp + c_rate + c_avg_tds + c_numyrs +
## c_avg_att + X40 + wonderlic + broad_jump
##
## Df Sum of Sq RSS AIC
## - broad_jump 1 0.39 24.3 -0.44
## - X40 1 0.58 24.5 -0.15
## - c_numyrs 1 0.89 24.8 0.35
## - c_rate 1 0.96 24.9 0.46
## <none> 23.9 0.92
## - c_avg_tds 1 1.26 25.2 0.93
## - c_avg_cmpp 1 2.39 26.3 2.63
## + c_avg_yds 1 0.15 23.8 2.69
## + cone 1 0.12 23.8 2.73
## + weight 1 0.09 23.8 2.77
## + c_pct 1 0.09 23.8 2.77
## + c_avg_inter 1 0.06 23.9 2.82
## + vert_leap 1 0.03 23.9 2.87
## + height 1 0.02 23.9 2.89
## + shuttle 1 0.00 23.9 2.92
## - wonderlic 1 4.40 28.3 5.51
## - c_avg_att 1 4.43 28.4 5.56
## - age 1 5.78 29.7 7.37
##
## Step: AIC=-0.44
## games_started ~ age + c_avg_cmpp + c_rate + c_avg_tds + c_numyrs +
## c_avg_att + X40 + wonderlic
##
## Df Sum of Sq RSS AIC
## - X40 1 0.19 24.5 -2.13
## - c_rate 1 0.61 24.9 -1.47
## - c_numyrs 1 0.61 24.9 -1.47
## - c_avg_tds 1 0.90 25.2 -1.02
## <none> 24.3 -0.44
## + broad_jump 1 0.39 23.9 0.92
## - c_avg_cmpp 1 2.40 26.7 1.23
## + vert_leap 1 0.19 24.1 1.26
## + cone 1 0.16 24.1 1.30
## + c_avg_inter 1 0.16 24.1 1.30
## + c_avg_yds 1 0.09 24.2 1.42
## + height 1 0.09 24.2 1.42
## + c_pct 1 0.06 24.2 1.46
## + weight 1 0.01 24.3 1.55
## + shuttle 1 0.00 24.3 1.56
## - c_avg_att 1 4.20 28.5 3.78
## - wonderlic 1 4.52 28.8 4.22
## - age 1 5.71 30.0 5.79
##
## Step: AIC=-2.13
## games_started ~ age + c_avg_cmpp + c_rate + c_avg_tds + c_numyrs +
## c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.77 25.3 -2.92
## - c_numyrs 1 0.79 25.3 -2.89
## - c_avg_tds 1 0.87 25.4 -2.77
## <none> 24.5 -2.13
## + X40 1 0.19 24.3 -0.44
## + height 1 0.17 24.3 -0.41
## + c_avg_inter 1 0.16 24.3 -0.39
## + shuttle 1 0.08 24.4 -0.26
## + c_avg_yds 1 0.06 24.4 -0.24
## + c_pct 1 0.04 24.5 -0.20
## + cone 1 0.02 24.5 -0.16
## + broad_jump 1 0.01 24.5 -0.15
## + weight 1 0.01 24.5 -0.14
## + vert_leap 1 0.00 24.5 -0.14
## - c_avg_cmpp 1 2.84 27.3 0.15
## - wonderlic 1 4.38 28.9 2.28
## - c_avg_att 1 4.78 29.3 2.82
## - age 1 5.61 30.1 3.91
##
## Step: AIC=-2.92
## games_started ~ age + c_avg_cmpp + c_avg_tds + c_numyrs + c_avg_att +
## wonderlic
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.29 25.6 -4.47
## - c_avg_tds 1 0.35 25.6 -4.39
## <none> 25.3 -2.92
## + c_pct 1 0.80 24.5 -2.18
## + c_rate 1 0.77 24.5 -2.13
## - c_avg_cmpp 1 2.16 27.4 -1.72
## + X40 1 0.36 24.9 -1.47
## + height 1 0.17 25.1 -1.18
## + shuttle 1 0.15 25.1 -1.15
## + vert_leap 1 0.13 25.1 -1.12
## + c_avg_inter 1 0.13 25.1 -1.12
## + broad_jump 1 0.09 25.2 -1.06
## + c_avg_yds 1 0.04 25.2 -0.98
## + weight 1 0.01 25.3 -0.93
## + cone 1 0.00 25.3 -0.92
## - c_avg_att 1 4.02 29.3 0.84
## - wonderlic 1 4.11 29.4 0.96
## - age 1 5.02 30.3 2.15
##
## Step: AIC=-4.47
## games_started ~ age + c_avg_cmpp + c_avg_tds + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.38 25.9 -5.90
## <none> 25.6 -4.47
## - c_avg_cmpp 1 1.93 27.5 -3.63
## + X40 1 0.44 25.1 -3.15
## + c_numyrs 1 0.29 25.3 -2.92
## + c_rate 1 0.27 25.3 -2.89
## + c_pct 1 0.18 25.4 -2.76
## + shuttle 1 0.18 25.4 -2.75
## + vert_leap 1 0.15 25.4 -2.71
## + broad_jump 1 0.15 25.4 -2.70
## + height 1 0.13 25.4 -2.68
## + c_avg_yds 1 0.09 25.5 -2.61
## + c_avg_inter 1 0.04 25.5 -2.54
## + cone 1 0.01 25.6 -2.48
## + weight 1 0.00 25.6 -2.47
## - c_avg_att 1 3.73 29.3 -1.16
## - wonderlic 1 3.89 29.4 -0.95
## - age 1 4.91 30.5 0.38
##
## Step: AIC=-5.9
## games_started ~ age + c_avg_cmpp + c_avg_att + wonderlic
##
## Df Sum of Sq RSS AIC
## <none> 25.9 -5.90
## + c_avg_tds 1 0.38 25.6 -4.47
## + X40 1 0.33 25.6 -4.40
## + c_numyrs 1 0.32 25.6 -4.39
## + broad_jump 1 0.18 25.8 -4.17
## + vert_leap 1 0.17 25.8 -4.16
## + c_pct 1 0.15 25.8 -4.12
## + shuttle 1 0.11 25.8 -4.07
## + height 1 0.10 25.8 -4.06
## + c_rate 1 0.04 25.9 -3.96
## + c_avg_yds 1 0.02 25.9 -3.93
## + c_avg_inter 1 0.00 25.9 -3.90
## + cone 1 0.00 25.9 -3.90
## + weight 1 0.00 25.9 -3.90
## - wonderlic 1 4.50 30.4 -1.66
## - c_avg_cmpp 1 4.55 30.5 -1.60
## - c_avg_att 1 4.74 30.7 -1.36
## - age 1 5.35 31.3 -0.59
summary(step_reg.scaled.w_combine.games_started)
##
## Call:
## lm(formula = games_started ~ age + c_avg_cmpp + c_avg_att + wonderlic,
## data = data.scaled.w_combine.for_games_started)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8516 -0.5632 -0.0246 0.6552 1.4239
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.37e-16 1.40e-01 0.00 1.000
## age -4.21e-01 1.59e-01 -2.65 0.012 *
## c_avg_cmpp 1.62e+00 6.62e-01 2.44 0.020 *
## c_avg_att -1.64e+00 6.58e-01 -2.49 0.018 *
## wonderlic -3.89e-01 1.60e-01 -2.43 0.021 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.873 on 34 degrees of freedom
## Multiple R-squared: 0.317, Adjusted R-squared: 0.237
## F-statistic: 3.95 on 4 and 34 DF, p-value: 0.00971
plot(step_reg.scaled.w_combine.games_started)
leaps.scaled.w_combine.games_started <- regsubsets(games_started ~ ., data = data.scaled.w_combine.for_games_started,
nbest = 10)
subsets(leaps.scaled.w_combine.games_started, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.scaled.w_combine.for_games_started, step_reg.scaled.w_combine.games_started,
m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: games_started
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 4.20 4.20 5.50 0.025 *
## c_avg_cmpp 1 0.15 0.15 0.19 0.666
## c_avg_att 1 3.21 3.21 4.21 0.048 *
## wonderlic 1 4.50 4.50 5.90 0.021 *
## Residuals 34 25.94 0.76
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 3 21 24 40 42 52 61
## Predicted -0.3433 -0.962 -0.220 -0.717 -0.684 -0.956 0.695
## cvpred -0.2979 -0.923 -0.121 -0.708 -0.574 -0.935 0.687
## games_started -0.2013 -1.409 -1.409 0.403 -0.805 -1.409 0.101
## CV residual 0.0966 -0.486 -1.288 1.110 -0.231 -0.474 -0.587
##
## Sum of squares = 3.76 Mean square = 0.54 n = 7
##
## fold 2
## Observations in test set: 8
## 6 18 25 37 43 50 55 63
## Predicted 0.0289 0.1129 1.205 0.611 -0.433 -0.1114 -0.325 -0.273
## cvpred 0.0709 0.0654 1.169 0.384 -0.163 0.0643 -0.247 -0.200
## games_started 1.3086 -0.5033 1.309 1.309 -0.805 -0.5033 -1.107 -0.805
## CV residual 1.2377 -0.5687 0.139 0.925 -0.642 -0.5676 -0.861 -0.606
##
## Sum of squares = 4.57 Mean square = 0.57 n = 8
##
## fold 3
## Observations in test set: 8
## 5 7 16 20 28 32 49 64
## Predicted 0.2385 -0.188 -0.117 -1.107 0.743 0.0525 -0.1768 0.767
## cvpred -0.0382 0.257 -0.099 -0.616 0.974 0.2230 -0.1158 0.832
## games_started 1.0066 -1.409 0.101 -1.409 1.309 -0.2013 -0.2013 -0.201
## CV residual 1.0449 -1.666 0.200 -0.793 0.334 -0.4243 -0.0855 -1.033
##
## Sum of squares = 5.9 Mean square = 0.74 n = 8
##
## fold 4
## Observations in test set: 8
## 12 13 26 30 38 39 59 65
## Predicted -0.0924 1.231 0.696 0.213 -0.413 0.580 -0.425 -0.0693
## cvpred -0.0108 1.159 0.669 0.355 -0.525 0.812 -0.327 -0.2899
## games_started -0.8053 1.309 1.309 1.309 0.101 -0.805 -1.107 0.7047
## CV residual -0.7945 0.149 0.640 0.953 0.625 -1.617 -0.780 0.9946
##
## Sum of squares = 6.58 Mean square = 0.82 n = 8
##
## fold 5
## Observations in test set: 8
## 1 4 15 17 19 27 46 56
## Predicted 0.193 0.442 0.2508 -0.579 0.00636 -0.417 0.559 -0.0143
## cvpred -0.172 1.087 -0.0556 -0.298 0.32811 -1.206 0.830 -0.1397
## games_started 1.309 -1.409 0.4027 0.403 -0.50332 1.007 1.007 1.3086
## CV residual 1.480 -2.496 0.4582 0.700 -0.83144 2.213 0.177 1.4483
##
## Sum of squares = 16.8 Mean square = 2.11 n = 8
##
## Overall (Sum over all 8 folds)
## ms
## 0.965