# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
rating = qb_stats_w_combine["rating"]
# Generate clean data set
data.log.w_combine.for_rating = data.frame(log(na.omit(cbind(rating, college_stats)) +
0.1))
# Generate the linear model
lm.log.w_combine.rating <- lm(formula = rating ~ ., data = data.log.w_combine.for_rating)
# Find optimum linear regression model for rating
step_reg.log.w_combine.rating <- stepAIC(lm.log.w_combine.rating, direction = "both")
## Start: AIC=-122.7
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - cone 1 0.0000 0.583 -125
## - broad_jump 1 0.0005 0.584 -125
## - c_avg_tds 1 0.0014 0.585 -125
## - shuttle 1 0.0021 0.585 -125
## - c_avg_yds 1 0.0030 0.586 -124
## - c_avg_inter 1 0.0044 0.588 -124
## - c_rate 1 0.0046 0.588 -124
## - height 1 0.0066 0.590 -124
## - wonderlic 1 0.0141 0.597 -124
## <none> 0.583 -123
## - vert_leap 1 0.0323 0.616 -123
## - X40 1 0.0457 0.629 -122
## - weight 1 0.0489 0.632 -122
## - c_numyrs 1 0.0601 0.643 -121
## - c_avg_att 1 0.0858 0.669 -120
## - c_avg_cmpp 1 0.1074 0.691 -118
## - c_pct 1 0.1092 0.693 -118
## - age 1 0.1217 0.705 -118
##
## Step: AIC=-124.7
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + shuttle + vert_leap + broad_jump
##
## Df Sum of Sq RSS AIC
## - broad_jump 1 0.0005 0.584 -127
## - c_avg_tds 1 0.0014 0.585 -127
## - shuttle 1 0.0029 0.586 -126
## - c_avg_yds 1 0.0030 0.586 -126
## - c_rate 1 0.0046 0.588 -126
## - c_avg_inter 1 0.0051 0.589 -126
## - height 1 0.0068 0.590 -126
## - wonderlic 1 0.0171 0.601 -126
## <none> 0.583 -125
## - vert_leap 1 0.0329 0.616 -125
## - X40 1 0.0459 0.629 -124
## - weight 1 0.0515 0.635 -124
## - c_numyrs 1 0.0614 0.645 -123
## + cone 1 0.0000 0.583 -123
## - c_avg_att 1 0.0858 0.669 -122
## - c_avg_cmpp 1 0.1074 0.691 -120
## - c_pct 1 0.1091 0.693 -120
## - age 1 0.1219 0.705 -120
##
## Step: AIC=-126.7
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att +
## X40 + wonderlic + shuttle + vert_leap
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.0022 0.586 -128
## - shuttle 1 0.0027 0.587 -128
## - c_avg_yds 1 0.0042 0.588 -128
## - c_avg_inter 1 0.0046 0.589 -128
## - c_rate 1 0.0063 0.590 -128
## - height 1 0.0072 0.591 -128
## - wonderlic 1 0.0177 0.602 -128
## <none> 0.584 -127
## - vert_leap 1 0.0471 0.631 -126
## - weight 1 0.0510 0.635 -126
## - X40 1 0.0537 0.638 -125
## + broad_jump 1 0.0005 0.583 -125
## + cone 1 0.0001 0.584 -125
## - c_numyrs 1 0.0671 0.651 -124
## - c_avg_att 1 0.0873 0.671 -123
## - c_avg_cmpp 1 0.1075 0.691 -122
## - c_pct 1 0.1086 0.693 -122
## - age 1 0.1213 0.705 -122
##
## Step: AIC=-128.5
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic +
## shuttle + vert_leap
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.0023 0.588 -130
## - shuttle 1 0.0028 0.589 -130
## - c_rate 1 0.0094 0.596 -130
## - height 1 0.0095 0.596 -130
## - c_avg_inter 1 0.0114 0.598 -130
## - wonderlic 1 0.0157 0.602 -130
## <none> 0.586 -128
## - vert_leap 1 0.0503 0.636 -127
## - weight 1 0.0577 0.644 -127
## - X40 1 0.0583 0.644 -127
## + c_avg_tds 1 0.0022 0.584 -127
## + broad_jump 1 0.0014 0.585 -127
## + cone 1 0.0002 0.586 -126
## - c_numyrs 1 0.0650 0.651 -126
## - c_avg_att 1 0.1080 0.694 -124
## - c_pct 1 0.1086 0.695 -124
## - c_avg_cmpp 1 0.1167 0.703 -124
## - age 1 0.1245 0.711 -123
##
## Step: AIC=-130.4
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_numyrs + c_avg_att + X40 + wonderlic + shuttle +
## vert_leap
##
## Df Sum of Sq RSS AIC
## - shuttle 1 0.0049 0.593 -132
## - height 1 0.0091 0.598 -132
## - wonderlic 1 0.0147 0.603 -131
## - c_avg_inter 1 0.0158 0.604 -131
## - c_rate 1 0.0243 0.613 -131
## <none> 0.588 -130
## - vert_leap 1 0.0484 0.637 -129
## - weight 1 0.0555 0.644 -129
## - X40 1 0.0593 0.648 -129
## - c_numyrs 1 0.0627 0.651 -128
## + c_avg_yds 1 0.0023 0.586 -128
## + broad_jump 1 0.0015 0.587 -128
## + c_avg_tds 1 0.0003 0.588 -128
## + cone 1 0.0001 0.588 -128
## - c_pct 1 0.1101 0.699 -126
## - c_avg_att 1 0.1201 0.709 -125
## - c_avg_cmpp 1 0.1217 0.710 -125
## - age 1 0.1275 0.716 -125
##
## Step: AIC=-132.1
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_numyrs + c_avg_att + X40 + wonderlic + vert_leap
##
## Df Sum of Sq RSS AIC
## - height 1 0.0101 0.604 -133
## - c_avg_inter 1 0.0145 0.608 -133
## - wonderlic 1 0.0197 0.613 -133
## - c_rate 1 0.0231 0.616 -133
## <none> 0.593 -132
## - X40 1 0.0564 0.650 -131
## - vert_leap 1 0.0582 0.652 -130
## - c_numyrs 1 0.0585 0.652 -130
## + shuttle 1 0.0049 0.588 -130
## + c_avg_yds 1 0.0044 0.589 -130
## + cone 1 0.0013 0.592 -130
## + broad_jump 1 0.0011 0.592 -130
## + c_avg_tds 1 0.0011 0.592 -130
## - weight 1 0.0823 0.676 -129
## - c_pct 1 0.1055 0.699 -128
## - c_avg_att 1 0.1153 0.709 -127
## - c_avg_cmpp 1 0.1169 0.710 -127
## - age 1 0.1368 0.730 -126
##
## Step: AIC=-133.4
## rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter +
## c_numyrs + c_avg_att + X40 + wonderlic + vert_leap
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.0167 0.620 -134
## - c_rate 1 0.0263 0.630 -134
## <none> 0.604 -133
## - wonderlic 1 0.0357 0.639 -133
## + height 1 0.0101 0.593 -132
## + shuttle 1 0.0059 0.598 -132
## + c_avg_yds 1 0.0041 0.599 -132
## - c_numyrs 1 0.0641 0.668 -132
## + broad_jump 1 0.0023 0.601 -132
## - vert_leap 1 0.0665 0.670 -131
## + c_avg_tds 1 0.0004 0.603 -131
## + cone 1 0.0002 0.603 -131
## - X40 1 0.0809 0.684 -131
## - weight 1 0.0885 0.692 -130
## - c_pct 1 0.1101 0.714 -129
## - c_avg_att 1 0.1220 0.726 -128
## - c_avg_cmpp 1 0.1235 0.727 -128
## - age 1 0.1297 0.733 -128
##
## Step: AIC=-134.4
## rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_numyrs +
## c_avg_att + X40 + wonderlic + vert_leap
##
## Df Sum of Sq RSS AIC
## <none> 0.620 -134
## + c_avg_inter 1 0.0167 0.604 -133
## - c_numyrs 1 0.0513 0.672 -133
## - wonderlic 1 0.0533 0.674 -133
## + height 1 0.0124 0.608 -133
## - vert_leap 1 0.0579 0.678 -133
## + c_avg_yds 1 0.0089 0.611 -133
## - c_rate 1 0.0631 0.683 -133
## + shuttle 1 0.0044 0.616 -133
## + cone 1 0.0023 0.618 -132
## + broad_jump 1 0.0007 0.620 -132
## + c_avg_tds 1 0.0000 0.620 -132
## - X40 1 0.0714 0.692 -132
## - weight 1 0.0958 0.716 -131
## - c_pct 1 0.1086 0.729 -130
## - age 1 0.1209 0.741 -130
## - c_avg_cmpp 1 0.1252 0.745 -129
## - c_avg_att 1 0.1257 0.746 -129
summary(step_reg.log.w_combine.rating)
##
## Call:
## lm(formula = rating ~ weight + age + c_avg_cmpp + c_rate + c_pct +
## c_numyrs + c_avg_att + X40 + wonderlic + vert_leap, data = data.log.w_combine.for_rating)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3132 -0.0839 -0.0043 0.0871 0.3266
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 53.547 23.831 2.25 0.033 *
## weight 1.049 0.513 2.04 0.051 .
## age 1.023 0.446 2.29 0.030 *
## c_avg_cmpp 11.851 5.076 2.33 0.027 *
## c_rate -0.619 0.374 -1.66 0.109
## c_pct -10.714 4.927 -2.17 0.039 *
## c_numyrs 0.206 0.138 1.49 0.147
## c_avg_att -11.816 5.051 -2.34 0.027 *
## X40 -1.963 1.114 -1.76 0.089 .
## wonderlic -0.173 0.113 -1.52 0.139
## vert_leap -0.568 0.358 -1.59 0.124
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.152 on 27 degrees of freedom
## Multiple R-squared: 0.478, Adjusted R-squared: 0.284
## F-statistic: 2.47 on 10 and 27 DF, p-value: 0.0302
plot(step_reg.log.w_combine.rating)
## Warning: NaNs produced
## Warning: NaNs produced
leaps.log.w_combine.rating <- regsubsets(rating ~ ., data = data.log.w_combine.for_rating,
nbest = 10)
subsets(leaps.log.w_combine.rating, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.log.w_combine.for_rating, step_reg.log.w_combine.rating, m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: rating
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 0.046 0.0463 2.02 0.1670
## age 1 0.263 0.2631 11.45 0.0022 **
## c_avg_cmpp 1 0.000 0.0001 0.01 0.9373
## c_rate 1 0.000 0.0005 0.02 0.8863
## c_pct 1 0.024 0.0243 1.06 0.3130
## c_numyrs 1 0.007 0.0066 0.29 0.5960
## c_avg_att 1 0.092 0.0918 4.00 0.0558 .
## X40 1 0.026 0.0257 1.12 0.2994
## wonderlic 1 0.051 0.0506 2.20 0.1494
## vert_leap 1 0.058 0.0579 2.52 0.1240
## Residuals 27 0.620 0.0230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 7
## 6 24 26 32 38 46 59
## Predicted 4.3134 4.3782 4.2370 4.444 4.1452 4.1440 4.342
## cvpred 4.3229 4.3591 4.2347 4.474 4.1876 4.1243 4.173
## rating 4.3883 4.2863 4.1415 4.305 4.0943 4.1447 4.467
## CV residual 0.0654 -0.0728 -0.0932 -0.168 -0.0933 0.0204 0.294
##
## Sum of squares = 0.14 Mean square = 0.02 n = 7
##
## fold 2
## Observations in test set: 8
## 7 19 21 27 37 43 55 65
## Predicted 4.47 4.3367 4.398 4.254 4.41 4.115 4.2067 4.286
## cvpred 4.38 4.2716 4.368 4.095 9.66 3.968 4.2396 4.132
## rating 4.28 4.2513 4.589 4.414 4.39 4.135 4.1447 4.461
## CV residual -0.10 -0.0203 0.221 0.319 -5.27 0.167 -0.0949 0.329
##
## Sum of squares = 28.1 Mean square = 3.51 n = 8
##
## fold 3
## Observations in test set: 8
## 5 12 13 17 39 40 50 56
## Predicted 4.262 4.106 4.280 4.3228 4.182 4.186 4.0948 4.280
## cvpred 4.207 4.017 4.145 4.2994 4.295 4.171 4.1017 4.137
## rating 4.339 4.228 4.438 4.2808 4.093 4.307 4.1125 4.606
## CV residual 0.132 0.211 0.293 -0.0186 -0.203 0.135 0.0108 0.469
##
## Sum of squares = 0.43 Mean square = 0.05 n = 8
##
## fold 4
## Observations in test set: 8
## 4 15 18 20 28 52 63 64
## Predicted 4.024 4.472 4.3501 4.30535 4.435 4.274 4.397 4.3256
## cvpred 4.135 4.535 4.3853 4.26513 4.468 4.277 4.487 4.3197
## rating 3.711 4.321 4.4006 4.26409 4.355 4.013 4.359 4.2905
## CV residual -0.424 -0.214 0.0153 -0.00105 -0.112 -0.265 -0.127 -0.0293
##
## Sum of squares = 0.33 Mean square = 0.04 n = 8
##
## fold 5
## Observations in test set: 7
## 1 3 25 30 42 49 61
## Predicted 4.3134 4.094 4.515 4.24 4.3841 4.4786 4.171
## cvpred 4.3027 4.250 4.480 4.19 4.4315 4.4187 3.964
## rating 4.3373 3.968 4.586 4.34 4.3969 4.4694 4.261
## CV residual 0.0346 -0.282 0.106 0.15 -0.0345 0.0506 0.297
##
## Sum of squares = 0.21 Mean square = 0.03 n = 7
##
## Overall (Sum over all 7 folds)
## ms
## 0.768