# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]
# Set the resopnse variables
rating = qb_stats["rating"]
# Generate clean data set
data.scaled.no_combine.for_rating = data.frame(scale(na.omit(cbind(rating, college_stats))))
# Generate the linear model
lm.scaled.no_combine.rating <- lm(formula = rating ~ ., data = data.scaled.no_combine.for_rating)
# Find optimum linear regression model for rating
step_reg.scaled.no_combine.rating <- stepAIC(lm.scaled.no_combine.rating, direction = "both")
## Start: AIC=-21.26
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.01 196 -23.26
## - c_pct 1 0.02 196 -23.24
## - c_avg_tds 1 0.03 196 -23.22
## - c_avg_yds 1 0.18 196 -23.04
## - c_rate 1 0.19 196 -23.03
## - height 1 1.08 197 -21.96
## - c_avg_inter 1 1.24 197 -21.76
## - c_avg_cmpp 1 1.28 197 -21.72
## - c_avg_att 1 1.52 197 -21.43
## <none> 196 -21.26
## - weight 1 3.70 200 -18.83
## - age 1 14.74 210 -6.06
##
## Step: AIC=-23.26
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_pct 1 0.02 196 -25.23
## - c_avg_tds 1 0.03 196 -25.22
## - c_avg_yds 1 0.18 196 -25.04
## - c_rate 1 0.19 196 -25.03
## - height 1 1.07 197 -23.96
## - c_avg_inter 1 1.27 197 -23.72
## - c_avg_cmpp 1 1.36 197 -23.62
## - c_avg_att 1 1.65 198 -23.26
## <none> 196 -23.26
## + c_numyrs 1 0.01 196 -21.26
## - weight 1 3.74 200 -20.77
## - age 1 14.76 211 -8.03
##
## Step: AIC=-25.23
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_avg_inter +
## c_avg_tds + c_avg_yds + c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.02 196 -27.21
## - c_avg_yds 1 0.17 196 -27.03
## - c_rate 1 0.28 196 -26.89
## - height 1 1.07 197 -25.95
## - c_avg_inter 1 1.30 197 -25.67
## <none> 196 -25.23
## - c_avg_att 1 2.05 198 -24.77
## - c_avg_cmpp 1 3.26 199 -23.32
## + c_pct 1 0.02 196 -23.26
## + c_numyrs 1 0.00 196 -23.24
## - weight 1 3.80 200 -22.68
## - age 1 15.32 211 -9.38
##
## Step: AIC=-27.21
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_avg_inter +
## c_avg_yds + c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.26 196 -28.9
## - c_avg_yds 1 0.30 196 -28.9
## - height 1 1.09 197 -27.9
## - c_avg_inter 1 1.35 197 -27.6
## <none> 196 -27.2
## - c_avg_att 1 2.06 198 -26.7
## - c_avg_cmpp 1 3.24 199 -25.3
## + c_avg_tds 1 0.02 196 -25.2
## + c_pct 1 0.01 196 -25.2
## + c_numyrs 1 0.00 196 -25.2
## - weight 1 3.79 200 -24.7
## - age 1 15.56 211 -11.1
##
## Step: AIC=-28.89
## rating ~ height + weight + age + c_avg_cmpp + c_avg_inter + c_avg_yds +
## c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.08 196 -30.8
## - height 1 1.10 197 -29.6
## - c_avg_inter 1 1.15 197 -29.5
## <none> 196 -28.9
## - c_avg_att 1 1.88 198 -28.6
## - c_avg_cmpp 1 3.01 199 -27.3
## + c_rate 1 0.26 196 -27.2
## + c_pct 1 0.11 196 -27.0
## + c_numyrs 1 0.02 196 -26.9
## + c_avg_tds 1 0.00 196 -26.9
## - weight 1 3.75 200 -26.4
## - age 1 15.77 212 -12.6
##
## Step: AIC=-30.8
## rating ~ height + weight + age + c_avg_cmpp + c_avg_inter + c_avg_att
##
## Df Sum of Sq RSS AIC
## - height 1 1.13 197 -31.4
## - c_avg_inter 1 1.15 197 -31.4
## <none> 196 -30.8
## - c_avg_att 1 1.80 198 -30.6
## + c_pct 1 0.10 196 -28.9
## + c_avg_yds 1 0.08 196 -28.9
## + c_avg_tds 1 0.04 196 -28.9
## + c_rate 1 0.04 196 -28.9
## + c_numyrs 1 0.02 196 -28.8
## - weight 1 3.79 200 -28.3
## - c_avg_cmpp 1 4.43 201 -27.5
## - age 1 15.72 212 -14.5
##
## Step: AIC=-31.43
## rating ~ weight + age + c_avg_cmpp + c_avg_inter + c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 1.39 199 -31.8
## - c_avg_att 1 1.55 199 -31.6
## <none> 197 -31.4
## + height 1 1.13 196 -30.8
## - weight 1 2.70 200 -30.2
## + c_pct 1 0.12 197 -29.6
## + c_avg_yds 1 0.11 197 -29.6
## + c_avg_tds 1 0.08 197 -29.5
## + c_rate 1 0.03 197 -29.5
## + c_numyrs 1 0.00 197 -29.4
## - c_avg_cmpp 1 4.08 201 -28.6
## - age 1 15.07 212 -16.0
##
## Step: AIC=-31.77
## rating ~ weight + age + c_avg_cmpp + c_avg_att
##
## Df Sum of Sq RSS AIC
## <none> 199 -31.8
## + c_avg_inter 1 1.39 197 -31.4
## + height 1 1.37 197 -31.4
## + c_avg_tds 1 0.18 198 -30.0
## + c_pct 1 0.16 198 -30.0
## + c_avg_yds 1 0.11 199 -29.9
## + c_numyrs 1 0.04 199 -29.8
## + c_rate 1 0.00 199 -29.8
## - weight 1 3.92 203 -29.1
## - c_avg_att 1 6.88 206 -25.7
## - c_avg_cmpp 1 9.76 208 -22.4
## - age 1 14.79 214 -16.8
summary(step_reg.scaled.no_combine.rating)
##
## Call:
## lm(formula = rating ~ weight + age + c_avg_cmpp + c_avg_att,
## data = data.scaled.no_combine.for_rating)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.925 -0.581 0.002 0.591 2.632
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.34e-16 6.01e-02 0.00 1.00000
## weight 1.39e-01 6.49e-02 2.14 0.03334 *
## age 2.57e-01 6.19e-02 4.16 4.6e-05 ***
## c_avg_cmpp 1.12e+00 3.33e-01 3.38 0.00086 ***
## c_avg_att -9.35e-01 3.30e-01 -2.84 0.00498 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.925 on 232 degrees of freedom
## Multiple R-squared: 0.158, Adjusted R-squared: 0.144
## F-statistic: 10.9 on 4 and 232 DF, p-value: 4.17e-08
plot(step_reg.scaled.no_combine.rating)
leaps.scaled.no_combine.rating <- regsubsets(rating ~ ., data = data.scaled.no_combine.for_rating,
nbest = 10)
subsets(leaps.scaled.no_combine.rating, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.scaled.no_combine.for_rating, step_reg.scaled.no_combine.rating,
m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: rating
## Df Sum Sq Mean Sq F value Pr(>F)
## weight 1 7.1 7.11 8.30 0.0043 **
## age 1 15.0 14.98 17.49 4.1e-05 ***
## c_avg_cmpp 1 8.3 8.32 9.71 0.0021 **
## c_avg_att 1 6.9 6.88 8.04 0.0050 **
## Residuals 232 198.7 0.86
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 47
## 3 8 15 18 19 20 21 23 26
## Predicted 1.128 0.223 0.781 0.264 0.0254 0.0511 0.366 0.102 0.5188
## cvpred 1.151 0.265 0.887 0.277 0.1058 0.0738 0.339 0.162 0.5290
## rating 0.228 0.808 0.369 0.421 -0.4874 -0.7258 -0.175 1.479 0.5627
## CV residual -0.923 0.543 -0.518 0.144 -0.5932 -0.7996 -0.514 1.317 0.0337
## 35 45 54 56 69 71 72 73
## Predicted -0.0108 0.116 1.028 -0.1036 -0.0344 -0.0353 0.0678 0.817
## cvpred -0.0686 0.148 1.050 -0.0638 0.0448 -0.0177 0.1279 0.805
## rating 0.1084 0.883 0.637 -0.5321 -0.5023 0.5106 1.2628 0.399
## CV residual 0.1770 0.735 -0.413 -0.4684 -0.5471 0.5283 1.1349 -0.406
## 76 79 81 82 91 96 115 118
## Predicted 0.112 -0.236 -0.3134 0.0724 -0.151 0.0922 -0.0784 -0.00859
## cvpred 0.132 -0.207 -0.2787 0.0804 -0.152 0.1541 -0.0440 -0.01998
## rating 0.101 0.466 -0.0778 -1.2471 -2.275 0.0860 -1.0311 0.16797
## CV residual -0.031 0.673 0.2009 -1.3275 -2.123 -0.0680 -0.9872 0.18795
## 121 122 124 131 132 133 135 140
## Predicted 0.357 -0.417 -0.0957 -0.179 -0.313 -0.543 -0.3955 0.193
## cvpred 0.331 -0.417 -0.0351 -0.115 -0.270 -0.479 -0.3866 0.262
## rating 1.084 -0.048 -0.1746 -0.316 -0.167 -0.130 -0.4651 -0.458
## CV residual 0.753 0.369 -0.1395 -0.201 0.103 0.349 -0.0785 -0.719
## 150 155 164 176 183 187 194 206 215
## Predicted -0.113 0.443 0.2015 -0.647 0.240 -0.307 -0.0337 -0.220 -0.251
## cvpred -0.109 0.457 0.1874 -0.559 0.199 -0.270 0.0315 -0.166 -0.213
## rating 1.762 1.777 0.2797 -0.480 1.010 -1.202 1.1585 -1.746 -3.176
## CV residual 1.870 1.320 0.0923 0.079 0.811 -0.932 1.1270 -1.580 -2.963
## 224 229 236 237 238
## Predicted -0.600 -0.337 -0.371 -0.398 -0.485
## cvpred -0.514 -0.261 -0.277 -0.306 -0.384
## rating -1.255 -1.627 -2.133 -1.739 0.622
## CV residual -0.740 -1.366 -1.857 -1.433 1.006
##
## Sum of squares = 44.9 Mean square = 0.95 n = 47
##
## fold 2
## Observations in test set: 48
## 24 31 33 36 37 39 41 42 59
## Predicted 0.115 0.0580 -0.2433 0.165 -0.100 0.129 0.564 0.518 0.177
## cvpred 0.145 0.0664 -0.2815 0.194 -0.104 0.157 0.669 0.603 0.205
## rating 1.352 1.3820 0.0935 -0.644 -2.230 -0.212 0.332 0.198 0.466
## CV residual 1.207 1.3155 0.3750 -0.838 -2.126 -0.369 -0.337 -0.405 0.261
## 62 74 85 88 89 99 103 108
## Predicted -0.722 0.870 0.199 0.221 -0.00323 1.025 -0.162 -0.124
## cvpred -0.820 0.945 0.213 0.274 0.00163 1.100 -0.190 -0.123
## rating -0.532 0.511 1.159 0.123 1.03937 -0.048 0.972 -0.741
## CV residual 0.288 -0.434 0.946 -0.150 1.03774 -1.148 1.162 -0.618
## 110 111 116 119 126 128 138 139
## Predicted -0.2935 -0.402 0.118 0.332 -0.0447 -0.0418 0.0165 0.121
## cvpred -0.3272 -0.442 0.159 0.355 -0.0503 -0.0295 0.0315 0.125
## rating -0.3385 -0.190 -0.845 0.220 0.1903 -1.0311 1.0021 0.995
## CV residual -0.0113 0.253 -1.004 -0.135 0.2406 -1.0016 0.9707 0.870
## 146 149 152 153 159 166 170 173
## Predicted 0.390 -0.322 0.0553 -0.315 -0.404 0.108 -0.373 -0.0354
## cvpred 0.454 -0.351 0.1012 -0.340 -0.458 0.148 -0.404 -0.0476
## rating -1.195 -1.441 -0.0182 -0.972 0.682 -1.091 0.287 1.5011
## CV residual -1.649 -1.090 -0.1194 -0.632 1.139 -1.238 0.691 1.5488
## 175 178 191 192 198 202 209 210
## Predicted -0.447 -0.5908 -0.230 -0.455 0.207 -0.501 0.0649 0.0336
## cvpred -0.495 -0.6652 -0.275 -0.512 0.216 -0.551 0.0622 0.0299
## rating 1.360 -0.0182 -0.376 -0.763 -0.324 -0.830 -1.0609 1.1809
## CV residual 1.855 0.6470 -0.101 -0.251 -0.539 -0.279 -1.1231 1.1509
## 212 214 216 218 233 234 240
## Predicted -0.168 -0.3788 -0.496 0.255 -0.755 -0.126 -0.414
## cvpred -0.191 -0.4209 -0.557 0.257 -0.851 -0.151 -0.452
## rating -0.376 -0.3310 -0.830 -1.590 -1.337 1.382 -0.122
## CV residual -0.185 0.0899 -0.273 -1.847 -0.485 1.533 0.330
##
## Sum of squares = 41.3 Mean square = 0.86 n = 48
##
## fold 3
## Observations in test set: 48
## 2 4 5 6 7 14 17 46
## Predicted 0.6601 0.1029 0.312 -0.0582 0.0317 0.5721 -0.216 0.071830
## cvpred 0.5636 0.0197 0.255 -0.1280 -0.1868 0.4538 -0.321 0.000727
## rating 0.5180 0.4882 2.142 0.3318 1.1139 0.5180 -0.636 -0.733224
## CV residual -0.0456 0.4685 1.886 0.4598 1.3006 0.0642 -0.315 -0.733952
## 47 51 55 60 66 67 70 77 78
## Predicted -0.121 0.320 0.1104 0.0719 -0.409 0.220 0.572 -0.108 0.1902
## cvpred -0.121 0.267 0.0663 0.0735 -0.506 0.188 0.498 -0.133 0.0746
## rating 0.250 0.578 -1.0535 0.2052 0.898 2.380 -0.242 -0.487 0.6148
## CV residual 0.371 0.311 -1.1198 0.1317 1.404 2.192 -0.740 -0.355 0.5402
## 80 86 90 100 102 112 114 141
## Predicted 0.478 0.5985 0.5593 0.0943 0.7636 0.397 -0.302 -0.140
## cvpred 0.505 0.7278 0.6078 -0.0107 0.8112 0.345 -0.367 -0.110
## rating 0.943 0.8234 0.0637 0.1084 0.7415 1.092 1.173 -0.338
## CV residual 0.437 0.0956 -0.5441 0.1191 -0.0697 0.747 1.540 -0.228
## 144 156 157 158 160 163 165 167
## Predicted 0.04902 -0.0821 0.1787 -0.20546 0.0908 -0.164 0.330 0.315
## cvpred 0.00721 -0.1327 0.1573 -0.17590 0.0822 -0.247 0.404 0.319
## rating 1.02448 0.2871 0.1829 -0.16718 0.5478 1.970 -0.204 -0.547
## CV residual 1.01726 0.4199 0.0256 0.00871 0.4656 2.217 -0.609 -0.866
## 171 174 182 184 190 199 201 203
## Predicted -0.112 -0.0303 -0.691 0.0409 -0.0769 -0.561 -0.298 -0.0724
## cvpred -0.103 0.0439 -0.742 0.0164 -0.0317 -0.574 -0.299 -0.0996
## rating -0.793 0.8308 -0.450 -1.1875 -1.2918 -1.768 -0.636 -0.0704
## CV residual -0.690 0.7869 0.292 -1.2040 -1.2601 -1.194 -0.337 0.0292
## 208 211 219 226 232 235 239
## Predicted -0.2544 -0.500 -0.714 -0.145 -0.759 -0.122 -0.749
## cvpred -0.2964 -0.634 -0.762 -0.214 -0.826 -0.106 -0.828
## rating -0.0629 -1.389 -1.493 1.092 0.384 -0.897 0.183
## CV residual 0.2334 -0.754 -0.731 1.306 1.210 -0.791 1.011
##
## Sum of squares = 38.3 Mean square = 0.8 n = 48
##
## fold 4
## Observations in test set: 47
## 9 13 25 27 29 34 43 44 48
## Predicted 0.288 0.601 -0.245 0.378 0.574 0.0102 0.345 0.00425 -0.185
## cvpred 0.273 0.750 -0.231 0.433 0.700 0.1044 0.371 0.15217 -0.073
## rating 1.307 -0.219 0.801 1.337 0.228 0.0637 0.518 -1.02369 -1.068
## CV residual 1.035 -0.969 1.032 0.904 -0.472 -0.0407 0.147 -1.17586 -0.995
## 50 53 64 65 68 75 97 101 106
## Predicted 0.568 -0.251 0.8815 0.143 -0.170 0.736 -0.0686 0.717 0.0215
## cvpred 0.654 -0.192 1.0712 0.129 -0.115 0.925 0.0350 0.699 0.0719
## rating 1.307 0.339 1.1660 0.548 -0.718 -0.562 -0.8375 0.838 -0.7034
## CV residual 0.654 0.532 0.0948 0.419 -0.604 -1.487 -0.8725 0.139 -0.7754
## 107 113 117 123 129 130 134 137
## Predicted -0.164 -0.0655 0.362 0.00152 0.0149 0.276 -0.2489 -0.0949
## cvpred -0.112 -0.0446 0.468 -0.01084 0.0891 0.367 -0.1932 -0.1014
## rating 0.548 -0.2193 -1.627 0.24245 -0.1895 -1.545 -0.1746 -1.5376
## CV residual 0.659 -0.1747 -2.095 0.25329 -0.2786 -1.912 0.0186 -1.4362
## 147 148 154 161 169 177 180 181
## Predicted -0.177 0.476 -0.176 -0.123 -0.632 -0.3078 -0.297 0.177
## cvpred -0.199 0.471 -0.226 -0.134 -0.667 -0.2729 -0.301 0.167
## rating -0.122 -0.301 -1.195 -0.435 -0.532 -0.2491 0.295 -0.592
## CV residual 0.076 -0.772 -0.969 -0.301 0.135 0.0238 0.596 -0.759
## 185 189 193 195 197 200 205 213 220
## Predicted -0.414 -0.751 -0.263 -0.453 0.283 0.104 -0.174 -0.753 -0.359
## cvpred -0.296 -0.894 -0.249 -0.477 0.331 0.133 -0.163 -0.864 -0.391
## rating -1.202 1.881 -0.763 -0.532 1.397 0.473 -0.912 -1.783 -0.726
## CV residual -0.906 2.775 -0.514 -0.055 1.066 0.340 -0.749 -0.919 -0.334
## 222 225 227 230
## Predicted -1.004 -0.358 -0.564 -0.646
## cvpred -0.916 -0.400 -0.616 -0.703
## rating -1.135 -0.271 -1.955 1.635
## CV residual -0.220 0.129 -1.338 2.338
##
## Sum of squares = 43.5 Mean square = 0.93 n = 47
##
## fold 5
## Observations in test set: 47
## 1 10 11 12 16 22 28 30 32
## Predicted 0.1334 0.1190 0.509 0.682 0.253 0.538 0.288 -0.264 0.792
## cvpred 0.0527 0.0875 0.417 0.476 0.169 0.403 0.134 -0.346 0.632
## rating 2.2683 0.0414 0.250 0.861 -0.830 1.807 1.300 0.965 1.315
## CV residual 2.2155 -0.0461 -0.167 0.384 -0.999 1.403 1.166 1.311 0.683
## 38 40 49 52 57 58 61 83 84
## Predicted 0.752 0.967 -0.196 0.691 0.1667 0.515 0.1563 0.905 0.515
## cvpred 0.553 0.713 -0.125 0.539 0.0187 0.415 0.0738 0.720 0.417
## rating 1.672 2.119 -2.141 2.127 0.8234 0.257 -0.1523 2.119 0.272
## CV residual 1.119 1.407 -2.016 1.588 0.8047 -0.157 -0.2261 1.399 -0.145
## 87 92 93 94 95 98 104 105
## Predicted 0.448 0.200 0.04260 -0.294 0.39320 0.403 0.0367 -0.1492
## cvpred 0.336 0.124 0.00972 -0.290 0.31420 0.333 0.0232 -0.1779
## rating -0.048 -1.783 0.42120 0.265 -0.00333 1.479 -0.5694 0.0488
## CV residual -0.384 -1.908 0.41148 0.555 -0.31753 1.146 -0.5925 0.2267
## 109 120 125 127 136 142 143 145
## Predicted -0.2046 -0.02386 0.313 0.229 -0.23485 -0.09370 -0.0994 -0.299
## cvpred -0.2054 -0.00736 0.241 0.147 -0.21680 -0.08832 -0.0632 -0.300
## rating -0.2342 -0.56192 0.920 1.382 -0.21932 -0.08526 0.2499 -0.249
## CV residual -0.0288 -0.55456 0.679 1.235 -0.00252 0.00306 0.3131 0.051
## 151 162 168 172 179 186 188 196
## Predicted -0.120 -0.323 -0.189 -0.1175 -0.694 -0.353 -0.311 -0.62023
## cvpred -0.263 -0.307 -0.168 -0.0985 -0.564 -0.266 -0.241 -0.60096
## rating -0.659 -0.770 0.771 -0.7258 -1.024 -0.689 -1.500 -0.60661
## CV residual -0.396 -0.463 0.940 -0.6273 -0.459 -0.423 -1.259 -0.00565
## 207 217 221 223 228
## Predicted -0.526 -0.396 -0.125 -0.6205 -0.168
## cvpred -0.475 -0.337 -0.107 -0.4925 -0.128
## rating -0.703 -1.917 0.838 -0.0629 1.181
## CV residual -0.228 -1.580 0.945 0.4295 1.309
##
## Sum of squares = 41.6 Mean square = 0.88 n = 47
##
## Overall (Sum over all 47 folds)
## ms
## 0.884