# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]
# Set the resopnse variables
rating = qb_stats["rating"]
# Generate clean data set
data.log.no_combine.for_rating = data.frame(log(na.omit(cbind(rating, college_stats)) +
0.1))
# Generate the linear model
lm.log.no_combine.rating <- lm(formula = rating ~ ., data = data.log.no_combine.for_rating)
# Find optimum linear regression model for rating
step_reg.log.no_combine.rating <- stepAIC(lm.log.no_combine.rating, direction = "both")
## Start: AIC=-776.9
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_numyrs 1 0.000 8.07 -779
## - c_avg_inter 1 0.010 8.08 -779
## - c_avg_tds 1 0.057 8.13 -777
## <none> 8.07 -777
## - c_avg_yds 1 0.079 8.15 -777
## - c_rate 1 0.082 8.16 -777
## - height 1 0.102 8.18 -776
## - weight 1 0.176 8.25 -774
## - c_avg_att 1 0.188 8.26 -773
## - c_avg_cmpp 1 0.219 8.29 -773
## - c_pct 1 0.220 8.29 -773
## - age 1 0.562 8.64 -763
##
## Step: AIC=-778.9
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_inter + c_avg_tds + c_avg_yds + c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_avg_inter 1 0.010 8.08 -781
## - c_avg_tds 1 0.059 8.13 -779
## <none> 8.07 -779
## - c_avg_yds 1 0.080 8.15 -779
## - c_rate 1 0.084 8.16 -778
## - height 1 0.103 8.18 -778
## + c_numyrs 1 0.000 8.07 -777
## - weight 1 0.180 8.25 -776
## - c_avg_att 1 0.193 8.27 -775
## - c_avg_cmpp 1 0.227 8.30 -774
## - c_pct 1 0.230 8.30 -774
## - age 1 0.562 8.64 -765
##
## Step: AIC=-780.6
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_tds + c_avg_yds + c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_avg_tds 1 0.053 8.14 -781
## <none> 8.08 -781
## - height 1 0.100 8.18 -780
## - c_avg_yds 1 0.105 8.19 -780
## + c_avg_inter 1 0.010 8.07 -779
## - c_rate 1 0.128 8.21 -779
## + c_numyrs 1 0.000 8.08 -779
## - weight 1 0.170 8.25 -778
## - c_avg_att 1 0.183 8.27 -777
## - c_avg_cmpp 1 0.218 8.30 -776
## - c_pct 1 0.224 8.31 -776
## - age 1 0.555 8.64 -767
##
## Step: AIC=-781.1
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_yds + c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_avg_yds 1 0.061 8.20 -781
## <none> 8.14 -781
## - c_rate 1 0.078 8.21 -781
## + c_avg_tds 1 0.053 8.08 -781
## - height 1 0.101 8.24 -780
## + c_avg_inter 1 0.004 8.13 -779
## + c_numyrs 1 0.001 8.14 -779
## - weight 1 0.176 8.31 -778
## - c_avg_att 1 0.209 8.35 -777
## - c_pct 1 0.231 8.37 -776
## - c_avg_cmpp 1 0.235 8.37 -776
## - age 1 0.599 8.74 -766
##
## Step: AIC=-781.3
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct +
## c_avg_att
##
## Df Sum of Sq RSS AIC
## - c_rate 1 0.017 8.21 -783
## <none> 8.20 -781
## + c_avg_yds 1 0.061 8.14 -781
## + c_avg_inter 1 0.040 8.16 -780
## - height 1 0.101 8.30 -780
## + c_avg_tds 1 0.008 8.19 -780
## + c_numyrs 1 0.000 8.20 -779
## - weight 1 0.195 8.39 -778
## - c_pct 1 0.382 8.58 -773
## - c_avg_cmpp 1 0.399 8.60 -772
## - c_avg_att 1 0.400 8.60 -772
## - age 1 0.594 8.79 -767
##
## Step: AIC=-782.8
## rating ~ height + weight + age + c_avg_cmpp + c_pct + c_avg_att
##
## Df Sum of Sq RSS AIC
## <none> 8.21 -783
## + c_avg_inter 1 0.056 8.16 -782
## - height 1 0.105 8.32 -782
## + c_rate 1 0.017 8.20 -781
## + c_avg_tds 1 0.002 8.21 -781
## + c_numyrs 1 0.000 8.21 -781
## + c_avg_yds 1 0.000 8.21 -781
## - weight 1 0.199 8.41 -779
## - c_pct 1 0.379 8.59 -774
## - c_avg_cmpp 1 0.403 8.62 -773
## - c_avg_att 1 0.403 8.62 -773
## - age 1 0.578 8.79 -769
summary(step_reg.log.no_combine.rating)
##
## Call:
## lm(formula = rating ~ height + weight + age + c_avg_cmpp + c_pct +
## c_avg_att, data = data.log.no_combine.for_rating)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7778 -0.1042 0.0189 0.1203 0.4547
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.804 16.441 3.39 0.00081 ***
## height -1.292 0.753 -1.72 0.08763 .
## weight 0.618 0.262 2.36 0.01900 *
## age 0.583 0.145 4.02 7.8e-05 ***
## c_avg_cmpp 11.557 3.442 3.36 0.00092 ***
## c_pct -11.080 3.399 -3.26 0.00128 **
## c_avg_att -11.535 3.433 -3.36 0.00091 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.189 on 230 degrees of freedom
## Multiple R-squared: 0.166, Adjusted R-squared: 0.144
## F-statistic: 7.64 on 6 and 230 DF, p-value: 1.72e-07
plot(step_reg.log.no_combine.rating)
leaps.log.no_combine.rating <- regsubsets(rating ~ ., data = data.log.no_combine.for_rating,
nbest = 10)
subsets(leaps.log.no_combine.rating, statistic = "rsq")
## Error: invalid coordinate lengths
cv.lm(df = data.log.no_combine.for_rating, step_reg.log.no_combine.rating, m = 5) # 5 fold cross-validation
## Analysis of Variance Table
##
## Response: rating
## Df Sum Sq Mean Sq F value Pr(>F)
## height 1 0.06 0.056 1.56 0.21249
## weight 1 0.23 0.232 6.51 0.01139 *
## age 1 0.61 0.615 17.21 4.7e-05 ***
## c_avg_cmpp 1 0.11 0.111 3.10 0.07961 .
## c_pct 1 0.22 0.220 6.15 0.01386 *
## c_avg_att 1 0.40 0.403 11.29 0.00091 ***
## Residuals 230 8.21 0.036
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
##
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values. Lines that are shown for the different folds are approximate
##
## fold 1
## Observations in test set: 47
## 3 8 15 18 19 20 21 23 26
## Predicted 4.424 4.267 4.3074 4.2788 4.280 4.1761 4.2835 4.31 4.3010
## cvpred 4.405 4.273 4.2989 4.2848 4.291 4.1812 4.2866 4.30 4.3082
## rating 4.286 4.388 4.3121 4.3215 4.145 4.0927 4.2092 4.49 4.3464
## CV residual -0.119 0.115 0.0133 0.0367 -0.146 -0.0886 -0.0774 0.19 0.0382
## 35 45 54 56 69 71 72 73
## Predicted 4.25829 4.253 4.37238 4.1984 4.2284 4.217 4.255 4.432
## cvpred 4.27027 4.253 4.36688 4.2009 4.2325 4.222 4.258 4.431
## rating 4.26409 4.401 4.35927 4.1352 4.1415 4.337 4.461 4.317
## CV residual -0.00618 0.148 -0.00761 -0.0657 -0.0909 0.115 0.203 -0.113
## 76 79 81 82 91 96 115 118
## Predicted 4.2547 4.2172 4.1965 4.244 4.175 4.255052 4.224 4.2564
## cvpred 4.2586 4.2346 4.2116 4.253 4.193 4.259285 4.238 4.2626
## rating 4.2627 4.3294 4.2283 3.968 3.666 4.259859 4.022 4.2753
## CV residual 0.0041 0.0948 0.0167 -0.284 -0.527 0.000574 -0.216 0.0126
## 121 122 124 131 132 133 135 140
## Predicted 4.277 4.1707 4.21338 4.16006 4.1768 4.1047 4.1970 4.258
## cvpred 4.285 4.1998 4.21438 4.17316 4.1925 4.1213 4.2153 4.254
## rating 4.433 4.2341 4.20916 4.18052 4.2106 4.2180 4.1495 4.151
## CV residual 0.148 0.0343 -0.00522 0.00736 0.0181 0.0967 -0.0659 -0.103
## 150 155 164 176 183 187 194 206 215
## Predicted 4.191 4.27 4.2834 4.0645 4.246 4.154 4.116 4.207 4.074
## cvpred 4.194 4.26 4.2855 4.0949 4.246 4.161 4.117 4.214 4.104
## rating 4.536 4.54 4.2959 4.1463 4.421 3.980 4.445 3.833 3.296
## CV residual 0.341 0.28 0.0105 0.0514 0.175 -0.181 0.328 -0.381 -0.808
## 224 229 236 237 238
## Predicted 4.064 4.095 4.171 4.140 4.152
## cvpred 4.100 4.116 4.193 4.157 4.174
## rating 3.967 3.867 3.714 3.835 4.357
## CV residual -0.133 -0.249 -0.479 -0.322 0.183
##
## Sum of squares = 2.28 Mean square = 0.05 n = 47
##
## fold 2
## Observations in test set: 48
## 24 31 33 36 37 39 41 42 59
## Predicted 4.203 4.241 4.060 4.289 4.212 4.2430 4.260 4.2834 4.290
## cvpred 4.203 4.241 4.015 4.297 4.212 4.2466 4.269 4.2955 4.297
## rating 4.475 4.480 4.261 4.111 3.681 4.2017 4.305 4.2808 4.329
## CV residual 0.273 0.238 0.246 -0.186 -0.531 -0.0449 0.036 -0.0146 0.032
## 62 74 85 88 89 99 103 108 110
## Predicted 4.0889 4.4152 4.276 4.2210 4.257 4.420 4.224 4.240 4.1554
## cvpred 4.0660 4.4332 4.281 4.2234 4.261 4.443 4.218 4.245 4.1425
## rating 4.1352 4.3373 4.445 4.2669 4.426 4.234 4.415 4.089 4.1759
## CV residual 0.0691 -0.0959 0.164 0.0435 0.165 -0.209 0.197 -0.156 0.0335
## 111 116 119 126 128 138 139 146 149
## Predicted 4.1445 4.211 4.2644 4.2286 4.220 4.202 4.284 4.314 4.193
## cvpred 4.1354 4.215 4.2678 4.2292 4.224 4.203 4.292 4.327 4.192
## rating 4.2062 4.066 4.2850 4.2794 4.022 4.420 4.419 3.982 3.918
## CV residual 0.0708 -0.149 0.0172 0.0502 -0.202 0.217 0.126 -0.346 -0.274
## 152 153 159 166 170 173 175 178 191
## Predicted 4.1692 4.172 4.189 4.239 4.152 4.229 4.143 4.121 4.17680
## cvpred 4.1714 4.169 4.182 4.246 4.148 4.228 4.135 4.108 4.16543
## rating 4.2399 4.036 4.367 4.007 4.297 4.498 4.476 4.240 4.16821
## CV residual 0.0685 -0.133 0.184 -0.239 0.149 0.269 0.342 0.132 0.00279
## 192 198 202 209 210 212 214 216
## Predicted 4.10546 4.2660 4.1167 4.235 4.220 4.2445 4.1944 4.142
## cvpred 4.08746 4.2693 4.1052 4.237 4.218 4.2469 4.1946 4.126
## rating 4.08429 4.1790 4.0690 4.015 4.449 4.1682 4.1775 4.069
## CV residual -0.00316 -0.0903 -0.0361 -0.222 0.231 -0.0786 -0.0172 -0.057
## 218 233 234 240
## Predicted 4.285 4.0202 4.199 4.1288
## cvpred 4.284 3.9883 4.188 4.1209
## rating 3.877 3.9455 4.480 4.2195
## CV residual -0.406 -0.0428 0.291 0.0986
##
## Sum of squares = 1.74 Mean square = 0.04 n = 48
##
## fold 3
## Observations in test set: 48
## 2 4 5 6 7 14 17 46 47
## Predicted 4.3126 4.2599 4.273 4.2379 4.270 4.3226 4.2104 4.221 4.2347
## cvpred 4.2940 4.2449 4.263 4.2231 4.225 4.3004 4.1873 4.208 4.2308
## rating 4.3386 4.3334 4.589 4.3054 4.438 4.3386 4.1125 4.091 4.2905
## CV residual 0.0446 0.0885 0.326 0.0823 0.213 0.0382 -0.0747 -0.117 0.0596
## 51 55 60 66 67 70 77 78 80
## Predicted 4.2684 4.223 4.2718 4.196 4.273 4.3062 4.2296 4.2997 4.300
## cvpred 4.2598 4.216 4.2658 4.170 4.266 4.2924 4.2229 4.2722 4.306
## rating 4.3490 4.016 4.2822 4.403 4.621 4.1957 4.1447 4.3554 4.410
## CV residual 0.0892 -0.199 0.0164 0.233 0.355 -0.0967 -0.0781 0.0833 0.104
## 86 90 100 102 112 114 141 144 156
## Predicted 4.2957 4.3186 4.2417 4.3420 4.27 4.205 4.2292 4.252 4.20
## cvpred 4.3168 4.3230 4.2220 4.3413 4.26 4.190 4.2294 4.242 4.19
## rating 4.3907 4.2556 4.2641 4.3770 4.43 4.447 4.1759 4.424 4.30
## CV residual 0.0739 -0.0674 0.0421 0.0357 0.17 0.257 -0.0534 0.182 0.11
## 157 158 160 163 165 167 171 174 182
## Predicted 4.148 4.1914 4.23 4.172 4.314 4.31 4.1526 4.209 4.034
## cvpred 4.132 4.1955 4.22 4.157 4.325 4.30 4.1621 4.221 4.025
## rating 4.278 4.2106 4.34 4.565 4.203 4.13 4.0775 4.392 4.153
## CV residual 0.146 0.0151 0.12 0.409 -0.122 -0.17 -0.0845 0.171 0.128
## 184 190 199 201 203 208 211 219
## Predicted 4.208 4.225 4.133 4.1555 4.22847 4.128 4.080 4.0064
## cvpred 4.207 4.231 4.130 4.1554 4.22239 4.123 4.056 3.9972
## rating 3.983 3.957 3.826 4.1125 4.22975 4.231 3.932 3.9040
## CV residual -0.224 -0.274 -0.304 -0.0429 0.00736 0.109 -0.124 -0.0932
## 226 232 235 239
## Predicted 4.223 4.107 4.232 4.044
## cvpred 4.207 4.088 4.233 4.025
## rating 4.434 4.315 4.054 4.278
## CV residual 0.227 0.227 -0.179 0.253
##
## Sum of squares = 1.36 Mean square = 0.03 n = 48
##
## fold 4
## Observations in test set: 47
## 9 13 25 27 29 34 43 44
## Predicted 4.250 4.318 4.49 4.280 4.2889 4.236143 4.2909 4.188
## cvpred 4.265 4.345 5.70 4.308 4.3238 4.255755 4.3200 4.202
## rating 4.468 4.200 4.39 4.473 4.2863 4.255613 4.3386 4.024
## CV residual 0.203 -0.144 -1.31 0.165 -0.0374 -0.000143 0.0186 -0.178
## 48 50 53 64 65 68 75 97 101
## Predicted 4.205 4.348 4.158 4.3691 4.2684 4.189 4.374 4.1716 4.3604
## cvpred 4.199 4.359 4.160 4.3843 4.2707 4.202 4.408 4.1645 4.3748
## rating 4.013 4.468 4.307 4.4462 4.3438 4.094 4.129 4.0673 4.3932
## CV residual -0.187 0.109 0.146 0.0618 0.0731 -0.108 -0.279 -0.0972 0.0184
## 106 107 113 117 123 129 130 134
## Predicted 4.216 4.232 4.203780 4.303 4.2391 4.2921 4.282 4.17889
## cvpred 4.222 4.218 4.200963 4.333 4.2338 4.2904 4.293 4.19968
## rating 4.098 4.344 4.200205 3.867 4.2891 4.2062 3.890 4.20916
## CV residual -0.124 0.125 -0.000758 -0.466 0.0553 -0.0842 -0.403 0.00948
## 137 147 148 154 161 169 177 180 181
## Predicted 4.198 4.190 4.324 4.17 4.1870 4.1066 4.1193 4.130 4.295
## cvpred 4.211 4.205 4.336 4.33 4.1968 4.1011 4.0997 4.127 4.274
## rating 3.892 4.220 4.184 3.98 4.1558 4.1352 4.1942 4.299 4.122
## CV residual -0.319 0.014 -0.153 -0.35 -0.0411 0.0341 0.0945 0.172 -0.152
## 185 189 193 195 197 200 205 213
## Predicted 4.147 4.098 4.1746 4.13258 4.309 4.272 4.200 4.053
## cvpred 4.163 4.058 4.1554 4.13717 4.313 4.289 4.198 4.005
## rating 3.980 4.553 4.0843 4.13517 4.482 4.331 4.050 3.822
## CV residual -0.183 0.495 -0.0711 -0.00201 0.169 0.042 -0.148 -0.183
## 220 222 225 227 230
## Predicted 4.1659 4.00079 4.1754 4.107 4.067
## cvpred 4.1582 3.99064 4.1521 4.099 4.056
## rating 4.0927 3.99636 4.1897 3.770 4.517
## CV residual -0.0656 0.00573 0.0376 -0.329 0.461
##
## Sum of squares = 3.45 Mean square = 0.07 n = 47
##
## fold 5
## Observations in test set: 47
## 1 10 11 12 16 22 28 30 32
## Predicted 4.295 4.2772 4.3689 4.3374 4.251 4.33 4.335 4.151 4.358
## cvpred 4.284 4.2680 4.3492 4.3124 4.247 4.31 4.311 4.140 4.337
## rating 4.606 4.2513 4.2905 4.3969 4.069 4.54 4.467 4.414 4.469
## CV residual 0.322 -0.0166 -0.0588 0.0845 -0.178 0.23 0.156 0.274 0.133
## 38 40 49 52 57 58 61 83 84
## Predicted 4.309 4.427 4.174 4.275 4.28 4.306948 4.2853 4.380 4.2573
## cvpred 4.291 4.387 4.196 4.265 4.25 4.292791 4.2635 4.349 4.2586
## rating 4.523 4.586 3.711 4.587 4.39 4.291828 4.2136 4.586 4.2946
## CV residual 0.232 0.199 -0.485 0.322 0.14 -0.000963 -0.0499 0.237 0.0359
## 87 92 93 94 95 98 104 105 109
## Predicted 4.341 4.291 4.2839 4.181 4.3193 4.331 4.2334 4.2170 4.213
## cvpred 4.315 4.275 4.2708 4.181 4.3066 4.324 4.2228 4.1987 4.211
## rating 4.234 3.822 4.3215 4.293 4.2428 4.494 4.1271 4.2528 4.197
## CV residual -0.081 -0.453 0.0507 0.113 -0.0638 0.171 -0.0957 0.0541 -0.014
## 120 125 127 136 142 143 145 151
## Predicted 4.2140 4.3212 4.263 4.19818 4.1809 4.2084 4.1752 4.2130
## cvpred 4.2150 4.3090 4.242 4.19718 4.1880 4.2173 4.1750 4.1978
## rating 4.1287 4.4067 4.480 4.20020 4.2268 4.2905 4.1942 4.1076
## CV residual -0.0863 0.0977 0.237 0.00302 0.0389 0.0731 0.0192 -0.0902
## 162 168 172 179 186 188 196 207
## Predicted 4.1741 4.204 4.236 4.0401 4.1667 4.200 4.1055 4.1231
## cvpred 4.1691 4.214 4.234 4.0681 4.1806 4.210 4.1108 4.1310
## rating 4.0826 4.382 4.093 4.0236 4.1010 3.902 4.1190 4.0977
## CV residual -0.0864 0.168 -0.142 -0.0445 -0.0796 -0.308 0.0082 -0.0333
## 217 221 223 228
## Predicted 4.132 4.155 4.096 4.199
## cvpred 4.138 4.154 4.120 4.211
## rating 3.782 4.393 4.231 4.449
## CV residual -0.356 0.239 0.112 0.238
##
## Sum of squares = 1.6 Mean square = 0.03 n = 47
##
## Overall (Sum over all 47 folds)
## ms
## 0.0441