# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Set the resopnse variables
rating = qb_stats_w_combine["rating"]

# Generate clean data set
data.log.w_combine.for_rating = data.frame(log(na.omit(cbind(rating, college_stats)) + 
    0.1))

# Generate the linear model
lm.log.w_combine.rating <- lm(formula = rating ~ ., data = data.log.w_combine.for_rating)

# Find optimum linear regression model for rating
step_reg.log.w_combine.rating <- stepAIC(lm.log.w_combine.rating, direction = "both")
## Start:  AIC=-122.7
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq   RSS  AIC
## - cone         1    0.0000 0.583 -125
## - broad_jump   1    0.0005 0.584 -125
## - c_avg_tds    1    0.0014 0.585 -125
## - shuttle      1    0.0021 0.585 -125
## - c_avg_yds    1    0.0030 0.586 -124
## - c_avg_inter  1    0.0044 0.588 -124
## - c_rate       1    0.0046 0.588 -124
## - height       1    0.0066 0.590 -124
## - wonderlic    1    0.0141 0.597 -124
## <none>                     0.583 -123
## - vert_leap    1    0.0323 0.616 -123
## - X40          1    0.0457 0.629 -122
## - weight       1    0.0489 0.632 -122
## - c_numyrs     1    0.0601 0.643 -121
## - c_avg_att    1    0.0858 0.669 -120
## - c_avg_cmpp   1    0.1074 0.691 -118
## - c_pct        1    0.1092 0.693 -118
## - age          1    0.1217 0.705 -118
## 
## Step:  AIC=-124.7
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq   RSS  AIC
## - broad_jump   1    0.0005 0.584 -127
## - c_avg_tds    1    0.0014 0.585 -127
## - shuttle      1    0.0029 0.586 -126
## - c_avg_yds    1    0.0030 0.586 -126
## - c_rate       1    0.0046 0.588 -126
## - c_avg_inter  1    0.0051 0.589 -126
## - height       1    0.0068 0.590 -126
## - wonderlic    1    0.0171 0.601 -126
## <none>                     0.583 -125
## - vert_leap    1    0.0329 0.616 -125
## - X40          1    0.0459 0.629 -124
## - weight       1    0.0515 0.635 -124
## - c_numyrs     1    0.0614 0.645 -123
## + cone         1    0.0000 0.583 -123
## - c_avg_att    1    0.0858 0.669 -122
## - c_avg_cmpp   1    0.1074 0.691 -120
## - c_pct        1    0.1091 0.693 -120
## - age          1    0.1219 0.705 -120
## 
## Step:  AIC=-126.7
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + shuttle + vert_leap
## 
##               Df Sum of Sq   RSS  AIC
## - c_avg_tds    1    0.0022 0.586 -128
## - shuttle      1    0.0027 0.587 -128
## - c_avg_yds    1    0.0042 0.588 -128
## - c_avg_inter  1    0.0046 0.589 -128
## - c_rate       1    0.0063 0.590 -128
## - height       1    0.0072 0.591 -128
## - wonderlic    1    0.0177 0.602 -128
## <none>                     0.584 -127
## - vert_leap    1    0.0471 0.631 -126
## - weight       1    0.0510 0.635 -126
## - X40          1    0.0537 0.638 -125
## + broad_jump   1    0.0005 0.583 -125
## + cone         1    0.0001 0.584 -125
## - c_numyrs     1    0.0671 0.651 -124
## - c_avg_att    1    0.0873 0.671 -123
## - c_avg_cmpp   1    0.1075 0.691 -122
## - c_pct        1    0.1086 0.693 -122
## - age          1    0.1213 0.705 -122
## 
## Step:  AIC=-128.5
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     shuttle + vert_leap
## 
##               Df Sum of Sq   RSS  AIC
## - c_avg_yds    1    0.0023 0.588 -130
## - shuttle      1    0.0028 0.589 -130
## - c_rate       1    0.0094 0.596 -130
## - height       1    0.0095 0.596 -130
## - c_avg_inter  1    0.0114 0.598 -130
## - wonderlic    1    0.0157 0.602 -130
## <none>                     0.586 -128
## - vert_leap    1    0.0503 0.636 -127
## - weight       1    0.0577 0.644 -127
## - X40          1    0.0583 0.644 -127
## + c_avg_tds    1    0.0022 0.584 -127
## + broad_jump   1    0.0014 0.585 -127
## + cone         1    0.0002 0.586 -126
## - c_numyrs     1    0.0650 0.651 -126
## - c_avg_att    1    0.1080 0.694 -124
## - c_pct        1    0.1086 0.695 -124
## - c_avg_cmpp   1    0.1167 0.703 -124
## - age          1    0.1245 0.711 -123
## 
## Step:  AIC=-130.4
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_numyrs + c_avg_att + X40 + wonderlic + shuttle + 
##     vert_leap
## 
##               Df Sum of Sq   RSS  AIC
## - shuttle      1    0.0049 0.593 -132
## - height       1    0.0091 0.598 -132
## - wonderlic    1    0.0147 0.603 -131
## - c_avg_inter  1    0.0158 0.604 -131
## - c_rate       1    0.0243 0.613 -131
## <none>                     0.588 -130
## - vert_leap    1    0.0484 0.637 -129
## - weight       1    0.0555 0.644 -129
## - X40          1    0.0593 0.648 -129
## - c_numyrs     1    0.0627 0.651 -128
## + c_avg_yds    1    0.0023 0.586 -128
## + broad_jump   1    0.0015 0.587 -128
## + c_avg_tds    1    0.0003 0.588 -128
## + cone         1    0.0001 0.588 -128
## - c_pct        1    0.1101 0.699 -126
## - c_avg_att    1    0.1201 0.709 -125
## - c_avg_cmpp   1    0.1217 0.710 -125
## - age          1    0.1275 0.716 -125
## 
## Step:  AIC=-132.1
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_numyrs + c_avg_att + X40 + wonderlic + vert_leap
## 
##               Df Sum of Sq   RSS  AIC
## - height       1    0.0101 0.604 -133
## - c_avg_inter  1    0.0145 0.608 -133
## - wonderlic    1    0.0197 0.613 -133
## - c_rate       1    0.0231 0.616 -133
## <none>                     0.593 -132
## - X40          1    0.0564 0.650 -131
## - vert_leap    1    0.0582 0.652 -130
## - c_numyrs     1    0.0585 0.652 -130
## + shuttle      1    0.0049 0.588 -130
## + c_avg_yds    1    0.0044 0.589 -130
## + cone         1    0.0013 0.592 -130
## + broad_jump   1    0.0011 0.592 -130
## + c_avg_tds    1    0.0011 0.592 -130
## - weight       1    0.0823 0.676 -129
## - c_pct        1    0.1055 0.699 -128
## - c_avg_att    1    0.1153 0.709 -127
## - c_avg_cmpp   1    0.1169 0.710 -127
## - age          1    0.1368 0.730 -126
## 
## Step:  AIC=-133.4
## rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_numyrs + c_avg_att + X40 + wonderlic + vert_leap
## 
##               Df Sum of Sq   RSS  AIC
## - c_avg_inter  1    0.0167 0.620 -134
## - c_rate       1    0.0263 0.630 -134
## <none>                     0.604 -133
## - wonderlic    1    0.0357 0.639 -133
## + height       1    0.0101 0.593 -132
## + shuttle      1    0.0059 0.598 -132
## + c_avg_yds    1    0.0041 0.599 -132
## - c_numyrs     1    0.0641 0.668 -132
## + broad_jump   1    0.0023 0.601 -132
## - vert_leap    1    0.0665 0.670 -131
## + c_avg_tds    1    0.0004 0.603 -131
## + cone         1    0.0002 0.603 -131
## - X40          1    0.0809 0.684 -131
## - weight       1    0.0885 0.692 -130
## - c_pct        1    0.1101 0.714 -129
## - c_avg_att    1    0.1220 0.726 -128
## - c_avg_cmpp   1    0.1235 0.727 -128
## - age          1    0.1297 0.733 -128
## 
## Step:  AIC=-134.4
## rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_numyrs + 
##     c_avg_att + X40 + wonderlic + vert_leap
## 
##               Df Sum of Sq   RSS  AIC
## <none>                     0.620 -134
## + c_avg_inter  1    0.0167 0.604 -133
## - c_numyrs     1    0.0513 0.672 -133
## - wonderlic    1    0.0533 0.674 -133
## + height       1    0.0124 0.608 -133
## - vert_leap    1    0.0579 0.678 -133
## + c_avg_yds    1    0.0089 0.611 -133
## - c_rate       1    0.0631 0.683 -133
## + shuttle      1    0.0044 0.616 -133
## + cone         1    0.0023 0.618 -132
## + broad_jump   1    0.0007 0.620 -132
## + c_avg_tds    1    0.0000 0.620 -132
## - X40          1    0.0714 0.692 -132
## - weight       1    0.0958 0.716 -131
## - c_pct        1    0.1086 0.729 -130
## - age          1    0.1209 0.741 -130
## - c_avg_cmpp   1    0.1252 0.745 -129
## - c_avg_att    1    0.1257 0.746 -129
summary(step_reg.log.w_combine.rating)
## 
## Call:
## lm(formula = rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_numyrs + c_avg_att + X40 + wonderlic + vert_leap, data = data.log.w_combine.for_rating)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3132 -0.0839 -0.0043  0.0871  0.3266 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   53.547     23.831    2.25    0.033 *
## weight         1.049      0.513    2.04    0.051 .
## age            1.023      0.446    2.29    0.030 *
## c_avg_cmpp    11.851      5.076    2.33    0.027 *
## c_rate        -0.619      0.374   -1.66    0.109  
## c_pct        -10.714      4.927   -2.17    0.039 *
## c_numyrs       0.206      0.138    1.49    0.147  
## c_avg_att    -11.816      5.051   -2.34    0.027 *
## X40           -1.963      1.114   -1.76    0.089 .
## wonderlic     -0.173      0.113   -1.52    0.139  
## vert_leap     -0.568      0.358   -1.59    0.124  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.152 on 27 degrees of freedom
## Multiple R-squared: 0.478,   Adjusted R-squared: 0.284 
## F-statistic: 2.47 on 10 and 27 DF,  p-value: 0.0302
plot(step_reg.log.w_combine.rating)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

## Warning: NaNs produced
## Warning: NaNs produced

plot of chunk unnamed-chunk-1

leaps.log.w_combine.rating <- regsubsets(rating ~ ., data = data.log.w_combine.for_rating, 
    nbest = 10)
subsets(leaps.log.w_combine.rating, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.log.w_combine.for_rating, step_reg.log.w_combine.rating, m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: rating
##            Df Sum Sq Mean Sq F value Pr(>F)   
## weight      1  0.046  0.0463    2.02 0.1670   
## age         1  0.263  0.2631   11.45 0.0022 **
## c_avg_cmpp  1  0.000  0.0001    0.01 0.9373   
## c_rate      1  0.000  0.0005    0.02 0.8863   
## c_pct       1  0.024  0.0243    1.06 0.3130   
## c_numyrs    1  0.007  0.0066    0.29 0.5960   
## c_avg_att   1  0.092  0.0918    4.00 0.0558 . 
## X40         1  0.026  0.0257    1.12 0.2994   
## wonderlic   1  0.051  0.0506    2.20 0.1494   
## vert_leap   1  0.058  0.0579    2.52 0.1240   
## Residuals  27  0.620  0.0230                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 7 
##                  6      24      26     32      38     46    59
## Predicted   4.3134  4.3782  4.2370  4.444  4.1452 4.1440 4.342
## cvpred      4.3229  4.3591  4.2347  4.474  4.1876 4.1243 4.173
## rating      4.3883  4.2863  4.1415  4.305  4.0943 4.1447 4.467
## CV residual 0.0654 -0.0728 -0.0932 -0.168 -0.0933 0.0204 0.294
## 
## Sum of squares = 0.14    Mean square = 0.02    n = 7 
## 
## fold 2 
## Observations in test set: 8 
##                 7      19    21    27    37    43      55    65
## Predicted    4.47  4.3367 4.398 4.254  4.41 4.115  4.2067 4.286
## cvpred       4.38  4.2716 4.368 4.095  9.66 3.968  4.2396 4.132
## rating       4.28  4.2513 4.589 4.414  4.39 4.135  4.1447 4.461
## CV residual -0.10 -0.0203 0.221 0.319 -5.27 0.167 -0.0949 0.329
## 
## Sum of squares = 28.1    Mean square = 3.51    n = 8 
## 
## fold 3 
## Observations in test set: 8 
##                 5    12    13      17     39    40     50    56
## Predicted   4.262 4.106 4.280  4.3228  4.182 4.186 4.0948 4.280
## cvpred      4.207 4.017 4.145  4.2994  4.295 4.171 4.1017 4.137
## rating      4.339 4.228 4.438  4.2808  4.093 4.307 4.1125 4.606
## CV residual 0.132 0.211 0.293 -0.0186 -0.203 0.135 0.0108 0.469
## 
## Sum of squares = 0.43    Mean square = 0.05    n = 8 
## 
## fold 4 
## Observations in test set: 8 
##                  4     15     18       20     28     52     63      64
## Predicted    4.024  4.472 4.3501  4.30535  4.435  4.274  4.397  4.3256
## cvpred       4.135  4.535 4.3853  4.26513  4.468  4.277  4.487  4.3197
## rating       3.711  4.321 4.4006  4.26409  4.355  4.013  4.359  4.2905
## CV residual -0.424 -0.214 0.0153 -0.00105 -0.112 -0.265 -0.127 -0.0293
## 
## Sum of squares = 0.33    Mean square = 0.04    n = 8 
## 
## fold 5 
## Observations in test set: 7 
##                  1      3    25   30      42     49    61
## Predicted   4.3134  4.094 4.515 4.24  4.3841 4.4786 4.171
## cvpred      4.3027  4.250 4.480 4.19  4.4315 4.4187 3.964
## rating      4.3373  3.968 4.586 4.34  4.3969 4.4694 4.261
## CV residual 0.0346 -0.282 0.106 0.15 -0.0345 0.0506 0.297
## 
## Sum of squares = 0.21    Mean square = 0.03    n = 7 
## 
## Overall (Sum over all 7 folds) 
##    ms 
## 0.768