# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Set the resopnse variables
rating = qb_stats_w_combine["rating"]

# Generate clean data set
data.scaled.w_combine.for_rating = data.frame(scale(na.omit(cbind(rating, college_stats))))

# Generate the linear model
lm.scaled.w_combine.rating <- lm(formula = rating ~ ., data = data.scaled.w_combine.for_rating)

# Find optimum linear regression model for rating
step_reg.scaled.w_combine.rating <- stepAIC(lm.scaled.w_combine.rating, direction = "both")
## Start:  AIC=13.81
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - cone         1     0.000 21.2 11.8
## - height       1     0.008 21.2 11.8
## - c_avg_tds    1     0.012 21.2 11.8
## - c_pct        1     0.020 21.2 11.8
## - c_avg_inter  1     0.031 21.2 11.9
## - c_avg_cmpp   1     0.110 21.3 12.0
## - c_rate       1     0.135 21.3 12.1
## - c_avg_yds    1     0.180 21.4 12.1
## - shuttle      1     0.199 21.4 12.2
## - X40          1     0.260 21.4 12.3
## - broad_jump   1     0.339 21.5 12.4
## - c_avg_att    1     0.454 21.6 12.6
## - wonderlic    1     0.704 21.9 13.1
## <none>                     21.2 13.8
## - c_numyrs     1     1.147 22.3 13.8
## - vert_leap    1     1.512 22.7 14.4
## - weight       1     1.773 23.0 14.9
## - age          1     2.156 23.4 15.5
## 
## Step:  AIC=11.81
## rating ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - height       1     0.008 21.2  9.83
## - c_avg_tds    1     0.012 21.2  9.84
## - c_pct        1     0.022 21.2  9.85
## - c_avg_inter  1     0.032 21.2  9.87
## - c_avg_cmpp   1     0.111 21.3 10.01
## - c_rate       1     0.143 21.3 10.07
## - c_avg_yds    1     0.184 21.4 10.14
## - shuttle      1     0.235 21.4 10.23
## - X40          1     0.265 21.5 10.29
## - broad_jump   1     0.341 21.5 10.42
## - c_avg_att    1     0.455 21.6 10.62
## - wonderlic    1     0.837 22.0 11.29
## <none>                     21.2 11.81
## - c_numyrs     1     1.159 22.4 11.84
## - vert_leap    1     1.544 22.7 12.49
## - weight       1     1.842 23.0 12.98
## - age          1     2.160 23.4 13.50
## + cone         1     0.000 21.2 13.81
## 
## Step:  AIC=9.83
## rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_tds    1     0.009 21.2  7.84
## - c_pct        1     0.029 21.2  7.88
## - c_avg_inter  1     0.031 21.2  7.88
## - c_avg_cmpp   1     0.108 21.3  8.02
## - c_rate       1     0.168 21.4  8.13
## - c_avg_yds    1     0.210 21.4  8.20
## - shuttle      1     0.238 21.4  8.25
## - X40          1     0.294 21.5  8.35
## - broad_jump   1     0.345 21.6  8.44
## - c_avg_att    1     0.492 21.7  8.70
## - wonderlic    1     1.132 22.3  9.80
## <none>                     21.2  9.83
## - c_numyrs     1     1.246 22.4 10.00
## - vert_leap    1     1.551 22.8 10.51
## - age          1     2.216 23.4 11.61
## + height       1     0.008 21.2 11.81
## + cone         1     0.000 21.2 11.83
## - weight       1     3.001 24.2 12.86
## 
## Step:  AIC=7.84
## rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + shuttle + 
##     vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_inter  1     0.045 21.3  5.93
## - c_pct        1     0.092 21.3  6.01
## - c_avg_cmpp   1     0.118 21.3  6.05
## - shuttle      1     0.236 21.4  6.27
## - c_avg_yds    1     0.257 21.5  6.30
## - X40          1     0.286 21.5  6.35
## - broad_jump   1     0.410 21.6  6.57
## - c_rate       1     0.415 21.6  6.58
## - c_avg_att    1     0.518 21.7  6.76
## <none>                     21.2  7.84
## - wonderlic    1     1.160 22.4  7.87
## - c_numyrs     1     1.249 22.5  8.02
## - vert_leap    1     1.542 22.8  8.51
## - age          1     2.208 23.4  9.61
## + c_avg_tds    1     0.009 21.2  9.83
## + height       1     0.005 21.2  9.84
## + cone         1     0.000 21.2  9.84
## - weight       1     2.994 24.2 10.86
## 
## Step:  AIC=5.93
## rating ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_yds + 
##     c_numyrs + c_avg_att + X40 + wonderlic + shuttle + vert_leap + 
##     broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_pct        1     0.052 21.3 4.02
## - shuttle      1     0.206 21.5 4.29
## - c_avg_yds    1     0.222 21.5 4.32
## - X40          1     0.347 21.6 4.54
## - c_avg_cmpp   1     0.375 21.6 4.59
## - broad_jump   1     0.375 21.6 4.59
## - c_rate       1     0.380 21.6 4.60
## - wonderlic    1     1.141 22.4 5.91
## <none>                     21.3 5.93
## - vert_leap    1     1.537 22.8 6.58
## - c_numyrs     1     1.591 22.9 6.67
## - c_avg_att    1     1.877 23.1 7.14
## + c_avg_inter  1     0.045 21.2 7.84
## + c_avg_tds    1     0.023 21.2 7.88
## + cone         1     0.007 21.2 7.91
## + height       1     0.003 21.2 7.92
## - age          1     2.366 23.6 7.94
## - weight       1     2.951 24.2 8.86
## 
## Step:  AIC=4.02
## rating ~ weight + age + c_avg_cmpp + c_rate + c_avg_yds + c_numyrs + 
##     c_avg_att + X40 + wonderlic + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - shuttle      1     0.166 21.5 2.31
## - c_avg_yds    1     0.214 21.5 2.40
## - X40          1     0.310 21.6 2.57
## - broad_jump   1     0.381 21.7 2.69
## - wonderlic    1     1.092 22.4 3.92
## <none>                     21.3 4.02
## - c_rate       1     1.374 22.7 4.39
## - vert_leap    1     1.572 22.9 4.72
## - c_numyrs     1     1.737 23.1 5.00
## - c_avg_att    1     1.983 23.3 5.40
## - c_avg_cmpp   1     2.085 23.4 5.57
## + c_avg_tds    1     0.067 21.2 5.90
## + c_pct        1     0.052 21.3 5.93
## + height       1     0.008 21.3 6.00
## + c_avg_inter  1     0.006 21.3 6.01
## + cone         1     0.001 21.3 6.02
## - age          1     2.455 23.8 6.16
## - weight       1     2.914 24.2 6.89
## 
## Step:  AIC=2.31
## rating ~ weight + age + c_avg_cmpp + c_rate + c_avg_yds + c_numyrs + 
##     c_avg_att + X40 + wonderlic + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_avg_yds    1     0.122 21.6 0.53
## - broad_jump   1     0.317 21.8 0.87
## - X40          1     0.705 22.2 1.54
## - wonderlic    1     0.939 22.4 1.94
## <none>                     21.5 2.31
## - c_rate       1     1.238 22.7 2.44
## - vert_leap    1     1.409 22.9 2.73
## - c_numyrs     1     1.657 23.1 3.14
## - c_avg_att    1     1.926 23.4 3.58
## + shuttle      1     0.166 21.3 4.02
## + cone         1     0.036 21.4 4.25
## + c_avg_tds    1     0.025 21.4 4.27
## - age          1     2.363 23.8 4.28
## + c_pct        1     0.012 21.5 4.29
## + height       1     0.008 21.5 4.30
## + c_avg_inter  1     0.004 21.5 4.31
## - c_avg_cmpp   1     2.459 23.9 4.43
## - weight       1     2.838 24.3 5.03
## 
## Step:  AIC=0.53
## rating ~ weight + age + c_avg_cmpp + c_rate + c_numyrs + c_avg_att + 
##     X40 + wonderlic + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - broad_jump   1     0.231 21.8 -1.07
## - wonderlic    1     0.915 22.5  0.10
## - X40          1     0.966 22.6  0.19
## <none>                     21.6  0.53
## - c_rate       1     1.373 23.0  0.87
## - vert_leap    1     1.400 23.0  0.91
## - c_numyrs     1     1.612 23.2  1.26
## - c_avg_att    1     2.179 23.8  2.18
## + c_avg_yds    1     0.122 21.5  2.31
## + shuttle      1     0.074 21.5  2.40
## + cone         1     0.038 21.6  2.46
## + c_pct        1     0.036 21.6  2.46
## + c_avg_inter  1     0.024 21.6  2.49
## + height       1     0.016 21.6  2.50
## + c_avg_tds    1     0.000 21.6  2.53
## - age          1     2.662 24.3  2.94
## - c_avg_cmpp   1     2.823 24.4  3.20
## - weight       1     3.055 24.6  3.56
## 
## Step:  AIC=-1.07
## rating ~ weight + age + c_avg_cmpp + c_rate + c_numyrs + c_avg_att + 
##     X40 + wonderlic + vert_leap
## 
##               Df Sum of Sq  RSS    AIC
## - wonderlic    1      0.82 22.6 -1.660
## - vert_leap    1      1.18 23.0 -1.071
## <none>                     21.8 -1.068
## - c_rate       1      1.19 23.0 -1.046
## - c_numyrs     1      1.40 23.2 -0.706
## - X40          1      1.59 23.4 -0.403
## - c_avg_att    1      2.08 23.9  0.398
## + broad_jump   1      0.23 21.6  0.528
## + shuttle      1      0.06 21.8  0.824
## + cone         1      0.04 21.8  0.856
## + c_avg_yds    1      0.04 21.8  0.870
## + c_avg_tds    1      0.03 21.8  0.879
## + height       1      0.01 21.8  0.913
## + c_pct        1      0.00 21.8  0.927
## + c_avg_inter  1      0.00 21.8  0.931
## - c_avg_cmpp   1      2.70 24.5  1.357
## - age          1      2.89 24.7  1.659
## - weight       1      3.65 25.5  2.808
## 
## Step:  AIC=-1.66
## rating ~ weight + age + c_avg_cmpp + c_rate + c_numyrs + c_avg_att + 
##     X40 + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - c_rate       1      0.98 23.6 -2.05
## - c_numyrs     1      1.09 23.7 -1.87
## <none>                     22.6 -1.66
## - vert_leap    1      1.24 23.9 -1.64
## - c_avg_att    1      1.50 24.1 -1.23
## + wonderlic    1      0.82 21.8 -1.07
## - X40          1      1.85 24.5 -0.68
## - c_avg_cmpp   1      2.02 24.7 -0.41
## + height       1      0.26 22.4 -0.09
## + broad_jump   1      0.14 22.5  0.10
## + cone         1      0.04 22.6  0.27
## + c_avg_yds    1      0.04 22.6  0.28
## + c_pct        1      0.03 22.6  0.29
## + c_avg_inter  1      0.01 22.6  0.32
## + c_avg_tds    1      0.00 22.6  0.33
## + shuttle      1      0.00 22.6  0.34
## - weight       1      5.60 28.2  4.73
## - age          1      6.17 28.8  5.49
## 
## Step:  AIC=-2.05
## rating ~ weight + age + c_avg_cmpp + c_numyrs + c_avg_att + X40 + 
##     vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - c_numyrs     1      0.44 24.1 -3.36
## - c_avg_att    1      0.68 24.3 -2.98
## - c_avg_cmpp   1      1.07 24.7 -2.37
## <none>                     23.6 -2.05
## + c_rate       1      0.98 22.6 -1.66
## + c_pct        1      0.88 22.8 -1.49
## - X40          1      1.81 25.4 -1.25
## - vert_leap    1      1.82 25.4 -1.23
## + wonderlic    1      0.61 23.0 -1.05
## + c_avg_tds    1      0.36 23.3 -0.63
## + height       1      0.26 23.4 -0.47
## + c_avg_yds    1      0.22 23.4 -0.40
## + c_avg_inter  1      0.08 23.6 -0.18
## + cone         1      0.04 23.6 -0.11
## + broad_jump   1      0.03 23.6 -0.09
## + shuttle      1      0.00 23.6 -0.06
## - weight       1      5.56 29.2  3.97
## - age          1      7.29 30.9  6.17
## 
## Step:  AIC=-3.36
## rating ~ weight + age + c_avg_cmpp + c_avg_att + X40 + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_att    1      0.47 24.5 -4.62
## - c_avg_cmpp   1      0.87 24.9 -4.01
## <none>                     24.1 -3.36
## - X40          1      1.60 25.7 -2.91
## - vert_leap    1      1.73 25.8 -2.72
## + wonderlic    1      0.48 23.6 -2.12
## + c_numyrs     1      0.44 23.6 -2.05
## + height       1      0.37 23.7 -1.95
## + c_rate       1      0.32 23.7 -1.87
## + c_avg_tds    1      0.26 23.8 -1.77
## + c_avg_yds    1      0.26 23.8 -1.76
## + c_pct        1      0.13 23.9 -1.57
## + cone         1      0.04 24.0 -1.42
## + shuttle      1      0.02 24.0 -1.39
## + c_avg_inter  1      0.00 24.1 -1.36
## + broad_jump   1      0.00 24.1 -1.36
## - weight       1      5.68 29.7  2.69
## - age          1      7.36 31.4  4.78
## 
## Step:  AIC=-4.62
## rating ~ weight + age + c_avg_cmpp + X40 + vert_leap
## 
##               Df Sum of Sq  RSS   AIC
## <none>                     24.5 -4.62
## - c_avg_cmpp   1      1.41 25.9 -4.50
## - X40          1      1.71 26.3 -4.06
## - vert_leap    1      2.08 26.6 -3.53
## + height       1      0.53 24.0 -3.45
## + c_avg_att    1      0.47 24.1 -3.36
## + c_avg_yds    1      0.46 24.1 -3.33
## + c_avg_inter  1      0.24 24.3 -3.00
## + wonderlic    1      0.23 24.3 -2.98
## + c_numyrs     1      0.23 24.3 -2.98
## + c_avg_tds    1      0.14 24.4 -2.84
## + shuttle      1      0.10 24.4 -2.77
## + c_rate       1      0.05 24.5 -2.70
## + broad_jump   1      0.01 24.5 -2.65
## + c_pct        1      0.00 24.5 -2.63
## + cone         1      0.00 24.5 -2.63
## - weight       1      6.92 31.5  2.82
## - age          1      7.19 31.7  3.14
summary(step_reg.scaled.w_combine.rating)
## 
## Call:
## lm(formula = rating ~ weight + age + c_avg_cmpp + X40 + vert_leap, 
##     data = data.scaled.w_combine.for_rating)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8369 -0.5566 -0.0951  0.5703  2.3523 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  1.16e-15   1.42e-01    0.00   1.0000   
## weight       4.87e-01   1.62e-01    3.00   0.0051 **
## age          4.96e-01   1.62e-01    3.06   0.0044 **
## c_avg_cmpp   1.99e-01   1.47e-01    1.36   0.1848   
## X40         -3.32e-01   2.22e-01   -1.50   0.1447   
## vert_leap   -3.67e-01   2.23e-01   -1.65   0.1092   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.876 on 32 degrees of freedom
## Multiple R-squared: 0.337,   Adjusted R-squared: 0.233 
## F-statistic: 3.25 on 5 and 32 DF,  p-value: 0.0174
plot(step_reg.scaled.w_combine.rating)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.scaled.w_combine.rating <- regsubsets(rating ~ ., data = data.scaled.w_combine.for_rating, 
    nbest = 10)
subsets(leaps.scaled.w_combine.rating, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.scaled.w_combine.for_rating, step_reg.scaled.w_combine.rating, 
    m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: rating
##            Df Sum Sq Mean Sq F value Pr(>F)   
## weight      1   1.78    1.78    2.32 0.1378   
## age         1   7.08    7.08    9.24 0.0047 **
## c_avg_cmpp  1   1.41    1.41    1.84 0.1841   
## X40         1   0.11    0.11    0.14 0.7098   
## vert_leap   1   2.08    2.08    2.71 0.1092   
## Residuals  32  24.54    0.77                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 7 
##                   6     24      26       32     38     46      59
## Predicted   -0.0371  0.775 -0.7961  0.43217 -0.484 -0.746  0.0304
## cvpred      -0.0162  0.894 -0.8552  0.42652 -0.450 -0.656 -0.3169
## rating       0.5204 -0.102 -0.8837  0.00987 -1.115 -0.868  1.0470
## CV residual  0.5366 -0.996 -0.0284 -0.41666 -0.665 -0.211  1.3639
## 
## Sum of squares = 3.8    Mean square = 0.54    n = 7 
## 
## fold 2 
## Observations in test set: 8 
##                  7      19    21     27     37     43     55    65
## Predicted    0.525 -0.0601 1.096  0.113 -0.212 -0.767 -0.386 -0.30
## cvpred       0.354 -0.0598 1.168 -0.431 -0.986 -0.595 -0.147 -1.45
## rating      -0.126 -0.3013 1.948  0.688  0.512 -0.916 -0.868  1.01
## CV residual -0.480 -0.2415 0.781  1.119  1.499 -0.320 -0.720  2.46
## 
## Sum of squares = 11.1    Mean square = 1.38    n = 8 
## 
## fold 3 
## Observations in test set: 8 
##                  5     12     13       17     39      40     50     56
## Predicted   0.2503 -0.900 0.1243  0.10263 -0.450 -0.6119 -0.446 -0.268
## cvpred      0.1429 -1.205 0.0898  0.00675 -0.163 -0.9399 -0.457 -0.845
## rating      0.2093 -0.429 0.8475 -0.13373 -1.123  0.0178 -1.027  2.084
## CV residual 0.0664  0.776 0.7578 -0.14048 -0.960  0.9578 -0.571  2.929
## 
## Sum of squares = 11.9    Mean square = 1.49    n = 8 
## 
## fold 4 
## Observations in test set: 8 
##                  4     15     18     20     28      52     63      64
## Predicted   -1.238  0.692 0.0462  0.195  0.679 -0.0598  0.925 -0.0953
## cvpred      -0.959  0.992 0.2086  0.190  0.763  0.0439  1.448 -0.2195
## rating      -2.639  0.106 0.6002 -0.229  0.313 -1.4900  0.337 -0.0779
## CV residual -1.680 -0.886 0.3917 -0.419 -0.450 -1.5339 -1.112  0.1416
## 
## Sum of squares = 7.75    Mean square = 0.97    n = 8 
## 
## fold 5 
## Observations in test set: 7 
##                    1      3    25     30      42    49     61
## Predicted   0.000154  0.155 1.166 -0.654 0.67691 0.680 -0.156
## cvpred      0.089007  0.189 0.647 -0.765 0.56778 0.507  0.145
## rating      0.201336 -1.681 1.925  0.241 0.57629 1.063 -0.245
## CV residual 0.112328 -1.871 1.278  1.006 0.00851 0.556 -0.391
## 
## Sum of squares = 6.62    Mean square = 0.95    n = 7 
## 
## Overall (Sum over all 7 folds) 
##   ms 
## 1.08