# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]

# Set the resopnse variables
sacks = qb_stats["sacked"]

# Generate clean data set
data.scaled.no_combine.for_sacks = data.frame(scale(na.omit(cbind(sacks, college_stats))))

# Generate the linear model
lm.scaled.no_combine.sacks <- lm(formula = sacked ~ ., data = data.scaled.no_combine.for_sacks)

# Find optimum linear regression model for sacks
step_reg.scaled.no_combine.sacks <- stepAIC(lm.scaled.no_combine.sacks, direction = "both")
## Start:  AIC=13.46
## sacked ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - height       1     0.000 194 11.5
## - c_avg_cmpp   1     0.036 194 11.5
## - c_rate       1     0.211 194 11.7
## - c_avg_att    1     0.326 194 11.8
## - weight       1     0.361 194 11.8
## - c_pct        1     0.513 194 12.0
## - c_avg_tds    1     0.793 194 12.3
## - c_numyrs     1     0.989 195 12.5
## - c_avg_inter  1     1.666 195 13.2
## - age          1     1.867 196 13.4
## <none>                     194 13.5
## - c_avg_yds    1     2.043 196 13.6
## 
## Step:  AIC=11.46
## sacked ~ weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att
## 
##               Df Sum of Sq RSS   AIC
## - c_avg_cmpp   1     0.036 194  9.50
## - c_rate       1     0.211 194  9.69
## - c_avg_att    1     0.327 194  9.81
## - c_pct        1     0.515 194 10.01
## - weight       1     0.542 194 10.03
## - c_avg_tds    1     0.793 194 10.30
## - c_numyrs     1     1.028 195 10.54
## - c_avg_inter  1     1.670 195 11.22
## - age          1     1.869 196 11.42
## <none>                     194 11.46
## - c_avg_yds    1     2.044 196 11.61
## + height       1     0.000 194 13.46
## 
## Step:  AIC=9.5
## sacked ~ weight + age + c_rate + c_pct + c_avg_inter + c_avg_tds + 
##     c_avg_yds + c_numyrs + c_avg_att
## 
##               Df Sum of Sq RSS   AIC
## - c_rate       1     0.177 194  7.69
## - weight       1     0.538 194  8.07
## - c_pct        1     0.711 194  8.25
## - c_avg_tds    1     0.898 195  8.45
## - c_numyrs     1     0.992 195  8.54
## - c_avg_att    1     1.103 195  8.66
## - age          1     1.849 196  9.44
## <none>                     194  9.50
## - c_avg_inter  1     1.983 196  9.58
## - c_avg_yds    1     2.312 196  9.92
## + c_avg_cmpp   1     0.036 194 11.46
## + height       1     0.000 194 11.50
## 
## Step:  AIC=7.69
## sacked ~ weight + age + c_pct + c_avg_inter + c_avg_tds + c_avg_yds + 
##     c_numyrs + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - weight       1     0.510 194 6.23
## - c_pct        1     0.664 195 6.39
## - c_avg_att    1     1.032 195 6.77
## - c_numyrs     1     1.067 195 6.81
## - c_avg_tds    1     1.517 196 7.28
## - age          1     1.731 196 7.50
## - c_avg_inter  1     1.897 196 7.67
## <none>                     194 7.69
## - c_avg_yds    1     2.244 196 8.04
## + c_rate       1     0.177 194 9.50
## + c_avg_cmpp   1     0.002 194 9.69
## + height       1     0.001 194 9.69
## 
## Step:  AIC=6.23
## sacked ~ age + c_pct + c_avg_inter + c_avg_tds + c_avg_yds + 
##     c_numyrs + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - c_pct        1     0.590 195 4.84
## - c_avg_att    1     1.144 196 5.42
## - c_numyrs     1     1.164 196 5.44
## - age          1     1.389 196 5.68
## - c_avg_tds    1     1.498 196 5.79
## - c_avg_inter  1     1.542 196 5.84
## <none>                     194 6.23
## - c_avg_yds    1     2.228 197 6.55
## + weight       1     0.510 194 7.69
## + height       1     0.160 194 8.06
## + c_rate       1     0.150 194 8.07
## + c_avg_cmpp   1     0.001 194 8.22
## 
## Step:  AIC=4.84
## sacked ~ age + c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + 
##     c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - c_numyrs     1      0.90 196 3.78
## - age          1      1.17 196 4.06
## - c_avg_att    1      1.50 196 4.40
## - c_avg_tds    1      1.69 197 4.60
## <none>                     195 4.84
## - c_avg_inter  1      2.37 197 5.31
## + c_pct        1      0.59 194 6.23
## - c_avg_yds    1      3.41 198 6.38
## + weight       1      0.44 195 6.39
## + c_avg_cmpp   1      0.27 195 6.56
## + c_rate       1      0.12 195 6.72
## + height       1      0.09 195 6.75
## 
## Step:  AIC=3.78
## sacked ~ age + c_avg_inter + c_avg_tds + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - age          1      1.05 197 2.87
## - c_avg_tds    1      1.76 198 3.61
## - c_avg_inter  1      1.84 198 3.69
## <none>                     196 3.78
## - c_avg_att    1      2.01 198 3.86
## + c_numyrs     1      0.90 195 4.84
## + weight       1      0.53 195 5.23
## + c_pct        1      0.33 196 5.44
## + c_avg_cmpp   1      0.30 196 5.47
## - c_avg_yds    1      3.76 200 5.66
## + height       1      0.04 196 5.74
## + c_rate       1      0.02 196 5.76
## 
## Step:  AIC=2.87
## sacked ~ c_avg_inter + c_avg_tds + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - c_avg_tds    1      1.43 198 2.34
## - c_avg_att    1      1.76 199 2.68
## <none>                     197 2.87
## - c_avg_inter  1      1.99 199 2.92
## + age          1      1.05 196 3.78
## + c_numyrs     1      0.78 196 4.06
## - c_avg_yds    1      3.42 200 4.38
## + weight       1      0.22 197 4.65
## + c_pct        1      0.19 197 4.67
## + c_avg_cmpp   1      0.19 197 4.68
## + c_rate       1      0.02 197 4.85
## + height       1      0.01 197 4.86
## 
## Step:  AIC=2.34
## sacked ~ c_avg_inter + c_avg_yds + c_avg_att
## 
##               Df Sum of Sq RSS  AIC
## - c_avg_att    1     1.038 200 1.41
## - c_avg_inter  1     1.406 200 1.78
## <none>                     198 2.34
## - c_avg_yds    1     2.027 200 2.42
## + c_avg_tds    1     1.427 197 2.87
## + c_numyrs     1     0.859 198 3.46
## + age          1     0.714 198 3.61
## + c_pct        1     0.312 198 4.02
## + c_avg_cmpp   1     0.301 198 4.03
## + weight       1     0.240 198 4.10
## + c_rate       1     0.015 198 4.33
## + height       1     0.009 198 4.33
## 
## Step:  AIC=1.41
## sacked ~ c_avg_inter + c_avg_yds
## 
##               Df Sum of Sq RSS  AIC
## <none>                     200 1.41
## - c_avg_yds    1      2.09 202 1.54
## + c_numyrs     1      1.21 198 2.17
## + c_avg_att    1      1.04 198 2.34
## + c_avg_tds    1      0.71 199 2.68
## + age          1      0.62 199 2.77
## - c_avg_inter  1      3.35 203 2.81
## + c_pct        1      0.47 199 2.93
## + weight       1      0.34 199 3.06
## + c_rate       1      0.25 199 3.15
## + c_avg_cmpp   1      0.20 199 3.20
## + height       1      0.02 199 3.39
summary(step_reg.scaled.no_combine.sacks)
## 
## Call:
## lm(formula = sacked ~ c_avg_inter + c_avg_yds, data = data.scaled.no_combine.for_sacks)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5214 -0.7665 -0.0516  0.5967  2.8683 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -1.52e-17   6.97e-02    0.00    1.000  
## c_avg_inter -1.61e-01   8.75e-02   -1.84    0.068 .
## c_avg_yds    1.27e-01   8.75e-02    1.45    0.148  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.996 on 201 degrees of freedom
## Multiple R-squared: 0.0174,  Adjusted R-squared: 0.00766 
## F-statistic: 1.78 on 2 and 201 DF,  p-value: 0.171
plot(step_reg.scaled.no_combine.sacks)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.scaled.no_combine.sacks <- regsubsets(sacked ~ ., data = data.scaled.no_combine.for_sacks, 
    nbest = 10)
subsets(leaps.scaled.no_combine.sacks, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.scaled.no_combine.for_sacks, step_reg.scaled.no_combine.sacks, 
    m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: sacked
##              Df Sum Sq Mean Sq F value Pr(>F)
## c_avg_inter   1    1.4   1.447    1.46   0.23
## c_avg_yds     1    2.1   2.094    2.11   0.15
## Residuals   201  199.5   0.992
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 40 
##                  17       29      30     38        40     45    46      62
## Predicted    0.0125 -0.00280 -0.0888  0.244  0.000743 0.0241 0.171 -0.1220
## cvpred       0.0281  0.00922 -0.0626  0.222  0.029975 0.0466 0.169 -0.0829
## sacked      -0.1800 -1.12636 -1.1264  0.104 -0.937090 0.2932 0.388 -0.8425
## CV residual -0.2081 -1.13558 -1.0638 -0.118 -0.967065 0.2466 0.218 -0.7596
##                   64     72     79     91      97      98    103    115
## Predicted   -0.14145 0.0686 0.0554  0.125 -0.0100  0.0474 0.0390 0.0210
## cvpred      -0.10698 0.0909 0.0696  0.127  0.0222  0.0809 0.0692 0.0462
## sacked       0.00928 1.4288 1.0503 -0.369 -0.4639 -1.0317 1.2396 0.1039
## CV residual  0.11626 1.3379 0.9806 -0.496 -0.4861 -1.1126 1.1703 0.0577
##                  117    125      126     127     134     135     139
## Predicted   -0.08966 0.0551 -0.00144 -0.0541 -0.1082 -0.0975  0.0330
## cvpred      -0.05875 0.0779  0.03105 -0.0148 -0.0652 -0.0688  0.0701
## sacked       0.00928 0.4825 -0.36927 -0.9371 -0.7478 -1.4103 -0.5585
## CV residual  0.06803 0.4046 -0.40031 -0.9223 -0.6826 -1.3414 -0.6286
##                 141     142     144    148     154      156    166     169
## Predicted   -0.0610  0.0271 -0.0176 0.0126  0.0558 -0.14490 0.0279 -0.0508
## cvpred      -0.0289  0.0523  0.0153 0.0282  0.0921 -0.09371 0.0440 -0.0136
## sacked      -0.0854 -0.8425  1.6181 1.9967 -0.3693 -0.08536 0.1986 -0.9371
## CV residual -0.0565 -0.8948  1.6029 1.9685 -0.4614  0.00835 0.1545 -0.9235
##                 174     176    181    183     194    195     197    200
## Predicted   -0.1248 -0.0292 -0.191 -0.277  0.0851 0.0112 -0.0657 -0.111
## cvpred      -0.0851  0.0116 -0.137 -0.218  0.1176 0.0421 -0.0206 -0.069
## sacked       0.3878 -0.8425  1.523 -1.600 -1.3156 0.9556  0.7664 -1.126
## CV residual  0.4729 -0.8540  1.661 -1.382 -1.4332 0.9135  0.7870 -1.057
##                209
## Predicted   0.0572
## cvpred      0.0857
## sacked      0.1986
## CV residual 0.1128
## 
## Sum of squares = 32.4    Mean square = 0.81    n = 40 
## 
## fold 2 
## Observations in test set: 41 
##                 1      4      8      10    11    12    14    42      43
## Predicted   0.217 0.0658  0.162 0.02601 0.270 0.256 0.289 0.170  0.1068
## cvpred      0.162 0.0769  0.117 0.00826 0.261 0.188 0.255 0.115  0.0975
## sacked      0.672 0.8610 -0.180 0.38783 0.672 0.388 0.766 1.713 -1.7888
## CV residual 0.510 0.7841 -0.297 0.37956 0.410 0.200 0.512 1.598 -1.8863
##                 53    54      55      65      68      73      76     88
## Predicted   -0.197 0.248  0.1009  0.0496  0.0989 -0.0358 -0.0231  0.165
## cvpred      -0.233 0.118  0.0478 -0.0342  0.1034 -0.0371 -0.0147  0.107
## sacked       0.482 1.050 -1.2210 -0.1800 -1.6942 -1.4103  1.1449 -0.369
## CV residual  0.715 0.933 -1.2688 -0.1458 -1.7976 -1.3731  1.1596 -0.477
##                 101     102     116    119     120    123    130     133
## Predicted   -0.0234  0.0166 -0.0635 -0.209  0.0354 -0.136 0.0471 -0.0972
## cvpred      -0.1826  0.0634 -0.1452 -0.246  0.0894 -0.200 0.0456 -0.0946
## sacked       0.1039 -0.4639 -0.5585  2.659 -0.1800  1.618 0.6717  0.4825
## CV residual  0.2865 -0.5273 -0.4133  2.905 -0.2694  1.818 0.6261  0.5770
##                  146     147     150     151     159    167    168
## Predicted   -0.00893 -0.0209  0.0202 -0.0669 -0.0996  0.103 0.0885
## cvpred      -0.05244 -0.0130 -0.0209 -0.1888 -0.1087  0.168 0.0420
## sacked       0.29319  1.9967  0.5771  1.2396 -0.1800 -0.369 0.1039
## CV residual  0.34562  2.0096  0.5980  1.4284 -0.0713 -0.537 0.0619
##                  177     186    187    196     198    199    203    205
## Predicted   -0.03445  0.0615 -0.138 -0.377 -0.0959 -0.147 -0.196 -0.228
## cvpred      -0.00467  0.1212 -0.164 -0.486 -0.0812 -0.182 -0.309 -0.252
## sacked       0.00928 -0.1800  0.293 -1.316 -0.9371 -0.559  0.956 -1.126
## CV residual  0.01395 -0.3012  0.457 -0.829 -0.8559 -0.376  1.265 -0.874
##                206
## Predicted   -0.122
## cvpred      -0.163
## sacked      -1.410
## CV residual -1.248
## 
## Sum of squares = 43.1    Mean square = 1.05    n = 41 
## 
## fold 3 
## Observations in test set: 41 
##                  5    16       19      20      25    26        28      37
## Predicted    0.217 0.120  0.05590  0.0167  0.0761 0.125 -0.083375  0.0701
## cvpred       0.274 0.198  0.01595  0.0991 -0.0799 0.159  0.000381  0.1063
## sacked      -0.937 0.672  0.00928 -0.5585  0.5771 0.482 -1.126364 -2.4513
## CV residual -1.211 0.473 -0.00668 -0.6576  0.6570 0.324 -1.126745 -2.5576
##                 41    44     48     52     58      59      60      61
## Predicted    0.310 0.110 -0.219 0.1615  0.212 -0.1056  0.0626 -0.0284
## cvpred       0.453 0.184 -0.214 0.3162  0.256 -0.0507  0.0301 -0.0236
## sacked      -0.464 1.902 -0.653 0.3878 -0.653  0.3878 -0.6532 -0.6532
## CV residual -0.917 1.718 -0.439 0.0716 -0.909  0.4385 -0.6833 -0.6296
##                 67     71      86     89     100    105    118       121
## Predicted    0.034 0.0249 -0.0901 0.0438 -0.0471 -0.114 -0.159  5.83e-02
## cvpred       0.058 0.0248  0.0336 0.0550 -0.0162 -0.140 -0.101 -4.97e-05
## sacked      -1.316 2.2806 -1.2210 1.0503  2.0913 -0.275 -1.126 -5.59e-01
## CV residual -1.374 2.2558 -1.2546 0.9953  2.1075 -0.134 -1.025 -5.58e-01
##                 128     132     140    158      161    162    163    164
## Predicted    0.1108 -0.0774 -0.0411 0.0992 -0.01163 -0.255 -0.321 -0.139
## cvpred       0.0786 -0.0794 -0.0190 0.0317 -0.07728 -0.253 -0.268 -0.113
## sacked      -0.6532 -0.1800 -0.8425 2.5645  0.00928 -1.126 -1.505  1.240
## CV residual -0.7318 -0.1006 -0.8234 2.5328  0.08656 -0.874 -1.237  1.352
##                178     182     185     192    201    202     208    210
## Predicted   -0.235 -0.0891 -0.1738 -0.0758 -0.123 -0.367 -0.0841 -0.129
## cvpred      -0.239 -0.1698 -0.0288 -0.0947 -0.110 -0.278 -0.1664 -0.143
## sacked       0.956  0.1986 -1.8835 -0.6532 -0.275  1.145  0.2932  0.388
## CV residual  1.194  0.3683 -1.8546 -0.5585 -0.164  1.423  0.4595  0.531
##                 211
## Predicted   -0.0616
## cvpred      -0.1286
## sacked       1.0503
## CV residual  1.1788
## 
## Sum of squares = 52.7    Mean square = 1.29    n = 41 
## 
## fold 4 
## Observations in test set: 41 
##                   13       18     21      24      27      33      34
## Predicted    0.02493  0.06365  0.123  0.0478  0.0467  0.0259 -0.0623
## cvpred      -0.00101  0.04284  0.104  0.0264  0.0296  0.0162 -0.0864
## sacked      -0.93709  0.00928 -0.653 -0.8425 -0.2746 -0.6532 -1.3156
## CV residual -0.93608 -0.03356 -0.757 -0.8688 -0.3043 -0.6693 -1.2292
##                   39     47     49      51     56     63     66      70
## Predicted   -0.00534 0.1108 0.2344  0.0252  0.154  0.251 0.1004 -0.0635
## cvpred      -0.02895 0.0999 0.2308  0.0020  0.140  0.236 0.0871 -0.0929
## sacked      -0.08536 0.5771 0.2932 -0.0854 -1.221 -0.369 0.6717  0.7664
## CV residual -0.05641 0.4772 0.0624 -0.0874 -1.361 -0.605 0.5847  0.8593
##                   75    78     80     81    82    83      85     95
## Predicted    0.12181 0.125 0.1105 0.0575 0.161 0.111 -0.0125 0.0925
## cvpred       0.10526 0.111 0.0951 0.0352 0.147 0.090 -0.0334 0.0797
## sacked       0.00928 1.807 2.1859 0.1039 0.956 0.766 -1.4103 1.2396
## CV residual -0.09598 1.696 2.0908 0.0687 0.809 0.676 -1.3768 1.1599
##                  99   106    109      112    113      114     124    131
## Predicted    0.0647 0.164 0.0748  0.00711 0.0418  0.01588  0.0872 0.0401
## cvpred       0.0551 0.150 0.0581 -0.02141 0.0197 -0.00557  0.0720 0.0301
## sacked      -1.1264 1.997 0.8610 -1.78882 1.3342  0.76637 -1.5049 0.9556
## CV residual -1.1815 1.847 0.8029 -1.76741 1.3145  0.77194 -1.5769 0.9255
##                 137     149    153    157      165    170      172    184
## Predicted   -0.0135 -0.0893 -0.134 0.0245  0.00881 -0.101  0.02501 -0.361
## cvpred      -0.0316 -0.1157 -0.168 0.0144 -0.01211 -0.130  0.00903 -0.408
## sacked       0.4825 -2.0727  1.618 0.3878  1.05028  0.104 -0.18000  0.577
## CV residual  0.5141 -1.9571  1.786 0.3734  1.06239  0.234 -0.18903  0.985
##                190     204
## Predicted   -0.508  0.0448
## cvpred      -0.570  0.0269
## sacked       0.199 -0.2746
## CV residual  0.769 -0.3015
## 
## Sum of squares = 44    Mean square = 1.07    n = 41 
## 
## fold 5 
## Observations in test set: 41 
##                 2      3        6     7       9     15    22     31
## Predicted   0.258 0.1522  0.00302 0.128 -0.0270  0.118 0.229 0.0341
## cvpred      0.236 0.1523  0.02322 0.150 -0.0175  0.107 0.216 0.0476
## sacked      1.429 0.1986 -0.08536 0.861  0.9556 -0.275 0.766 0.1039
## CV residual 1.192 0.0463 -0.10858 0.711  0.9732 -0.382 0.551 0.0563
##                  32      35      36    50     57     74      77      87
## Predicted    0.0935  0.0270  0.1169 0.240 -0.169 -0.120  0.0587  0.0492
## cvpred       0.1055  0.0446  0.1264 0.234 -0.138 -0.101  0.0701  0.0777
## sacked      -0.9371 -0.0854  0.1039 0.388 -0.275 -1.126 -1.3156 -1.4103
## CV residual -1.0425 -0.1300 -0.0225 0.154 -0.137 -1.025 -1.3858 -1.4880
##                  90      92     93     94      96    104     107    108
## Predicted    0.1002 -0.0130  0.128 0.0526  0.0489  0.060  0.0639  0.119
## cvpred       0.0994  0.0101  0.140 0.0645  0.0692  0.083  0.0871  0.136
## sacked      -0.6532  0.8610 -0.559 2.4698 -0.6532 -0.180 -0.6532 -1.316
## CV residual -0.7526  0.8509 -0.699 2.4053 -0.7223 -0.263 -0.7403 -1.452
##                  110     122     129      136    143     145     152
## Predicted   -0.02041 -0.0851 -0.0484 -0.13674 0.0540 -0.1235 -0.0721
## cvpred      -0.00573 -0.0533 -0.0253 -0.09914 0.0868 -0.0871 -0.0600
## sacked      -0.93709 -0.6532  0.1039  0.00928 0.1986  0.6717 -0.6532
## CV residual -0.93136 -0.5999  0.1293  0.10841 0.1118  0.7588 -0.5932
##                 160    171     173     175    179     180    188     189
## Predicted   0.00521 -0.200 -0.0881 -0.0637 0.0227 -0.2071 -0.131 -0.1075
## cvpred      0.03679 -0.171 -0.0500 -0.0295 0.0526 -0.1526 -0.087 -0.0652
## sacked      1.99665  0.388 -0.2746 -1.0317 1.0503 -0.0854 -0.180 -1.2210
## CV residual 1.95986  0.559 -0.2247 -1.0022 0.9977  0.0672 -0.093 -1.1558
##                 191    193
## Predicted   -0.1177 -0.206
## cvpred      -0.0747 -0.153
## sacked      -0.1800 -1.126
## CV residual -0.1053 -0.973
## 
## Sum of squares = 31.2    Mean square = 0.76    n = 41 
## 
## Overall (Sum over all 41 folds) 
##    ms 
## 0.997