# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Set the resopnse variables
tds = qb_stats_w_combine["tds"]

# Generate clean data set
data.scaled.w_combine.for_tds = data.frame(scale(na.omit(cbind(tds, college_stats))))

# Generate the linear model
lm.scaled.w_combine.tds <- lm(formula = tds ~ ., data = data.scaled.w_combine.for_tds)

# Find optimum linear regression model for tds
step_reg.scaled.w_combine.tds <- stepAIC(lm.scaled.w_combine.tds, direction = "both")
## Start:  AIC=19.32
## tds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - cone         1     0.005 24.5 17.3
## - weight       1     0.007 24.5 17.3
## - shuttle      1     0.013 24.5 17.3
## - c_avg_cmpp   1     0.019 24.5 17.4
## - c_avg_inter  1     0.036 24.5 17.4
## - height       1     0.175 24.7 17.6
## - c_pct        1     0.219 24.7 17.7
## - X40          1     0.299 24.8 17.8
## - c_rate       1     0.596 25.1 18.2
## - c_avg_tds    1     0.638 25.1 18.3
## - vert_leap    1     0.871 25.4 18.6
## - c_avg_att    1     0.885 25.4 18.7
## - c_avg_yds    1     1.295 25.8 19.3
## <none>                     24.5 19.3
## - c_numyrs     1     1.945 26.4 20.2
## - age          1     2.332 26.8 20.8
## - broad_jump   1     2.537 27.0 21.1
## - wonderlic    1     2.784 27.3 21.4
## 
## Step:  AIC=17.33
## tds ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - weight       1     0.003 24.5 15.3
## - shuttle      1     0.008 24.5 15.3
## - c_avg_cmpp   1     0.023 24.5 15.4
## - c_avg_inter  1     0.044 24.6 15.4
## - height       1     0.208 24.7 15.7
## - c_pct        1     0.238 24.7 15.7
## - X40          1     0.294 24.8 15.8
## - c_rate       1     0.626 25.1 16.3
## - c_avg_tds    1     0.747 25.2 16.5
## - c_avg_att    1     0.880 25.4 16.7
## - vert_leap    1     0.905 25.4 16.7
## - c_avg_yds    1     1.293 25.8 17.3
## <none>                     24.5 17.3
## - c_numyrs     1     1.976 26.5 18.3
## - age          1     2.327 26.8 18.8
## - broad_jump   1     2.532 27.0 19.1
## + cone         1     0.005 24.5 19.3
## - wonderlic    1     3.132 27.6 19.9
## 
## Step:  AIC=15.33
## tds ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - shuttle      1      0.00 24.5 13.3
## - c_avg_cmpp   1      0.02 24.5 13.4
## - c_avg_inter  1      0.04 24.6 13.4
## - c_pct        1      0.23 24.7 13.7
## - X40          1      0.29 24.8 13.8
## - height       1      0.39 24.9 13.9
## - c_rate       1      0.62 25.1 14.3
## - c_avg_tds    1      0.80 25.3 14.6
## - vert_leap    1      0.91 25.4 14.7
## - c_avg_att    1      0.91 25.4 14.7
## - c_avg_yds    1      1.31 25.8 15.3
## <none>                     24.5 15.3
## - c_numyrs     1      2.11 26.6 16.5
## - broad_jump   1      2.67 27.2 17.3
## + weight       1      0.00 24.5 17.3
## + cone         1      0.00 24.5 17.3
## - age          1      2.80 27.3 17.4
## - wonderlic    1      3.21 27.7 18.0
## 
## Step:  AIC=13.34
## tds ~ height + age + c_avg_cmpp + c_rate + c_pct + c_avg_inter + 
##     c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_avg_cmpp   1      0.02 24.5 11.4
## - c_avg_inter  1      0.05 24.6 11.4
## - c_pct        1      0.23 24.7 11.7
## - X40          1      0.30 24.8 11.8
## - height       1      0.41 24.9 12.0
## - c_rate       1      0.64 25.1 12.3
## - c_avg_tds    1      0.82 25.3 12.6
## - c_avg_att    1      0.90 25.4 12.7
## - vert_leap    1      0.97 25.5 12.8
## <none>                     24.5 13.3
## - c_avg_yds    1      1.54 26.1 13.7
## - c_numyrs     1      2.12 26.6 14.5
## - broad_jump   1      2.71 27.2 15.3
## + shuttle      1      0.00 24.5 15.3
## + weight       1      0.00 24.5 15.3
## + cone         1      0.00 24.5 15.3
## - age          1      2.90 27.4 15.6
## - wonderlic    1      4.33 28.9 17.5
## 
## Step:  AIC=11.37
## tds ~ height + age + c_rate + c_pct + c_avg_inter + c_avg_tds + 
##     c_avg_yds + c_numyrs + c_avg_att + X40 + wonderlic + vert_leap + 
##     broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_inter  1      0.15 24.7  9.60
## - X40          1      0.29 24.8  9.81
## - height       1      0.40 24.9  9.98
## - c_pct        1      0.64 25.2 10.34
## - vert_leap    1      0.96 25.5 10.82
## - c_avg_tds    1      1.12 25.6 11.06
## <none>                     24.5 11.37
## - c_rate       1      1.58 26.1 11.75
## - c_avg_yds    1      2.24 26.8 12.68
## - c_avg_att    1      2.31 26.8 12.79
## + c_avg_cmpp   1      0.02 24.5 13.34
## + cone         1      0.00 24.5 13.36
## + shuttle      1      0.00 24.5 13.36
## + weight       1      0.00 24.5 13.37
## - broad_jump   1      2.76 27.3 13.42
## - c_numyrs     1      2.78 27.3 13.44
## - age          1      2.91 27.4 13.62
## - wonderlic    1      4.40 28.9 15.64
## 
## Step:  AIC=9.6
## tds ~ height + age + c_rate + c_pct + c_avg_tds + c_avg_yds + 
##     c_numyrs + c_avg_att + X40 + wonderlic + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - height       1      0.34 25.0  8.12
## - X40          1      0.42 25.1  8.23
## - c_pct        1      0.52 25.2  8.39
## - vert_leap    1      0.84 25.5  8.87
## - c_avg_tds    1      0.99 25.7  9.09
## <none>                     24.7  9.60
## - c_rate       1      1.45 26.1  9.77
## - c_avg_yds    1      2.12 26.8 10.73
## + c_avg_inter  1      0.15 24.5 11.37
## + c_avg_cmpp   1      0.12 24.6 11.41
## - c_numyrs     1      2.64 27.3 11.46
## + cone         1      0.03 24.6 11.55
## - c_avg_att    1      2.72 27.4 11.57
## + weight       1      0.00 24.7 11.60
## + shuttle      1      0.00 24.7 11.60
## - broad_jump   1      2.95 27.6 11.89
## - age          1      3.02 27.7 11.98
## - wonderlic    1      4.71 29.4 14.23
## 
## Step:  AIC=8.12
## tds ~ age + c_rate + c_pct + c_avg_tds + c_avg_yds + c_numyrs + 
##     c_avg_att + X40 + wonderlic + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_pct        1      0.33 25.4  6.63
## - vert_leap    1      0.85 25.9  7.38
## - X40          1      0.89 25.9  7.45
## - c_avg_tds    1      0.95 26.0  7.54
## - c_rate       1      1.20 26.2  7.90
## <none>                     25.0  8.12
## - c_avg_yds    1      1.91 26.9  8.91
## + height       1      0.34 24.7  9.60
## - c_avg_att    1      2.48 27.5  9.72
## - c_numyrs     1      2.51 27.5  9.75
## + weight       1      0.14 24.9  9.90
## + c_avg_inter  1      0.09 24.9  9.98
## + c_avg_cmpp   1      0.07 24.9 10.01
## + cone         1      0.06 25.0 10.03
## + shuttle      1      0.03 25.0 10.08
## - age          1      3.25 28.3 10.77
## - broad_jump   1      3.49 28.5 11.08
## - wonderlic    1      4.37 29.4 12.23
## 
## Step:  AIC=6.63
## tds ~ age + c_rate + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_tds    1      0.75 26.1  5.73
## - vert_leap    1      0.91 26.3  5.97
## - X40          1      1.09 26.4  6.22
## <none>                     25.4  6.63
## - c_avg_yds    1      1.57 26.9  6.91
## - c_rate       1      1.76 27.1  7.17
## - c_avg_att    1      2.27 27.6  7.88
## - c_numyrs     1      2.30 27.7  7.93
## + c_pct        1      0.33 25.0  8.12
## + height       1      0.16 25.2  8.39
## + c_avg_cmpp   1      0.14 25.2  8.41
## + weight       1      0.13 25.2  8.43
## + cone         1      0.05 25.3  8.55
## + shuttle      1      0.04 25.3  8.57
## + c_avg_inter  1      0.02 25.3  8.59
## - age          1      2.92 28.3  8.77
## - broad_jump   1      3.50 28.9  9.54
## - wonderlic    1      4.17 29.5 10.41
## 
## Step:  AIC=5.73
## tds ~ age + c_rate + c_avg_yds + c_numyrs + c_avg_att + X40 + 
##     wonderlic + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - X40          1      0.69 26.8 4.72
## - c_rate       1      1.24 27.3 5.49
## - vert_leap    1      1.35 27.4 5.65
## <none>                     26.1 5.73
## - c_numyrs     1      2.05 28.1 6.60
## + c_avg_tds    1      0.75 25.4 6.63
## + c_avg_cmpp   1      0.28 25.8 7.32
## + weight       1      0.27 25.8 7.34
## - c_avg_att    1      2.69 28.8 7.46
## + height       1      0.19 25.9 7.46
## + c_pct        1      0.13 26.0 7.54
## + cone         1      0.03 26.1 7.69
## + shuttle      1      0.00 26.1 7.73
## + c_avg_inter  1      0.00 26.1 7.73
## - broad_jump   1      3.00 29.1 7.87
## - age          1      3.12 29.2 8.02
## - c_avg_yds    1      3.42 29.5 8.41
## - wonderlic    1      4.63 30.7 9.94
## 
## Step:  AIC=4.72
## tds ~ age + c_rate + c_avg_yds + c_numyrs + c_avg_att + wonderlic + 
##     vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_rate       1      0.87 27.7 3.94
## <none>                     26.8 4.72
## - c_numyrs     1      1.79 28.6 5.18
## + X40          1      0.69 26.1 5.73
## - c_avg_att    1      2.21 29.0 5.74
## - broad_jump   1      2.33 29.1 5.90
## + weight       1      0.53 26.3 5.96
## + height       1      0.48 26.3 6.04
## + c_avg_tds    1      0.35 26.4 6.22
## + c_avg_cmpp   1      0.32 26.5 6.27
## + c_pct        1      0.28 26.5 6.32
## - vert_leap    1      2.70 29.5 6.37
## - age          1      2.77 29.6 6.46
## + shuttle      1      0.14 26.7 6.53
## + cone         1      0.11 26.7 6.56
## - c_avg_yds    1      2.88 29.7 6.60
## + c_avg_inter  1      0.03 26.8 6.68
## - wonderlic    1      4.37 31.2 8.46
## 
## Step:  AIC=3.94
## tds ~ age + c_avg_yds + c_numyrs + c_avg_att + wonderlic + vert_leap + 
##     broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_numyrs     1      0.94 28.6 3.21
## - c_avg_att    1      1.36 29.0 3.77
## <none>                     27.7 3.94
## - broad_jump   1      1.71 29.4 4.23
## + c_rate       1      0.87 26.8 4.72
## - age          1      2.19 29.9 4.83
## - c_avg_yds    1      2.35 30.0 5.05
## - vert_leap    1      2.42 30.1 5.13
## + weight       1      0.47 27.2 5.29
## + height       1      0.41 27.3 5.38
## + c_pct        1      0.38 27.3 5.41
## + X40          1      0.33 27.3 5.49
## + shuttle      1      0.24 27.4 5.61
## + c_avg_tds    1      0.12 27.5 5.78
## + cone         1      0.11 27.6 5.79
## + c_avg_inter  1      0.06 27.6 5.85
## + c_avg_cmpp   1      0.04 27.6 5.89
## - wonderlic    1      4.28 31.9 7.41
## 
## Step:  AIC=3.21
## tds ~ age + c_avg_yds + c_avg_att + wonderlic + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - c_avg_att    1      0.91 29.5 2.41
## - broad_jump   1      1.27 29.9 2.87
## <none>                     28.6 3.21
## - c_avg_yds    1      1.90 30.5 3.66
## - vert_leap    1      2.00 30.6 3.78
## + c_numyrs     1      0.94 27.7 3.94
## - age          1      2.19 30.8 4.02
## + weight       1      0.68 27.9 4.30
## + X40          1      0.37 28.2 4.72
## + height       1      0.33 28.3 4.77
## + shuttle      1      0.29 28.3 4.83
## + c_avg_tds    1      0.20 28.4 4.95
## + cone         1      0.19 28.4 4.97
## + c_rate       1      0.03 28.6 5.18
## + c_avg_cmpp   1      0.02 28.6 5.19
## + c_avg_inter  1      0.00 28.6 5.21
## + c_pct        1      0.00 28.6 5.21
## - wonderlic    1      4.06 32.7 6.25
## 
## Step:  AIC=2.41
## tds ~ age + c_avg_yds + wonderlic + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS  AIC
## - broad_jump   1      1.50 31.0 2.29
## <none>                     29.5 2.41
## - vert_leap    1      2.12 31.6 3.04
## + weight       1      0.92 28.6 3.20
## + c_avg_att    1      0.91 28.6 3.21
## - age          1      2.37 31.9 3.34
## + c_avg_tds    1      0.69 28.8 3.51
## - c_avg_yds    1      2.70 32.2 3.73
## + c_avg_inter  1      0.51 29.0 3.75
## + c_numyrs     1      0.49 29.0 3.77
## + shuttle      1      0.35 29.2 3.96
## + height       1      0.29 29.2 4.04
## + c_avg_cmpp   1      0.27 29.2 4.05
## + X40          1      0.26 29.3 4.07
## + c_rate       1      0.16 29.4 4.21
## + c_pct        1      0.05 29.5 4.34
## + cone         1      0.03 29.5 4.36
## - wonderlic    1      3.93 33.5 5.15
## 
## Step:  AIC=2.29
## tds ~ age + c_avg_yds + wonderlic + vert_leap
## 
##               Df Sum of Sq  RSS  AIC
## - vert_leap    1      0.65 31.7 1.08
## <none>                     31.0 2.29
## + broad_jump   1      1.50 29.5 2.41
## + weight       1      1.38 29.6 2.56
## + c_avg_att    1      1.14 29.9 2.87
## - c_avg_yds    1      2.40 33.4 3.12
## + c_avg_tds    1      0.71 30.3 3.41
## - age          1      2.80 33.8 3.57
## + c_avg_inter  1      0.57 30.5 3.59
## + height       1      0.38 30.6 3.82
## + c_rate       1      0.33 30.7 3.88
## + shuttle      1      0.32 30.7 3.90
## + c_avg_cmpp   1      0.21 30.8 4.03
## + c_numyrs     1      0.16 30.9 4.09
## + c_pct        1      0.16 30.9 4.10
## + X40          1      0.00 31.0 4.29
## + cone         1      0.00 31.0 4.29
## - wonderlic    1      3.94 35.0 4.83
## 
## Step:  AIC=1.08
## tds ~ age + c_avg_yds + wonderlic
## 
##               Df Sum of Sq  RSS  AIC
## <none>                     31.7 1.08
## + weight       1      1.54 30.1 1.19
## + c_avg_att    1      1.06 30.6 1.79
## + c_avg_tds    1      0.93 30.7 1.95
## - c_avg_yds    1      2.51 34.2 1.98
## + shuttle      1      0.85 30.8 2.05
## + c_avg_inter  1      0.76 30.9 2.15
## + vert_leap    1      0.65 31.0 2.29
## - age          1      2.81 34.5 2.30
## + height       1      0.56 31.1 2.41
## + X40          1      0.38 31.3 2.62
## + c_rate       1      0.19 31.5 2.85
## + c_numyrs     1      0.17 31.5 2.88
## + cone         1      0.14 31.5 2.91
## + c_pct        1      0.10 31.6 2.96
## + c_avg_cmpp   1      0.06 31.6 3.01
## + broad_jump   1      0.03 31.6 3.04
## - wonderlic    1      3.63 35.3 3.20
summary(step_reg.scaled.w_combine.tds)
## 
## Call:
## lm(formula = tds ~ age + c_avg_yds + wonderlic, data = data.scaled.w_combine.for_tds)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.502 -0.692 -0.202  0.520  1.806 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -2.79e-16   1.57e-01    0.00    1.000  
## age         -3.17e-01   1.83e-01   -1.74    0.092 .
## c_avg_yds    2.78e-01   1.69e-01    1.64    0.110  
## wonderlic   -3.54e-01   1.80e-01   -1.97    0.057 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.965 on 34 degrees of freedom
## Multiple R-squared: 0.144,   Adjusted R-squared: 0.0685 
## F-statistic: 1.91 on 3 and 34 DF,  p-value: 0.147
plot(step_reg.scaled.w_combine.tds)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.scaled.w_combine.tds <- regsubsets(tds ~ ., data = data.scaled.w_combine.for_tds, 
    nbest = 10)
subsets(leaps.scaled.w_combine.tds, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.scaled.w_combine.for_tds, step_reg.scaled.w_combine.tds, m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: tds
##           Df Sum Sq Mean Sq F value Pr(>F)  
## age        1    0.5    0.45    0.49  0.490  
## c_avg_yds  1    1.2    1.25    1.34  0.255  
## wonderlic  1    3.6    3.63    3.89  0.057 .
## Residuals 34   31.7    0.93                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 7 
##                  6     21     26     32      38       46     59
## Predicted   0.0551 -0.605  0.222  0.308 -0.4200 -0.11484  0.139
## cvpred      0.0327 -0.781  0.290  0.420 -0.4894 -0.10757  0.278
## tds         1.1019 -0.399 -0.549 -0.399 -0.0987 -0.09874 -0.249
## CV residual 1.0693  0.382 -0.839 -0.819  0.3907  0.00883 -0.526
## 
## Sum of squares = 3.09    Mean square = 0.44    n = 7 
## 
## fold 2 
## Observations in test set: 8 
##                  7      18     20    27     37     43     55     65
## Predicted   -0.179 -0.0173 -0.296 0.646 -0.480  0.329 -0.133 -0.576
## cvpred      -0.124 -0.0105 -0.205 0.649 -0.564  0.259 -0.149 -0.611
## tds         -0.549 -0.3989 -0.999 2.453  0.201 -0.849 -1.149  0.802
## CV residual -0.425 -0.3884 -0.794 1.804  0.765 -1.108 -1.000  1.413
## 
## Sum of squares = 9.03    Mean square = 1.13    n = 8 
## 
## fold 3 
## Observations in test set: 8 
##                     5     12    13     16     39     40      50       56
## Predicted   -0.000613  0.289 0.313  0.299  0.729 -0.568  0.1238 0.218640
## cvpred      -0.309489  0.124 0.404 -0.020  0.597 -0.654 -0.0997 0.000097
## tds          1.552209 -0.699 1.252  1.402 -0.399 -0.249  0.0513 2.002468
## CV residual  1.861698 -0.823 0.848  1.422 -0.996  0.405  0.1511 2.002371
## 
## Sum of squares = 12.1    Mean square = 1.51    n = 8 
## 
## fold 4 
## Observations in test set: 8 
##                   4      15     17      19    28     52     63      64
## Predicted   -0.2476  0.3693 -0.530 -0.3506 0.693 -0.492  0.155  0.3352
## cvpred       0.0633  0.3756 -0.230 -0.1161 0.556 -0.381  0.268  0.3416
## tds         -1.7497 -0.0987 -0.399  0.0513 1.252 -1.149 -0.399 -0.0987
## CV residual -1.8130 -0.4743 -0.169  0.1674 0.696 -0.768 -0.667 -0.4403
## 
## Sum of squares = 5.28    Mean square = 0.66    n = 8 
## 
## fold 5 
## Observations in test set: 7 
##                 1       3     24    30      42      49     61
## Predicted   0.236  0.0307  0.201 0.381 -0.2168 -0.6281 -0.217
## cvpred      0.160  0.1014  0.303 0.433 -0.0266 -0.5155 -0.399
## tds         2.002 -1.4495 -0.699 0.652 -0.5490 -0.5490 -0.549
## CV residual 1.842 -1.5509 -1.003 0.218 -0.5224 -0.0335 -0.150
## 
## Sum of squares = 7.15    Mean square = 1.02    n = 7 
## 
## Overall (Sum over all 7 folds) 
##    ms 
## 0.964