# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Set the resopnse variables
ints = qb_stats_w_combine["ints"]

# Generate clean data set
data.scaled.w_combine.for_ints = data.frame(scale(na.omit(cbind(ints, college_stats))))

# Generate the linear model
lm.scaled.w_combine.ints <- lm(formula = ints ~ ., data = data.scaled.w_combine.for_ints)

# Find optimum linear regression model for ints
step_reg.scaled.w_combine.ints <- stepAIC(lm.scaled.w_combine.ints, direction = "both")
## Start:  AIC=7.24
## ints ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_avg_yds + c_numyrs + c_avg_att + 
##     X40 + wonderlic + cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_yds    1      0.01 18.7  5.25
## - vert_leap    1      0.03 18.7  5.30
## - c_rate       1      0.07 18.7  5.38
## - c_pct        1      0.10 18.8  5.45
## - height       1      0.13 18.8  5.50
## - c_avg_tds    1      0.20 18.9  5.65
## - shuttle      1      0.21 18.9  5.68
## - wonderlic    1      0.22 18.9  5.68
## - weight       1      0.23 18.9  5.72
## - X40          1      0.29 18.9  5.84
## - c_avg_inter  1      0.29 18.9  5.84
## - cone         1      0.40 19.1  6.07
## - c_numyrs     1      0.41 19.1  6.08
## - c_avg_cmpp   1      0.55 19.2  6.37
## - broad_jump   1      0.74 19.4  6.75
## - c_avg_att    1      0.85 19.5  6.98
## <none>                     18.6  7.24
## - age          1      9.12 27.8 20.76
## 
## Step:  AIC=5.25
## ints ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     cone + shuttle + vert_leap + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - vert_leap    1      0.05 18.7  3.34
## - c_rate       1      0.11 18.8  3.48
## - height       1      0.14 18.8  3.55
## - c_pct        1      0.16 18.8  3.59
## - c_avg_tds    1      0.20 18.9  3.66
## - shuttle      1      0.21 18.9  3.68
## - weight       1      0.24 18.9  3.74
## - wonderlic    1      0.29 18.9  3.84
## - c_avg_inter  1      0.29 18.9  3.85
## - X40          1      0.30 19.0  3.87
## - cone         1      0.40 19.1  4.07
## - c_numyrs     1      0.42 19.1  4.12
## - broad_jump   1      0.82 19.5  4.93
## - c_avg_att    1      0.85 19.5  4.98
## - c_avg_cmpp   1      0.91 19.6  5.10
## <none>                     18.7  5.25
## + c_avg_yds    1      0.01 18.6  7.24
## - age          1      9.93 28.6 19.89
## 
## Step:  AIC=3.34
## ints ~ height + weight + age + c_avg_cmpp + c_rate + c_pct + 
##     c_avg_inter + c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + 
##     cone + shuttle + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_rate       1      0.10 18.8  1.56
## - height       1      0.15 18.9  1.65
## - c_pct        1      0.16 18.9  1.68
## - c_avg_tds    1      0.18 18.9  1.71
## - c_avg_inter  1      0.27 19.0  1.89
## - shuttle      1      0.27 19.0  1.90
## - wonderlic    1      0.27 19.0  1.90
## - weight       1      0.28 19.0  1.91
## - c_numyrs     1      0.42 19.1  2.21
## - X40          1      0.42 19.1  2.21
## - cone         1      0.43 19.1  2.22
## - broad_jump   1      0.78 19.5  2.94
## - c_avg_att    1      0.87 19.6  3.11
## - c_avg_cmpp   1      0.93 19.6  3.23
## <none>                     18.7  3.34
## + vert_leap    1      0.05 18.7  5.25
## + c_avg_yds    1      0.02 18.7  5.30
## - age          1     10.44 29.1 18.64
## 
## Step:  AIC=1.56
## ints ~ height + weight + age + c_avg_cmpp + c_pct + c_avg_inter + 
##     c_avg_tds + c_numyrs + c_avg_att + X40 + wonderlic + cone + 
##     shuttle + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_avg_tds    1      0.08 18.9 -0.28
## - c_pct        1      0.10 18.9 -0.24
## - c_avg_inter  1      0.18 19.0 -0.07
## - height       1      0.18 19.0 -0.07
## - weight       1      0.32 19.1  0.21
## - c_numyrs     1      0.34 19.1  0.24
## - cone         1      0.36 19.2  0.29
## - shuttle      1      0.37 19.2  0.31
## - wonderlic    1      0.38 19.2  0.33
## - X40          1      0.39 19.2  0.35
## - c_avg_att    1      0.82 19.6  1.22
## - broad_jump   1      0.88 19.7  1.33
## <none>                     18.8  1.56
## - c_avg_cmpp   1      1.08 19.9  1.73
## + c_rate       1      0.10 18.7  3.34
## + vert_leap    1      0.04 18.8  3.48
## + c_avg_yds    1      0.03 18.8  3.50
## - age          1     10.79 29.6 17.24
## 
## Step:  AIC=-0.28
## ints ~ height + weight + age + c_avg_cmpp + c_pct + c_avg_inter + 
##     c_numyrs + c_avg_att + X40 + wonderlic + cone + shuttle + 
##     broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_pct        1      0.14 19.0 -1.99
## - height       1      0.21 19.1 -1.85
## - cone         1      0.32 19.2 -1.62
## - c_avg_inter  1      0.33 19.2 -1.61
## - wonderlic    1      0.34 19.2 -1.58
## - weight       1      0.39 19.3 -1.48
## - c_numyrs     1      0.43 19.3 -1.42
## - X40          1      0.43 19.3 -1.41
## - shuttle      1      0.45 19.3 -1.36
## <none>                     18.9 -0.28
## - c_avg_att    1      1.01 19.9 -0.25
## - broad_jump   1      1.01 19.9 -0.24
## - c_avg_cmpp   1      1.07 19.9 -0.14
## + c_avg_tds    1      0.08 18.8  1.56
## + vert_leap    1      0.03 18.9  1.66
## + c_avg_yds    1      0.00 18.9  1.71
## + c_rate       1      0.00 18.9  1.71
## - age          1     10.73 29.6 15.26
## 
## Step:  AIC=-1.99
## ints ~ height + weight + age + c_avg_cmpp + c_avg_inter + c_numyrs + 
##     c_avg_att + X40 + wonderlic + cone + shuttle + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - height       1      0.20 19.2 -3.57
## - c_numyrs     1      0.28 19.3 -3.41
## - cone         1      0.30 19.3 -3.39
## - c_avg_inter  1      0.32 19.3 -3.34
## - wonderlic    1      0.32 19.4 -3.33
## - X40          1      0.35 19.4 -3.28
## - weight       1      0.35 19.4 -3.27
## - shuttle      1      0.49 19.5 -2.99
## - broad_jump   1      0.87 19.9 -2.24
## - c_avg_att    1      0.88 19.9 -2.22
## <none>                     19.0 -1.99
## - c_avg_cmpp   1      1.03 20.1 -1.93
## + c_pct        1      0.14 18.9 -0.28
## + c_rate       1      0.12 18.9 -0.24
## + c_avg_tds    1      0.12 18.9 -0.24
## + vert_leap    1      0.05 19.0 -0.09
## + c_avg_yds    1      0.02 19.0 -0.04
## - age          1     10.71 29.7 13.43
## 
## Step:  AIC=-3.57
## ints ~ weight + age + c_avg_cmpp + c_avg_inter + c_numyrs + c_avg_att + 
##     X40 + wonderlic + cone + shuttle + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - weight       1      0.15 19.4 -5.27
## - wonderlic    1      0.17 19.4 -5.23
## - c_numyrs     1      0.18 19.4 -5.22
## - c_avg_inter  1      0.24 19.5 -5.08
## - cone         1      0.40 19.6 -4.77
## - shuttle      1      0.42 19.6 -4.73
## - X40          1      0.60 19.8 -4.38
## - c_avg_att    1      0.70 19.9 -4.18
## - c_avg_cmpp   1      0.83 20.1 -3.93
## - broad_jump   1      0.99 20.2 -3.61
## <none>                     19.2 -3.57
## + height       1      0.20 19.0 -1.99
## + c_avg_tds    1      0.16 19.1 -1.89
## + c_pct        1      0.14 19.1 -1.85
## + c_rate       1      0.12 19.1 -1.81
## + vert_leap    1      0.05 19.2 -1.67
## + c_avg_yds    1      0.04 19.2 -1.65
## - age          1     10.52 29.8 11.44
## 
## Step:  AIC=-5.27
## ints ~ age + c_avg_cmpp + c_avg_inter + c_numyrs + c_avg_att + 
##     X40 + wonderlic + cone + shuttle + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - wonderlic    1      0.11 19.5 -7.05
## - c_numyrs     1      0.14 19.5 -6.99
## - c_avg_inter  1      0.23 19.6 -6.81
## - shuttle      1      0.30 19.7 -6.67
## - cone         1      0.45 19.8 -6.37
## - c_avg_att    1      0.59 20.0 -6.10
## - X40          1      0.59 20.0 -6.09
## - c_avg_cmpp   1      0.70 20.1 -5.88
## - broad_jump   1      0.86 20.2 -5.57
## <none>                     19.4 -5.27
## + c_avg_tds    1      0.20 19.2 -3.66
## + weight       1      0.15 19.2 -3.57
## + c_rate       1      0.10 19.3 -3.47
## + c_pct        1      0.10 19.3 -3.47
## + vert_leap    1      0.08 19.3 -3.42
## + c_avg_yds    1      0.04 19.4 -3.34
## + height       1      0.00 19.4 -3.27
## - age          1     12.03 31.4 11.56
## 
## Step:  AIC=-7.05
## ints ~ age + c_avg_cmpp + c_avg_inter + c_numyrs + c_avg_att + 
##     X40 + cone + shuttle + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## - c_numyrs     1      0.12 19.6 -8.82
## - c_avg_inter  1      0.25 19.7 -8.56
## - shuttle      1      0.37 19.9 -8.32
## - X40          1      0.50 20.0 -8.06
## - c_avg_att    1      0.59 20.1 -7.90
## - cone         1      0.66 20.1 -7.76
## - c_avg_cmpp   1      0.68 20.2 -7.72
## - broad_jump   1      0.88 20.4 -7.32
## <none>                     19.5 -7.05
## + c_avg_tds    1      0.13 19.4 -5.32
## + wonderlic    1      0.11 19.4 -5.27
## + c_pct        1      0.10 19.4 -5.25
## + weight       1      0.09 19.4 -5.23
## + c_rate       1      0.07 19.4 -5.20
## + vert_leap    1      0.05 19.4 -5.15
## + c_avg_yds    1      0.03 19.5 -5.10
## + height       1      0.00 19.5 -5.05
## - age          1     12.77 32.3 10.60
## 
## Step:  AIC=-8.82
## ints ~ age + c_avg_cmpp + c_avg_inter + c_avg_att + X40 + cone + 
##     shuttle + broad_jump
## 
##               Df Sum of Sq  RSS    AIC
## - c_avg_inter  1      0.18 19.8 -10.47
## - shuttle      1      0.32 19.9 -10.19
## - c_avg_att    1      0.48 20.1  -9.89
## - X40          1      0.57 20.2  -9.71
## - c_avg_cmpp   1      0.57 20.2  -9.71
## - cone         1      0.75 20.4  -9.36
## - broad_jump   1      0.89 20.5  -9.09
## <none>                     19.6  -8.82
## + c_avg_tds    1      0.15 19.5  -7.12
## + c_numyrs     1      0.12 19.5  -7.05
## + wonderlic    1      0.09 19.5  -6.99
## + weight       1      0.07 19.5  -6.96
## + c_avg_yds    1      0.05 19.6  -6.92
## + vert_leap    1      0.03 19.6  -6.88
## + c_rate       1      0.01 19.6  -6.84
## + height       1      0.00 19.6  -6.83
## + c_pct        1      0.00 19.6  -6.82
## - age          1     12.96 32.6   8.96
## 
## Step:  AIC=-10.47
## ints ~ age + c_avg_cmpp + c_avg_att + X40 + cone + shuttle + 
##     broad_jump
## 
##               Df Sum of Sq  RSS    AIC
## - shuttle      1      0.23 20.0 -12.03
## - c_avg_att    1      0.35 20.1 -11.80
## - c_avg_cmpp   1      0.41 20.2 -11.68
## - X40          1      0.80 20.6 -10.92
## <none>                     19.8 -10.47
## - cone         1      1.22 21.0 -10.13
## - broad_jump   1      1.47 21.3  -9.68
## + c_avg_tds    1      0.25 19.5  -8.98
## + c_avg_inter  1      0.18 19.6  -8.82
## + wonderlic    1      0.11 19.7  -8.68
## + c_avg_yds    1      0.11 19.7  -8.68
## + weight       1      0.06 19.7  -8.60
## + c_rate       1      0.05 19.7  -8.57
## + c_numyrs     1      0.05 19.7  -8.56
## + vert_leap    1      0.01 19.8  -8.50
## + c_pct        1      0.01 19.8  -8.49
## + height       1      0.01 19.8  -8.49
## - age          1     13.55 33.3   7.87
## 
## Step:  AIC=-12.03
## ints ~ age + c_avg_cmpp + c_avg_att + X40 + cone + broad_jump
## 
##               Df Sum of Sq  RSS    AIC
## - c_avg_att    1      0.38 20.4 -13.29
## - c_avg_cmpp   1      0.45 20.5 -13.16
## <none>                     20.0 -12.03
## - X40          1      1.45 21.5 -11.30
## - broad_jump   1      1.60 21.6 -11.02
## + c_avg_tds    1      0.26 19.8 -10.54
## + shuttle      1      0.23 19.8 -10.47
## + wonderlic    1      0.16 19.8 -10.34
## + c_avg_inter  1      0.08 19.9 -10.19
## + c_rate       1      0.06 20.0 -10.14
## + c_avg_yds    1      0.04 20.0 -10.10
## + vert_leap    1      0.04 20.0 -10.10
## + c_numyrs     1      0.03 20.0 -10.09
## + c_pct        1      0.03 20.0 -10.08
## + weight       1      0.00 20.0 -10.03
## + height       1      0.00 20.0 -10.03
## - cone         1      2.42 22.4  -9.57
## - age          1     14.06 34.1   6.73
## 
## Step:  AIC=-13.29
## ints ~ age + c_avg_cmpp + X40 + cone + broad_jump
## 
##               Df Sum of Sq  RSS    AIC
## - c_avg_cmpp   1      0.10 20.5 -15.10
## <none>                     20.4 -13.29
## - broad_jump   1      1.70 22.1 -12.16
## + c_avg_att    1      0.38 20.0 -12.03
## - X40          1      1.83 22.2 -11.93
## + shuttle      1      0.26 20.1 -11.80
## + wonderlic    1      0.14 20.3 -11.56
## - cone         1      2.07 22.5 -11.52
## + c_avg_yds    1      0.11 20.3 -11.51
## + c_avg_tds    1      0.10 20.3 -11.49
## + vert_leap    1      0.10 20.3 -11.48
## + c_avg_inter  1      0.10 20.3 -11.48
## + c_pct        1      0.03 20.4 -11.34
## + weight       1      0.02 20.4 -11.32
## + c_numyrs     1      0.00 20.4 -11.29
## + c_rate       1      0.00 20.4 -11.29
## + height       1      0.00 20.4 -11.29
## - age          1     14.48 34.9   5.63
## 
## Step:  AIC=-15.1
## ints ~ age + X40 + cone + broad_jump
## 
##               Df Sum of Sq  RSS   AIC
## <none>                     20.5 -15.1
## - broad_jump   1      1.66 22.1 -14.1
## - X40          1      1.87 22.4 -13.7
## + shuttle      1      0.27 20.2 -13.6
## - cone         1      2.02 22.5 -13.4
## + c_pct        1      0.11 20.4 -13.3
## + c_avg_cmpp   1      0.10 20.4 -13.3
## + vert_leap    1      0.09 20.4 -13.3
## + wonderlic    1      0.08 20.4 -13.3
## + c_avg_yds    1      0.06 20.4 -13.2
## + c_rate       1      0.03 20.5 -13.2
## + c_avg_att    1      0.03 20.5 -13.2
## + c_avg_tds    1      0.02 20.5 -13.1
## + weight       1      0.01 20.5 -13.1
## + c_numyrs     1      0.01 20.5 -13.1
## + height       1      0.00 20.5 -13.1
## + c_avg_inter  1      0.00 20.5 -13.1
## - age          1     14.62 35.1   3.9
summary(step_reg.scaled.w_combine.ints)
## 
## Call:
## lm(formula = ints ~ age + X40 + cone + broad_jump, data = data.scaled.w_combine.for_ints)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3891 -0.5983  0.0201  0.5724  1.8927 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.12e-16   1.24e-01    0.00    1.000    
## age         -6.49e-01   1.32e-01   -4.92  2.1e-05 ***
## X40          3.35e-01   1.90e-01    1.76    0.087 .  
## cone         2.63e-01   1.44e-01    1.83    0.076 .  
## broad_jump   3.11e-01   1.88e-01    1.66    0.106    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.776 on 34 degrees of freedom
## Multiple R-squared: 0.461,   Adjusted R-squared: 0.397 
## F-statistic: 7.26 on 4 and 34 DF,  p-value: 0.000243
plot(step_reg.scaled.w_combine.ints)

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

leaps.scaled.w_combine.ints <- regsubsets(ints ~ ., data = data.scaled.w_combine.for_ints, 
    nbest = 10)
subsets(leaps.scaled.w_combine.ints, statistic = "rsq")
## Error: invalid coordinate lengths

plot of chunk unnamed-chunk-1

cv.lm(df = data.scaled.w_combine.for_ints, step_reg.scaled.w_combine.ints, m = 5)  # 5 fold cross-validation
## Analysis of Variance Table
## 
## Response: ints
##            Df Sum Sq Mean Sq F value  Pr(>F)    
## age         1  12.61   12.61   20.92 6.1e-05 ***
## X40         1   1.59    1.59    2.63    0.11    
## cone        1   1.65    1.65    2.75    0.11    
## broad_jump  1   1.66    1.66    2.75    0.11    
## Residuals  34  20.49    0.60                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning:
## 
## As there is >1 explanatory variable, cross-validation predicted values for
## a fold are not a linear function of corresponding overall predicted
## values.  Lines that are shown for the different folds are approximate

plot of chunk unnamed-chunk-1

## 
## fold 1 
## Observations in test set: 7 
##                  3     21      24      40     42     52      61
## Predicted   -0.532 -0.887  0.0399 -0.5491 -1.043 -0.486  0.0319
## cvpred      -0.412 -0.720  0.0902 -0.4347 -0.937 -0.382  0.1445
## ints        -1.166 -1.776 -0.7602 -0.3541 -0.760 -0.760  0.0521
## CV residual -0.755 -1.055 -0.8505  0.0807  0.176 -0.379 -0.0924
## 
## Sum of squares = 2.6    Mean square = 0.37    n = 7 
## 
## fold 2 
## Observations in test set: 8 
##                  6     18     25      37     43    50      55     63
## Predicted   -0.260 -0.257 0.5261  0.1863  0.832 0.986 -0.2234 -1.033
## cvpred      -0.361 -0.239 0.7967  0.0925  1.072 1.069 -0.0951 -1.158
## ints         0.255 -0.963 0.8644  0.0521 -0.557 1.677 -0.9633 -0.354
## CV residual  0.616 -0.724 0.0677 -0.0405 -1.629 0.607 -0.8682  0.804
## 
## Sum of squares = 5.33    Mean square = 0.67    n = 8 
## 
## fold 3 
## Observations in test set: 8 
##                 5      7     16     20      28     32     49     64
## Predicted   0.309 -0.663 -0.605 -1.220  0.2635 -0.645  0.362 -0.379
## cvpred      0.169 -0.410 -0.455 -0.909  0.3201 -0.879  0.847 -0.515
## ints        1.271 -0.760 -1.166 -1.166  0.2551 -0.151 -0.557 -1.166
## CV residual 1.102 -0.350 -0.711 -0.257 -0.0649  0.728 -1.404 -0.652
## 
## Sum of squares = 4.84    Mean square = 0.6    n = 8 
## 
## fold 4 
## Observations in test set: 8 
##                 12     13     26    30    38     39     59      65
## Predicted    0.592 0.8363  0.018 0.234 0.198  1.748 -1.255  0.5822
## cvpred       0.627 1.0288 -0.187 0.112 0.337  2.108 -1.003  0.7679
## ints        -0.557 1.0675  0.661 0.864 0.864  1.271 -1.776  0.0521
## CV residual -1.184 0.0387  0.849 0.752 0.528 -0.838 -0.772 -0.7158
## 
## Sum of squares = 4.78    Mean square = 0.6    n = 8 
## 
## fold 5 
## Observations in test set: 8 
##                  1      4     15    17     19    27    46      56
## Predicted    0.190  0.804 0.1710 -0.37 0.0829 0.786 0.884 -0.2577
## cvpred      -0.394  0.961 0.0542 -0.41 0.1628 0.262 0.629 -0.2842
## ints         2.083 -0.151 0.4582  1.07 0.2551 1.474 1.677 -0.3541
## CV residual  2.477 -1.112 0.4041  1.48 0.0924 1.211 1.048 -0.0698
## 
## Sum of squares = 12.3    Mean square = 1.54    n = 8 
## 
## Overall (Sum over all 8 folds) 
##    ms 
## 0.765