# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)

# Generate clean data sets
bin_cpct = ifelse(qb_stats_w_combine["completion_percentage"] < 60, 0, 1)
data.w_combine.for_bin_cpct = data.frame(na.omit(cbind(bin_cpct, college_stats)))

# Logistic Regression
glm.w_combine.cpct <- glm(formula = completion_percentage ~ ., data = data.w_combine.for_bin_cpct, 
    family = binomial())
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
exp(cbind(OR = coef(glm.w_combine.cpct), confint(glm.w_combine.cpct)))
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                     OR      2.5 %     97.5 %
## (Intercept)        Inf  0.000e+00        Inf
## height       2.125e-15  0.000e+00        Inf
## weight       5.835e+02 7.421e-199 2.542e+195
## age          8.384e+15  0.000e+00        Inf
## c_avg_cmpp   1.329e+05  0.000e+00        Inf
## c_rate       9.356e+05  0.000e+00        Inf
## c_pct        1.535e-19  0.000e+00        Inf
## c_avg_inter  1.362e-03  0.000e+00        Inf
## c_avg_tds    6.114e-04  0.000e+00        Inf
## c_avg_yds    6.040e-01  3.276e-12  3.418e+12
## c_numyrs     6.929e+11  0.000e+00        Inf
## c_avg_att    7.351e-02 4.184e-260 1.149e+263
## X40          2.373e+33  0.000e+00        Inf
## wonderlic    1.303e+03 6.275e-275 1.539e+274
## cone        3.688e+111  0.000e+00        Inf
## shuttle     4.516e-254  0.000e+00        Inf
## vert_leap    5.478e-06  0.000e+00        Inf
## broad_jump   3.670e-02  0.000e+00        Inf
cpct.w_combine.cv <- cv.glm(data = data.w_combine.for_bin_cpct, glmfit = glm.w_combine.cpct, 
    cost, 5)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
cpct.w_combine.cv.error <- cpct.w_combine.cv$delta[2]
cat("Cross Validation Error\n", cpct.w_combine.cv.error)
## Cross Validation Error
##  0.3373