# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40", 
    "wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]

# Set the resopnse variables
td_int_ratio = qb_stats_w_combine["tds"]/qb_stats_w_combine["ints"]

# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)

# Generate clean data sets
bin_rating = ifelse(qb_stats_w_combine["rating"] < 85, 0, 1)
data.w_combine.for_bin_rating = data.frame(na.omit(cbind(bin_rating, college_stats)))

# Logistic Regression
glm.w_combine.rating <- glm(formula = rating ~ ., data = data.w_combine.for_bin_rating, 
    family = binomial())
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
exp(cbind(OR = coef(glm.w_combine.rating), confint(glm.w_combine.rating)))
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                     OR      2.5 %     97.5 %
## (Intercept)  0.000e+00  0.000e+00        Inf
## height       2.911e-02  0.000e+00        Inf
## weight       1.859e+01 1.415e-146 2.442e+148
## age          1.686e+05  0.000e+00        Inf
## c_avg_cmpp   1.340e-09  0.000e+00        Inf
## c_rate       1.222e-11  0.000e+00        Inf
## c_pct        1.102e+36  0.000e+00        Inf
## c_avg_inter  4.070e-27  0.000e+00        Inf
## c_avg_tds    3.136e+10  0.000e+00        Inf
## c_avg_yds    1.718e+00  1.175e-16  1.086e+16
## c_numyrs     4.999e-39  0.000e+00        Inf
## c_avg_att    4.193e+03 1.022e-143 4.102e+153
## X40          2.381e+68  0.000e+00        Inf
## wonderlic    5.560e-02 9.865e-302 8.418e+287
## cone         5.258e+32  0.000e+00        Inf
## shuttle     7.961e-141  0.000e+00        Inf
## vert_leap    4.034e-04  0.000e+00        Inf
## broad_jump   1.137e+03 2.879e-224 4.236e+208
rating.w_combine.cv <- cv.glm(data = data.w_combine.for_bin_rating, glmfit = glm.w_combine.rating, 
    cost, 5)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
rating.w_combine.cv.error <- rating.w_combine.cv$delta[2]
cat("Cross Validation Error\n", rating.w_combine.cv.error)
## Cross Validation Error
##  0.2957