# Fetch Data
qb_stats <- read.csv("../gs/qb_stats_w_draft.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "draft_Round", 
    "draft_Player")
college_stats = qb_stats[, predictors]

# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)

# Set the resopnse variables
bin_cpct = ifelse(qb_stats["completion_percentage"] < 60, 0, 1)

# Generate clean data sets
data.no_combine.for_bin_cpct = data.frame(na.omit(cbind(bin_cpct, college_stats)))

# Logistic Regression
glm.no_combine.cpct <- glm(formula = completion_percentage ~ ., data = data.no_combine.for_bin_cpct, 
    family = binomial())
exp(cbind(OR = coef(glm.no_combine.cpct), confint(glm.no_combine.cpct)))
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                     OR     2.5 %    97.5 %
## (Intercept)  1.123e-07 1.030e-19 1.369e+04
## height       1.034e+00 7.046e-01 1.545e+00
## weight       1.043e+00 9.954e-01 1.097e+00
## age          1.207e+00 9.462e-01 1.525e+00
## c_avg_cmpp   1.052e+00 9.722e-01 1.134e+00
## c_rate       9.537e-01 8.597e-01 1.029e+00
## c_pct        1.068e+00 8.668e-01 1.408e+00
## c_avg_inter  7.548e-01 5.515e-01 9.966e-01
## c_avg_tds    1.040e+00 8.320e-01 1.311e+00
## c_avg_yds    9.989e-01 9.943e-01 1.004e+00
## c_numyrs     8.579e-01 4.093e-01 1.836e+00
## c_avg_att    9.880e-01 9.452e-01 1.032e+00
## draft_Round  7.075e-01 2.663e-01 1.082e+00
## draft_Player 1.011e+00 9.936e-01 1.041e+00
cpct.cv <- cv.glm(data = data.no_combine.for_bin_cpct, glmfit = glm.no_combine.cpct, 
    cost, 5)
cpct.cv.error <- cpct.cv$delta[2]
cat("Cross Validation Error\n", cpct.cv.error)
## Cross Validation Error
##  0.1749