# Fetch Data
qb_stats <- read.csv("../gs/qb_stats_w_draft.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "draft_Round", 
    "draft_Player")
college_stats = qb_stats[, predictors]

# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)

# Set the resopnse variables
bin_rating = ifelse(qb_stats["rating"] < 85, 0, 1)

# Generate clean data sets
data.no_combine.for_bin_rating = data.frame(na.omit(cbind(bin_rating, college_stats)))

# Logistic Regression
glm.no_combine.rating <- glm(formula = rating ~ ., data = data.no_combine.for_bin_rating, 
    family = binomial())
exp(cbind(OR = coef(glm.no_combine.rating), confint(glm.no_combine.rating)))
## Waiting for profiling to be done...
##                     OR     2.5 %    97.5 %
## (Intercept)  0.0008189 1.103e-13 1.919e+06
## height       0.9591696 6.874e-01 1.351e+00
## weight       1.0295471 9.873e-01 1.074e+00
## age          1.1402678 9.327e-01 1.385e+00
## c_avg_cmpp   1.0290543 9.648e-01 1.094e+00
## c_rate       0.9966562 9.356e-01 1.054e+00
## c_pct        1.0100546 8.649e-01 1.213e+00
## c_avg_inter  0.9804169 7.790e-01 1.220e+00
## c_avg_tds    0.9989394 8.253e-01 1.201e+00
## c_avg_yds    0.9988480 9.950e-01 1.003e+00
## c_numyrs     0.6284896 3.494e-01 1.132e+00
## c_avg_att    0.9934459 9.556e-01 1.034e+00
## draft_Round  0.8307603 5.050e-01 1.081e+00
## draft_Player 1.0082191 9.958e-01 1.025e+00
rating.cv <- cv.glm(data = data.no_combine.for_bin_rating, glmfit = glm.no_combine.rating, 
    cost, 5)
rating.cv.error <- rating.cv$delta[2]
cat("Cross Validation Error\n", rating.cv.error)
## Cross Validation Error
##  0.156