# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]

# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)

# Set the resopnse variables
bin_rating = ifelse(qb_stats["rating"] < 85, 0, 1)

# Generate clean data sets
data.no_combine.for_bin_rating = data.frame(na.omit(cbind(bin_rating, college_stats)))

# Logistic Regression
glm.no_combine.rating <- glm(formula = rating ~ ., data = data.no_combine.for_bin_rating, 
    family = binomial())
exp(cbind(OR = coef(glm.no_combine.rating), confint(glm.no_combine.rating)))
## Waiting for profiling to be done...
##                    OR     2.5 %    97.5 %
## (Intercept) 0.0004256 4.846e-13 1.422e+05
## height      0.9829684 7.226e-01 1.346e+00
## weight      1.0188721 9.798e-01 1.059e+00
## age         1.2159525 1.033e+00 1.433e+00
## c_avg_cmpp  1.0391946 9.811e-01 1.101e+00
## c_rate      1.0071943 9.525e-01 1.064e+00
## c_pct       0.9677730 8.328e-01 1.130e+00
## c_avg_inter 0.9167108 7.510e-01 1.108e+00
## c_avg_tds   0.9682451 8.092e-01 1.154e+00
## c_avg_yds   0.9986725 9.952e-01 1.002e+00
## c_numyrs    0.7715008 4.861e-01 1.245e+00
## c_avg_att   0.9926907 9.614e-01 1.025e+00
rating.cv <- cv.glm(data = data.no_combine.for_bin_rating, glmfit = glm.no_combine.rating, 
    cost, 5)
rating.cv.error <- rating.cv$delta[2]
cat("Cross Validation Error\n", rating.cv.error)
## Cross Validation Error
##  0.1586