# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]

# Set the resopnse variables
win_pct = qb_stats["wins"]/qb_stats["games_started"]

# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)

# Generate clean data sets
bin_win_pct = ifelse(win_pct < 0.5, 0, 1)
data.no_combine.for_bin_win_pct = data.frame(na.omit(cbind(bin_win_pct, college_stats)))

# Logistic Regression
glm.no_combine.win_pct <- glm(formula = wins ~ ., data = data.no_combine.for_bin_win_pct, 
    family = binomial())
exp(cbind(OR = coef(glm.no_combine.win_pct), confint(glm.no_combine.win_pct)))
## Waiting for profiling to be done...
##                 OR     2.5 %    97.5 %
## (Intercept) 4.1085 2.486e-06 6.729e+06
## height      0.9473 7.593e-01 1.180e+00
## weight      1.0099 9.824e-01 1.038e+00
## age         1.1512 1.018e+00 1.309e+00
## c_avg_cmpp  1.0109 9.650e-01 1.062e+00
## c_rate      0.9969 9.552e-01 1.044e+00
## c_pct       0.9567 8.317e-01 1.079e+00
## c_avg_inter 1.0338 9.008e-01 1.186e+00
## c_avg_tds   0.9949 8.812e-01 1.124e+00
## c_avg_yds   1.0017 9.991e-01 1.004e+00
## c_numyrs    0.9187 6.600e-01 1.281e+00
## c_avg_att   0.9786 9.531e-01 1.003e+00
win_pct.cv <- cv.glm(data = data.no_combine.for_bin_win_pct, glmfit = glm.no_combine.win_pct, 
    cost, 5)
win_pct.cv.error <- win_pct.cv$delta[2]
cat("Cross Validation Error\n", win_pct.cv.error)
## Cross Validation Error
##  0.4185