# Fetch Data
qb_stats <- read.csv("../gs/qb_stats_w_draft.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "draft_Round", 
    "draft_Player")
college_stats = qb_stats[, predictors]

# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)

# Set the resopnse variables
td_int_ratio = qb_stats["tds"]/qb_stats["ints"]
bin_td_int_ratio = ifelse(td_int_ratio < 2, 0, 1)

# Generate clean data sets
data.no_combine.for_bin_td_int_ratio = data.frame(na.omit(cbind(bin_td_int_ratio, 
    college_stats)))

# Logistic Regression
glm.no_combine.td_int_ratio <- glm(formula = tds ~ ., data = data.no_combine.for_bin_td_int_ratio, 
    family = "binomial")
exp(cbind(OR = coef(glm.no_combine.td_int_ratio), confint(glm.no_combine.td_int_ratio)))
## Waiting for profiling to be done...
##                     OR     2.5 %    97.5 %
## (Intercept)  8931.6872 4.547e-09 1.193e+16
## height          0.7371 4.738e-01 1.135e+00
## weight          1.0220 9.636e-01 1.082e+00
## age             1.1989 9.224e-01 1.532e+00
## c_avg_cmpp      0.9777 8.643e-01 1.066e+00
## c_rate          1.0041 8.947e-01 1.119e+00
## c_pct           1.0438 8.238e-01 1.561e+00
## c_avg_inter     0.8439 6.246e-01 1.112e+00
## c_avg_tds       0.9248 7.453e-01 1.139e+00
## c_avg_yds       1.0017 9.968e-01 1.007e+00
## c_numyrs        0.5376 2.637e-01 1.067e+00
## c_avg_att       1.0147 9.615e-01 1.087e+00
## draft_Round     0.9719 6.075e-01 1.297e+00
## draft_Player    1.0000 9.832e-01 1.018e+00
td_int_ratio.cv <- cv.glm(data = data.no_combine.for_bin_td_int_ratio, glmfit = glm.no_combine.td_int_ratio, 
    cost, 5)
td_int_ratio.cv.error <- td_int_ratio.cv$delta[2]
cat("Cross Validation Error\n", td_int_ratio.cv.error)
## Cross Validation Error
##  0.09366