# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]

# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)

# Set the resopnse variables
td_int_ratio = qb_stats["tds"]/qb_stats["ints"]
bin_td_int_ratio = ifelse(td_int_ratio < 2, 0, 1)

# Generate clean data sets
data.no_combine.for_bin_td_int_ratio = data.frame(na.omit(cbind(bin_td_int_ratio, 
    college_stats)))

# Logistic Regression
glm.no_combine.td_int_ratio <- glm(formula = tds ~ ., data = data.no_combine.for_bin_td_int_ratio, 
    family = "binomial")
exp(cbind(OR = coef(glm.no_combine.td_int_ratio), confint(glm.no_combine.td_int_ratio)))
## Waiting for profiling to be done...
##                   OR     2.5 %    97.5 %
## (Intercept) 481.1729 1.897e-09 5.060e+13
## height        0.7387 4.896e-01 1.106e+00
## weight        1.0278 9.747e-01 1.084e+00
## age           1.2530 1.015e+00 1.538e+00
## c_avg_cmpp    0.9899 8.890e-01 1.072e+00
## c_rate        1.0055 9.073e-01 1.110e+00
## c_pct         1.0364 8.342e-01 1.465e+00
## c_avg_inter   0.8681 6.649e-01 1.116e+00
## c_avg_tds     0.9391 7.583e-01 1.156e+00
## c_avg_yds     1.0015 9.969e-01 1.006e+00
## c_numyrs      0.7207 4.039e-01 1.317e+00
## c_avg_att     1.0052 9.600e-01 1.063e+00
td_int_ratio.cv <- cv.glm(data = data.no_combine.for_bin_td_int_ratio, glmfit = glm.no_combine.td_int_ratio, 
    cost, 5)
td_int_ratio.cv.error <- td_int_ratio.cv$delta[2]
cat("Cross Validation Error\n", td_int_ratio.cv.error)
## Cross Validation Error
##  0.08658