# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")

# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct", 
    "c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]

# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)

# Set the resopnse variables
bin_cpct = ifelse(qb_stats["completion_percentage"] < 60, 0, 1)

# Generate clean data sets
data.no_combine.for_bin_cpct = data.frame(na.omit(cbind(bin_cpct, college_stats)))

# Logistic Regression
glm.no_combine.cpct <- glm(formula = completion_percentage ~ ., data = data.no_combine.for_bin_cpct, 
    family = binomial())
exp(cbind(OR = coef(glm.no_combine.cpct), confint(glm.no_combine.cpct)))

## Waiting for profiling to be done...

##                    OR     2.5 %  97.5 %
## (Intercept) 4.326e-08 7.338e-19 417.082
## height      1.101e+00 7.789e-01   1.579
## weight      1.023e+00 9.816e-01   1.068
## age         1.280e+00 1.065e+00   1.544
## c_avg_cmpp  1.067e+00 9.985e-01   1.137
## c_rate      9.714e-01 8.968e-01   1.035
## c_pct       9.945e-01 8.433e-01   1.232
## c_avg_inter 7.781e-01 6.087e-01   0.973
## c_avg_tds   1.023e+00 8.422e-01   1.250
## c_avg_yds   9.991e-01 9.953e-01   1.003
## c_numyrs    9.410e-01 5.486e-01   1.662
## c_avg_att   9.795e-01 9.456e-01   1.013

cpct.cv <- cv.glm(data = data.no_combine.for_bin_cpct, glmfit = glm.no_combine.cpct, 
    cost, 5)
cpct.cv.error <- cpct.cv$delta[2]
cat("Cross Validation Error\n", cpct.cv.error)

## Cross Validation Error
##  0.1483