# Fetch Data
qb_stats <- read.csv("../data/qb_stats.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att")
college_stats = qb_stats[, predictors]
# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)
# Set the resopnse variables
bin_cpct = ifelse(qb_stats["completion_percentage"] < 60, 0, 1)
# Generate clean data sets
data.no_combine.for_bin_cpct = data.frame(na.omit(cbind(bin_cpct, college_stats)))
# Logistic Regression
glm.no_combine.cpct <- glm(formula = completion_percentage ~ ., data = data.no_combine.for_bin_cpct,
family = binomial())
exp(cbind(OR = coef(glm.no_combine.cpct), confint(glm.no_combine.cpct)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 4.326e-08 7.338e-19 417.082
## height 1.101e+00 7.789e-01 1.579
## weight 1.023e+00 9.816e-01 1.068
## age 1.280e+00 1.065e+00 1.544
## c_avg_cmpp 1.067e+00 9.985e-01 1.137
## c_rate 9.714e-01 8.968e-01 1.035
## c_pct 9.945e-01 8.433e-01 1.232
## c_avg_inter 7.781e-01 6.087e-01 0.973
## c_avg_tds 1.023e+00 8.422e-01 1.250
## c_avg_yds 9.991e-01 9.953e-01 1.003
## c_numyrs 9.410e-01 5.486e-01 1.662
## c_avg_att 9.795e-01 9.456e-01 1.013
cpct.cv <- cv.glm(data = data.no_combine.for_bin_cpct, glmfit = glm.no_combine.cpct,
cost, 5)
cpct.cv.error <- cpct.cv$delta[2]
cat("Cross Validation Error\n", cpct.cv.error)
## Cross Validation Error
## 0.1483