# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)
# Generate clean data sets
bin_cpct = ifelse(qb_stats_w_combine["completion_percentage"] < 60, 0, 1)
data.w_combine.for_bin_cpct = data.frame(na.omit(cbind(bin_cpct, college_stats)))
# Logistic Regression
glm.w_combine.cpct <- glm(formula = completion_percentage ~ ., data = data.w_combine.for_bin_cpct,
family = binomial())
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
exp(cbind(OR = coef(glm.w_combine.cpct), confint(glm.w_combine.cpct)))
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## OR 2.5 % 97.5 %
## (Intercept) Inf 0.000e+00 Inf
## height 2.125e-15 0.000e+00 Inf
## weight 5.835e+02 7.421e-199 2.542e+195
## age 8.384e+15 0.000e+00 Inf
## c_avg_cmpp 1.329e+05 0.000e+00 Inf
## c_rate 9.356e+05 0.000e+00 Inf
## c_pct 1.535e-19 0.000e+00 Inf
## c_avg_inter 1.362e-03 0.000e+00 Inf
## c_avg_tds 6.114e-04 0.000e+00 Inf
## c_avg_yds 6.040e-01 3.276e-12 3.418e+12
## c_numyrs 6.929e+11 0.000e+00 Inf
## c_avg_att 7.351e-02 4.184e-260 1.149e+263
## X40 2.373e+33 0.000e+00 Inf
## wonderlic 1.303e+03 6.275e-275 1.539e+274
## cone 3.688e+111 0.000e+00 Inf
## shuttle 4.516e-254 0.000e+00 Inf
## vert_leap 5.478e-06 0.000e+00 Inf
## broad_jump 3.670e-02 0.000e+00 Inf
cpct.w_combine.cv <- cv.glm(data = data.w_combine.for_bin_cpct, glmfit = glm.w_combine.cpct,
cost, 5)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
cpct.w_combine.cv.error <- cpct.w_combine.cv$delta[2]
cat("Cross Validation Error\n", cpct.w_combine.cv.error)
## Cross Validation Error
## 0.3373