# Fetch Data
qb_stats_w_combine <- read.csv("../data/qb_stats_w_combine.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "X40",
"wonderlic", "cone", "shuttle", "vert_leap", "broad_jump")
college_stats = qb_stats_w_combine[, predictors]
# Set the resopnse variables
td_int_ratio = qb_stats_w_combine["tds"]/qb_stats_w_combine["ints"]
# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)
# Generate clean data sets
bin_rating = ifelse(qb_stats_w_combine["rating"] < 85, 0, 1)
data.w_combine.for_bin_rating = data.frame(na.omit(cbind(bin_rating, college_stats)))
# Logistic Regression
glm.w_combine.rating <- glm(formula = rating ~ ., data = data.w_combine.for_bin_rating,
family = binomial())
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
exp(cbind(OR = coef(glm.w_combine.rating), confint(glm.w_combine.rating)))
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## OR 2.5 % 97.5 %
## (Intercept) 0.000e+00 0.000e+00 Inf
## height 2.911e-02 0.000e+00 Inf
## weight 1.859e+01 1.415e-146 2.442e+148
## age 1.686e+05 0.000e+00 Inf
## c_avg_cmpp 1.340e-09 0.000e+00 Inf
## c_rate 1.222e-11 0.000e+00 Inf
## c_pct 1.102e+36 0.000e+00 Inf
## c_avg_inter 4.070e-27 0.000e+00 Inf
## c_avg_tds 3.136e+10 0.000e+00 Inf
## c_avg_yds 1.718e+00 1.175e-16 1.086e+16
## c_numyrs 4.999e-39 0.000e+00 Inf
## c_avg_att 4.193e+03 1.022e-143 4.102e+153
## X40 2.381e+68 0.000e+00 Inf
## wonderlic 5.560e-02 9.865e-302 8.418e+287
## cone 5.258e+32 0.000e+00 Inf
## shuttle 7.961e-141 0.000e+00 Inf
## vert_leap 4.034e-04 0.000e+00 Inf
## broad_jump 1.137e+03 2.879e-224 4.236e+208
rating.w_combine.cv <- cv.glm(data = data.w_combine.for_bin_rating, glmfit = glm.w_combine.rating,
cost, 5)
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
rating.w_combine.cv.error <- rating.w_combine.cv$delta[2]
cat("Cross Validation Error\n", rating.w_combine.cv.error)
## Cross Validation Error
## 0.2957