# Fetch Data
qb_stats <- read.csv("../gs/qb_stats_w_draft.csv")
# Grab the college predictors
predictors <- c("height", "weight", "age", "c_avg_cmpp", "c_rate", "c_pct",
"c_avg_inter", "c_avg_tds", "c_avg_yds", "c_numyrs", "c_avg_att", "draft_Round",
"draft_Player")
college_stats = qb_stats[, predictors]
# Set the resopnse variables
win_pct = qb_stats["wins"]/qb_stats["games_started"]
# Establish the cost function
cost = function(r, pi = 0) mean(abs(r - pi) > 0.5)
# Generate clean data sets
bin_win_pct = ifelse(win_pct < 0.5, 0, 1)
data.no_combine.for_bin_win_pct = data.frame(na.omit(cbind(bin_win_pct, college_stats)))
# Logistic Regression
glm.no_combine.win_pct <- glm(formula = wins ~ ., data = data.no_combine.for_bin_win_pct,
family = binomial())
exp(cbind(OR = coef(glm.no_combine.win_pct), confint(glm.no_combine.win_pct)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 17.0469 2.530e-06 1.261e+08
## height 0.9196 7.213e-01 1.169e+00
## weight 1.0211 9.916e-01 1.052e+00
## age 1.1214 9.693e-01 1.303e+00
## c_avg_cmpp 1.0259 9.763e-01 1.082e+00
## c_rate 0.9954 9.504e-01 1.046e+00
## c_pct 0.9455 8.113e-01 1.076e+00
## c_avg_inter 1.0980 9.418e-01 1.282e+00
## c_avg_tds 1.0068 8.843e-01 1.148e+00
## c_avg_yds 1.0015 9.987e-01 1.004e+00
## c_numyrs 0.8517 5.748e-01 1.261e+00
## c_avg_att 0.9701 9.408e-01 9.982e-01
## draft_Round 0.9347 7.748e-01 1.114e+00
## draft_Player 1.0043 9.962e-01 1.013e+00
win_pct.cv <- cv.glm(data = data.no_combine.for_bin_win_pct, glmfit = glm.no_combine.win_pct,
cost, 5)
win_pct.cv.error <- win_pct.cv$delta[2]
cat("Cross Validation Error\n", win_pct.cv.error)
## Cross Validation Error
## 0.4209