library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
player_comp_data <- read.csv('Balego Player Comparisons - Sheet1.csv')
org_comp_data <- read.csv('Balego Org. Comps - Sheet1.csv')
model_data <- read_csv('Cam Balego Projection Data - Sheet1.csv')
## Parsed with column specification:
## cols(
## .default = col_double(),
## Name = col_character()
## )
## See spec(...) for full column specifications.
model_data$MLB <- as.logical(model_data$MLB)
player_comp_data <- player_comp_data %>% mutate(XBH_Rate = (X2B + X3B + HR) / H)
ggplot(player_comp_data, aes(Age, XBH_Rate, color = Name)) + ylab('XBH %') + geom_line() + ggtitle('Cam Balego XBH% Comparisons')
ggplot(player_comp_data, aes(Age, AVG, color = Name)) + ylab('BA') + geom_line() + ggtitle('Cam Balego BA Comparisons')
ggplot(player_comp_data, aes(Age, OPS, color = Name)) + ylab('OPS') + geom_line() + ggtitle('Cam Balego OPS Comparisons')
ggplot(player_comp_data, aes(Age, SO, color = Name)) + ylab('Strikeouts') + geom_line() + ggtitle('Cam Balego Strikeouts Comparisons')
ggplot(player_comp_data, aes(Age, SLG, color = Name)) + ylab('SLG %') + geom_line() + ggtitle('Cam Balego SLG% Comparisons')
ggplot(player_comp_data, aes(Age, HR, color = Name)) + ylab('HR') + geom_line() + ggtitle('Cam Balego HR Comparisons')
level <- org_comp_data$Level
ordered_level <- factor(level, levels = c("R","SS", "LoA", "HiA", "AA"))
org_comp_data$Level <- ordered_level
ggplot(org_comp_data, aes(Year, AVG, color = Name)) + ylab('AVG') + geom_line() + ggtitle('Cam Balego vs Organization Catcher AVG')+scale_x_continuous(breaks = 0:2100)
ggplot(org_comp_data, aes(Year, OPS, color = Name)) + ylab('OPS') + geom_line() + ggtitle('Cam Balego vs Organization Catcher OPS') + scale_x_continuous(breaks = 0:2100)
ggplot(org_comp_data, aes(Year, OBP, color = Name)) + ylab('OBP') + geom_line() + ggtitle('Cam Balego vs Organization Catcher OBP') + scale_x_continuous(breaks = 0:2100)
ggplot(org_comp_data, aes(Year, HR, color = Name)) + ylab('HR') + geom_line() + ggtitle('Cam Balego vs Organization Catcher HR') + scale_x_continuous(breaks = 0:2100)
test <- model_data[1,]
train <- model_data[2:69,]
log <- glm(MLB~ OPS + AVG + SLG + OBP, data = train,
family = "binomial")
summary(log)
##
## Call:
## glm(formula = MLB ~ OPS + AVG + SLG + OBP, family = "binomial",
## data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9014 -0.9401 -0.4657 0.9874 2.2784
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.996 3.579 -3.072 0.00212 **
## OPS 21.191 516.774 0.041 0.96729
## AVG 14.992 17.501 0.857 0.39164
## SLG -16.791 515.789 -0.033 0.97403
## OBP -5.808 517.009 -0.011 0.99104
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 90.468 on 67 degrees of freedom
## Residual deviance: 75.110 on 63 degrees of freedom
## AIC: 85.11
##
## Number of Fisher Scoring iterations: 5
predict(log, test, type = "response")
## 1
## 0.474364
Chance of Reaching MLB: 47.4%
earnings <- lm(Career_Earnings ~ AVG + H + HR + SO + BB + OBP + SLG + OPS, data = train)
summary(earnings)
##
## Call:
## lm(formula = Career_Earnings ~ AVG + H + HR + SO + BB + OBP +
## SLG + OPS, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5250215 -1508161 -811676 -212934 25374216
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4482515 8429590 -0.532 0.597
## AVG -14649208 51415584 -0.285 0.777
## H 3991 8801 0.453 0.652
## HR 3018 45321 0.067 0.947
## SO 1257 8131 0.155 0.878
## BB -4575 11536 -0.397 0.693
## OBP 414670466 1173832968 0.353 0.725
## SLG 410691338 1171892104 0.350 0.727
## OPS -400705545 1171733503 -0.342 0.734
##
## Residual standard error: 4954000 on 59 degrees of freedom
## Multiple R-squared: 0.09237, Adjusted R-squared: -0.0307
## F-statistic: 0.7505 on 8 and 59 DF, p-value: 0.647
predict(earnings, test)
## 1
## 1058331
Predicted Career Earnings: $1,058,331.