Cam Balego Player Comp

library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
player_comp_data <- read.csv('Balego Player Comparisons - Sheet1.csv')
org_comp_data <- read.csv('Balego Org. Comps - Sheet1.csv')
model_data <- read_csv('Cam Balego Projection Data  - Sheet1.csv')
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Name = col_character()
## )
## See spec(...) for full column specifications.
model_data$MLB <- as.logical(model_data$MLB)

Player Comp Visualizations

player_comp_data <- player_comp_data %>% mutate(XBH_Rate = (X2B + X3B + HR) / H)
ggplot(player_comp_data, aes(Age, XBH_Rate, color = Name)) + ylab('XBH %') + geom_line() + ggtitle('Cam Balego XBH% Comparisons')

ggplot(player_comp_data, aes(Age, AVG, color = Name)) + ylab('BA') + geom_line() + ggtitle('Cam Balego BA Comparisons')

ggplot(player_comp_data, aes(Age, OPS, color = Name)) + ylab('OPS') + geom_line() + ggtitle('Cam Balego OPS Comparisons')

ggplot(player_comp_data, aes(Age, SO, color = Name)) + ylab('Strikeouts') + geom_line() + ggtitle('Cam Balego Strikeouts Comparisons')

ggplot(player_comp_data, aes(Age, SLG, color = Name)) + ylab('SLG %') + geom_line() + ggtitle('Cam Balego SLG% Comparisons')

ggplot(player_comp_data, aes(Age, HR, color = Name)) + ylab('HR') + geom_line() + ggtitle('Cam Balego HR Comparisons')

Organization Comp Visualizations

level <- org_comp_data$Level
ordered_level <- factor(level, levels = c("R","SS", "LoA", "HiA", "AA"))
org_comp_data$Level <- ordered_level

ggplot(org_comp_data, aes(Year, AVG, color = Name)) + ylab('AVG') + geom_line() + ggtitle('Cam Balego vs Organization Catcher AVG')+scale_x_continuous(breaks = 0:2100)

ggplot(org_comp_data, aes(Year, OPS, color = Name)) + ylab('OPS') + geom_line() + ggtitle('Cam Balego vs Organization Catcher OPS') + scale_x_continuous(breaks = 0:2100)

ggplot(org_comp_data, aes(Year, OBP, color = Name)) + ylab('OBP') + geom_line() + ggtitle('Cam Balego vs Organization Catcher OBP') + scale_x_continuous(breaks = 0:2100)

ggplot(org_comp_data, aes(Year, HR, color = Name)) + ylab('HR') + geom_line() + ggtitle('Cam Balego vs Organization Catcher HR') + scale_x_continuous(breaks = 0:2100)

Career Projections

test <- model_data[1,]
train <- model_data[2:69,]
log <- glm(MLB~ OPS + AVG + SLG + OBP, data = train, 
           family = "binomial")
summary(log)
## 
## Call:
## glm(formula = MLB ~ OPS + AVG + SLG + OBP, family = "binomial", 
##     data = train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9014  -0.9401  -0.4657   0.9874   2.2784  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  -10.996      3.579  -3.072  0.00212 **
## OPS           21.191    516.774   0.041  0.96729   
## AVG           14.992     17.501   0.857  0.39164   
## SLG          -16.791    515.789  -0.033  0.97403   
## OBP           -5.808    517.009  -0.011  0.99104   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 90.468  on 67  degrees of freedom
## Residual deviance: 75.110  on 63  degrees of freedom
## AIC: 85.11
## 
## Number of Fisher Scoring iterations: 5
predict(log, test, type = "response")
##        1 
## 0.474364

Chance of Reaching MLB: 47.4%

earnings <- lm(Career_Earnings ~ AVG + H + HR + SO + BB + OBP + SLG + OPS, data = train)
summary(earnings)
## 
## Call:
## lm(formula = Career_Earnings ~ AVG + H + HR + SO + BB + OBP + 
##     SLG + OPS, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5250215 -1508161  -811676  -212934 25374216 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -4482515    8429590  -0.532    0.597
## AVG          -14649208   51415584  -0.285    0.777
## H                 3991       8801   0.453    0.652
## HR                3018      45321   0.067    0.947
## SO                1257       8131   0.155    0.878
## BB               -4575      11536  -0.397    0.693
## OBP          414670466 1173832968   0.353    0.725
## SLG          410691338 1171892104   0.350    0.727
## OPS         -400705545 1171733503  -0.342    0.734
## 
## Residual standard error: 4954000 on 59 degrees of freedom
## Multiple R-squared:  0.09237,    Adjusted R-squared:  -0.0307 
## F-statistic: 0.7505 on 8 and 59 DF,  p-value: 0.647
predict(earnings, test)
##       1 
## 1058331

Predicted Career Earnings: $1,058,331.