Batka Projections

library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
data_1 <- read.csv('Batka Comps  - COMPS DATA.csv')
round <- data_1$ROUND
num_round <- as.numeric(round)
## Warning: NAs introduced by coercion
data_1$ROUND <- num_round
test <- data_1[26,]
train <- data_1[1:25,]
train <- na.omit(train)

Round Projection

round_model <- lm(formula = ROUND ~ X4.seam.MPH + X4.seam.VB + X4.seam.HB + Slider.VELO + Slider.VB + CH.VB, data = train)
summary(round_model)
## 
## Call:
## lm(formula = ROUND ~ X4.seam.MPH + X4.seam.VB + X4.seam.HB + 
##     Slider.VELO + Slider.VB + CH.VB, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.746 -1.495 -0.600  1.503  9.037 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 142.80519   73.79632   1.935   0.0750 .
## X4.seam.MPH  -1.94876    0.71693  -2.718   0.0176 *
## X4.seam.VB    0.06441    0.39854   0.162   0.8741  
## X4.seam.HB    0.39800    0.33371   1.193   0.2543  
## Slider.VELO   0.55159    0.50175   1.099   0.2916  
## Slider.VB     0.20920    0.25723   0.813   0.4307  
## CH.VB        -0.27952    0.27304  -1.024   0.3246  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.478 on 13 degrees of freedom
## Multiple R-squared:  0.452,  Adjusted R-squared:  0.1991 
## F-statistic: 1.787 on 6 and 13 DF,  p-value: 0.1788
round_pred <- predict(round_model, test)
round_pred
##       26 
## 4.811033

Projected draft round: 4-5.

Bonus Projection

bonus_model <- lm(formula = BONUS ~ X4.seam.MPH + X4.seam.HB + Slider.VELO + Slider.VB, data = train)
summary(bonus_model)
## 
## Call:
## lm(formula = BONUS ~ X4.seam.MPH + X4.seam.HB + Slider.VELO + 
##     Slider.VB, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -836174 -343001 -214218  -49834 3267516 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7683521   17006566  -0.452    0.658
## X4.seam.MPH    89269     157388   0.567    0.579
## X4.seam.HB    -53230      70304  -0.757    0.461
## Slider.VELO     1864     107258   0.017    0.986
## Slider.VB      -1864      13620  -0.137    0.893
## 
## Residual standard error: 1042000 on 15 degrees of freedom
## Multiple R-squared:  0.04595,    Adjusted R-squared:  -0.2085 
## F-statistic: 0.1806 on 4 and 15 DF,  p-value: 0.9449
bonus_pred <- predict(bonus_model, test)
bonus_pred
##     26 
## 625980

Projected signing bonus: $625,980.

Visualizations

data <- read_csv('Batka Comp Data - Sheet1.csv')
## Parsed with column specification:
## cols(
##   Name = col_character(),
##   Height = col_double(),
##   Weight = col_double(),
##   FS_velo = col_double(),
##   FS_rpm = col_double(),
##   FS_rel_h = col_double(),
##   FS_rel_v = col_double(),
##   SL_velo = col_double(),
##   SL_rpm = col_double(),
##   SL_rel_h = col_double(),
##   SL_rel_v = col_double(),
##   FB_HB = col_double(),
##   FB_VB = col_double(),
##   SL_HB = col_double(),
##   SL_VB = col_double(),
##   Round = col_double(),
##   Bonus = col_double(),
##   Career_Earnings = col_double()
## )
train <- data[1:19,]
test <- data[21,]
kh <- data[16,]
tt <- data[10,]
rd <- data[14,]
ggplot(data, aes(FS_velo, FS_rpm)) + geom_point(alpha = .5) + geom_point(data = test, colour = 'red') + geom_text(data = test, label = 'Batka') + ggtitle('FB Velo vs RPM Against MLB Comps') + xlab('MPH') + ylab('RPM')

ggplot(data, aes(FB_HB, FB_VB)) + geom_point(alpha = .5) + geom_point(data = test, colour = 'red') + geom_text(data = test, label = 'Batka') + ggtitle('FB Horizontal vs Vertical Break Against MLB Comps') + xlab('HB') + ylab('VB')

ggplot(data, aes(SL_velo, SL_rpm)) + geom_point(alpha = .5) + geom_point(data = test, colour = 'red') + geom_text(data = test, label = 'Batka') + ggtitle('SL Velo vs RPM Against MLB Comps') + xlab('MPH') + ylab('RPM')