library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data_1 <- read.csv('Batka Comps - COMPS DATA.csv')
round <- data_1$ROUND
num_round <- as.numeric(round)
## Warning: NAs introduced by coercion
data_1$ROUND <- num_round
test <- data_1[26,]
train <- data_1[1:25,]
train <- na.omit(train)
round_model <- lm(formula = ROUND ~ X4.seam.MPH + X4.seam.VB + X4.seam.HB + Slider.VELO + Slider.VB + CH.VB, data = train)
summary(round_model)
##
## Call:
## lm(formula = ROUND ~ X4.seam.MPH + X4.seam.VB + X4.seam.HB +
## Slider.VELO + Slider.VB + CH.VB, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.746 -1.495 -0.600 1.503 9.037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 142.80519 73.79632 1.935 0.0750 .
## X4.seam.MPH -1.94876 0.71693 -2.718 0.0176 *
## X4.seam.VB 0.06441 0.39854 0.162 0.8741
## X4.seam.HB 0.39800 0.33371 1.193 0.2543
## Slider.VELO 0.55159 0.50175 1.099 0.2916
## Slider.VB 0.20920 0.25723 0.813 0.4307
## CH.VB -0.27952 0.27304 -1.024 0.3246
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.478 on 13 degrees of freedom
## Multiple R-squared: 0.452, Adjusted R-squared: 0.1991
## F-statistic: 1.787 on 6 and 13 DF, p-value: 0.1788
round_pred <- predict(round_model, test)
round_pred
## 26
## 4.811033
Projected draft round: 4-5.
bonus_model <- lm(formula = BONUS ~ X4.seam.MPH + X4.seam.HB + Slider.VELO + Slider.VB, data = train)
summary(bonus_model)
##
## Call:
## lm(formula = BONUS ~ X4.seam.MPH + X4.seam.HB + Slider.VELO +
## Slider.VB, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -836174 -343001 -214218 -49834 3267516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7683521 17006566 -0.452 0.658
## X4.seam.MPH 89269 157388 0.567 0.579
## X4.seam.HB -53230 70304 -0.757 0.461
## Slider.VELO 1864 107258 0.017 0.986
## Slider.VB -1864 13620 -0.137 0.893
##
## Residual standard error: 1042000 on 15 degrees of freedom
## Multiple R-squared: 0.04595, Adjusted R-squared: -0.2085
## F-statistic: 0.1806 on 4 and 15 DF, p-value: 0.9449
bonus_pred <- predict(bonus_model, test)
bonus_pred
## 26
## 625980
Projected signing bonus: $625,980.
data <- read_csv('Batka Comp Data - Sheet1.csv')
## Parsed with column specification:
## cols(
## Name = col_character(),
## Height = col_double(),
## Weight = col_double(),
## FS_velo = col_double(),
## FS_rpm = col_double(),
## FS_rel_h = col_double(),
## FS_rel_v = col_double(),
## SL_velo = col_double(),
## SL_rpm = col_double(),
## SL_rel_h = col_double(),
## SL_rel_v = col_double(),
## FB_HB = col_double(),
## FB_VB = col_double(),
## SL_HB = col_double(),
## SL_VB = col_double(),
## Round = col_double(),
## Bonus = col_double(),
## Career_Earnings = col_double()
## )
train <- data[1:19,]
test <- data[21,]
kh <- data[16,]
tt <- data[10,]
rd <- data[14,]
ggplot(data, aes(FS_velo, FS_rpm)) + geom_point(alpha = .5) + geom_point(data = test, colour = 'red') + geom_text(data = test, label = 'Batka') + ggtitle('FB Velo vs RPM Against MLB Comps') + xlab('MPH') + ylab('RPM')
ggplot(data, aes(FB_HB, FB_VB)) + geom_point(alpha = .5) + geom_point(data = test, colour = 'red') + geom_text(data = test, label = 'Batka') + ggtitle('FB Horizontal vs Vertical Break Against MLB Comps') + xlab('HB') + ylab('VB')
ggplot(data, aes(SL_velo, SL_rpm)) + geom_point(alpha = .5) + geom_point(data = test, colour = 'red') + geom_text(data = test, label = 'Batka') + ggtitle('SL Velo vs RPM Against MLB Comps') + xlab('MPH') + ylab('RPM')