To make projections in regards to a potential round drafted and signing bonus we built a statistical model based on other players’ senior seasons who we felt compared to Callil physically. This data can be seen below.
library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data <- read.csv('Callil_Training_Data.csv')
data <- data %>% mutate(BB_Rate = BB / PA)
data <- data %>% mutate(SO_Rate = SO / PA)
data <- data %>% mutate(XBH_Rate = (X2B + X3B + HR) / H)
bonus <- data$Bonus
bonus_new <- gsub(",", "", bonus)
num_bonus <- as.numeric(bonus_new)
data$Bonus <- num_bonus
data
## Name Age G PA AB R H X2B X3B HR RBI SB BB SO BA OBP
## 1 Angel Camacho 22 58 268 227 38 72 7 0 10 41 2 31 26 0.317 0.407
## 2 McCarthy Tatum 23 57 259 236 54 84 16 2 13 77 5 14 32 0.356 0.397
## 3 Madison Stokes 22 50 220 183 32 59 12 1 11 44 1 29 44 0.322 0.414
## 4 Mike Gretler 22 68 307 262 43 80 21 1 7 51 1 22 46 0.305 0.379
## 5 Jordan Rodgers 22 51 231 205 39 66 10 1 9 35 8 19 30 0.322 0.390
## 6 Brandon Lopez 22 58 259 213 50 80 14 0 2 42 5 23 34 0.376 0.449
## 7 Zach Remillard 22 73 323 290 66 99 17 2 19 72 15 20 82 0.341 0.391
## 8 Conner Hale 22 64 288 263 49 86 17 3 4 56 1 18 34 0.327 0.373
## 9 John Ziznewski 23 51 225 197 50 72 13 2 9 55 17 22 26 0.365 0.431
## 10 Justin Gonzalez 23 56 256 196 43 48 16 2 3 39 3 48 68 0.245 0.410
## 11 Tyler Smith 21 56 260 224 48 69 10 2 2 28 8 24 35 0.308 0.390
## 12 Kyle Farmer 22 52 237 210 26 61 12 2 3 44 0 12 12 0.290 0.315
## 13 Joel Hutter 22 60 271 234 45 79 16 1 11 72 8 22 33 0.338 0.400
## 14 Danny Muno 22 56 260 204 47 71 14 1 3 52 14 46 27 0.348 0.471
## 15 George Callil 22 64 260 196 36 52 4 0 4 40 4 16 48 0.271 0.446
## SLG OPS SEC Round Bonus BB_Rate SO_Rate XBH_Rate
## 1 0.480 0.887 0 8 2500 0.11567164 0.09701493 0.2361111
## 2 0.606 1.003 0 10 5000 0.05405405 0.12355212 0.3690476
## 3 0.579 0.993 1 10 10000 0.13181818 0.20000000 0.4067797
## 4 0.473 0.852 0 10 50000 0.07166124 0.14983713 0.3625000
## 5 0.512 0.902 1 6 5000 0.08225108 0.12987013 0.3030303
## 6 0.469 0.919 0 10 30000 0.08880309 0.13127413 0.2000000
## 7 0.610 1.001 0 10 10000 0.06191950 0.25386997 0.3838384
## 8 0.460 0.833 1 9 10000 0.06250000 0.11805556 0.2790698
## 9 0.589 1.020 0 8 10000 0.09777778 0.11555556 0.3333333
## 10 0.393 0.803 0 9 25000 0.18750000 0.26562500 0.4375000
## 11 0.397 0.787 0 8 20000 0.09230769 0.13461538 0.2028986
## 12 0.410 0.724 1 8 40000 0.05063291 0.05063291 0.2786885
## 13 0.556 0.956 0 10 10000 0.08118081 0.12177122 0.3544304
## 14 0.471 0.942 0 8 10000 0.17692308 0.10384615 0.2535211
## 15 0.354 0.800 1 NA NA 0.06153846 0.18461538 0.1538462
library(leaps)
regfit.fwd<-regsubsets(Round~H + HR + RBI + SB + BA + OBP + SLG + OPS + SEC + XBH_Rate + BB_Rate + SO_Rate, data=data, method="forward")
train <- data[1:14,]
test <- data[15,]
round_model <- lm(formula = Round ~ RBI + SB + SO_Rate + BA + SEC + HR + OBP + SEC, data = train)
summary(round_model)
##
## Call:
## lm(formula = Round ~ RBI + SB + SO_Rate + BA + SEC + HR + OBP +
## SEC, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8575 -0.3144 0.1052 0.2196 0.8771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.13303 3.01889 0.375 0.7203
## RBI 0.04749 0.01952 2.433 0.0510 .
## SB -0.15786 0.04440 -3.555 0.0120 *
## SO_Rate 15.12911 4.85468 3.116 0.0207 *
## BA 23.84705 10.78781 2.211 0.0691 .
## SEC -0.93371 0.51771 -1.804 0.1214
## HR -0.08279 0.06004 -1.379 0.2172
## OBP -6.76814 8.93594 -0.757 0.4775
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6728 on 6 degrees of freedom
## Multiple R-squared: 0.8622, Adjusted R-squared: 0.7015
## F-statistic: 5.365 on 7 and 6 DF, p-value: 0.02879
We then extrapolated on Callil’s previous season and plugged that data into our models.
GC_round_pred <- predict(round_model, test)
GC_round_pred
## 15
## 7.373366
regfit<-regsubsets(Round~H + HR + RBI + SB + BA + OBP + SLG + OPS + SEC + XBH_Rate + BB_Rate + SO_Rate, data=data, method="forward")
bonus_model <- lm(formula = Bonus ~ RBI + SB + BA + SEC + HR + OBP + BB_Rate, data = train)
summary(bonus_model)
##
## Call:
## lm(formula = Bonus ~ RBI + SB + BA + SEC + HR + OBP + BB_Rate,
## data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13031.0 -8666.9 642.6 7786.3 19543.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 96429.87 71085.76 1.357 0.224
## RBI 37.74 438.04 0.086 0.934
## SB -352.60 1003.57 -0.351 0.737
## BA -209340.32 522581.09 -0.401 0.703
## SEC -9325.85 13996.94 -0.666 0.530
## HR -1500.70 1121.99 -1.338 0.230
## OBP 55492.00 586518.53 0.095 0.928
## BB_Rate -198480.65 426510.30 -0.465 0.658
##
## Residual standard error: 14480 on 6 degrees of freedom
## Multiple R-squared: 0.5254, Adjusted R-squared: -0.02823
## F-statistic: 0.949 on 7 and 6 DF, p-value: 0.5337
GC_bonus <- predict(bonus_model, test)
GC_bonus
## 15
## 37004.6