George Callil Projections

To make projections in regards to a potential round drafted and signing bonus we built a statistical model based on other players’ senior seasons who we felt compared to Callil physically. This data can be seen below.

library(tidyverse)
## ── Attaching packages ───────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
data <- read.csv('Callil_Training_Data.csv')
data <- data %>% mutate(BB_Rate = BB / PA)
data <- data %>% mutate(SO_Rate = SO / PA)
data <- data %>% mutate(XBH_Rate = (X2B + X3B + HR) / H)
bonus <- data$Bonus
bonus_new <- gsub(",", "", bonus)
num_bonus <- as.numeric(bonus_new)
data$Bonus <- num_bonus
data
##               Name Age  G  PA  AB  R  H X2B X3B HR RBI SB BB SO    BA   OBP
## 1    Angel Camacho  22 58 268 227 38 72   7   0 10  41  2 31 26 0.317 0.407
## 2   McCarthy Tatum  23 57 259 236 54 84  16   2 13  77  5 14 32 0.356 0.397
## 3   Madison Stokes  22 50 220 183 32 59  12   1 11  44  1 29 44 0.322 0.414
## 4     Mike Gretler  22 68 307 262 43 80  21   1  7  51  1 22 46 0.305 0.379
## 5   Jordan Rodgers  22 51 231 205 39 66  10   1  9  35  8 19 30 0.322 0.390
## 6    Brandon Lopez  22 58 259 213 50 80  14   0  2  42  5 23 34 0.376 0.449
## 7   Zach Remillard  22 73 323 290 66 99  17   2 19  72 15 20 82 0.341 0.391
## 8      Conner Hale  22 64 288 263 49 86  17   3  4  56  1 18 34 0.327 0.373
## 9   John Ziznewski  23 51 225 197 50 72  13   2  9  55 17 22 26 0.365 0.431
## 10 Justin Gonzalez  23 56 256 196 43 48  16   2  3  39  3 48 68 0.245 0.410
## 11     Tyler Smith  21 56 260 224 48 69  10   2  2  28  8 24 35 0.308 0.390
## 12     Kyle Farmer  22 52 237 210 26 61  12   2  3  44  0 12 12 0.290 0.315
## 13     Joel Hutter  22 60 271 234 45 79  16   1 11  72  8 22 33 0.338 0.400
## 14      Danny Muno  22 56 260 204 47 71  14   1  3  52 14 46 27 0.348 0.471
## 15   George Callil  22 64 260 196 36 52   4   0  4  40  4 16 48 0.271 0.446
##      SLG   OPS SEC Round Bonus    BB_Rate    SO_Rate  XBH_Rate
## 1  0.480 0.887   0     8  2500 0.11567164 0.09701493 0.2361111
## 2  0.606 1.003   0    10  5000 0.05405405 0.12355212 0.3690476
## 3  0.579 0.993   1    10 10000 0.13181818 0.20000000 0.4067797
## 4  0.473 0.852   0    10 50000 0.07166124 0.14983713 0.3625000
## 5  0.512 0.902   1     6  5000 0.08225108 0.12987013 0.3030303
## 6  0.469 0.919   0    10 30000 0.08880309 0.13127413 0.2000000
## 7  0.610 1.001   0    10 10000 0.06191950 0.25386997 0.3838384
## 8  0.460 0.833   1     9 10000 0.06250000 0.11805556 0.2790698
## 9  0.589 1.020   0     8 10000 0.09777778 0.11555556 0.3333333
## 10 0.393 0.803   0     9 25000 0.18750000 0.26562500 0.4375000
## 11 0.397 0.787   0     8 20000 0.09230769 0.13461538 0.2028986
## 12 0.410 0.724   1     8 40000 0.05063291 0.05063291 0.2786885
## 13 0.556 0.956   0    10 10000 0.08118081 0.12177122 0.3544304
## 14 0.471 0.942   0     8 10000 0.17692308 0.10384615 0.2535211
## 15 0.354 0.800   1    NA    NA 0.06153846 0.18461538 0.1538462
library(leaps)
regfit.fwd<-regsubsets(Round~H + HR + RBI + SB + BA + OBP + SLG + OPS + SEC + XBH_Rate + BB_Rate + SO_Rate, data=data, method="forward")

train <- data[1:14,]
test <- data[15,]

round_model <- lm(formula = Round ~ RBI + SB + SO_Rate + BA + SEC + HR + OBP + SEC, data = train)
summary(round_model)
## 
## Call:
## lm(formula = Round ~ RBI + SB + SO_Rate + BA + SEC + HR + OBP + 
##     SEC, data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8575 -0.3144  0.1052  0.2196  0.8771 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  1.13303    3.01889   0.375   0.7203  
## RBI          0.04749    0.01952   2.433   0.0510 .
## SB          -0.15786    0.04440  -3.555   0.0120 *
## SO_Rate     15.12911    4.85468   3.116   0.0207 *
## BA          23.84705   10.78781   2.211   0.0691 .
## SEC         -0.93371    0.51771  -1.804   0.1214  
## HR          -0.08279    0.06004  -1.379   0.2172  
## OBP         -6.76814    8.93594  -0.757   0.4775  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6728 on 6 degrees of freedom
## Multiple R-squared:  0.8622, Adjusted R-squared:  0.7015 
## F-statistic: 5.365 on 7 and 6 DF,  p-value: 0.02879

We then extrapolated on Callil’s previous season and plugged that data into our models.

Draft round prediction:

GC_round_pred <- predict(round_model, test)
GC_round_pred
##       15 
## 7.373366
regfit<-regsubsets(Round~H + HR + RBI + SB + BA + OBP + SLG + OPS + SEC + XBH_Rate + BB_Rate + SO_Rate, data=data, method="forward")
bonus_model <- lm(formula = Bonus ~ RBI + SB + BA + SEC + HR + OBP + BB_Rate, data = train)
summary(bonus_model)
## 
## Call:
## lm(formula = Bonus ~ RBI + SB + BA + SEC + HR + OBP + BB_Rate, 
##     data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13031.0  -8666.9    642.6   7786.3  19543.4 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)   96429.87   71085.76   1.357    0.224
## RBI              37.74     438.04   0.086    0.934
## SB             -352.60    1003.57  -0.351    0.737
## BA          -209340.32  522581.09  -0.401    0.703
## SEC           -9325.85   13996.94  -0.666    0.530
## HR            -1500.70    1121.99  -1.338    0.230
## OBP           55492.00  586518.53   0.095    0.928
## BB_Rate     -198480.65  426510.30  -0.465    0.658
## 
## Residual standard error: 14480 on 6 degrees of freedom
## Multiple R-squared:  0.5254, Adjusted R-squared:  -0.02823 
## F-statistic: 0.949 on 7 and 6 DF,  p-value: 0.5337

Signing bonus projection:

GC_bonus <- predict(bonus_model, test)
GC_bonus
##      15 
## 37004.6