library(baseballr)
## Warning: package 'baseballr' was built under R version 4.2.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
# Load data from project drive
load("G:/My Drive/Baseball/Summer 2023/R Codes and Data/Statcast2022.RData")
load("G:/My Drive/Baseball/Summer 2023/R Codes and Data/Statcast2021.RData")
# Create FB Data for DJ LeMahieu
lemahieu <- playerid_lookup('LeMahieu', 'DJ')
lemahieu_id <- lemahieu$mlbam_id
lemahieu2022 <- Statcast2022 %>%
filter(batter == lemahieu_id)
lemahieu2021 <- Statcast2021 %>%
filter(batter == lemahieu_id)
lemahieu2122 <- rbind(lemahieu2022, lemahieu2021)
## Add true zone column
lemahieu2122$true_zone <- if_else(lemahieu2122$plate_z < lemahieu2122$sz_top &
lemahieu2122$plate_z > lemahieu2122$sz_bot &
lemahieu2122$plate_x < (0.83) &
lemahieu2122$plate_x > (-0.83),
'strike','ball')
## Add edge column
lemahieu2122$edge <- if_else(
(lemahieu2122$plate_z > lemahieu2122$sz_top - 0.24 &
lemahieu2122$plate_z < lemahieu2122$sz_top + 0.24 &
lemahieu2122$plate_x > -1.07 &
lemahieu2122$plate_x < 1.07) |
(lemahieu2122$plate_z > lemahieu2122$sz_bot - 0.24 &
lemahieu2122$plate_z < lemahieu2122$sz_bot + 0.24 &
lemahieu2122$plate_x > -1.07 &
lemahieu2122$plate_x < 1.07) |
(lemahieu2122$plate_z < lemahieu2122$sz_top + 0.24 &
lemahieu2122$plate_z > lemahieu2122$sz_bot - 0.24 &
lemahieu2122$plate_x > -1.07 &
lemahieu2122$plate_x < -0.59) |
(lemahieu2122$plate_z < lemahieu2122$sz_top + 0.24 &
lemahieu2122$plate_z > lemahieu2122$sz_bot - 0.24 &
lemahieu2122$plate_x < 1.07 &
lemahieu2122$plate_x > 0.59), 1,0)
# Separate RHP and LHP Fastball data
lemahieu2122_rh_fb <- lemahieu2122 %>%
filter((pitch_type == 'FF' | pitch_type == 'SI') & p_throws == 'R')
lemahieu2122_lh_fb <- lemahieu2122 %>%
filter((pitch_type == 'FF' | pitch_type == 'SI') & p_throws == 'L')
# Plot FBs
lemahieu2122_rh_fb %>%
ggplot(aes(x = pfx_x*12, y = pfx_z*12))+
geom_point(aes(color = pitch_type)) +
xlab('h_break (in)')+
ylab('v_break (in)')+
geom_vline(xintercept = 0, size = 1) +
geom_hline(yintercept = 0, size = 1) +
xlim(-25, 25) +
ylim(-15, 25) +
ggtitle('RH Fastballs')
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 4 rows containing missing values (`geom_point()`).
h_ride_h_run refers to high ride (vertical movement) and high run (horizontal movement). l_ride_l_run refers to low ride and low run.
# Divide into categories
median_rh_pfx_x <- lemahieu2122_rh_fb %>%
summarize(median_rh_pfx_x = median(pfx_x, na.rm = TRUE)) %>%
pull(median_rh_pfx_x)
median_rh_pfx_z <- lemahieu2122_rh_fb %>%
summarize(median_rh_pfx_z = median(pfx_z, na.rm = TRUE)) %>%
pull(median_rh_pfx_z)
lemahieu2122_rh_fb$fb_type <-
if_else(lemahieu2122_rh_fb$pfx_z > median_rh_pfx_z &
lemahieu2122_rh_fb$pfx_x < median_rh_pfx_x, 'h_ride_h_run',
if_else(lemahieu2122_rh_fb$pfx_z > median_rh_pfx_z &
lemahieu2122_rh_fb$pfx_x > median_rh_pfx_x, 'h_ride_l_run',
if_else(lemahieu2122_rh_fb$pfx_z < median_rh_pfx_z &
lemahieu2122_rh_fb$pfx_x < median_rh_pfx_x, 'l_ride_h_run',
'l_ride_l_run')
))
lemahieu2122_rh_fb <- lemahieu2122_rh_fb[!is.na(lemahieu2122_rh_fb$fb_type), ]
fb_type_counts <- table(lemahieu2122_rh_fb$fb_type)
print(fb_type_counts)
##
## h_ride_h_run h_ride_l_run l_ride_h_run l_ride_l_run
## 227 754 746 252
# Plot by category
lemahieu2122_rh_fb %>%
ggplot(aes(x = pfx_x*12, y = pfx_z*12))+
geom_point(aes(color = fb_type)) +
xlab('h_break (in)')+
ylab('v_break (in)')+
geom_vline(xintercept = 0, size = 1) +
geom_hline(yintercept = 0, size = 1) +
geom_vline(xintercept = median_rh_pfx_x*12, size = 1, color = 'gray') +
geom_hline(yintercept = median_rh_pfx_z*12, size = 1, color = 'gray') +
xlim(-25, 25) +
ylim(-15, 25) +
ggtitle('RH Fastballs')
## Warning: Removed 4 rows containing missing values (`geom_point()`).
# Create Statistics Table
## Create label vectors
swing_vector <- c('hit_into_play', 'swinging_strike_blocked', 'swinging_strike', 'foul_tip', 'foul')
contact_vector <- c('hit_into_play', 'foul')
bip_vector <- c('line_drive', 'ground_ball', 'fly_ball', 'popup')
fb_types <- unique(lemahieu2122_rh_fb$fb_type)
## Create empty data frame for lemahieu_rh_fb_stats
lemahieu_rh_fb_stats <- data.frame()
for (fb in fb_types) {
filtered_data <- lemahieu2122_rh_fb %>%
filter(fb_type == fb)
pitch_count <- nrow(filtered_data)
swing_count <- sum(filtered_data$description %in% swing_vector)
contact_count <- sum(filtered_data$description %in% contact_vector)
bip_count <- sum(filtered_data$bb_type %in% bip_vector)
### Calculate xWOBAsw
xwobasw <- sum(filtered_data$estimated_woba_using_speedangle, na.rm = TRUE) /
swing_count
### Calculate Contact Rate
contact_rate <- contact_count / swing_count
### Calculate Line Drive Rate
ld_rate <- (sum(filtered_data$bb_type == 'line_drive')) / bip_count
### Calculate Fly Ball Rate
fb_rate <- (sum(filtered_data$bb_type == 'fly_ball')) / bip_count
### Calculate Ground Ball Rate
gb_rate <- (sum(filtered_data$bb_type == 'ground_ball')) / bip_count
### Calculate edge ZOsw
filtered_data_edge <- filtered_data %>%
filter(edge == 1)
z_swing_edge <- sum(filtered_data_edge$description %in% swing_vector &
filtered_data_edge$true_zone == 'strike', na.rm = TRUE) /
sum(filtered_data_edge$description %in% swing_vector)
o_swing_edge <- sum(filtered_data_edge$description %in% swing_vector &
filtered_data_edge$true_zone == 'ball', na.rm = TRUE) /
sum(filtered_data_edge$description %in% swing_vector)
zo_swing_edge <- z_swing_edge - o_swing_edge
### Create a new row for each pitch type
new_row <- data.frame(FB_Type = fb,
n = pitch_count,
xWOBAsw = xwobasw,
CON = contact_rate,
FB = fb_rate,
LD = ld_rate,
GB = gb_rate,
ZOedge = zo_swing_edge)
### Append the new row to the data frame
lemahieu_rh_fb_stats <- rbind(lemahieu_rh_fb_stats, new_row)
}
print(lemahieu_rh_fb_stats)
## FB_Type n xWOBAsw CON FB LD GB ZOedge
## 1 l_ride_h_run 746 0.1526475 0.8669065 0.05405405 0.1689189 0.7635135 0.1260504
## 2 h_ride_h_run 227 0.1741705 0.8750000 0.11428571 0.2857143 0.5428571 0.1176471
## 3 h_ride_l_run 754 0.1366157 0.8619403 0.21978022 0.2747253 0.4505495 0.3191489
## 4 l_ride_l_run 252 0.1544107 0.8303571 0.16666667 0.3809524 0.4523810 0.4871795
# Visualize xWOBAsw
lemahieu_rh_fb_stats %>%
ggplot(aes(x = FB_Type, y = xWOBAsw, fill = FB_Type)) +
geom_bar(stat = 'identity')+
ggtitle('xWOBAsw by FB Type')
# Visualize ZOedge
lemahieu_rh_fb_stats %>%
ggplot(aes(x = FB_Type, y = ZOedge, fill = FB_Type)) +
geom_bar(stat = 'identity')+
ggtitle('Plate Discipline by FB Type')
# Visualize FB/GB/LD
lemahieu_rh_fb_stats_long <- lemahieu_rh_fb_stats %>%
select( -c(n, ZOedge, CON)) %>%
gather(key = type, value = value, -FB_Type)
lemahieu_rh_fb_stats_long$type <- factor(lemahieu_rh_fb_stats_long$type,
levels = c("FB", "LD", "GB"))
ggplot(lemahieu_rh_fb_stats_long, aes(x = FB_Type, y = value, fill = type)) +
geom_bar(position = 'dodge', stat = 'identity') +
labs(title = 'Hit Trajectory by FB Type',
x = 'Pitch Type',
y = 'Value',
fill = 'BIP Type') +
scale_fill_manual(values = c('FB' = 'yellow', 'LD' = 'orange', 'GB' = 'red')) +
theme_minimal()
RH Fastballs with low vertical movement and high horizontal movement appear to be most effective at limiting damage against LeMahieu. He also struggles most to recognize this type of pitch at the edges of the zone. Pitchers with high sinker usage should be most effective against him.
This code could be used to identify ideal bullpen matchups against any player. It could also be used to make lineup decisions based on the fastball profile of the opponent’s starting pitcher.