library(baseballr)
## Warning: package 'baseballr' was built under R version 4.2.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
#load season data
load("G:/My Drive/Baseball/Summer 2023/R Projects/Data/Statcast2022.RData")
# Create player statcast data for subject. ex: Freddy Peralta, RHP, MIL
first_name <- 'Freddy'
last_name <- 'Peralta'
player_id <- playerid_lookup(last_name, first_name)$mlbam_id
player_data <- Statcast2022 %>%
filter(pitcher == player_id)
# Create usage and pitch profile data for subject
pitch_usage <- data.frame()
all_pitches <- unique(Statcast2022$pitch_name)
for (pitch in all_pitches) {
pitch_count <- sum(player_data$pitch_name == pitch)
total_count <- nrow(player_data)
usage <- round((pitch_count / total_count),2)
player_pitch_data <- player_data %>%
filter(pitch_name == pitch)
avg_h_break <- round(mean(player_pitch_data$pfx_x) * 12,2)
avg_v_break <- round(mean(player_pitch_data$pfx_z) * 12,2)
avg_velo <- round(mean(player_pitch_data$release_speed),2)
new_row <- data.frame(player_id = player_id,
pitch = pitch,
count = pitch_count,
usage = usage,
h_break = avg_h_break,
v_break = avg_v_break,
velo = avg_velo,
stringsAsFactors = FALSE)
pitch_usage <- rbind(pitch_usage, new_row)
}
# Create same data for every other league pitcher
league_usage <- data.frame()
rh_data <- Statcast2022 %>%
filter(p_throws == 'R')
all_pitcher <- unique(rh_data$pitcher)
for (guy in all_pitcher) {
league_data <- Statcast2022 %>%
filter(pitcher == guy)
new_new <- data.frame()
for (pitch in all_pitches) {
pitch_count <- sum(league_data$pitch_name == pitch)
total_count <- nrow(league_data)
usage <- round((pitch_count / total_count),2)
league_pitch_data <- league_data %>%
filter(pitch_name == pitch)
avg_h_break <- round(mean(league_pitch_data$pfx_x) * 12,2)
avg_v_break <- round(mean(league_pitch_data$pfx_z) * 12,2)
avg_velo <- round(mean(league_pitch_data$release_speed),2)
new_row <- data.frame(player_id = guy,
pitch = pitch,
count = pitch_count,
usage = usage,
h_break = avg_h_break,
v_break = avg_v_break,
velo = avg_velo,
stringsAsFactors = FALSE)
new_new <- rbind(new_new, new_row)
}
league_usage <- rbind(league_usage, new_new)
}
# Combine both tables with mlb_stats() table to retrieve name and team
mlb_stats_22 <- mlb_stats(stat_type = 'season', player_pool = 'All',
stat_group = 'pitching', season = 2022)
league_usage <- league_usage %>%
left_join(mlb_stats_22, by = 'player_id') %>%
select(player_id, player_first_name, player_last_name, position_name,
team_name, everything(league_usage)) %>%
filter(position_name == 'Pitcher') %>% # remove position players
select(-'position_name')
pitch_usage <- pitch_usage %>%
left_join(mlb_stats_22, by = 'player_id') %>%
select(player_id, player_first_name, player_last_name, position_name,
team_name, everything(pitch_usage)) %>%
filter(position_name == 'Pitcher') %>%
select(-'position_name')
print(pitch_usage)
## player_id player_first_name player_last_name team_name
## 1 642547 Freddy Peralta Milwaukee Brewers
## 2 642547 Freddy Peralta Milwaukee Brewers
## 3 642547 Freddy Peralta Milwaukee Brewers
## 4 642547 Freddy Peralta Milwaukee Brewers
## 5 642547 Freddy Peralta Milwaukee Brewers
## 6 642547 Freddy Peralta Milwaukee Brewers
## 7 642547 Freddy Peralta Milwaukee Brewers
## 8 642547 Freddy Peralta Milwaukee Brewers
## 9 642547 Freddy Peralta Milwaukee Brewers
## 10 642547 Freddy Peralta Milwaukee Brewers
## 11 642547 Freddy Peralta Milwaukee Brewers
## 12 642547 Freddy Peralta Milwaukee Brewers
## 13 642547 Freddy Peralta Milwaukee Brewers
## 14 642547 Freddy Peralta Milwaukee Brewers
## 15 642547 Freddy Peralta Milwaukee Brewers
## 16 642547 Freddy Peralta Milwaukee Brewers
## pitch count usage h_break v_break velo
## 1 4-Seam Fastball 743 0.55 -8.79 16.08 92.60
## 2 Slider 250 0.18 8.00 3.64 80.26
## 3 Sweeper 0 0.00 NaN NaN NaN
## 4 Sinker 0 0.00 NaN NaN NaN
## 5 Curveball 219 0.16 5.62 -3.66 75.79
## 6 Changeup 141 0.10 -14.69 3.82 85.87
## 7 Cutter 0 0.00 NaN NaN NaN
## 8 Split-Finger 0 0.00 NaN NaN NaN
## 9 Knuckle Curve 0 0.00 NaN NaN NaN
## 10 Slow Curve 0 0.00 NaN NaN NaN
## 11 Slurve 0 0.00 NaN NaN NaN
## 12 Pitch Out 0 0.00 NaN NaN NaN
## 13 Other 0 0.00 NaN NaN NaN
## 14 Eephus 0 0.00 NaN NaN NaN
## 15 0 0.00 NaN NaN NaN
## 16 Knuckleball 0 0.00 NaN NaN NaN
head(league_usage, n = 40)
## player_id player_first_name player_last_name team_name
## 1 506433 Yu Darvish San Diego Padres
## 2 506433 Yu Darvish San Diego Padres
## 3 506433 Yu Darvish San Diego Padres
## 4 506433 Yu Darvish San Diego Padres
## 5 506433 Yu Darvish San Diego Padres
## 6 506433 Yu Darvish San Diego Padres
## 7 506433 Yu Darvish San Diego Padres
## 8 506433 Yu Darvish San Diego Padres
## 9 506433 Yu Darvish San Diego Padres
## 10 506433 Yu Darvish San Diego Padres
## 11 506433 Yu Darvish San Diego Padres
## 12 506433 Yu Darvish San Diego Padres
## 13 506433 Yu Darvish San Diego Padres
## 14 506433 Yu Darvish San Diego Padres
## 15 506433 Yu Darvish San Diego Padres
## 16 506433 Yu Darvish San Diego Padres
## 17 425844 Zack Greinke Kansas City Royals
## 18 425844 Zack Greinke Kansas City Royals
## 19 425844 Zack Greinke Kansas City Royals
## 20 425844 Zack Greinke Kansas City Royals
## 21 425844 Zack Greinke Kansas City Royals
## 22 425844 Zack Greinke Kansas City Royals
## 23 425844 Zack Greinke Kansas City Royals
## 24 425844 Zack Greinke Kansas City Royals
## 25 425844 Zack Greinke Kansas City Royals
## 26 425844 Zack Greinke Kansas City Royals
## 27 425844 Zack Greinke Kansas City Royals
## 28 425844 Zack Greinke Kansas City Royals
## 29 425844 Zack Greinke Kansas City Royals
## 30 425844 Zack Greinke Kansas City Royals
## 31 425844 Zack Greinke Kansas City Royals
## 32 425844 Zack Greinke Kansas City Royals
## 33 641816 Tyler Mahle Minnesota Twins
## 34 641816 Tyler Mahle Minnesota Twins
## 35 641816 Tyler Mahle Minnesota Twins
## 36 641816 Tyler Mahle Minnesota Twins
## 37 641816 Tyler Mahle Minnesota Twins
## 38 641816 Tyler Mahle Minnesota Twins
## 39 641816 Tyler Mahle Minnesota Twins
## 40 641816 Tyler Mahle Minnesota Twins
## pitch count usage h_break v_break velo
## 1 4-Seam Fastball 828 0.25 -7.17 17.08 94.86
## 2 Slider 1037 0.31 5.74 -0.15 85.91
## 3 Sweeper 559 0.17 16.55 -0.07 82.51
## 4 Sinker 300 0.09 -14.05 11.76 94.58
## 5 Curveball 138 0.04 11.53 -14.51 72.54
## 6 Changeup 0 0.00 NaN NaN NaN
## 7 Cutter 145 0.04 2.13 10.60 90.56
## 8 Split-Finger 245 0.07 -6.36 5.06 89.21
## 9 Knuckle Curve 103 0.03 9.52 -10.82 80.30
## 10 Slow Curve 2 0.00 10.32 -16.98 65.55
## 11 Slurve 0 0.00 NaN NaN NaN
## 12 Pitch Out 0 0.00 NaN NaN NaN
## 13 Other 0 0.00 NaN NaN NaN
## 14 Eephus 0 0.00 NaN NaN NaN
## 15 0 0.00 NaN NaN NaN
## 16 Knuckleball 0 0.00 NaN NaN NaN
## 17 4-Seam Fastball 870 0.38 -2.10 15.88 89.15
## 18 Slider 168 0.07 8.38 6.01 81.78
## 19 Sweeper 0 0.00 NaN NaN NaN
## 20 Sinker 51 0.02 -11.74 9.89 89.44
## 21 Curveball 462 0.20 12.54 -11.18 71.96
## 22 Changeup 374 0.16 -12.81 3.80 86.47
## 23 Cutter 347 0.15 6.57 6.08 85.82
## 24 Split-Finger 0 0.00 NaN NaN NaN
## 25 Knuckle Curve 0 0.00 NaN NaN NaN
## 26 Slow Curve 0 0.00 NaN NaN NaN
## 27 Slurve 0 0.00 NaN NaN NaN
## 28 Pitch Out 0 0.00 NaN NaN NaN
## 29 Other 0 0.00 NaN NaN NaN
## 30 Eephus 0 0.00 NaN NaN NaN
## 31 0 0.00 NaN NaN NaN
## 32 Knuckleball 0 0.00 NaN NaN NaN
## 33 4-Seam Fastball 1093 0.52 -10.22 18.07 93.21
## 34 Slider 240 0.11 4.98 1.84 83.86
## 35 Sweeper 0 0.00 NaN NaN NaN
## 36 Sinker 0 0.00 NaN NaN NaN
## 37 Curveball 0 0.00 NaN NaN NaN
## 38 Changeup 0 0.00 NaN NaN NaN
## 39 Cutter 272 0.13 1.75 10.23 85.50
## 40 Split-Finger 496 0.24 -14.32 9.92 85.39
diff_table <- data.frame()
for (guy in all_pitcher) {
other_usage <- league_usage %>%
filter(player_id == guy)
other_diff <- data.frame()
for (p in all_pitches) {
other_pitch <- other_usage %>%
filter(pitch == p)
pitch_pitch <- pitch_usage %>%
filter(pitch == p)
new_row <- other_pitch %>%
mutate(usage = usage - pitch_pitch$usage,
h_break = h_break - pitch_pitch$h_break,
v_break = v_break - pitch_pitch$v_break,
velo = velo - pitch_pitch$velo)
other_diff <- rbind(other_diff, new_row)
}
diff_table <- rbind(diff_table, other_diff)
}
## Create row for pythagorean difference in break
diff_table <- diff_table %>%
mutate(`break` = round(sqrt(h_break^2 + v_break^2),2))
head(diff_table, n = 40)
## player_id player_first_name player_last_name team_name
## 1 506433 Yu Darvish San Diego Padres
## 2 506433 Yu Darvish San Diego Padres
## 3 506433 Yu Darvish San Diego Padres
## 4 506433 Yu Darvish San Diego Padres
## 5 506433 Yu Darvish San Diego Padres
## 6 506433 Yu Darvish San Diego Padres
## 7 506433 Yu Darvish San Diego Padres
## 8 506433 Yu Darvish San Diego Padres
## 9 506433 Yu Darvish San Diego Padres
## 10 506433 Yu Darvish San Diego Padres
## 11 506433 Yu Darvish San Diego Padres
## 12 506433 Yu Darvish San Diego Padres
## 13 506433 Yu Darvish San Diego Padres
## 14 506433 Yu Darvish San Diego Padres
## 15 506433 Yu Darvish San Diego Padres
## 16 506433 Yu Darvish San Diego Padres
## 17 425844 Zack Greinke Kansas City Royals
## 18 425844 Zack Greinke Kansas City Royals
## 19 425844 Zack Greinke Kansas City Royals
## 20 425844 Zack Greinke Kansas City Royals
## 21 425844 Zack Greinke Kansas City Royals
## 22 425844 Zack Greinke Kansas City Royals
## 23 425844 Zack Greinke Kansas City Royals
## 24 425844 Zack Greinke Kansas City Royals
## 25 425844 Zack Greinke Kansas City Royals
## 26 425844 Zack Greinke Kansas City Royals
## 27 425844 Zack Greinke Kansas City Royals
## 28 425844 Zack Greinke Kansas City Royals
## 29 425844 Zack Greinke Kansas City Royals
## 30 425844 Zack Greinke Kansas City Royals
## 31 425844 Zack Greinke Kansas City Royals
## 32 425844 Zack Greinke Kansas City Royals
## 33 641816 Tyler Mahle Minnesota Twins
## 34 641816 Tyler Mahle Minnesota Twins
## 35 641816 Tyler Mahle Minnesota Twins
## 36 641816 Tyler Mahle Minnesota Twins
## 37 641816 Tyler Mahle Minnesota Twins
## 38 641816 Tyler Mahle Minnesota Twins
## 39 641816 Tyler Mahle Minnesota Twins
## 40 641816 Tyler Mahle Minnesota Twins
## pitch count usage h_break v_break velo break
## 1 4-Seam Fastball 828 -0.30 1.62 1.00 2.26 1.90
## 2 Slider 1037 0.13 -2.26 -3.79 5.65 4.41
## 3 Sweeper 559 0.17 NaN NaN NaN NaN
## 4 Sinker 300 0.09 NaN NaN NaN NaN
## 5 Curveball 138 -0.12 5.91 -10.85 -3.25 12.36
## 6 Changeup 0 -0.10 NaN NaN NaN NaN
## 7 Cutter 145 0.04 NaN NaN NaN NaN
## 8 Split-Finger 245 0.07 NaN NaN NaN NaN
## 9 Knuckle Curve 103 0.03 NaN NaN NaN NaN
## 10 Slow Curve 2 0.00 NaN NaN NaN NaN
## 11 Slurve 0 0.00 NaN NaN NaN NaN
## 12 Pitch Out 0 0.00 NaN NaN NaN NaN
## 13 Other 0 0.00 NaN NaN NaN NaN
## 14 Eephus 0 0.00 NaN NaN NaN NaN
## 15 0 0.00 NaN NaN NaN NaN
## 16 Knuckleball 0 0.00 NaN NaN NaN NaN
## 17 4-Seam Fastball 870 -0.17 6.69 -0.20 -3.45 6.69
## 18 Slider 168 -0.11 0.38 2.37 1.52 2.40
## 19 Sweeper 0 0.00 NaN NaN NaN NaN
## 20 Sinker 51 0.02 NaN NaN NaN NaN
## 21 Curveball 462 0.04 6.92 -7.52 -3.83 10.22
## 22 Changeup 374 0.06 1.88 -0.02 0.60 1.88
## 23 Cutter 347 0.15 NaN NaN NaN NaN
## 24 Split-Finger 0 0.00 NaN NaN NaN NaN
## 25 Knuckle Curve 0 0.00 NaN NaN NaN NaN
## 26 Slow Curve 0 0.00 NaN NaN NaN NaN
## 27 Slurve 0 0.00 NaN NaN NaN NaN
## 28 Pitch Out 0 0.00 NaN NaN NaN NaN
## 29 Other 0 0.00 NaN NaN NaN NaN
## 30 Eephus 0 0.00 NaN NaN NaN NaN
## 31 0 0.00 NaN NaN NaN NaN
## 32 Knuckleball 0 0.00 NaN NaN NaN NaN
## 33 4-Seam Fastball 1093 -0.03 -1.43 1.99 0.61 2.45
## 34 Slider 240 -0.07 -3.02 -1.80 3.60 3.52
## 35 Sweeper 0 0.00 NaN NaN NaN NaN
## 36 Sinker 0 0.00 NaN NaN NaN NaN
## 37 Curveball 0 -0.16 NaN NaN NaN NaN
## 38 Changeup 0 -0.10 NaN NaN NaN NaN
## 39 Cutter 272 0.13 NaN NaN NaN NaN
## 40 Split-Finger 496 0.24 NaN NaN NaN NaN
Look for players with the smallest differences in usage and break to determine the best matches.
totals <- diff_table %>%
group_by(player_id, player_first_name, player_last_name) %>%
summarize(
total_usage = sum(abs(usage)),
total_break = sum(`break`, na.rm = TRUE)
)
## `summarise()` has grouped output by 'player_id', 'player_first_name'. You can
## override using the `.groups` argument.
slice_head(totals, n = 10)
## # A tibble: 589 × 5
## # Groups: player_id, player_first_name [589]
## player_id player_first_name player_last_name total_usage total_break
## <dbl> <chr> <chr> <dbl> <dbl>
## 1 425794 Adam Wainwright 1.37 40.8
## 2 425844 Zack Greinke 0.55 21.2
## 3 434378 Justin Verlander 0.25 21.7
## 4 434671 Anibal Sanchez 1.22 15.4
## 5 445276 Kenley Jansen 1.73 10.2
## 6 445926 Jesse Chavez 1.74 18.8
## 7 446372 Corey Kluber 1.44 17.6
## 8 450203 Charlie Morton 0.82 19.1
## 9 453178 Ian Kennedy 0.83 13.3
## 10 453268 Daniel Bard 1.52 9.63
## # ℹ 579 more rows
# Select closest matches
matches <- league_usage %>%
filter(player_id %in% c(642547,680573,663474), count > 0)
print(matches)
## player_id player_first_name player_last_name team_name
## 1 663474 Triston McKenzie Cleveland Guardians
## 2 663474 Triston McKenzie Cleveland Guardians
## 3 663474 Triston McKenzie Cleveland Guardians
## 4 642547 Freddy Peralta Milwaukee Brewers
## 5 642547 Freddy Peralta Milwaukee Brewers
## 6 642547 Freddy Peralta Milwaukee Brewers
## 7 642547 Freddy Peralta Milwaukee Brewers
## 8 680573 Simeon Woods Richardson Minnesota Twins
## 9 680573 Simeon Woods Richardson Minnesota Twins
## 10 680573 Simeon Woods Richardson Minnesota Twins
## 11 680573 Simeon Woods Richardson Minnesota Twins
## pitch count usage h_break v_break velo
## 1 4-Seam Fastball 1676 0.56 -3.01 20.55 92.48
## 2 Slider 668 0.22 5.32 10.18 87.27
## 3 Curveball 637 0.21 7.91 -7.95 79.40
## 4 4-Seam Fastball 743 0.55 -8.79 16.08 92.60
## 5 Slider 250 0.18 8.00 3.64 80.26
## 6 Curveball 219 0.16 5.62 -3.66 75.79
## 7 Changeup 141 0.10 -14.69 3.82 85.87
## 8 4-Seam Fastball 50 0.55 1.71 19.78 90.83
## 9 Slider 17 0.19 6.94 4.86 83.70
## 10 Curveball 13 0.14 0.30 -15.14 74.27
## 11 Changeup 11 0.12 -11.49 16.51 80.38
For Freddy Peralta, Triston McKenzie (CLE) and Simeon Woods Richardson (MIN) were selected as the best matches for comparison. Woods Richardson uses the same four pitch mix with nearly identical usage rates. McKenzie utilizes similarly shaped FB, SL, and CBs. McKenzie also provides a more prominent comparison with proven success.
# Visualize pitch data
matches %>%
ggplot(aes(x = h_break, y = v_break)) +
geom_point(aes(color = player_last_name, shape = pitch, size = 3)) +
geom_path(aes(group = pitch), color = 'gray', alpha = 1) +
xlab('h_break (in)')+
ylab('v_break (in)')+
geom_vline(xintercept = 0, size = 1) +
geom_hline(yintercept = 0, size = 1) +
xlim(-25, 25) +
ylim(-20, 25) +
ggtitle('Pitch Mix By Pitcher') +
guides(size = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Visualize usage by pitcher
matches %>%
ggplot(aes(x = pitch, y = usage, fill = player_last_name)) +
geom_bar(position = 'dodge', stat = 'identity') +
labs(title = 'Pitch Usage By Pitcher',
x = 'Pitch Type',
y = 'Usage',
fill = 'Pitcher') +
scale_fill_manual(values = c('McKenzie' = 'red', 'Peralta' = 'green', 'Woods Richardson' = 'blue')) +
theme_minimal()
These three pitchers can confidently look to one another as bases of comparison. Possible applications include advance scouting, player development, and pro scouting evaluation. This project uses Peralta as the subject, but if it were replicated for all three players, the following are examples of applications.
Peralta: Observe how McKenzie approaches certain hitters. With what pitches and sequences has McKenzie found success against Peralta’s upcoming opponents? In McKenzie’s dominant 2022 season, on what pitches and sequences did he rely most?
McKenzie: Unlike, McKenzie, Peralta features a CH. Should McKenzie try to replicate Peralta’s CH?
Woods Richardson: For a young prospect, these comparisons to big leaguers can be especially valuable. Woods Richarson should follow these pitchers closely, particularly Peralta, to mimic aspects of their approach and development.