Data Prep: Statcast data assembled from past work
load("G:/My Drive/Baseball/R Projects/Data/Statcast2022.RData")
load("G:/My Drive/Baseball/R Projects/Data/Statcast2023.RData")
# Use last two seasons
statcast <- rbind(Statcast2022, Statcast2023)
# Create data for each hitter
schwarber_data <- statcast %>%
filter(player_name == 'Schwarber, Kyle')
turner_data <- statcast %>%
filter(player_name == 'Turner, Trea')
harper_data <- statcast %>%
filter(player_name == 'Harper, Bryce')
bohm_data <- statcast %>%
filter(player_name == 'Bohm, Alec')
Data Prep: Manually create pitcher data from questionnaire images. H-break and V-break are eye-balled from the graphic.
# Pitcher A
A_name <- rep('A',3)
A_pitches <- c('Sinker', 'Slider', 'Sweeper')
A_usages <- c(69.5, 20.9, 9.6)
A_velocities <- c(96, 87.5, 83.9)
A_hbreak <- c(-15, 3, 17)
A_vbreak <- c(6, -6, -10)
A_table <- cbind.data.frame(A_name, A_pitches, A_usages, A_velocities,
A_hbreak, A_vbreak) %>%
rename(Name = A_name,
Pitch = A_pitches,
Usage = A_usages,
Velo = A_velocities,
Hbreak = A_hbreak,
Vbreak = A_vbreak)
# Repeat for Pitcher B
B_name <- rep('B', 3)
B_pitches <- c('4-Seam Fastball', 'Slider', 'Curve')
B_usages <- c(56.2, 36.7, 7.1)
B_velocities <- c(99.3, 89.0, 82.1)
B_hbreak <- c(-4, 5, 4)
B_vbreak <- c(24, 1, -16)
B_table <- cbind.data.frame(B_name, B_pitches, B_usages, B_velocities,
B_hbreak, B_vbreak) %>%
rename(Name = B_name,
Pitch = B_pitches,
Usage = B_usages,
Velo = B_velocities,
Hbreak = B_hbreak,
Vbreak = B_vbreak)
A_table
## Name Pitch Usage Velo Hbreak Vbreak
## 1 A Sinker 69.5 96.0 -15 6
## 2 A Slider 20.9 87.5 3 -6
## 3 A Sweeper 9.6 83.9 17 -10
B_table
## Name Pitch Usage Velo Hbreak Vbreak
## 1 B 4-Seam Fastball 56.2 99.3 -4 24
## 2 B Slider 36.7 89.0 5 1
## 3 B Curve 7.1 82.1 4 -16
Matchup table: Create a function that displays a given hitter’s performance against pitches that resemble those of a given pitcher.
QC metric - Quality Contact - the percentage of swings that result in exit velocity over 92mph. I find this to be a more appropriate substitute for average exit velocity. AEV does not properly account for hitters who whiff a lot but hit the ball hard when they happen to make contact. QC is a better measure of success against specific pitches. MLB median QC is around 21%.
matchup_table <- function(hitter_data, pitcher_data){
hitter_stats <- data.frame()
# for each pitch
for(i in 1:nrow(pitcher_data)){
pitch <- pitcher_data[i,]
# filter to pitches that resemble the pitch
pitch_data <- hitter_data %>%
filter(p_throws == 'R') %>%
filter(between(release_speed, pitch$Velo - 3, pitch$Velo + 3)) %>%
filter(between(pfx_x*12, pitch$Hbreak - 3, pitch$Hbreak + 3)) %>%
filter(between(pfx_z*12, pitch$Vbreak - 5, pitch$Vbreak + 5))
# Calculate Statistics
# Define necessary labels for filters
swings_vector <- c('hit_into_play', 'swinging_strike_blocked', 'swinging_strike', 'foul_tip', 'foul')
swing_count <- sum(pitch_data$description %in% swings_vector)
qc_count <- sum(pitch_data$launch_speed >= 92, na.rm = TRUE)
# Calculate Z-Swing %
z_swing <- sum(pitch_data$description %in% swings_vector &
pitch_data$true_zone == 'strike', na.rm = TRUE) /
sum(pitch_data$description %in% swings_vector)
# Calculate O-Swing %
o_swing <- sum(pitch_data$description %in% swings_vector &
pitch_data$true_zone == 'ball', na.rm = TRUE) /
sum(pitch_data$description %in% swings_vector)
# Calculate ZO-Swing %
zo_swing <- z_swing - o_swing
# Calculate QC
qc <- qc_count / swing_count
# Create new row
new_row <- data.frame(pitcher = pitcher_data$Name[1],
hitter = pitch_data$player_name[1],
pitch = pitcher_data[i,2],
usage = pitcher_data[i,3],
count = nrow(pitch_data),
zo_swing = round(zo_swing,2),
qc = round(qc,2))
# Bind new row to existing rows
hitter_stats <- rbind(hitter_stats, new_row)
}
return(hitter_stats)
}
Apply function to each hitter / pitcher matchup
# Kyle Schwarber
schwarber_B <- matchup_table(hitter_data = schwarber_data,
pitcher_data = B_table)
schwarber_A <- matchup_table(hitter_data = schwarber_data,
pitcher_data = A_table)
# Trea Turner
turner_B <- matchup_table(hitter_data = turner_data,
pitcher_data = B_table)
turner_A <- matchup_table(hitter_data = turner_data,
pitcher_data = A_table)
# Bryce Harper
harper_B <- matchup_table(hitter_data = harper_data,
pitcher_data = B_table)
harper_A <- matchup_table(hitter_data = harper_data,
pitcher_data = A_table)
# Alec Bohm
bohm_B <- matchup_table(hitter_data = bohm_data,
pitcher_data = B_table)
bohm_A <- matchup_table(hitter_data = bohm_data,
pitcher_data = A_table)
Evaluate each matchup
schwarber_A
## pitcher hitter pitch usage count zo_swing qc
## 1 A Schwarber, Kyle Sinker 69.5 162 0.43 0.41
## 2 A Schwarber, Kyle Slider 20.9 47 0.47 0.21
## 3 A Schwarber, Kyle Sweeper 9.6 31 0.50 0.42
SI (most-used pitch) does not matchup well against Schwarber at all. Bad matchup.
turner_A
## pitcher hitter pitch usage count zo_swing qc
## 1 A Turner, Trea Sinker 69.5 246 0.26 0.31
## 2 A Turner, Trea Slider 20.9 86 -0.16 0.19
## 3 A Turner, Trea Sweeper 9.6 17 0.00 0.17
Could likely induce chase with SL, but SI would likely get hit. Average matchup.
harper_A
## pitcher hitter pitch usage count zo_swing qc
## 1 A Harper, Bryce Sinker 69.5 103 0.37 0.22
## 2 A Harper, Bryce Slider 20.9 49 -0.08 0.12
## 3 A Harper, Bryce Sweeper 9.6 15 -0.45 0.09
Breaking would likely get chase, not much hard contact. Good matchup
bohm_A
## pitcher hitter pitch usage count zo_swing qc
## 1 A Bohm, Alec Sinker 69.5 257 0.51 0.30
## 2 A Bohm, Alec Slider 20.9 54 0.22 0.22
## 3 A Bohm, Alec Sweeper 9.6 26 0.07 0.27
Good contact and discipline against SI. Bad matchup
schwarber_B
## pitcher hitter pitch usage count zo_swing qc
## 1 B Schwarber, Kyle 4-Seam Fastball 56.2 23 0.57 0.21
## 2 B Schwarber, Kyle Slider 36.7 175 0.48 0.37
## 3 B Schwarber, Kyle Curve 7.1 36 0.43 0.00
Average contact on FB. Would likely rely on CB. Good Matchup
turner_B
## pitcher hitter pitch usage count zo_swing qc
## 1 B Turner, Trea 4-Seam Fastball 56.2 25 0.12 0.06
## 2 B Turner, Trea Slider 36.7 252 0.07 0.17
## 3 B Turner, Trea Curve 7.1 32 -0.25 0.12
Bad contact across all three pitches. Good matchup
harper_B
## pitcher hitter pitch usage count zo_swing qc
## 1 B Harper, Bryce 4-Seam Fastball 56.2 27 0.50 0.06
## 2 B Harper, Bryce Slider 36.7 116 -0.09 0.18
## 3 B Harper, Bryce Curve 7.1 29 -0.25 0.19
Bad contact across the board, particularly with FB. Chase likely with OFF. Good Matchup.
bohm_B
## pitcher hitter pitch usage count zo_swing qc
## 1 B Bohm, Alec 4-Seam Fastball 56.2 23 0.50 0.08
## 2 B Bohm, Alec Slider 36.7 184 0.29 0.19
## 3 B Bohm, Alec Curve 7.1 15 -0.43 0.29
Low contact on FB. Good discipline on SL, CB would have to be go-to breaking. Average Matchup.