Data Prep: Statcast data assembled from past work

load("G:/My Drive/Baseball/R Projects/Data/Statcast2022.RData")
load("G:/My Drive/Baseball/R Projects/Data/Statcast2023.RData")

# Use last two seasons
statcast <- rbind(Statcast2022, Statcast2023)

# Create data for each hitter
schwarber_data <- statcast %>%
  filter(player_name == 'Schwarber, Kyle')
turner_data <- statcast %>%
  filter(player_name == 'Turner, Trea')
harper_data <- statcast %>%
  filter(player_name == 'Harper, Bryce')
bohm_data <- statcast %>%
  filter(player_name == 'Bohm, Alec')

Data Prep: Manually create pitcher data from questionnaire images. H-break and V-break are eye-balled from the graphic.

# Pitcher A

A_name <- rep('A',3)
A_pitches <- c('Sinker', 'Slider', 'Sweeper')
A_usages <- c(69.5, 20.9, 9.6)
A_velocities <- c(96, 87.5, 83.9)
A_hbreak <- c(-15, 3, 17)
A_vbreak <- c(6, -6, -10)


A_table <- cbind.data.frame(A_name, A_pitches, A_usages, A_velocities,
                            A_hbreak, A_vbreak) %>%
  rename(Name = A_name,
          Pitch = A_pitches, 
          Usage = A_usages,
          Velo = A_velocities,
          Hbreak = A_hbreak,
          Vbreak = A_vbreak)

# Repeat for Pitcher B

B_name <- rep('B', 3)
B_pitches <- c('4-Seam Fastball', 'Slider', 'Curve')
B_usages <- c(56.2, 36.7, 7.1)
B_velocities <- c(99.3, 89.0, 82.1)
B_hbreak <- c(-4, 5, 4)
B_vbreak <- c(24, 1, -16)


B_table <- cbind.data.frame(B_name, B_pitches, B_usages, B_velocities,
                            B_hbreak, B_vbreak) %>%
  rename(Name = B_name,
          Pitch = B_pitches, 
          Usage = B_usages,
          Velo = B_velocities,
          Hbreak = B_hbreak,
          Vbreak = B_vbreak)
          
A_table
##   Name   Pitch Usage Velo Hbreak Vbreak
## 1    A  Sinker  69.5 96.0    -15      6
## 2    A  Slider  20.9 87.5      3     -6
## 3    A Sweeper   9.6 83.9     17    -10
B_table
##   Name           Pitch Usage Velo Hbreak Vbreak
## 1    B 4-Seam Fastball  56.2 99.3     -4     24
## 2    B          Slider  36.7 89.0      5      1
## 3    B           Curve   7.1 82.1      4    -16

Matchup table: Create a function that displays a given hitter’s performance against pitches that resemble those of a given pitcher.

QC metric - Quality Contact - the percentage of swings that result in exit velocity over 92mph. I find this to be a more appropriate substitute for average exit velocity. AEV does not properly account for hitters who whiff a lot but hit the ball hard when they happen to make contact. QC is a better measure of success against specific pitches. MLB median QC is around 21%.

matchup_table <- function(hitter_data, pitcher_data){

hitter_stats <- data.frame()

# for each pitch
for(i in 1:nrow(pitcher_data)){

pitch <- pitcher_data[i,]

# filter to pitches that resemble the pitch
pitch_data <- hitter_data %>%
  filter(p_throws == 'R') %>%
  filter(between(release_speed, pitch$Velo - 3, pitch$Velo + 3)) %>%
  filter(between(pfx_x*12, pitch$Hbreak - 3, pitch$Hbreak + 3)) %>%
  filter(between(pfx_z*12, pitch$Vbreak - 5, pitch$Vbreak + 5)) 

# Calculate Statistics

# Define necessary labels for filters
swings_vector <- c('hit_into_play', 'swinging_strike_blocked', 'swinging_strike', 'foul_tip', 'foul')
swing_count <- sum(pitch_data$description %in% swings_vector)
qc_count <- sum(pitch_data$launch_speed >= 92, na.rm = TRUE)

 # Calculate Z-Swing %
  
  z_swing <- sum(pitch_data$description %in% swings_vector & 
                 pitch_data$true_zone == 'strike', na.rm = TRUE) /
             sum(pitch_data$description %in% swings_vector)
  
  # Calculate O-Swing %
  
  o_swing <- sum(pitch_data$description %in% swings_vector & 
                 pitch_data$true_zone == 'ball', na.rm = TRUE) /
             sum(pitch_data$description %in% swings_vector)
  
  # Calculate ZO-Swing %
  
  zo_swing <- z_swing - o_swing
  
  # Calculate QC
  qc <- qc_count / swing_count
  
  # Create new row
  new_row <- data.frame(pitcher = pitcher_data$Name[1],
                        hitter = pitch_data$player_name[1],
                        pitch = pitcher_data[i,2],
                        usage = pitcher_data[i,3],
                        count = nrow(pitch_data),
                        zo_swing = round(zo_swing,2),
                        qc = round(qc,2))

  # Bind new row to existing rows
  hitter_stats <- rbind(hitter_stats, new_row)
}

  return(hitter_stats)
}

Apply function to each hitter / pitcher matchup

# Kyle Schwarber
schwarber_B <- matchup_table(hitter_data = schwarber_data,
                             pitcher_data = B_table)

schwarber_A <- matchup_table(hitter_data = schwarber_data,
                             pitcher_data = A_table)

# Trea Turner
turner_B <- matchup_table(hitter_data = turner_data,
                             pitcher_data = B_table)

turner_A <- matchup_table(hitter_data = turner_data,
                             pitcher_data = A_table)

# Bryce Harper
harper_B <- matchup_table(hitter_data = harper_data,
                             pitcher_data = B_table)

harper_A <- matchup_table(hitter_data = harper_data,
                             pitcher_data = A_table)

# Alec Bohm
bohm_B <- matchup_table(hitter_data = bohm_data,
                             pitcher_data = B_table)

bohm_A <- matchup_table(hitter_data = bohm_data,
                             pitcher_data = A_table)

Evaluate each matchup

schwarber_A
##   pitcher          hitter   pitch usage count zo_swing   qc
## 1       A Schwarber, Kyle  Sinker  69.5   162     0.43 0.41
## 2       A Schwarber, Kyle  Slider  20.9    47     0.47 0.21
## 3       A Schwarber, Kyle Sweeper   9.6    31     0.50 0.42

SI (most-used pitch) does not matchup well against Schwarber at all. Bad matchup.

turner_A
##   pitcher       hitter   pitch usage count zo_swing   qc
## 1       A Turner, Trea  Sinker  69.5   246     0.26 0.31
## 2       A Turner, Trea  Slider  20.9    86    -0.16 0.19
## 3       A Turner, Trea Sweeper   9.6    17     0.00 0.17

Could likely induce chase with SL, but SI would likely get hit. Average matchup.

harper_A
##   pitcher        hitter   pitch usage count zo_swing   qc
## 1       A Harper, Bryce  Sinker  69.5   103     0.37 0.22
## 2       A Harper, Bryce  Slider  20.9    49    -0.08 0.12
## 3       A Harper, Bryce Sweeper   9.6    15    -0.45 0.09

Breaking would likely get chase, not much hard contact. Good matchup

bohm_A
##   pitcher     hitter   pitch usage count zo_swing   qc
## 1       A Bohm, Alec  Sinker  69.5   257     0.51 0.30
## 2       A Bohm, Alec  Slider  20.9    54     0.22 0.22
## 3       A Bohm, Alec Sweeper   9.6    26     0.07 0.27

Good contact and discipline against SI. Bad matchup

Pitcher B

schwarber_B
##   pitcher          hitter           pitch usage count zo_swing   qc
## 1       B Schwarber, Kyle 4-Seam Fastball  56.2    23     0.57 0.21
## 2       B Schwarber, Kyle          Slider  36.7   175     0.48 0.37
## 3       B Schwarber, Kyle           Curve   7.1    36     0.43 0.00

Average contact on FB. Would likely rely on CB. Good Matchup

turner_B
##   pitcher       hitter           pitch usage count zo_swing   qc
## 1       B Turner, Trea 4-Seam Fastball  56.2    25     0.12 0.06
## 2       B Turner, Trea          Slider  36.7   252     0.07 0.17
## 3       B Turner, Trea           Curve   7.1    32    -0.25 0.12

Bad contact across all three pitches. Good matchup

harper_B
##   pitcher        hitter           pitch usage count zo_swing   qc
## 1       B Harper, Bryce 4-Seam Fastball  56.2    27     0.50 0.06
## 2       B Harper, Bryce          Slider  36.7   116    -0.09 0.18
## 3       B Harper, Bryce           Curve   7.1    29    -0.25 0.19

Bad contact across the board, particularly with FB. Chase likely with OFF. Good Matchup.

bohm_B
##   pitcher     hitter           pitch usage count zo_swing   qc
## 1       B Bohm, Alec 4-Seam Fastball  56.2    23     0.50 0.08
## 2       B Bohm, Alec          Slider  36.7   184     0.29 0.19
## 3       B Bohm, Alec           Curve   7.1    15    -0.43 0.29

Low contact on FB. Good discipline on SL, CB would have to be go-to breaking. Average Matchup.