Kellen Clukey Disaster Zone

strike_zone_limits <- list(
  xmin = -0.85,
  xmax = 0.85,
  ymin = 1.5,
  ymax = 3.5
)


umpire_data <- keene_data %>%
  filter(PitchCall %in% c("BallCalled", "StrikeCalled")) %>%
  select(Pitcher, Batter, PitchCall, PlateLocHeight, PlateLocSide, PitcherTeam) %>%
  mutate(
    CorrectCall = case_when(
      PitchCall == "BallCalled" & (PlateLocSide < strike_zone_limits$xmin | PlateLocSide > strike_zone_limits$xmax | PlateLocHeight < strike_zone_limits$ymin | PlateLocHeight > strike_zone_limits$ymax) ~ "Correct",
      PitchCall == "StrikeCalled" & (PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax) ~ "Correct",
      TRUE ~ "Incorrect"
    ),
    Distance = case_when(
      PitchCall == "BallCalled" & PlateLocSide < strike_zone_limits$xmin & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax ~ (strike_zone_limits$xmin - PlateLocSide) * 12,
      PitchCall == "BallCalled" & PlateLocSide > strike_zone_limits$xmax & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax ~ (PlateLocSide - strike_zone_limits$xmax) * 12,
      PitchCall == "BallCalled" & PlateLocHeight < strike_zone_limits$ymin & PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax ~ (strike_zone_limits$ymin - PlateLocHeight) * 12,
      PitchCall == "BallCalled" & PlateLocHeight > strike_zone_limits$ymax & PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax ~ (PlateLocHeight - strike_zone_limits$ymax) * 12,
      PitchCall == "BallCalled" & (PlateLocSide < strike_zone_limits$xmin | PlateLocSide > strike_zone_limits$xmax) & (PlateLocHeight < strike_zone_limits$ymin | PlateLocHeight > strike_zone_limits$ymax) ~ sqrt((pmax(0, strike_zone_limits$xmin - PlateLocSide, PlateLocSide - strike_zone_limits$xmax))^2 + (pmax(0, strike_zone_limits$ymin - PlateLocHeight, PlateLocHeight - strike_zone_limits$ymax))^2) * 12,
      
      # Strikes called as balls
      PitchCall == "StrikeCalled" ~ sqrt((pmax(0, strike_zone_limits$xmin - PlateLocSide, PlateLocSide - strike_zone_limits$xmax))^2 + (pmax(0, strike_zone_limits$ymin - PlateLocHeight, PlateLocHeight - strike_zone_limits$ymax))^2) * 12
    )
  )

plot <- ggplot(umpire_data, aes(x = PlateLocSide, y = PlateLocHeight, color = PitchCall, text = paste("Pitcher: ", Pitcher, "<br>Batter: ", Batter, "<br>Distance: ", round(Distance, 2), " inches"))) +
  geom_point(data = umpire_data %>% filter(CorrectCall == "Correct"), size = 1, shape = 16) + # Correct calls as smaller dots
  geom_point(data = umpire_data %>% filter(CorrectCall == "Incorrect"), size = 2, shape = 4, stroke = 2) + 
  geom_rect(aes(xmin = strike_zone_limits$xmin, xmax = strike_zone_limits$xmax, ymin = strike_zone_limits$ymin, ymax = strike_zone_limits$ymax), fill = NA, color = "black", linetype = "solid", size = 1) + # Strike zone box
  scale_x_continuous(limits = c(-2, 2)) +
  scale_y_continuous(limits = c(0, 5)) +
  coord_fixed(ratio = 1) + # Adjust ratio to shrink vertical distance
  labs(title = "Umpire's Ball and Strike Calls",
       x = "Horizontal Location (feet)",
       y = "Vertical Location (feet)",
       color = "Pitch Call") +
  scale_color_manual(values = c("BallCalled" = "green", "StrikeCalled" = "red")) +
  theme_minimal() +
  theme(
    legend.position = "right",
    panel.grid.major = element_line(color = "grey80"),
    panel.grid.minor = element_line(color = "grey90"),
    axis.text = element_text(color = "black"),
    axis.title = element_text(color = "black"),
    plot.title = element_text(color = "black"),
    legend.background = element_rect(fill = "white", color = NA),
    legend.key = element_rect(fill = "white", color = NA),
    legend.text = element_text(color = "black"),
    legend.title = element_text(color = "black"),
    plot.margin = unit(c(2, 2, 2, 2), "cm") # Increase plot margins
  )

plotly_plot <- ggplotly(plot, tooltip = "text") %>%
  layout(clickmode = 'event+select') # Require clicking to see the data

plotly_plot

summary_table <- umpire_data %>%
  mutate(Team = ifelse(PitcherTeam == "VER_MOU", "Opponent", "Vermont")) %>%
  group_by(Team, PitchCall, CorrectCall) %>%
  summarise(Count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = CorrectCall, values_from = Count, values_fill = list(Count = 0)) %>%
  mutate(Total = Correct + Incorrect,
         Correct_Percentage = Correct / Total * 100) %>%
  select(Team, PitchCall, Correct_Percentage, Total) %>%
  pivot_wider(names_from = PitchCall, values_from = c(Correct_Percentage, Total), values_fill = list(Correct_Percentage = 0, Total = 0))

vermont_summary <- summary_table %>%
  filter(Team == "Vermont") %>%
  summarise(
    Team = "Vermont",
    Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
    Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
    Total_BallCalled = sum(Total_BallCalled),
    Total_StrikeCalled = sum(Total_StrikeCalled)
  )

opponent_summary <- summary_table %>%
  filter(Team == "Opponent") %>%
  summarise(
    Team = "Opponent",
    Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
    Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
    Total_BallCalled = sum(Total_BallCalled),
    Total_StrikeCalled = sum(Total_StrikeCalled)
  )

overall_summary <- umpire_data %>%
  group_by(PitchCall, CorrectCall) %>%
  summarise(Count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = CorrectCall, values_from = Count, values_fill = list(Count = 0)) %>%
  mutate(Total = Correct + Incorrect,
         Correct_Percentage = Correct / Total * 100) %>%
  pivot_wider(names_from = PitchCall, values_from = c(Correct_Percentage, Total), values_fill = list(Correct_Percentage = 0, Total = 0)) %>%
  summarise(
    Team = "Overall",
    Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
    Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
    Total_BallCalled = sum(Total_BallCalled),
    Total_StrikeCalled = sum(Total_StrikeCalled)
  )

# Combine the summaries
combined_summary <- bind_rows(vermont_summary, opponent_summary, overall_summary)

# Display the summary table
kable(combined_summary, caption = "Summary of Correct Ball and Strike Calls") %>%
  kable_styling(full_width = F, position = "left")

Summary of Correct Ball and Strike Calls
Team	Correct_Percentage_BallCalled	Correct_Percentage_StrikeCalled	Total_BallCalled	Total_StrikeCalled
Vermont	93.50649	54.16667	77	24
Opponent	98.33333	57.57576	60	33
Overall	95.62044	56.14035	137	57

Correct_Percentage_BallCalled: - Percentage of balls called correctly from the pitcher’s perspective - The Pitching team would prefer a lower percentage - Correct is a pitch outside the zone called a ball - Incorrect is a pitch outside the zone called as a strike

Correct_Percentage_StrikeCalled - Percentage of strikes called correctly from the pitcher’s perspective - The pitching team would prefer a higher percentage - Correct is a pitch inside the zone called as a strike - Incorrect is a pitch inside the zone called as a ball

The pitching team would prefer:

A lower percentage of accurate ball calls because this indicates more balls are incorrectly called as strikes.

A higher percentage of accurate strike calls because this indicates fewer strikes are incorrectly called as balls.

We were granted more strikes that were balls but got robbed on more true strikes that were called balls.