strike_zone_limits <- list(
  xmin = -0.85,
  xmax = 0.85,
  ymin = 1.5,
  ymax = 3.5
)


umpire_data <- keene_data %>%
  filter(PitchCall %in% c("BallCalled", "StrikeCalled")) %>%
  select(Pitcher, Batter, PitchCall, PlateLocHeight, PlateLocSide, PitcherTeam) %>%
  mutate(
    CorrectCall = case_when(
      PitchCall == "BallCalled" & (PlateLocSide < strike_zone_limits$xmin | PlateLocSide > strike_zone_limits$xmax | PlateLocHeight < strike_zone_limits$ymin | PlateLocHeight > strike_zone_limits$ymax) ~ "Correct",
      PitchCall == "StrikeCalled" & (PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax) ~ "Correct",
      TRUE ~ "Incorrect"
    ),
    Distance = case_when(
      PitchCall == "BallCalled" & PlateLocSide < strike_zone_limits$xmin & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax ~ (strike_zone_limits$xmin - PlateLocSide) * 12,
      PitchCall == "BallCalled" & PlateLocSide > strike_zone_limits$xmax & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax ~ (PlateLocSide - strike_zone_limits$xmax) * 12,
      PitchCall == "BallCalled" & PlateLocHeight < strike_zone_limits$ymin & PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax ~ (strike_zone_limits$ymin - PlateLocHeight) * 12,
      PitchCall == "BallCalled" & PlateLocHeight > strike_zone_limits$ymax & PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax ~ (PlateLocHeight - strike_zone_limits$ymax) * 12,
      PitchCall == "BallCalled" & (PlateLocSide < strike_zone_limits$xmin | PlateLocSide > strike_zone_limits$xmax) & (PlateLocHeight < strike_zone_limits$ymin | PlateLocHeight > strike_zone_limits$ymax) ~ sqrt((pmax(0, strike_zone_limits$xmin - PlateLocSide, PlateLocSide - strike_zone_limits$xmax))^2 + (pmax(0, strike_zone_limits$ymin - PlateLocHeight, PlateLocHeight - strike_zone_limits$ymax))^2) * 12,
      
      # Strikes called as balls
      PitchCall == "StrikeCalled" ~ sqrt((pmax(0, strike_zone_limits$xmin - PlateLocSide, PlateLocSide - strike_zone_limits$xmax))^2 + (pmax(0, strike_zone_limits$ymin - PlateLocHeight, PlateLocHeight - strike_zone_limits$ymax))^2) * 12
    )
  )

plot <- ggplot(umpire_data, aes(x = PlateLocSide, y = PlateLocHeight, color = PitchCall, text = paste("Pitcher: ", Pitcher, "<br>Batter: ", Batter, "<br>Distance: ", round(Distance, 2), " inches"))) +
  geom_point(data = umpire_data %>% filter(CorrectCall == "Correct"), size = 1, shape = 16) + # Correct calls as smaller dots
  geom_point(data = umpire_data %>% filter(CorrectCall == "Incorrect"), size = 2, shape = 4, stroke = 2) + 
  geom_rect(aes(xmin = strike_zone_limits$xmin, xmax = strike_zone_limits$xmax, ymin = strike_zone_limits$ymin, ymax = strike_zone_limits$ymax), fill = NA, color = "black", linetype = "solid", size = 1) + # Strike zone box
  scale_x_continuous(limits = c(-2, 2)) +
  scale_y_continuous(limits = c(0, 5)) +
  coord_fixed(ratio = 1) + # Adjust ratio to shrink vertical distance
  labs(title = "Umpire's Ball and Strike Calls",
       x = "Horizontal Location (feet)",
       y = "Vertical Location (feet)",
       color = "Pitch Call") +
  scale_color_manual(values = c("BallCalled" = "green", "StrikeCalled" = "red")) +
  theme_minimal() +
  theme(
    legend.position = "right",
    panel.grid.major = element_line(color = "grey80"),
    panel.grid.minor = element_line(color = "grey90"),
    axis.text = element_text(color = "black"),
    axis.title = element_text(color = "black"),
    plot.title = element_text(color = "black"),
    legend.background = element_rect(fill = "white", color = NA),
    legend.key = element_rect(fill = "white", color = NA),
    legend.text = element_text(color = "black"),
    legend.title = element_text(color = "black"),
    plot.margin = unit(c(2, 2, 2, 2), "cm") # Increase plot margins
  )

plotly_plot <- ggplotly(plot, tooltip = "text") %>%
  layout(clickmode = 'event+select') # Require clicking to see the data

plotly_plot
summary_table <- umpire_data %>%
  mutate(Team = ifelse(PitcherTeam == "VER_MOU", "Opponent", "Vermont")) %>%
  group_by(Team, PitchCall, CorrectCall) %>%
  summarise(Count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = CorrectCall, values_from = Count, values_fill = list(Count = 0)) %>%
  mutate(Total = Correct + Incorrect,
         Correct_Percentage = Correct / Total * 100) %>%
  select(Team, PitchCall, Correct_Percentage, Total) %>%
  pivot_wider(names_from = PitchCall, values_from = c(Correct_Percentage, Total), values_fill = list(Correct_Percentage = 0, Total = 0))

vermont_summary <- summary_table %>%
  filter(Team == "Vermont") %>%
  summarise(
    Team = "Vermont",
    Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
    Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
    Total_BallCalled = sum(Total_BallCalled),
    Total_StrikeCalled = sum(Total_StrikeCalled)
  )

opponent_summary <- summary_table %>%
  filter(Team == "Opponent") %>%
  summarise(
    Team = "Opponent",
    Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
    Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
    Total_BallCalled = sum(Total_BallCalled),
    Total_StrikeCalled = sum(Total_StrikeCalled)
  )

overall_summary <- umpire_data %>%
  group_by(PitchCall, CorrectCall) %>%
  summarise(Count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = CorrectCall, values_from = Count, values_fill = list(Count = 0)) %>%
  mutate(Total = Correct + Incorrect,
         Correct_Percentage = Correct / Total * 100) %>%
  pivot_wider(names_from = PitchCall, values_from = c(Correct_Percentage, Total), values_fill = list(Correct_Percentage = 0, Total = 0)) %>%
  summarise(
    Team = "Overall",
    Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
    Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
    Total_BallCalled = sum(Total_BallCalled),
    Total_StrikeCalled = sum(Total_StrikeCalled)
  )

# Combine the summaries
combined_summary <- bind_rows(vermont_summary, opponent_summary, overall_summary)

# Display the summary table
kable(combined_summary, caption = "Summary of Correct Ball and Strike Calls") %>%
  kable_styling(full_width = F, position = "left")
Summary of Correct Ball and Strike Calls
Team Correct_Percentage_BallCalled Correct_Percentage_StrikeCalled Total_BallCalled Total_StrikeCalled
Vermont 92.85714 45.45455 56 22
Opponent 94.11765 56.66667 51 30
Overall 93.45794 51.92308 107 52

Correct_Percentage_BallCalled: - Percentage of balls called correctly from the pitcher’s perspective - The Pitching team would prefer a lower percentage - Correct is a pitch outside the zone called a ball - Incorrect is a pitch outside the zone called as a strike

Correct_Percentage_StrikeCalled - Percentage of strikes called correctly from the pitcher’s perspective - The pitching team would prefer a higher percentage - Correct is a pitch inside the zone called as a strike - Incorrect is a pitch inside the zone called as a ball

The pitching team would prefer:

A lower percentage of accurate ball calls because this indicates more balls are incorrectly called as strikes.

A higher percentage of accurate strike calls because this indicates fewer strikes are incorrectly called as balls.