strike_zone_limits <- list(
  xmin = -0.85,
  xmax = 0.85,
  ymin = 1.5,
  ymax = 3.5
)


umpire_data <- keene_data %>%
  filter(PitchCall %in% c("BallCalled", "StrikeCalled")) %>%
  select(Pitcher, Batter, PitchCall, PlateLocHeight, PlateLocSide, PitcherTeam) %>%
  mutate(
    CorrectCall = case_when(
      PitchCall == "BallCalled" & (PlateLocSide < strike_zone_limits$xmin | PlateLocSide > strike_zone_limits$xmax | PlateLocHeight < strike_zone_limits$ymin | PlateLocHeight > strike_zone_limits$ymax) ~ "Correct",
      PitchCall == "StrikeCalled" & (PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax) ~ "Correct",
      TRUE ~ "Incorrect"
    ),
    Distance = case_when(
      PitchCall == "BallCalled" & PlateLocSide < strike_zone_limits$xmin & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax ~ (strike_zone_limits$xmin - PlateLocSide) * 12,
      PitchCall == "BallCalled" & PlateLocSide > strike_zone_limits$xmax & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax ~ (PlateLocSide - strike_zone_limits$xmax) * 12,
      PitchCall == "BallCalled" & PlateLocHeight < strike_zone_limits$ymin & PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax ~ (strike_zone_limits$ymin - PlateLocHeight) * 12,
      PitchCall == "BallCalled" & PlateLocHeight > strike_zone_limits$ymax & PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax ~ (PlateLocHeight - strike_zone_limits$ymax) * 12,
      PitchCall == "BallCalled" & (PlateLocSide < strike_zone_limits$xmin | PlateLocSide > strike_zone_limits$xmax) & (PlateLocHeight < strike_zone_limits$ymin | PlateLocHeight > strike_zone_limits$ymax) ~ sqrt((pmax(0, strike_zone_limits$xmin - PlateLocSide, PlateLocSide - strike_zone_limits$xmax))^2 + (pmax(0, strike_zone_limits$ymin - PlateLocHeight, PlateLocHeight - strike_zone_limits$ymax))^2) * 12,
      
      # Strikes called as balls
      PitchCall == "StrikeCalled" ~ sqrt((pmax(0, strike_zone_limits$xmin - PlateLocSide, PlateLocSide - strike_zone_limits$xmax))^2 + (pmax(0, strike_zone_limits$ymin - PlateLocHeight, PlateLocHeight - strike_zone_limits$ymax))^2) * 12
    )
  )

plot <- ggplot(umpire_data, aes(x = PlateLocSide, y = PlateLocHeight, color = PitchCall, text = paste("Pitcher: ", Pitcher, "<br>Batter: ", Batter, "<br>Distance: ", round(Distance, 2), " inches"))) +
  geom_point(data = umpire_data %>% filter(CorrectCall == "Correct"), size = 1, shape = 16) + # Correct calls as smaller dots
  geom_point(data = umpire_data %>% filter(CorrectCall == "Incorrect"), size = 2, shape = 4, stroke = 2) + 
  geom_rect(aes(xmin = strike_zone_limits$xmin, xmax = strike_zone_limits$xmax, ymin = strike_zone_limits$ymin, ymax = strike_zone_limits$ymax), fill = NA, color = "black", linetype = "solid", size = 1) + # Strike zone box
  scale_x_continuous(limits = c(-2, 2)) +
  scale_y_continuous(limits = c(0, 5)) +
  coord_fixed(ratio = 1) + # Adjust ratio to shrink vertical distance
  labs(title = "Umpire's Ball and Strike Calls",
       x = "Horizontal Location (feet)",
       y = "Vertical Location (feet)",
       color = "Pitch Call") +
  scale_color_manual(values = c("BallCalled" = "green", "StrikeCalled" = "red")) +
  theme_minimal() +
  theme(
    legend.position = "right",
    panel.grid.major = element_line(color = "grey80"),
    panel.grid.minor = element_line(color = "grey90"),
    axis.text = element_text(color = "black"),
    axis.title = element_text(color = "black"),
    plot.title = element_text(color = "black"),
    legend.background = element_rect(fill = "white", color = NA),
    legend.key = element_rect(fill = "white", color = NA),
    legend.text = element_text(color = "black"),
    legend.title = element_text(color = "black"),
    plot.margin = unit(c(2, 2, 2, 2), "cm") # Increase plot margins
  )

plotly_plot <- ggplotly(plot, tooltip = "text") %>%
  layout(clickmode = 'event+select') # Require clicking to see the data

plotly_plot
summary_table <- umpire_data %>%
  mutate(Team = ifelse(PitcherTeam == "VER_MOU", "Opponent", "Vermont")) %>%
  group_by(Team, PitchCall, CorrectCall) %>%
  summarise(Count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = CorrectCall, values_from = Count, values_fill = list(Count = 0)) %>%
  mutate(Total = Correct + Incorrect,
         Correct_Percentage = Correct / Total * 100) %>%
  select(Team, PitchCall, Correct_Percentage, Total) %>%
  pivot_wider(names_from = PitchCall, values_from = c(Correct_Percentage, Total), values_fill = list(Correct_Percentage = 0, Total = 0))

vermont_summary <- summary_table %>%
  filter(Team == "Vermont") %>%
  summarise(
    Team = "Vermont",
    Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
    Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
    Total_BallCalled = sum(Total_BallCalled),
    Total_StrikeCalled = sum(Total_StrikeCalled)
  )

opponent_summary <- summary_table %>%
  filter(Team == "Opponent") %>%
  summarise(
    Team = "Opponent",
    Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
    Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
    Total_BallCalled = sum(Total_BallCalled),
    Total_StrikeCalled = sum(Total_StrikeCalled)
  )

overall_summary <- umpire_data %>%
  group_by(PitchCall, CorrectCall) %>%
  summarise(Count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = CorrectCall, values_from = Count, values_fill = list(Count = 0)) %>%
  mutate(Total = Correct + Incorrect,
         Correct_Percentage = Correct / Total * 100) %>%
  pivot_wider(names_from = PitchCall, values_from = c(Correct_Percentage, Total), values_fill = list(Correct_Percentage = 0, Total = 0)) %>%
  summarise(
    Team = "Overall",
    Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
    Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
    Total_BallCalled = sum(Total_BallCalled),
    Total_StrikeCalled = sum(Total_StrikeCalled)
  )

# Combine the summaries
combined_summary <- bind_rows(vermont_summary, opponent_summary, overall_summary)

# Display the summary table
kable(combined_summary, caption = "Summary of Correct Ball and Strike Calls") %>%
  kable_styling(full_width = F, position = "left")
Summary of Correct Ball and Strike Calls
Team Correct_Percentage_BallCalled Correct_Percentage_StrikeCalled Total_BallCalled Total_StrikeCalled
Vermont 93.50649 54.16667 77 24
Opponent 98.33333 57.57576 60 33
Overall 95.62044 56.14035 137 57

Correct_Percentage_BallCalled: - Percentage of balls called correctly from the pitcher’s perspective - The Pitching team would prefer a lower percentage - Correct is a pitch outside the zone called a ball - Incorrect is a pitch outside the zone called as a strike

Correct_Percentage_StrikeCalled - Percentage of strikes called correctly from the pitcher’s perspective - The pitching team would prefer a higher percentage - Correct is a pitch inside the zone called as a strike - Incorrect is a pitch inside the zone called as a ball

The pitching team would prefer:

A lower percentage of accurate ball calls because this indicates more balls are incorrectly called as strikes.

A higher percentage of accurate strike calls because this indicates fewer strikes are incorrectly called as balls.

We were granted more strikes that were balls but got robbed on more true strikes that were called balls.