strike_zone_limits <- list(
xmin = -0.85,
xmax = 0.85,
ymin = 1.5,
ymax = 3.5
)
umpire_data <- keene_data %>%
filter(PitchCall %in% c("BallCalled", "StrikeCalled")) %>%
select(Pitcher, Batter, PitchCall, PlateLocHeight, PlateLocSide, PitcherTeam) %>%
mutate(
CorrectCall = case_when(
PitchCall == "BallCalled" & (PlateLocSide < strike_zone_limits$xmin | PlateLocSide > strike_zone_limits$xmax | PlateLocHeight < strike_zone_limits$ymin | PlateLocHeight > strike_zone_limits$ymax) ~ "Correct",
PitchCall == "StrikeCalled" & (PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax) ~ "Correct",
TRUE ~ "Incorrect"
),
Distance = case_when(
PitchCall == "BallCalled" & PlateLocSide < strike_zone_limits$xmin & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax ~ (strike_zone_limits$xmin - PlateLocSide) * 12,
PitchCall == "BallCalled" & PlateLocSide > strike_zone_limits$xmax & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax ~ (PlateLocSide - strike_zone_limits$xmax) * 12,
PitchCall == "BallCalled" & PlateLocHeight < strike_zone_limits$ymin & PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax ~ (strike_zone_limits$ymin - PlateLocHeight) * 12,
PitchCall == "BallCalled" & PlateLocHeight > strike_zone_limits$ymax & PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax ~ (PlateLocHeight - strike_zone_limits$ymax) * 12,
PitchCall == "BallCalled" & (PlateLocSide < strike_zone_limits$xmin | PlateLocSide > strike_zone_limits$xmax) & (PlateLocHeight < strike_zone_limits$ymin | PlateLocHeight > strike_zone_limits$ymax) ~ sqrt((pmax(0, strike_zone_limits$xmin - PlateLocSide, PlateLocSide - strike_zone_limits$xmax))^2 + (pmax(0, strike_zone_limits$ymin - PlateLocHeight, PlateLocHeight - strike_zone_limits$ymax))^2) * 12,
# Strikes called as balls
PitchCall == "StrikeCalled" ~ sqrt((pmax(0, strike_zone_limits$xmin - PlateLocSide, PlateLocSide - strike_zone_limits$xmax))^2 + (pmax(0, strike_zone_limits$ymin - PlateLocHeight, PlateLocHeight - strike_zone_limits$ymax))^2) * 12
)
)
plot <- ggplot(umpire_data, aes(x = PlateLocSide, y = PlateLocHeight, color = PitchCall, text = paste("Pitcher: ", Pitcher, "<br>Batter: ", Batter, "<br>Distance: ", round(Distance, 2), " inches"))) +
geom_point(data = umpire_data %>% filter(CorrectCall == "Correct"), size = 1, shape = 16) + # Correct calls as smaller dots
geom_point(data = umpire_data %>% filter(CorrectCall == "Incorrect"), size = 2, shape = 4, stroke = 2) +
geom_rect(aes(xmin = strike_zone_limits$xmin, xmax = strike_zone_limits$xmax, ymin = strike_zone_limits$ymin, ymax = strike_zone_limits$ymax), fill = NA, color = "black", linetype = "solid", size = 1) + # Strike zone box
scale_x_continuous(limits = c(-2, 2)) +
scale_y_continuous(limits = c(0, 5)) +
coord_fixed(ratio = 1) + # Adjust ratio to shrink vertical distance
labs(title = "Umpire's Ball and Strike Calls",
x = "Horizontal Location (feet)",
y = "Vertical Location (feet)",
color = "Pitch Call") +
scale_color_manual(values = c("BallCalled" = "green", "StrikeCalled" = "red")) +
theme_minimal() +
theme(
legend.position = "right",
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_line(color = "grey90"),
axis.text = element_text(color = "black"),
axis.title = element_text(color = "black"),
plot.title = element_text(color = "black"),
legend.background = element_rect(fill = "white", color = NA),
legend.key = element_rect(fill = "white", color = NA),
legend.text = element_text(color = "black"),
legend.title = element_text(color = "black"),
plot.margin = unit(c(2, 2, 2, 2), "cm") # Increase plot margins
)
plotly_plot <- ggplotly(plot, tooltip = "text") %>%
layout(clickmode = 'event+select') # Require clicking to see the data
plotly_plot
summary_table <- umpire_data %>%
mutate(Team = ifelse(PitcherTeam == "VER_MOU", "Opponent", "Vermont")) %>%
group_by(Team, PitchCall, CorrectCall) %>%
summarise(Count = n(), .groups = 'drop') %>%
pivot_wider(names_from = CorrectCall, values_from = Count, values_fill = list(Count = 0)) %>%
mutate(Total = Correct + Incorrect,
Correct_Percentage = Correct / Total * 100) %>%
select(Team, PitchCall, Correct_Percentage, Total) %>%
pivot_wider(names_from = PitchCall, values_from = c(Correct_Percentage, Total), values_fill = list(Correct_Percentage = 0, Total = 0))
vermont_summary <- summary_table %>%
filter(Team == "Vermont") %>%
summarise(
Team = "Vermont",
Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
Total_BallCalled = sum(Total_BallCalled),
Total_StrikeCalled = sum(Total_StrikeCalled)
)
opponent_summary <- summary_table %>%
filter(Team == "Opponent") %>%
summarise(
Team = "Opponent",
Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
Total_BallCalled = sum(Total_BallCalled),
Total_StrikeCalled = sum(Total_StrikeCalled)
)
overall_summary <- umpire_data %>%
group_by(PitchCall, CorrectCall) %>%
summarise(Count = n(), .groups = 'drop') %>%
pivot_wider(names_from = CorrectCall, values_from = Count, values_fill = list(Count = 0)) %>%
mutate(Total = Correct + Incorrect,
Correct_Percentage = Correct / Total * 100) %>%
pivot_wider(names_from = PitchCall, values_from = c(Correct_Percentage, Total), values_fill = list(Correct_Percentage = 0, Total = 0)) %>%
summarise(
Team = "Overall",
Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
Total_BallCalled = sum(Total_BallCalled),
Total_StrikeCalled = sum(Total_StrikeCalled)
)
# Combine the summaries
combined_summary <- bind_rows(vermont_summary, opponent_summary, overall_summary)
# Display the summary table
kable(combined_summary, caption = "Summary of Correct Ball and Strike Calls") %>%
kable_styling(full_width = F, position = "left")
Team | Correct_Percentage_BallCalled | Correct_Percentage_StrikeCalled | Total_BallCalled | Total_StrikeCalled |
---|---|---|---|---|
Vermont | 97.95918 | 86.95652 | 49 | 23 |
Opponent | 89.18919 | 86.36364 | 37 | 22 |
Overall | 94.18605 | 86.66667 | 86 | 45 |
Correct_Percentage_BallCalled: - Percentage of balls called correctly from the pitcher’s perspective - The Pitching team would prefer a lower percentage - Correct is a pitch outside the zone called a ball - Incorrect is a pitch outside the zone called as a strike
Correct_Percentage_StrikeCalled - Percentage of strikes called correctly from the pitcher’s perspective - The pitching team would prefer a higher percentage - Correct is a pitch inside the zone called as a strike - Incorrect is a pitch inside the zone called as a ball
The pitching team would prefer:
A lower percentage of accurate ball calls because this indicates more balls are incorrectly called as strikes.
A higher percentage of accurate strike calls because this indicates fewer strikes are incorrectly called as balls.