strike_zone_limits <- list(
xmin = -0.85,
xmax = 0.85,
ymin = 1.5,
ymax = 3.5
)
umpire_data <- keene_data %>%
filter(PitchCall %in% c("BallCalled", "StrikeCalled")) %>%
select(Pitcher, Batter, PitchCall, PlateLocHeight, PlateLocSide, PitcherTeam) %>%
mutate(
CorrectCall = case_when(
PitchCall == "BallCalled" & (PlateLocSide < strike_zone_limits$xmin | PlateLocSide > strike_zone_limits$xmax | PlateLocHeight < strike_zone_limits$ymin | PlateLocHeight > strike_zone_limits$ymax) ~ "Correct",
PitchCall == "StrikeCalled" & (PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax) ~ "Correct",
TRUE ~ "Incorrect"
),
Distance = case_when(
PitchCall == "BallCalled" & PlateLocSide < strike_zone_limits$xmin & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax ~ (strike_zone_limits$xmin - PlateLocSide) * 12,
PitchCall == "BallCalled" & PlateLocSide > strike_zone_limits$xmax & PlateLocHeight >= strike_zone_limits$ymin & PlateLocHeight <= strike_zone_limits$ymax ~ (PlateLocSide - strike_zone_limits$xmax) * 12,
PitchCall == "BallCalled" & PlateLocHeight < strike_zone_limits$ymin & PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax ~ (strike_zone_limits$ymin - PlateLocHeight) * 12,
PitchCall == "BallCalled" & PlateLocHeight > strike_zone_limits$ymax & PlateLocSide >= strike_zone_limits$xmin & PlateLocSide <= strike_zone_limits$xmax ~ (PlateLocHeight - strike_zone_limits$ymax) * 12,
PitchCall == "BallCalled" & (PlateLocSide < strike_zone_limits$xmin | PlateLocSide > strike_zone_limits$xmax) & (PlateLocHeight < strike_zone_limits$ymin | PlateLocHeight > strike_zone_limits$ymax) ~ sqrt((pmax(0, strike_zone_limits$xmin - PlateLocSide, PlateLocSide - strike_zone_limits$xmax))^2 + (pmax(0, strike_zone_limits$ymin - PlateLocHeight, PlateLocHeight - strike_zone_limits$ymax))^2) * 12,
# Strikes called as balls
PitchCall == "StrikeCalled" ~ sqrt((pmax(0, strike_zone_limits$xmin - PlateLocSide, PlateLocSide - strike_zone_limits$xmax))^2 + (pmax(0, strike_zone_limits$ymin - PlateLocHeight, PlateLocHeight - strike_zone_limits$ymax))^2) * 12
)
)
plot <- ggplot(umpire_data, aes(x = PlateLocSide, y = PlateLocHeight, color = PitchCall, text = paste("Pitcher: ", Pitcher, "<br>Batter: ", Batter, "<br>Distance: ", round(Distance, 2), " inches"))) +
geom_point(data = umpire_data %>% filter(CorrectCall == "Correct"), size = 1, shape = 16) + # Correct calls as smaller dots
geom_point(data = umpire_data %>% filter(CorrectCall == "Incorrect"), size = 2, shape = 4, stroke = 2) +
geom_rect(aes(xmin = strike_zone_limits$xmin, xmax = strike_zone_limits$xmax, ymin = strike_zone_limits$ymin, ymax = strike_zone_limits$ymax), fill = NA, color = "black", linetype = "solid", size = 1) + # Strike zone box
scale_x_continuous(limits = c(-2, 2)) +
scale_y_continuous(limits = c(0, 5)) +
coord_fixed(ratio = 1) + # Adjust ratio to shrink vertical distance
labs(title = "Umpire's Ball and Strike Calls",
x = "Horizontal Location (feet)",
y = "Vertical Location (feet)",
color = "Pitch Call") +
scale_color_manual(values = c("BallCalled" = "green", "StrikeCalled" = "red")) +
theme_minimal() +
theme(
legend.position = "right",
panel.grid.major = element_line(color = "grey80"),
panel.grid.minor = element_line(color = "grey90"),
axis.text = element_text(color = "black"),
axis.title = element_text(color = "black"),
plot.title = element_text(color = "black"),
legend.background = element_rect(fill = "white", color = NA),
legend.key = element_rect(fill = "white", color = NA),
legend.text = element_text(color = "black"),
legend.title = element_text(color = "black"),
plot.margin = unit(c(2, 2, 2, 2), "cm") # Increase plot margins
)
plotly_plot <- ggplotly(plot, tooltip = "text") %>%
layout(clickmode = 'event+select') # Require clicking to see the data
plotly_plot
summary_table <- umpire_data %>%
mutate(Team = ifelse(PitcherTeam == "VER_MOU", "Opponent", "Vermont")) %>%
group_by(Team, PitchCall, CorrectCall) %>%
summarise(Count = n(), .groups = 'drop') %>%
pivot_wider(names_from = CorrectCall, values_from = Count, values_fill = list(Count = 0)) %>%
mutate(Total = Correct + Incorrect,
Correct_Percentage = Correct / Total * 100) %>%
select(Team, PitchCall, Correct_Percentage, Total) %>%
pivot_wider(names_from = PitchCall, values_from = c(Correct_Percentage, Total), values_fill = list(Correct_Percentage = 0, Total = 0))
vermont_summary <- summary_table %>%
filter(Team == "Vermont") %>%
summarise(
Team = "Vermont",
Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
Total_BallCalled = sum(Total_BallCalled),
Total_StrikeCalled = sum(Total_StrikeCalled)
)
opponent_summary <- summary_table %>%
filter(Team == "Opponent") %>%
summarise(
Team = "Opponent",
Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
Total_BallCalled = sum(Total_BallCalled),
Total_StrikeCalled = sum(Total_StrikeCalled)
)
overall_summary <- umpire_data %>%
group_by(PitchCall, CorrectCall) %>%
summarise(Count = n(), .groups = 'drop') %>%
pivot_wider(names_from = CorrectCall, values_from = Count, values_fill = list(Count = 0)) %>%
mutate(Total = Correct + Incorrect,
Correct_Percentage = Correct / Total * 100) %>%
pivot_wider(names_from = PitchCall, values_from = c(Correct_Percentage, Total), values_fill = list(Correct_Percentage = 0, Total = 0)) %>%
summarise(
Team = "Overall",
Correct_Percentage_BallCalled = sum(Correct_Percentage_BallCalled * Total_BallCalled) / sum(Total_BallCalled),
Correct_Percentage_StrikeCalled = sum(Correct_Percentage_StrikeCalled * Total_StrikeCalled) / sum(Total_StrikeCalled),
Total_BallCalled = sum(Total_BallCalled),
Total_StrikeCalled = sum(Total_StrikeCalled)
)
# Combine the summaries
combined_summary <- bind_rows(vermont_summary, opponent_summary, overall_summary)
# Display the summary table
kable(combined_summary, caption = "Summary of Correct Ball and Strike Calls") %>%
kable_styling(full_width = F, position = "left")
Team | Correct_Percentage_BallCalled | Correct_Percentage_StrikeCalled | Total_BallCalled | Total_StrikeCalled |
---|---|---|---|---|
Vermont | 93.50649 | 54.16667 | 77 | 24 |
Opponent | 98.33333 | 57.57576 | 60 | 33 |
Overall | 95.62044 | 56.14035 | 137 | 57 |
Correct_Percentage_BallCalled: - Percentage of balls called correctly from the pitcher’s perspective - The Pitching team would prefer a lower percentage - Correct is a pitch outside the zone called a ball - Incorrect is a pitch outside the zone called as a strike
Correct_Percentage_StrikeCalled - Percentage of strikes called correctly from the pitcher’s perspective - The pitching team would prefer a higher percentage - Correct is a pitch inside the zone called as a strike - Incorrect is a pitch inside the zone called as a ball
The pitching team would prefer:
A lower percentage of accurate ball calls because this indicates more balls are incorrectly called as strikes.
A higher percentage of accurate strike calls because this indicates fewer strikes are incorrectly called as balls.
We were granted more strikes that were balls but got robbed on more true strikes that were called balls.