Data: MLB.com via {baseballr}
# end_games = data.frame(date = NULL, away_team = NULL, away_score = NULL,
# home_score = NULL, home_team = NULL)
# loading in this season's end game data
season_end_games = read_csv("season_end_games.csv", col_types = cols())
# accounting for all-star weekend and preventing repopulating of existing data
asg_dates = seq.Date(from = as_date("2023-07-10"), to = as_date("2023-07-13"), by = 1)
loop_dates = seq.Date(from = as_date("2023-03-30"), to = Sys.Date() - 1, by = 1)
loop_dates = loop_dates[!loop_dates %in% asg_dates & !loop_dates %in% season_end_games$date]
# loop to add new data
if (length(loop_dates) > 0) {
for (i in 1:length(loop_dates)) {
loop_df = mlb_game_pks(date = loop_dates[i]) |>
mutate(date = loop_dates[i]) |>
select(date, away_team = teams.away.team.name, away_score = teams.away.score,
home_score = teams.home.score, home_team = teams.home.team.name)
season_end_games = rbind(season_end_games, loop_df)
}
}
# updating the folder's season end game data
write_csv(season_end_games, "season_end_games.csv")
# historic_end_games = data.frame(date = NULL, away_team = NULL, away_score = NULL,
# home_score = NULL, home_team = NULL)
#
# for (i in 1901:2022) {
# loop_df = get_retrosheet(type = "game", year = i) |>
# select(date = Date, away_team = VisTm, away_score = VisRuns,
# home_score = HmRuns, home_team = HmTm)
#
# historic_end_games = rbind(historic_end_games, loop_df)
# }
#
# write_csv(historic_end_games, "historic_end_games.csv")
# loading in historic end game data
historic_end_games = read_csv("historic_end_games.csv", col_types = cols())
# combining this season and historic end game results
end_games = rbind(historic_end_games, season_end_games) |>
arrange(desc(date))
# data frame of all winning and losing scores with dates
all_scores = end_games |>
filter(!is.na(home_score) & !is.na(away_score)) |>
transmute(date,
win_score = case_when(home_score > away_score ~ home_score,
home_score < away_score ~ away_score,
home_score == away_score ~ home_score),
lose_score = case_when(home_score > away_score ~ away_score,
home_score < away_score ~ home_score,
home_score == away_score ~ away_score),
score = paste0(win_score, "-", lose_score))
# counts of each unique final score
scores_counts = all_scores |>
count(score) |>
rename(occurrences = n)
# function to convert a date to a more readable format
better_date = function(f_date) {
return(paste0(month(f_date, label = T, abbr = F), " ", day(f_date), ", ", year(f_date)))
}
# function to find last time a score happened
get_last_occurrence = function(f_score) {
date = all_scores |>
filter(score == f_score) |>
pull(date) |>
max()
return(date)
}
# function to find second-to-last occurrence of a score
get_2last_occurrence = function(f_score) {
date = all_scores |>
filter(score == f_score & date != Sys.Date() - 1) |>
pull(date) |>
max()
return(date)
}
# function to find how many times a score has occurred
get_n_occurrences = function(f_score) {
scores_counts |>
filter(score == f_score) |>
pull(occurrences)
}
# function to get info on how many times a score has occurred and when it last occurred
get_score_info = function(f_score) {
if (get_n_occurrences(f_score) == 1) {
paste0("Scorigami! The score ", f_score, " has never occurred before.")
} else {
return(paste0("The score ", f_score, " has occurred ", get_n_occurrences(f_score),
" times, and last occurred on ", better_date(get_2last_occurrence(f_score)), "."))
}
}
# adding last occurrence to full score data
scores_full = scores_counts |>
rowwise() |>
mutate(last_occurred = get_last_occurrence(score)) |>
ungroup()
# getting score of most recent scorigami
most_recent_score = scores_full |>
filter(occurrences == 1) |>
arrange(desc(last_occurred)) |>
head(1) |>
pull(score)
# getting data of most recent scorigami
most_recent_date = scores_full |>
filter(occurrences == 1) |>
pull(last_occurred) |>
max()
# generating plot with plotly
ggplotly(scores_full |>
separate(score, into = c("win_score", "lose_score"), sep = "-", remove = F) |>
mutate(win_score = as.integer(win_score),
lose_score = as.integer(lose_score)) |>
rename(`Winning Score` = win_score, `Losing Score` = lose_score, `Times Occurred` = occurrences) |>
ggplot(aes(`Winning Score`, `Losing Score`,
text = paste0("Last Occurred: ", better_date(last_occurred)))) +
geom_point(aes(col = `Times Occurred`), shape = "square", size = 4, show.legend = F) +
scale_color_gradient(low = "#BDD2B8", high = "#71896C") +
scale_x_continuous(breaks = seq(0, 50, by = 1)) +
scale_y_continuous(breaks = seq(0, 50, by = 1)) +
labs(title = paste0("Last Scorigami: ", most_recent_score, " on ", better_date(most_recent_date))))
yesterday = season_end_games |>
filter(date == Sys.Date() - 1 & !is.na(home_score) & !is.na(away_score)) |>
mutate(win_score = ifelse(home_score > away_score, home_score, away_score),
lose_score = ifelse(home_score > away_score, away_score, home_score),
win_team = ifelse(home_score > away_score, home_team, away_team),
lose_team = ifelse(home_score > away_score, away_team, home_team),
final_score = paste0(win_score, "-", lose_score),
score_info = sapply(final_score, get_score_info),
n_occur = sapply(final_score, get_n_occurrences),
last_occur = sapply(final_score, get_2last_occurrence),
total_score = home_score + away_score) |>
arrange(desc(last_occur), desc(n_occur), desc(total_score)) |>
mutate(row = row_number())
plot_df = data.frame(game = 1:nrow(yesterday), text = NA)
for (i in 1:nrow(yesterday)) {
data = yesterday |> filter(row == i)
winner = data$win_team
loser = data$lose_team
w_score = data$win_score
l_score = data$lose_score
info = data$score_info
text = paste0("⚾︎ ", winner, " ", w_score, ", ", loser, " ", l_score, ": ", info)
plot_df$text[i] = text
}
plot_df |>
ggplot(aes(x = 1, game)) +
geom_text(aes(label = text), size = 4) +
theme_void() +
theme(plot.background = element_rect(fill = "#DFDAD1"))