Data: MLB.com via {baseballr}


MLB Scorigami Since 1901

View Code
# end_games = data.frame(date = NULL, away_team = NULL, away_score = NULL,
#                        home_score = NULL, home_team = NULL)

# loading in this season's end game data
season_end_games = read_csv("season_end_games.csv", col_types = cols())

# accounting for all-star weekend and preventing repopulating of existing data
asg_dates = seq.Date(from = as_date("2023-07-10"), to = as_date("2023-07-13"), by = 1)
loop_dates = seq.Date(from = as_date("2023-03-30"), to = Sys.Date() - 1, by = 1)
loop_dates = loop_dates[!loop_dates %in% asg_dates & !loop_dates %in% season_end_games$date]

# loop to add new data
if (length(loop_dates) > 0) {
  for (i in 1:length(loop_dates)) {
    loop_df = mlb_game_pks(date = loop_dates[i]) |>
      mutate(date = loop_dates[i]) |>
      select(date, away_team = teams.away.team.name, away_score = teams.away.score,
             home_score = teams.home.score, home_team = teams.home.team.name)
    
    season_end_games = rbind(season_end_games, loop_df)
  }
}

# updating the folder's season end game data
write_csv(season_end_games, "season_end_games.csv")

# historic_end_games = data.frame(date = NULL, away_team = NULL, away_score = NULL,
#                                 home_score = NULL, home_team = NULL)
# 
# for (i in 1901:2022) {
#   loop_df = get_retrosheet(type = "game", year = i) |>
#     select(date = Date, away_team = VisTm, away_score = VisRuns,
#            home_score = HmRuns, home_team = HmTm)
# 
#   historic_end_games = rbind(historic_end_games, loop_df)
# }
# 
# write_csv(historic_end_games, "historic_end_games.csv")

# loading in historic end game data
historic_end_games = read_csv("historic_end_games.csv", col_types = cols())

# combining this season and historic end game results
end_games = rbind(historic_end_games, season_end_games) |>
  arrange(desc(date))

# data frame of all winning and losing scores with dates
all_scores = end_games |>
  filter(!is.na(home_score) & !is.na(away_score)) |>
  transmute(date,
            win_score = case_when(home_score > away_score ~ home_score,
                                  home_score < away_score ~ away_score,
                                  home_score == away_score ~ home_score),
            lose_score = case_when(home_score > away_score ~ away_score,
                                   home_score < away_score ~ home_score,
                                   home_score == away_score ~ away_score),
            score = paste0(win_score, "-", lose_score))

# counts of each unique final score
scores_counts = all_scores |>
  count(score) |>
  rename(occurrences = n)

# function to convert a date to a more readable format
better_date = function(f_date) {
  return(paste0(month(f_date, label = T, abbr = F), " ", day(f_date), ", ", year(f_date)))
}

# function to find last time a score happened
get_last_occurrence = function(f_score) {
  date = all_scores |>
    filter(score == f_score) |>
    pull(date) |>
    max()
  return(date)
}

# function to find second-to-last occurrence of a score
get_2last_occurrence = function(f_score) {
  date = all_scores |>
    filter(score == f_score & date != Sys.Date() - 1) |>
    pull(date) |>
    max()
  return(date)
}

# function to find how many times a score has occurred
get_n_occurrences = function(f_score) {
  scores_counts |>
    filter(score == f_score) |>
    pull(occurrences)
}

# function to get info on how many times a score has occurred and when it last occurred
get_score_info = function(f_score) {
  if (get_n_occurrences(f_score) == 1) {
    paste0("Scorigami! The score ", f_score, " has never occurred before.")
  } else {
    return(paste0("The score ", f_score, " has occurred ", get_n_occurrences(f_score),
                  " times, and last occurred on ", better_date(get_2last_occurrence(f_score)), "."))
  }
}

# adding last occurrence to full score data
scores_full = scores_counts |>
  rowwise() |>
  mutate(last_occurred = get_last_occurrence(score)) |>
  ungroup()

# getting score of most recent scorigami
most_recent_score = scores_full |>
  filter(occurrences == 1) |>
  arrange(desc(last_occurred)) |>
  head(1) |>
  pull(score)

# getting data of most recent scorigami
most_recent_date = scores_full |>
  filter(occurrences == 1) |>
  pull(last_occurred) |>
  max()

# generating plot with plotly
ggplotly(scores_full |>
  separate(score, into = c("win_score", "lose_score"), sep = "-", remove = F) |>
  mutate(win_score = as.integer(win_score),
         lose_score = as.integer(lose_score)) |>
  rename(`Winning Score` = win_score, `Losing Score` = lose_score, `Times Occurred` = occurrences) |>
  ggplot(aes(`Winning Score`, `Losing Score`,
             text = paste0("Last Occurred: ", better_date(last_occurred)))) +
  geom_point(aes(col = `Times Occurred`), shape = "square", size = 4, show.legend = F) +
  scale_color_gradient(low = "#BDD2B8", high = "#71896C") +
  scale_x_continuous(breaks = seq(0, 50, by = 1)) +
  scale_y_continuous(breaks = seq(0, 50, by = 1)) +
  labs(title = paste0("Last Scorigami: ", most_recent_score, " on ", better_date(most_recent_date))))

Yesterday’s Game Results (Sorted by Date of Last Occurrence)

View Code
yesterday = season_end_games |>
  filter(date == Sys.Date() - 1 & !is.na(home_score) & !is.na(away_score)) |>
  mutate(win_score = ifelse(home_score > away_score, home_score, away_score),
         lose_score = ifelse(home_score > away_score, away_score, home_score),
         win_team = ifelse(home_score > away_score, home_team, away_team),
         lose_team = ifelse(home_score > away_score, away_team, home_team),
         final_score = paste0(win_score, "-", lose_score),
         score_info = sapply(final_score, get_score_info),
         n_occur = sapply(final_score, get_n_occurrences),
         last_occur = sapply(final_score, get_2last_occurrence),
         total_score = home_score + away_score) |>
  arrange(desc(last_occur), desc(n_occur), desc(total_score)) |>
  mutate(row = row_number())

plot_df = data.frame(game = 1:nrow(yesterday), text = NA)

for (i in 1:nrow(yesterday)) {
  data = yesterday |> filter(row == i)
  winner = data$win_team
  loser = data$lose_team
  w_score = data$win_score
  l_score = data$lose_score
  info = data$score_info
  text = paste0("⚾︎ ", winner, " ", w_score, ", ", loser, " ", l_score, ": ", info)
  plot_df$text[i] = text
}

plot_df |>
  ggplot(aes(x = 1, game)) +
  geom_text(aes(label = text), size = 4) +
  theme_void() +
  theme(plot.background = element_rect(fill = "#DFDAD1"))