1) Load ratings

fname <- dplyr::case_when(
  file.exists("MovieRatings.xlsx") ~ "MovieRatings.xlsx",
  file.exists("MovieRatings (1).xlsx") ~ "MovieRatings (1).xlsx",
  file.exists("MovieRatings (2).xlsx") ~ "MovieRatings (2).xlsx",
  TRUE ~ NA_character_
)
stopifnot(!is.na(fname))

sheets <- readxl::excel_sheets(fname)
sheet_use <- if ("MovieRatings" %in% sheets) "MovieRatings" else sheets[1]

ratings_wide <- readxl::read_excel(fname, sheet = sheet_use) |> janitor::clean_names()

ratings <- ratings_wide |>
  tidyr::pivot_longer(-critic, names_to = "movie_id", values_to = "rating") |>
  dplyr::rename(user_id = critic) |>
  dplyr::mutate(rating = as.numeric(rating)) |>
  dplyr::filter(!is.na(rating))

knitr::kable(head(ratings, 8), caption = "Sample ratings (long format)")
Sample ratings (long format)
user_id movie_id rating
Burton jungle_book 4
Burton star_wars_force 4
Charley captain_america 4
Charley deadpool 5
Charley frozen 4
Charley jungle_book 3
Charley pitch_perfect2 2
Charley star_wars_force 3

2) Global Baseline (mu + b_i)

mu <- mean(ratings$rating, na.rm = TRUE)
lambda <- 5  # set to 0 if your spreadsheet says no regularization

item_bias <- ratings |>
  dplyr::group_by(movie_id) |>
  dplyr::summarise(n_i = dplyr::n(), sum_i = sum(rating - mu), .groups = "drop") |>
  dplyr::mutate(b_i = sum_i / (lambda + n_i))

global_baseline <- item_bias |>
  dplyr::mutate(global_score = mu + b_i) |>
  dplyr::arrange(dplyr::desc(global_score))

knitr::kable(global_baseline, digits = 3, caption = "Global Baseline (mu + b_i)")
Global Baseline (mu + b_i)
movie_id n_i sum_i b_i global_score
deadpool 9 4.590 0.328 4.262
captain_america 11 3.721 0.233 4.167
star_wars_force 13 2.852 0.158 4.093
jungle_book 10 -0.344 -0.023 3.911
frozen 11 -2.279 -0.142 3.792
pitch_perfect2 7 -8.541 -0.712 3.223

3) Plot + save outputs

pretty_names <- c(
  captain_america = "Captain America", captainamerica = "Captain America",
  deadpool = "Deadpool", frozen = "Frozen",
  jungle_book = "Jungle Book", junglebook = "Jungle Book",
  pitch_perfect2 = "Pitch Perfect 2",
  star_wars_force = "Star Wars: The Force Awakens", starwarsforce = "Star Wars: The Force Awakens"
)

global_formatted <- global_baseline |>
  dplyr::mutate(movie = dplyr::coalesce(pretty_names[movie_id], 
stringr::str_to_title(gsub("_"," ", movie_id)))) |>
  dplyr::select(movie_id, movie, n_i, b_i, global_score) |>
  dplyr::arrange(dplyr::desc(global_score))

ggplot(global_formatted, aes(x = reorder(movie, global_score), y = global_score)) +
  geom_col() + coord_flip() +
  labs(title = "Global Baseline (mu + b_i) — Non-Personalized Ranking",
       x = "Movie", y = "Baseline Score")

readr::write_csv(global_formatted, "global_baseline_results.csv")
ggplot2::ggsave("global_baseline_bar.png", width = 7, height = 4, dpi = 300)