# Read the wide ratings matrix from the provided spreadsheet
xlsxPath <- "MovieRatings.xlsx"   # keep the file in the same directory as the .Rmd
mr <- read_excel(xlsxPath, sheet = "MovieRatings")

stopifnot("Critic" %in% names(mr))
itemCols <- setdiff(names(mr), "Critic")
mr[itemCols] <- lapply(mr[itemCols], function(col) suppressWarnings(as.numeric(col)))

long <- mr %>%
  pivot_longer(cols = all_of(itemCols), names_to = "movie_name", values_to = "rating") %>%
  rename(friend_name = Critic) %>%
  filter(!is.na(rating))

friends <- long %>%
  distinct(friend_name) %>%
  arrange(friend_name) %>%
  mutate(friend_id = row_number()) %>%
  select(friend_id, friend_name)

movies <- long %>%
  distinct(movie_name) %>%
  arrange(movie_name) %>%
  mutate(movie_id = row_number()) %>%
  select(movie_id, movie_name)

full_ratings <- long %>%
  left_join(friends, by = "friend_name") %>%
  left_join(movies,  by = "movie_name") %>%
  select(friend_id, friend_name, movie_id, movie_name, rating)

kable(head(full_ratings, 8))
friend_id friend_name movie_id movie_name rating
1 Burton 4 JungleBook 4
1 Burton 6 StarWarsForce 4
2 Charley 1 CaptainAmerica 4
2 Charley 2 Deadpool 5
2 Charley 3 Frozen 4
2 Charley 4 JungleBook 3
2 Charley 5 PitchPerfect2 2
2 Charley 6 StarWarsForce 3
clamp <- function(x, lo = 1, hi = 5) pmin(pmax(x, lo), hi)

mu <- mean(full_ratings$rating, na.rm = TRUE)

user_bias <- full_ratings %>%
  group_by(friend_id, friend_name) %>%
  summarise(bu = mean(rating, na.rm = TRUE) - mu, .groups = "drop")

item_bias <- full_ratings %>%
  group_by(movie_id, movie_name) %>%
  summarise(bi = mean(rating, na.rm = TRUE) - mu, .groups = "drop")

predictions <- expand_grid(
    friend_id = unique(full_ratings$friend_id),
    movie_id  = unique(full_ratings$movie_id)
  ) %>%
  left_join(distinct(friends, friend_id, friend_name), by = "friend_id") %>%
  left_join(distinct(movies,  movie_id,  movie_name),  by = "movie_id") %>%
  left_join(select(full_ratings, friend_id, movie_id, rating), by = c("friend_id","movie_id")) %>%
  left_join(user_bias, by = c("friend_id","friend_name")) %>%
  left_join(item_bias, by = c("movie_id","movie_name")) %>%
  mutate(
    bu = ifelse(is.na(bu), 0, bu),
    bi = ifelse(is.na(bi), 0, bi),
    pred = clamp(mu + bu + bi),
    missing = is.na(rating)
  ) %>%
  arrange(friend_name, movie_name)

# Show the core model pieces
kable(tibble(GlobalMean = mu), digits = 3)
GlobalMean
3.934
kable(head(user_bias, 10), caption = "User Bias (b_u)", digits = 3)
User Bias (b_u)
friend_id friend_name bu
1 Burton 0.066
2 Charley -0.434
3 Dan 1.066
4 Dieudonne 0.732
5 Matt -0.684
6 Mauricio -0.434
7 Max -0.601
8 Nathan 0.066
9 Param -0.434
10 Parshu -0.268
kable(head(item_bias, 10), caption = "Item Bias (b_i)", digits = 3)
Item Bias (b_i)
movie_id movie_name bi
1 CaptainAmerica 0.338
2 Deadpool 0.510
3 Frozen -0.207
4 JungleBook -0.034
5 PitchPerfect2 -1.220
6 StarWarsForce 0.219
kable(head(select(predictions, friend_name, movie_name, rating, pred), 12),
      caption = "Sample Predictions", digits = 2)
Sample Predictions
friend_name movie_name rating pred
Burton CaptainAmerica NA 4.34
Burton Deadpool NA 4.51
Burton Frozen NA 3.79
Burton JungleBook 4 3.97
Burton PitchPerfect2 NA 2.78
Burton StarWarsForce 4 4.22
Charley CaptainAmerica 4 3.84
Charley Deadpool 5 4.01
Charley Frozen 4 3.29
Charley JungleBook 3 3.47
Charley PitchPerfect2 2 2.28
Charley StarWarsForce 3 3.72
# Top-N recommendations per friend for unseen movies
recommend_top_n <- function(pred_df, friend_id, n = 3) {
  pred_df %>%
    filter(friend_id == !!friend_id, missing) %>%
    arrange(desc(pred), movie_name) %>%
    slice_head(n = n) %>%
    select(friend_name, movie_name, predicted_rating = pred)
}

# Show Top 3 for each friend in a single table
top3_all <- predictions %>%
  filter(missing) %>%
  group_by(friend_id, friend_name) %>%
  slice_max(order_by = pred, n = 3, with_ties = FALSE) %>%
  ungroup() %>%
  select(friend_name, movie_name, predicted_rating = pred)

kable(top3_all, caption = "Top 3 Recommendations per Friend", digits = 2)
Top 3 Recommendations per Friend
friend_name movie_name predicted_rating
Burton Deadpool 4.51
Burton CaptainAmerica 4.34
Burton Frozen 3.79
Dan CaptainAmerica 5.00
Dan JungleBook 4.97
Dan Frozen 4.79
Dieudonne JungleBook 4.63
Dieudonne Frozen 4.46
Dieudonne PitchPerfect2 3.45
Matt Deadpool 3.76
Matt JungleBook 3.22
Mauricio Deadpool 4.01
Mauricio StarWarsForce 3.72
Nathan Deadpool 4.51
Nathan CaptainAmerica 4.34
Nathan JungleBook 3.97
Param JungleBook 3.47
Param PitchPerfect2 2.28
Prashanth PitchPerfect2 3.58
Shipra Deadpool 4.51
Shipra CaptainAmerica 4.34
Shipra PitchPerfect2 2.78
Steve Deadpool 4.51
Steve JungleBook 3.97
Steve Frozen 3.79
Vuthy StarWarsForce 3.82
Xingjia CaptainAmerica 5.00
Xingjia Deadpool 5.00
Xingjia StarWarsForce 5.00
write.csv(select(predictions, friend_name, movie_name, rating, pred),
           "predictions_global_baseline.csv", row.names = FALSE)
write.csv(top3_all, "top3_per_friend.csv", row.names = FALSE)