# Read the wide ratings matrix from the provided spreadsheet
xlsxPath <- "MovieRatings.xlsx"   # keep the file in the same directory as the .Rmd
mr <- read_excel(xlsxPath, sheet = "MovieRatings")
stopifnot("Critic" %in% names(mr))
itemCols <- setdiff(names(mr), "Critic")
mr[itemCols] <- lapply(mr[itemCols], function(col) suppressWarnings(as.numeric(col)))
long <- mr %>%
  pivot_longer(cols = all_of(itemCols), names_to = "movie_name", values_to = "rating") %>%
  rename(friend_name = Critic) %>%
  filter(!is.na(rating))
friends <- long %>%
  distinct(friend_name) %>%
  arrange(friend_name) %>%
  mutate(friend_id = row_number()) %>%
  select(friend_id, friend_name)
movies <- long %>%
  distinct(movie_name) %>%
  arrange(movie_name) %>%
  mutate(movie_id = row_number()) %>%
  select(movie_id, movie_name)
full_ratings <- long %>%
  left_join(friends, by = "friend_name") %>%
  left_join(movies,  by = "movie_name") %>%
  select(friend_id, friend_name, movie_id, movie_name, rating)
kable(head(full_ratings, 8))
| 1 | 
Burton | 
4 | 
JungleBook | 
4 | 
| 1 | 
Burton | 
6 | 
StarWarsForce | 
4 | 
| 2 | 
Charley | 
1 | 
CaptainAmerica | 
4 | 
| 2 | 
Charley | 
2 | 
Deadpool | 
5 | 
| 2 | 
Charley | 
3 | 
Frozen | 
4 | 
| 2 | 
Charley | 
4 | 
JungleBook | 
3 | 
| 2 | 
Charley | 
5 | 
PitchPerfect2 | 
2 | 
| 2 | 
Charley | 
6 | 
StarWarsForce | 
3 | 
clamp <- function(x, lo = 1, hi = 5) pmin(pmax(x, lo), hi)
mu <- mean(full_ratings$rating, na.rm = TRUE)
user_bias <- full_ratings %>%
  group_by(friend_id, friend_name) %>%
  summarise(bu = mean(rating, na.rm = TRUE) - mu, .groups = "drop")
item_bias <- full_ratings %>%
  group_by(movie_id, movie_name) %>%
  summarise(bi = mean(rating, na.rm = TRUE) - mu, .groups = "drop")
predictions <- expand_grid(
    friend_id = unique(full_ratings$friend_id),
    movie_id  = unique(full_ratings$movie_id)
  ) %>%
  left_join(distinct(friends, friend_id, friend_name), by = "friend_id") %>%
  left_join(distinct(movies,  movie_id,  movie_name),  by = "movie_id") %>%
  left_join(select(full_ratings, friend_id, movie_id, rating), by = c("friend_id","movie_id")) %>%
  left_join(user_bias, by = c("friend_id","friend_name")) %>%
  left_join(item_bias, by = c("movie_id","movie_name")) %>%
  mutate(
    bu = ifelse(is.na(bu), 0, bu),
    bi = ifelse(is.na(bi), 0, bi),
    pred = clamp(mu + bu + bi),
    missing = is.na(rating)
  ) %>%
  arrange(friend_name, movie_name)
# Show the core model pieces
kable(tibble(GlobalMean = mu), digits = 3)
kable(head(user_bias, 10), caption = "User Bias (b_u)", digits = 3)
User Bias (b_u)
| 1 | 
Burton | 
0.066 | 
| 2 | 
Charley | 
-0.434 | 
| 3 | 
Dan | 
1.066 | 
| 4 | 
Dieudonne | 
0.732 | 
| 5 | 
Matt | 
-0.684 | 
| 6 | 
Mauricio | 
-0.434 | 
| 7 | 
Max | 
-0.601 | 
| 8 | 
Nathan | 
0.066 | 
| 9 | 
Param | 
-0.434 | 
| 10 | 
Parshu | 
-0.268 | 
kable(head(item_bias, 10), caption = "Item Bias (b_i)", digits = 3)
Item Bias (b_i)
| 1 | 
CaptainAmerica | 
0.338 | 
| 2 | 
Deadpool | 
0.510 | 
| 3 | 
Frozen | 
-0.207 | 
| 4 | 
JungleBook | 
-0.034 | 
| 5 | 
PitchPerfect2 | 
-1.220 | 
| 6 | 
StarWarsForce | 
0.219 | 
kable(head(select(predictions, friend_name, movie_name, rating, pred), 12),
      caption = "Sample Predictions", digits = 2)
Sample Predictions
| Burton | 
CaptainAmerica | 
NA | 
4.34 | 
| Burton | 
Deadpool | 
NA | 
4.51 | 
| Burton | 
Frozen | 
NA | 
3.79 | 
| Burton | 
JungleBook | 
4 | 
3.97 | 
| Burton | 
PitchPerfect2 | 
NA | 
2.78 | 
| Burton | 
StarWarsForce | 
4 | 
4.22 | 
| Charley | 
CaptainAmerica | 
4 | 
3.84 | 
| Charley | 
Deadpool | 
5 | 
4.01 | 
| Charley | 
Frozen | 
4 | 
3.29 | 
| Charley | 
JungleBook | 
3 | 
3.47 | 
| Charley | 
PitchPerfect2 | 
2 | 
2.28 | 
| Charley | 
StarWarsForce | 
3 | 
3.72 | 
# Top-N recommendations per friend for unseen movies
recommend_top_n <- function(pred_df, friend_id, n = 3) {
  pred_df %>%
    filter(friend_id == !!friend_id, missing) %>%
    arrange(desc(pred), movie_name) %>%
    slice_head(n = n) %>%
    select(friend_name, movie_name, predicted_rating = pred)
}
# Show Top 3 for each friend in a single table
top3_all <- predictions %>%
  filter(missing) %>%
  group_by(friend_id, friend_name) %>%
  slice_max(order_by = pred, n = 3, with_ties = FALSE) %>%
  ungroup() %>%
  select(friend_name, movie_name, predicted_rating = pred)
kable(top3_all, caption = "Top 3 Recommendations per Friend", digits = 2)
Top 3 Recommendations per Friend
| Burton | 
Deadpool | 
4.51 | 
| Burton | 
CaptainAmerica | 
4.34 | 
| Burton | 
Frozen | 
3.79 | 
| Dan | 
CaptainAmerica | 
5.00 | 
| Dan | 
JungleBook | 
4.97 | 
| Dan | 
Frozen | 
4.79 | 
| Dieudonne | 
JungleBook | 
4.63 | 
| Dieudonne | 
Frozen | 
4.46 | 
| Dieudonne | 
PitchPerfect2 | 
3.45 | 
| Matt | 
Deadpool | 
3.76 | 
| Matt | 
JungleBook | 
3.22 | 
| Mauricio | 
Deadpool | 
4.01 | 
| Mauricio | 
StarWarsForce | 
3.72 | 
| Nathan | 
Deadpool | 
4.51 | 
| Nathan | 
CaptainAmerica | 
4.34 | 
| Nathan | 
JungleBook | 
3.97 | 
| Param | 
JungleBook | 
3.47 | 
| Param | 
PitchPerfect2 | 
2.28 | 
| Prashanth | 
PitchPerfect2 | 
3.58 | 
| Shipra | 
Deadpool | 
4.51 | 
| Shipra | 
CaptainAmerica | 
4.34 | 
| Shipra | 
PitchPerfect2 | 
2.78 | 
| Steve | 
Deadpool | 
4.51 | 
| Steve | 
JungleBook | 
3.97 | 
| Steve | 
Frozen | 
3.79 | 
| Vuthy | 
StarWarsForce | 
3.82 | 
| Xingjia | 
CaptainAmerica | 
5.00 | 
| Xingjia | 
Deadpool | 
5.00 | 
| Xingjia | 
StarWarsForce | 
5.00 | 
write.csv(select(predictions, friend_name, movie_name, rating, pred),
           "predictions_global_baseline.csv", row.names = FALSE)
write.csv(top3_all, "top3_per_friend.csv", row.names = FALSE)