# Read the wide ratings matrix from the provided spreadsheet
xlsxPath <- "MovieRatings.xlsx" # keep the file in the same directory as the .Rmd
mr <- read_excel(xlsxPath, sheet = "MovieRatings")
stopifnot("Critic" %in% names(mr))
itemCols <- setdiff(names(mr), "Critic")
mr[itemCols] <- lapply(mr[itemCols], function(col) suppressWarnings(as.numeric(col)))
long <- mr %>%
pivot_longer(cols = all_of(itemCols), names_to = "movie_name", values_to = "rating") %>%
rename(friend_name = Critic) %>%
filter(!is.na(rating))
friends <- long %>%
distinct(friend_name) %>%
arrange(friend_name) %>%
mutate(friend_id = row_number()) %>%
select(friend_id, friend_name)
movies <- long %>%
distinct(movie_name) %>%
arrange(movie_name) %>%
mutate(movie_id = row_number()) %>%
select(movie_id, movie_name)
full_ratings <- long %>%
left_join(friends, by = "friend_name") %>%
left_join(movies, by = "movie_name") %>%
select(friend_id, friend_name, movie_id, movie_name, rating)
kable(head(full_ratings, 8))
1 |
Burton |
4 |
JungleBook |
4 |
1 |
Burton |
6 |
StarWarsForce |
4 |
2 |
Charley |
1 |
CaptainAmerica |
4 |
2 |
Charley |
2 |
Deadpool |
5 |
2 |
Charley |
3 |
Frozen |
4 |
2 |
Charley |
4 |
JungleBook |
3 |
2 |
Charley |
5 |
PitchPerfect2 |
2 |
2 |
Charley |
6 |
StarWarsForce |
3 |
clamp <- function(x, lo = 1, hi = 5) pmin(pmax(x, lo), hi)
mu <- mean(full_ratings$rating, na.rm = TRUE)
user_bias <- full_ratings %>%
group_by(friend_id, friend_name) %>%
summarise(bu = mean(rating, na.rm = TRUE) - mu, .groups = "drop")
item_bias <- full_ratings %>%
group_by(movie_id, movie_name) %>%
summarise(bi = mean(rating, na.rm = TRUE) - mu, .groups = "drop")
predictions <- expand_grid(
friend_id = unique(full_ratings$friend_id),
movie_id = unique(full_ratings$movie_id)
) %>%
left_join(distinct(friends, friend_id, friend_name), by = "friend_id") %>%
left_join(distinct(movies, movie_id, movie_name), by = "movie_id") %>%
left_join(select(full_ratings, friend_id, movie_id, rating), by = c("friend_id","movie_id")) %>%
left_join(user_bias, by = c("friend_id","friend_name")) %>%
left_join(item_bias, by = c("movie_id","movie_name")) %>%
mutate(
bu = ifelse(is.na(bu), 0, bu),
bi = ifelse(is.na(bi), 0, bi),
pred = clamp(mu + bu + bi),
missing = is.na(rating)
) %>%
arrange(friend_name, movie_name)
# Show the core model pieces
kable(tibble(GlobalMean = mu), digits = 3)
kable(head(user_bias, 10), caption = "User Bias (b_u)", digits = 3)
User Bias (b_u)
1 |
Burton |
0.066 |
2 |
Charley |
-0.434 |
3 |
Dan |
1.066 |
4 |
Dieudonne |
0.732 |
5 |
Matt |
-0.684 |
6 |
Mauricio |
-0.434 |
7 |
Max |
-0.601 |
8 |
Nathan |
0.066 |
9 |
Param |
-0.434 |
10 |
Parshu |
-0.268 |
kable(head(item_bias, 10), caption = "Item Bias (b_i)", digits = 3)
Item Bias (b_i)
1 |
CaptainAmerica |
0.338 |
2 |
Deadpool |
0.510 |
3 |
Frozen |
-0.207 |
4 |
JungleBook |
-0.034 |
5 |
PitchPerfect2 |
-1.220 |
6 |
StarWarsForce |
0.219 |
kable(head(select(predictions, friend_name, movie_name, rating, pred), 12),
caption = "Sample Predictions", digits = 2)
Sample Predictions
Burton |
CaptainAmerica |
NA |
4.34 |
Burton |
Deadpool |
NA |
4.51 |
Burton |
Frozen |
NA |
3.79 |
Burton |
JungleBook |
4 |
3.97 |
Burton |
PitchPerfect2 |
NA |
2.78 |
Burton |
StarWarsForce |
4 |
4.22 |
Charley |
CaptainAmerica |
4 |
3.84 |
Charley |
Deadpool |
5 |
4.01 |
Charley |
Frozen |
4 |
3.29 |
Charley |
JungleBook |
3 |
3.47 |
Charley |
PitchPerfect2 |
2 |
2.28 |
Charley |
StarWarsForce |
3 |
3.72 |
# Top-N recommendations per friend for unseen movies
recommend_top_n <- function(pred_df, friend_id, n = 3) {
pred_df %>%
filter(friend_id == !!friend_id, missing) %>%
arrange(desc(pred), movie_name) %>%
slice_head(n = n) %>%
select(friend_name, movie_name, predicted_rating = pred)
}
# Show Top 3 for each friend in a single table
top3_all <- predictions %>%
filter(missing) %>%
group_by(friend_id, friend_name) %>%
slice_max(order_by = pred, n = 3, with_ties = FALSE) %>%
ungroup() %>%
select(friend_name, movie_name, predicted_rating = pred)
kable(top3_all, caption = "Top 3 Recommendations per Friend", digits = 2)
Top 3 Recommendations per Friend
Burton |
Deadpool |
4.51 |
Burton |
CaptainAmerica |
4.34 |
Burton |
Frozen |
3.79 |
Dan |
CaptainAmerica |
5.00 |
Dan |
JungleBook |
4.97 |
Dan |
Frozen |
4.79 |
Dieudonne |
JungleBook |
4.63 |
Dieudonne |
Frozen |
4.46 |
Dieudonne |
PitchPerfect2 |
3.45 |
Matt |
Deadpool |
3.76 |
Matt |
JungleBook |
3.22 |
Mauricio |
Deadpool |
4.01 |
Mauricio |
StarWarsForce |
3.72 |
Nathan |
Deadpool |
4.51 |
Nathan |
CaptainAmerica |
4.34 |
Nathan |
JungleBook |
3.97 |
Param |
JungleBook |
3.47 |
Param |
PitchPerfect2 |
2.28 |
Prashanth |
PitchPerfect2 |
3.58 |
Shipra |
Deadpool |
4.51 |
Shipra |
CaptainAmerica |
4.34 |
Shipra |
PitchPerfect2 |
2.78 |
Steve |
Deadpool |
4.51 |
Steve |
JungleBook |
3.97 |
Steve |
Frozen |
3.79 |
Vuthy |
StarWarsForce |
3.82 |
Xingjia |
CaptainAmerica |
5.00 |
Xingjia |
Deadpool |
5.00 |
Xingjia |
StarWarsForce |
5.00 |
write.csv(select(predictions, friend_name, movie_name, rating, pred),
"predictions_global_baseline.csv", row.names = FALSE)
write.csv(top3_all, "top3_per_friend.csv", row.names = FALSE)