LOADING

RESHAPING

ratings_wide <- read_excel("MovieRatings.xlsx", sheet = "MovieRatings")

ratings_long <- ratings_wide %>%
  pivot_longer(
    cols = -Critic,
    names_to = "Movie",
    values_to = "Rating",
    values_drop_na = TRUE
  )

ratings_long <- ratings_long %>%
  mutate(Rating = as.numeric(Rating))

CALCULATING

global_avg <- mean(ratings_long$Rating, na.rm = TRUE)
cat("Global average rating:", global_avg, "\
")
## Global average rating: 3.934426
movie_bias <- ratings_long %>%
  group_by(Movie) %>%
  summarize(b_i = mean(Rating, na.rm = TRUE) - global_avg)
print("Movie biases:")
## [1] "Movie biases:"
print(movie_bias)
## # A tibble: 6 × 2
##   Movie              b_i
##   <chr>            <dbl>
## 1 CaptainAmerica  0.338 
## 2 Deadpool        0.510 
## 3 Frozen         -0.207 
## 4 JungleBook     -0.0344
## 5 PitchPerfect2  -1.22  
## 6 StarWarsForce   0.219
user_bias <- ratings_long %>%
  group_by(Critic) %>%
  summarize(b_u = mean(Rating, na.rm = TRUE) - global_avg)
print("User biases:")
## [1] "User biases:"
print(user_bias)
## # A tibble: 16 × 2
##    Critic        b_u
##    <chr>       <dbl>
##  1 Burton     0.0656
##  2 Charley   -0.434 
##  3 Dan        1.07  
##  4 Dieudonne  0.732 
##  5 Matt      -0.684 
##  6 Mauricio  -0.434 
##  7 Max       -0.601 
##  8 Nathan     0.0656
##  9 Param     -0.434 
## 10 Parshu    -0.268 
## 11 Prashanth  0.866 
## 12 Shipra     0.0656
## 13 Sreejaya   0.732 
## 14 Steve      0.0656
## 15 Vuthy     -0.334 
## 16 Xingjia    1.07
all_users <- unique(ratings_long$Critic)
all_movies <- unique(ratings_long$Movie)
all_combinations <- expand.grid(Critic = all_users, Movie = all_movies, stringsAsFactors = FALSE)

all_rec <- all_combinations %>%
  left_join(ratings_long %>% select(Critic, Movie, Rating), by = c("Critic", "Movie")) %>%
  left_join(user_bias, by = "Critic") %>%
  left_join(movie_bias, by = "Movie") %>%
  mutate(EstimatedRating = global_avg + b_u + b_i)

RECOMMENDING

recommendations <- all_rec %>%
  filter(is.na(Rating)) %>%
  group_by(Critic) %>%
  arrange(desc(EstimatedRating)) %>%
  slice_head(n = 3)

print("Top 3 movie recommendations for each critic:")
## [1] "Top 3 movie recommendations for each critic:"
print(recommendations)
## # A tibble: 29 × 6
## # Groups:   Critic [12]
##    Critic    Movie          Rating     b_u     b_i EstimatedRating
##    <chr>     <chr>           <dbl>   <dbl>   <dbl>           <dbl>
##  1 Burton    Deadpool           NA  0.0656  0.510             4.51
##  2 Burton    CaptainAmerica     NA  0.0656  0.338             4.34
##  3 Burton    Frozen             NA  0.0656 -0.207             3.79
##  4 Dan       CaptainAmerica     NA  1.07    0.338             5.34
##  5 Dan       JungleBook         NA  1.07   -0.0344            4.97
##  6 Dan       Frozen             NA  1.07   -0.207             4.79
##  7 Dieudonne JungleBook         NA  0.732  -0.0344            4.63
##  8 Dieudonne Frozen             NA  0.732  -0.207             4.46
##  9 Dieudonne PitchPerfect2      NA  0.732  -1.22              3.45
## 10 Matt      Deadpool           NA -0.684   0.510             3.76
## # ℹ 19 more rows