ratings_full <-DBI::dbGetQuery(con, "SELECT r.name AS rater, m.title AS movie, rt.ratingFROM raters AS rCROSS JOIN movies AS mLEFT JOIN ratings AS rt ON rt.rater_id = r.rater_id AND rt.movie_id = m.movie_idORDER BY r.name, m.title; ") |> tibble::as_tibble()glimpse(ratings_full)
# A tibble: 6 × 3
movie n_observed mean_rating
<chr> <int> <dbl>
1 Inside Out 2 4 4.5
2 Spider-Verse 2 4.5
3 Dune: Part Two 3 4.33
4 Oppenheimer 3 4.33
5 Barbie 3 4
6 Top Gun: Maverick 3 3.67
Code
## Discussion: engaging with the data
Code
top_avail <- movie_summary |>slice_max(mean_rating, n =2)top_imp <- movie_summary_imp |>slice_max(mean_rating, n =2)dplyr::bind_rows(Available = top_avail, Imputed = top_imp, .id ="version")
# A tibble: 4 × 6
version movie n_ratings mean_rating sd_rating n_observed
<chr> <chr> <int> <dbl> <dbl> <int>
1 Available Inside Out 2 4 4.5 0.577 NA
2 Available Spider-Verse 2 4.5 0.707 NA
3 Imputed Inside Out 2 NA 4.5 NA 4
4 Imputed Spider-Verse NA 4.5 NA 2
msg <- glue::glue( “Takeaways. By available-case averaging, the top films are”, “{top_avail\(movie[1]} (mean={round(top_avail\)mean_rating[1],2)}, n={top_avail\(n_ratings[1]}) ",
"and {top_avail\)movie[2]} (mean={round(top_avail\(mean_rating[2],2)}, n={top_avail\)n_ratings[2]}).”, “After simple imputation by movie mean, the leaders are”, “{top_imp\(movie[1]} and {top_imp\)movie[2]}, with very similar ordering.”, “The heatmap shows most missingness clustered on a few rater–movie pairs,”, “so imputing with the movie mean didn’t materially shift rankings—our top choice looks robust.” ) cat(msg)