Load the Required Packages

Below, the required database connection and data processing packages are loaded.

library(DBI)
library(dbplyr)
library(tidyverse)
library(RMariaDB)

Connect to the Database

Below, we connect to my database of movie reviews. I collected the data via a Google Form: https://forms.gle/sw3oTb8dZsjXaw8b7. Of the 13 people to whom I sent the form, 12 people responded. Two out of those 12 people hadn’t seen any of the movies on the list. There was also one movie on the list that none of the respondents had seen.

con <- DBI::dbConnect(
  RMariaDB::MariaDB(),
  dbname = "movie_reviews",
  username = "root",
  password = as.character(read.table("pw.txt", header = FALSE)),
  host = "127.0.0.1",
  port = 3306,
)

Get Info About the Database and Read the Tables Into R

Below, we get a character vector of the named tables within the database. Then we read all those named tables into R.

tables <- dbListTables(con)
print(tables)
## [1] "_directors"            "_genres"               "_movies"              
## [4] "_movies_to__directors" "_movies_to__genres"    "_ratings"             
## [7] "_reviewers"
movies_db <- tbl(con, "_movies")
directors_db <- tbl(con, "_directors")
movies_directors_db <- tbl(con, "_movies_to__directors")
genres_db <- tbl(con, "_genres")
movies_genres_db <- tbl(con, "_movies_to__genres")
reviewers_db <- tbl(con, "_reviewers")
ratings_db <- tbl(con, "_ratings")

Run SQL Queries to Present the Information

Lastly, we run some SQL queries on the database tables to present the information in a variety of ways.

First, we look at the average rating each movie received. M3GAN notably has an NA rating because no one reported having seen it. The Barbarian is the film with the best average rating (4.8), but Tár is a close second (4.75).

ratings <- movies_db |>
    rename(Movie_id = id) |>
    left_join(ratings_db |> group_by(Movie_id) |>
        summarize(Average_Rating = mean(Rating, na.rm = TRUE)),
        by = "Movie_id") |>
    left_join(movies_directors_db, by = "Movie_id") |>
    left_join(directors_db |> rename(Director_id = id),
        by = "Director_id") |>
    collect()
ratings <- subset(ratings, select = -Director_id)
as_tibble(ratings)
## # A tibble: 6 × 6
##   Movie_id Movie       Released Length_Minutes Average_Rating Director        
##      <int> <chr>          <int>          <int>          <dbl> <chr>           
## 1        1 M3GAN           2023            102          NA    Gerard Johnstone
## 2        2 The Menu        2022            107           3.67 Mark Mylod      
## 3        3 Barbarian       2022            102           4.8  Zach Cregger    
## 4        4 Glass Onion     2022            139           3.25 Rian Johnson    
## 5        5 Tár             2022            158           4.75 Todd Field      
## 6        6 Aftersun        2022            102           3    Charlotte Wells

Next, we look at the movies by genre.

genres <- genres_db |>
    rename(Genre_id = id) |>
    left_join(movies_genres_db, by = "Genre_id") |>
    left_join(movies_db |> rename(Movie_id = id), by = "Movie_id") |>
    collect()
genres <- subset(genres, select = -c(Movie_id, Released, Length_Minutes))
as_tibble(genres)
## # A tibble: 14 × 3
##    Genre_id Genre    Movie      
##       <int> <chr>    <chr>      
##  1        1 Horror   Barbarian  
##  2        1 Horror   The Menu   
##  3        1 Horror   M3GAN      
##  4        2 Sci-Fi   M3GAN      
##  5        3 Thriller Barbarian  
##  6        3 Thriller The Menu   
##  7        3 Thriller M3GAN      
##  8        4 Mystery  Barbarian  
##  9        5 Comedy   Glass Onion
## 10        6 Crime    Glass Onion
## 11        7 Drama    Aftersun   
## 12        7 Drama    Tár        
## 13        7 Drama    Glass Onion
## 14        8 Music    Tár

Lastly, we look at the movies each reviewer saw and rated. Notably, neither Alex nor Grifin saw any of the movies on the list. (They are married with two small children, so this is not terribly surprising.)

reviewers <- reviewers_db |>
    rename(Reviewer_id = id) |>
    left_join(ratings_db |> filter(!is.null(Rating)) |>
        arrange(Reviewer_id), by = "Reviewer_id") |>
    left_join(movies_db |> rename(Movie_id = id), by = "Movie_id") |>
    collect()
Rating <- reviewers$Rating
reviewers <- subset(reviewers, select = -c(Movie_id, Rating, Released,
    Length_Minutes))
reviewers <- cbind(reviewers, Rating)
print(as_tibble(reviewers), n = Inf)
## # A tibble: 28 × 4
##    Reviewer_id Reviewer  Movie       Rating
##          <int> <chr>     <chr>        <int>
##  1           1 Glen      Aftersun         3
##  2           1 Glen      Tár              5
##  3           1 Glen      Glass Onion      4
##  4           1 Glen      Barbarian        5
##  5           1 Glen      The Menu         4
##  6           2 Sebastian Aftersun         2
##  7           2 Sebastian Tár              4
##  8           2 Sebastian Glass Onion      3
##  9           2 Sebastian Barbarian        5
## 10           2 Sebastian The Menu         4
## 11           3 Alex      <NA>            NA
## 12           4 Victoria  Glass Onion      3
## 13           5 Javin     Glass Onion      3
## 14           5 Javin     Barbarian        4
## 15           5 Javin     The Menu         4
## 16           6 Matt      Aftersun         4
## 17           7 Anne      Tár              5
## 18           7 Anne      Glass Onion      3
## 19           7 Anne      The Menu         4
## 20           8 Grifin    <NA>            NA
## 21           9 Claire    Glass Onion      4
## 22          10 Vicki     Glass Onion      3
## 23          10 Vicki     Barbarian        5
## 24          10 Vicki     The Menu         4
## 25          11 Dan       Barbarian        5
## 26          11 Dan       The Menu         2
## 27          12 Frankie   Tár              5
## 28          12 Frankie   Glass Onion      3