Setup

# If you see “there is no package called …”, install in Console:
# install.packages(c("DBI","RMariaDB","dplyr","readr","ggplot2"))
library(DBI)
library(RMariaDB)
library(dplyr)
library(readr)
library(ggplot2)

Connect to MySQL

## [1] "ratings"

Import (with safe fallback to CSV)

# Try DB import; if it fails during knit, use the CSV.
ratings_df <- tryCatch(
  {
    tbl(con, "ratings") |> collect()
  },
  error = function(e) {
    message("DB import failed (using CSV): ", conditionMessage(e))
    read_csv("ratings_export.csv", show_col_types = FALSE)
  }
)

# Sanity checks / preview
stopifnot(is.data.frame(ratings_df))
nrow(ratings_df)         # expect 30
## [1] 30
glimpse(ratings_df)
## Rows: 30
## Columns: 4
## $ id     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, …
## $ name   <chr> "Fedie", "Fedie", "Fedie", "Fedie", "Fedie", "Fedie", "Bishop",…
## $ movie  <chr> "Jurassic World", "The Dark Knight", "The Godfather", "Avatar",…
## $ rating <int> 4, 5, 3, 4, 5, 3, 3, 4, 5, 3, 4, 4, 5, 5, 4, 4, 5, 3, 4, 4, 5, …
head(ratings_df, 10)
## # A tibble: 10 × 4
##       id name   movie             rating
##    <int> <chr>  <chr>              <int>
##  1     1 Fedie  Jurassic World         4
##  2     2 Fedie  The Dark Knight        5
##  3     3 Fedie  The Godfather          3
##  4     4 Fedie  Avatar                 4
##  5     5 Fedie  Avengers: Endgame      5
##  6     6 Fedie  The Old Guard          3
##  7     7 Bishop Jurassic World         3
##  8     8 Bishop The Dark Knight        4
##  9     9 Bishop The Godfather          5
## 10    10 Bishop Avatar                 3

Export CSV

write_csv(ratings_df, "ratings_export.csv")
file.exists("ratings_export.csv")  # should print [1] TRUE
## [1] TRUE

Summaries

# Average rating per movie
ratings_summary <- ratings_df |>
  group_by(movie) |>
  summarize(
    avg_rating = mean(rating),
    n = n(),
    .groups = "drop"
  ) |>
  arrange(desc(avg_rating))

ratings_summary
## # A tibble: 6 × 3
##   movie             avg_rating     n
##   <chr>                  <dbl> <int>
## 1 Avengers: Endgame        4.4     5
## 2 The Dark Knight          4.4     5
## 3 The Godfather            4.2     5
## 4 Avatar                   3.8     5
## 5 Jurassic World           3.8     5
## 6 The Old Guard            3.2     5
# Average rating by rater
ratings_by_rater <- ratings_df |>
  group_by(name) |>
  summarize(
    avg_rating = mean(rating),
    n = n(),
    .groups = "drop"
  ) |>
  arrange(desc(avg_rating))

ratings_by_rater
## # A tibble: 5 × 3
##   name   avg_rating     n
##   <chr>       <dbl> <int>
## 1 Tim          4.33     6
## 2 Chris        4.17     6
## 3 Fedie        4        6
## 4 Bishop       3.83     6
## 5 Cody         3.5      6

Charts

# Bar chart: Average rating by movie
ggplot(ratings_summary, aes(x = reorder(movie, avg_rating), y = avg_rating)) +
  geom_col() +
  coord_flip() +
  labs(
    title = "Average Rating by Movie",
    x = "Movie",
    y = "Average Rating (1–5)"
  ) +
  theme_minimal()

# Bar chart: Average rating by rater
ggplot(ratings_by_rater, aes(x = reorder(name, avg_rating), y = avg_rating)) +
  geom_col() +
  coord_flip() +
  labs(
    title = "Average Rating by Rater",
    x = "Rater",
    y = "Average Rating (1–5)"
  ) +
  theme_minimal()

Disconnect

dbDisconnect(con)