Setup
# If you see “there is no package called …”, install in Console:
# install.packages(c("DBI","RMariaDB","dplyr","readr","ggplot2"))
library(DBI)
library(RMariaDB)
library(dplyr)
library(readr)
library(ggplot2)
Connect to MySQL
## [1] "ratings"
Import (with safe fallback to CSV)
# Try DB import; if it fails during knit, use the CSV.
ratings_df <- tryCatch(
{
tbl(con, "ratings") |> collect()
},
error = function(e) {
message("DB import failed (using CSV): ", conditionMessage(e))
read_csv("ratings_export.csv", show_col_types = FALSE)
}
)
# Sanity checks / preview
stopifnot(is.data.frame(ratings_df))
nrow(ratings_df) # expect 30
## [1] 30
glimpse(ratings_df)
## Rows: 30
## Columns: 4
## $ id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, …
## $ name <chr> "Fedie", "Fedie", "Fedie", "Fedie", "Fedie", "Fedie", "Bishop",…
## $ movie <chr> "Jurassic World", "The Dark Knight", "The Godfather", "Avatar",…
## $ rating <int> 4, 5, 3, 4, 5, 3, 3, 4, 5, 3, 4, 4, 5, 5, 4, 4, 5, 3, 4, 4, 5, …
head(ratings_df, 10)
## # A tibble: 10 × 4
## id name movie rating
## <int> <chr> <chr> <int>
## 1 1 Fedie Jurassic World 4
## 2 2 Fedie The Dark Knight 5
## 3 3 Fedie The Godfather 3
## 4 4 Fedie Avatar 4
## 5 5 Fedie Avengers: Endgame 5
## 6 6 Fedie The Old Guard 3
## 7 7 Bishop Jurassic World 3
## 8 8 Bishop The Dark Knight 4
## 9 9 Bishop The Godfather 5
## 10 10 Bishop Avatar 3
Export CSV
write_csv(ratings_df, "ratings_export.csv")
file.exists("ratings_export.csv") # should print [1] TRUE
## [1] TRUE
Summaries
# Average rating per movie
ratings_summary <- ratings_df |>
group_by(movie) |>
summarize(
avg_rating = mean(rating),
n = n(),
.groups = "drop"
) |>
arrange(desc(avg_rating))
ratings_summary
## # A tibble: 6 × 3
## movie avg_rating n
## <chr> <dbl> <int>
## 1 Avengers: Endgame 4.4 5
## 2 The Dark Knight 4.4 5
## 3 The Godfather 4.2 5
## 4 Avatar 3.8 5
## 5 Jurassic World 3.8 5
## 6 The Old Guard 3.2 5
# Average rating by rater
ratings_by_rater <- ratings_df |>
group_by(name) |>
summarize(
avg_rating = mean(rating),
n = n(),
.groups = "drop"
) |>
arrange(desc(avg_rating))
ratings_by_rater
## # A tibble: 5 × 3
## name avg_rating n
## <chr> <dbl> <int>
## 1 Tim 4.33 6
## 2 Chris 4.17 6
## 3 Fedie 4 6
## 4 Bishop 3.83 6
## 5 Cody 3.5 6
Charts
# Bar chart: Average rating by movie
ggplot(ratings_summary, aes(x = reorder(movie, avg_rating), y = avg_rating)) +
geom_col() +
coord_flip() +
labs(
title = "Average Rating by Movie",
x = "Movie",
y = "Average Rating (1–5)"
) +
theme_minimal()

# Bar chart: Average rating by rater
ggplot(ratings_by_rater, aes(x = reorder(name, avg_rating), y = avg_rating)) +
geom_col() +
coord_flip() +
labs(
title = "Average Rating by Rater",
x = "Rater",
y = "Average Rating (1–5)"
) +
theme_minimal()

Disconnect
dbDisconnect(con)