1. Install and Load Necessary Libraries:
options(repos = c(CRAN = "https://cloud.r-project.org/"))
install.packages(c("DBI", "RMySQL"))
##
## The downloaded binary packages are in
## /var/folders/tb/98qc6stx5v7402cd84rw9rkh0000gn/T//Rtmp1BVovA/downloaded_packages
library(DBI)
library(RMySQL)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(magrittr)
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
library(knitr)
library(dplyr)
library(ggplot2)
2. Connect to the MySQL Database:
con <- dbConnect(RMySQL::MySQL(),
host = "localhost",
user = "root",
dbname = "rotten_cucumbers")
3. Fetch Data from rotten_cucumbers DB Tables:
friends_df <- dbGetQuery(con, "SELECT * FROM Friends")
movies_df <- dbGetQuery(con, "SELECT * FROM Movies")
ratings_df <- dbGetQuery(con, "SELECT * FROM Ratings")
rating_desc_df <- dbGetQuery(con, "SELECT * FROM RatingDescription")
4. Close mysql Connection
dbDisconnect(con)
## [1] TRUE
5. Inspect Fetched Data
head(friends_df)
## id name
## 1 1 Alice
## 2 2 Bob
## 3 3 Charlie
## 4 4 Dave
## 5 5 Elly
head(movies_df)
## id title
## 1 1 Barbie
## 2 6 Blue Beetle
## 3 4 Oppenheimer
## 4 2 Rebel Moon
## 5 5 The Flash
## 6 3 The Last Voyage of the Demeter
head(ratings_df)
## id friend_id movie_id rating
## 1 1 1 1 1
## 2 2 1 2 2
## 3 3 1 3 5
## 4 4 2 4 1
## 5 5 2 5 1
## 6 6 2 3 3
head(rating_desc_df)
## rating description
## 1 1 Poor
## 2 2 Below Average
## 3 3 Average
## 4 4 Above Average
## 5 5 Excellent
8. Calculating and visualizing the mean rating of each movive
The movie with the highest average rating is the one most
recommended by friends.
# calculating the mean rating for movies
movies_avg_rating <- ratings_long %>%
group_by(title) %>%
summarize(mean_rating = mean(rating, na.rm = TRUE))
# visualizing the ratings of all movies
ggplot(movies_avg_rating, aes(x = reorder(title, -mean_rating), y = mean_rating)) +
geom_bar(stat = "identity", fill = "skyblue") +
geom_label(aes(label = round(mean_rating, 2)), vjust = -0.5) +
theme_minimal() +
labs(title = "Average Rating for Each Movie", x = "Movie Name", y = "Average Rating") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
