1. Install and Load Necessary Libraries:

options(repos = c(CRAN = "https://cloud.r-project.org/"))

install.packages(c("DBI", "RMySQL"))
## 
## The downloaded binary packages are in
##  /var/folders/tb/98qc6stx5v7402cd84rw9rkh0000gn/T//Rtmp1BVovA/downloaded_packages
library(DBI)
library(RMySQL)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(knitr)
library(dplyr)
library(ggplot2)

2. Connect to the MySQL Database:

con <- dbConnect(RMySQL::MySQL(), 
                 host = "localhost", 
                 user = "root",
                 dbname = "rotten_cucumbers")

3. Fetch Data from rotten_cucumbers DB Tables:

friends_df <- dbGetQuery(con, "SELECT * FROM Friends")
movies_df <- dbGetQuery(con, "SELECT * FROM Movies")
ratings_df <- dbGetQuery(con, "SELECT * FROM Ratings")
rating_desc_df <- dbGetQuery(con, "SELECT * FROM RatingDescription")

4. Close mysql Connection

dbDisconnect(con)
## [1] TRUE

5. Inspect Fetched Data

head(friends_df)
##   id    name
## 1  1   Alice
## 2  2     Bob
## 3  3 Charlie
## 4  4    Dave
## 5  5    Elly
head(movies_df)
##   id                          title
## 1  1                         Barbie
## 2  6                    Blue Beetle
## 3  4                    Oppenheimer
## 4  2                     Rebel Moon
## 5  5                      The Flash
## 6  3 The Last Voyage of the Demeter
head(ratings_df)
##   id friend_id movie_id rating
## 1  1         1        1      1
## 2  2         1        2      2
## 3  3         1        3      5
## 4  4         2        4      1
## 5  5         2        5      1
## 6  6         2        3      3
head(rating_desc_df)
##   rating   description
## 1      1          Poor
## 2      2 Below Average
## 3      3       Average
## 4      4 Above Average
## 5      5     Excellent

6. Data Transformation

Using the merge() function in R to replace foreign keys with their actual values:

# Merge with the Friends table to replace friend_id with name
ratings_transformed <- merge(ratings_df, friends_df, by.x="friend_id", by.y="id")

# Merging with the Movies table to replace movie_id with title
ratings_transformed <- merge(ratings_transformed, movies_df, by.x="movie_id", by.y="id")

# Dropping not needed column
ratings_transformed$movie_id <- NULL
ratings_transformed$friend_id <- NULL

# Reordering columns for clarity
ratings_transformed <- ratings_transformed[, c("name", "title", "rating")]

ordered_ratings <- ratings_transformed %>% 
  spread(key = title, value = rating)

# Printing the ordered dataframe
print(ordered_ratings)
##      name Barbie Blue Beetle Oppenheimer Rebel Moon The Flash
## 1   Alice      1          NA          NA          2        NA
## 2     Bob     NA          NA           1         NA         1
## 3 Charlie      3          NA           1         NA        NA
## 4    Dave      2           2           4         NA         4
## 5    Elly      1           4           3          4        NA
##   The Last Voyage of the Demeter
## 1                              5
## 2                              3
## 3                             NA
## 4                              3
## 5                              3

7. Converting the data to long format to vizualize with ggplot

ratings_long <- ordered_ratings %>%
  gather(key = "title", value = "rating", -name)

# Plot the heatmap
ggplot(ratings_long, aes(x = name, y = title, fill = rating)) +
  geom_tile(color = "white") + 
  scale_fill_gradient2(low = "blue", mid = "white", high = "red", 
                       midpoint = 3, na.value = "grey50", 
                       name = "Rating", 
                       breaks = c(NA, 1, 2, 3, 4, 5), 
                       labels = c("Not Watched", "1", "2", "3", "4", "5")) +
  theme_minimal() +
  labs(title = "Movie Ratings by Friend", x = "Friend Name", y = "Movie Name") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

8. Calculating and visualizing the mean rating of each movive