Libraries

chooseCRANmirror(graphics = FALSE, ind = 1)  # Selects the first mirror
install.packages("DBI")
## 
## The downloaded binary packages are in
##  /var/folders/nz/h7z329n55nxfs2dv7hmbhc400000gn/T//Rtmp3KSQZ2/downloaded_packages
install.packages("RMySQL")
## 
## The downloaded binary packages are in
##  /var/folders/nz/h7z329n55nxfs2dv7hmbhc400000gn/T//Rtmp3KSQZ2/downloaded_packages
library(DBI)
library(RMySQL)

Connect to MySQL DB

connection <- dbConnect(RMySQL::MySQL(),
                 dbname = "aaliyah.john25",
                 host = "cuny607sql.mysql.database.azure.com",
                 user = "aaliyah.john25",
                 password = "97725637f4caf03b",
                 port = 3306)

Create SQL table for Movie Ratings

table <- "
CREATE TABLE IF NOT EXISTS RecentMovieRatings (
    movie VARCHAR(100),
    rater VARCHAR(50),
    rating INT
);"
dbExecute(connection, table)
## [1] 0

Filling the table with movies, the raters and their ratings

ratings <- data.frame(
  movie = rep(c("Wicked", "Mufasa", "One of them days",
            "Den of Thieves 2", "The Fire Inside", "Good Rich"),each=5),
  rater = rep(c("Rhianna", "Lyndsay", "Kristel", "Akil", "Leis"), times=6),
  rating = c(5, 4, 5, 3, 2, 4, NA, 4, 2, NA, 4, 5, 4, 4, 3, 3, NA, NA, 5, 4, 5, 4, 4, 4, 3, 3, 3, 4, NA, 2)
)
print (ratings)
##               movie   rater rating
## 1            Wicked Rhianna      5
## 2            Wicked Lyndsay      4
## 3            Wicked Kristel      5
## 4            Wicked    Akil      3
## 5            Wicked    Leis      2
## 6            Mufasa Rhianna      4
## 7            Mufasa Lyndsay     NA
## 8            Mufasa Kristel      4
## 9            Mufasa    Akil      2
## 10           Mufasa    Leis     NA
## 11 One of them days Rhianna      4
## 12 One of them days Lyndsay      5
## 13 One of them days Kristel      4
## 14 One of them days    Akil      4
## 15 One of them days    Leis      3
## 16 Den of Thieves 2 Rhianna      3
## 17 Den of Thieves 2 Lyndsay     NA
## 18 Den of Thieves 2 Kristel     NA
## 19 Den of Thieves 2    Akil      5
## 20 Den of Thieves 2    Leis      4
## 21  The Fire Inside Rhianna      5
## 22  The Fire Inside Lyndsay      4
## 23  The Fire Inside Kristel      4
## 24  The Fire Inside    Akil      4
## 25  The Fire Inside    Leis      3
## 26        Good Rich Rhianna      3
## 27        Good Rich Lyndsay      3
## 28        Good Rich Kristel      4
## 29        Good Rich    Akil     NA
## 30        Good Rich    Leis      2

Storing the empty entries as NULL in SQL

  for (i in 1:nrow(ratings)) {
  existing <- dbGetQuery(connection, sprintf(
    "SELECT COUNT(*) FROM RecentMovieRatings WHERE movie = '%s' AND rater = '%s';",
    ratings$movie[i], ratings$rater[i]
  ))
  
  if (existing[1,1] == 0) {  # Only insert if the pair does not exist
    table <- sprintf("INSERT INTO RecentMovieRatings (movie, rater, rating) VALUES ('%s', '%s', %s);",
                     ratings$movie[i], ratings$rater[i], 
                     ifelse(is.na(ratings$rating[i]), "NULL", ratings$rating[i]))
    dbExecute(connection, table)
  }
}

Removing all null ratings

If a rater did not see the movie, their missing rating shows a lack of knowledge about the movie. I decided to completely remove it because averaging or inputing a random value could skew future analyses and paint a picture that does not represent the facts.

If most raters who watched a particular movie rated it a 5, and I replaced the missing values with the average of 5 or close to 5 - this would suggest that persons who didn’t see the movie would have rated it highly as well similarly—which may not be true based on preferences etc.

By removing incomplete rows, I’m aiming to ensure that every rating in the dataset is based on actual opinions and the data is credible.

ratings <- na.omit(ratings)
print (ratings)
##               movie   rater rating
## 1            Wicked Rhianna      5
## 2            Wicked Lyndsay      4
## 3            Wicked Kristel      5
## 4            Wicked    Akil      3
## 5            Wicked    Leis      2
## 6            Mufasa Rhianna      4
## 8            Mufasa Kristel      4
## 9            Mufasa    Akil      2
## 11 One of them days Rhianna      4
## 12 One of them days Lyndsay      5
## 13 One of them days Kristel      4
## 14 One of them days    Akil      4
## 15 One of them days    Leis      3
## 16 Den of Thieves 2 Rhianna      3
## 19 Den of Thieves 2    Akil      5
## 20 Den of Thieves 2    Leis      4
## 21  The Fire Inside Rhianna      5
## 22  The Fire Inside Lyndsay      4
## 23  The Fire Inside Kristel      4
## 24  The Fire Inside    Akil      4
## 25  The Fire Inside    Leis      3
## 26        Good Rich Rhianna      3
## 27        Good Rich Lyndsay      3
## 28        Good Rich Kristel      4
## 30        Good Rich    Leis      2
ratings_df <- dbGetQuery(connection, "SELECT * FROM RecentMovieRatings;")
print(ratings_df) 
##               movie   rater rating
## 1            Wicked Rhianna      5
## 2            Wicked Lyndsay      4
## 3            Wicked Kristel      5
## 4            Wicked    Akil      3
## 5            Wicked    Leis      2
## 6            Mufasa Rhianna      4
## 7            Mufasa Lyndsay     NA
## 8            Mufasa Kristel      4
## 9            Mufasa    Akil      2
## 10           Mufasa    Leis     NA
## 11 One of them days Rhianna      4
## 12 One of them days Lyndsay      5
## 13 One of them days Kristel      4
## 14 One of them days    Akil      4
## 15 One of them days    Leis      3
## 16 Den of Thieves 2 Rhianna      3
## 17 Den of Thieves 2 Lyndsay     NA
## 18 Den of Thieves 2 Kristel     NA
## 19 Den of Thieves 2    Akil      5
## 20 Den of Thieves 2    Leis      4
## 21  The Fire Inside Rhianna      5
## 22  The Fire Inside Lyndsay      4
## 23  The Fire Inside Kristel      4
## 24  The Fire Inside    Akil      4
## 25  The Fire Inside    Leis      3
## 26        Good Rich Rhianna      3
## 27        Good Rich Lyndsay      3
## 28        Good Rich Kristel      4
## 29        Good Rich    Akil     NA
## 30        Good Rich    Leis      2