Creating MySQL database

Loading the information from the SQL database into an R dataframe

Installing and Loading Required Packages

if (!requireNamespace("DBI", quietly = TRUE)) install.packages("DBI")
if (!requireNamespace("RMariaDB", quietly = TRUE)) install.packages("RMariaDB")
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")

library(DBI)
library(RMariaDB)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Connecting to the MySQL Database and Loading Data

con <- dbConnect(
  RMariaDB::MariaDB(),
  user = 'root',               
  password = Sys.getenv("MYSQL_PWD"),    
  dbname = 'movies_rating2',     
  host = 'localhost',           
  port = 3306                   
)

# Check if the connection is successful
if (!is.null(con)) {
  print("Connected successfully!")
} else {
  print("Connection failed.")
}
## [1] "Connected successfully!"
# Execute the SQL query and fetch the results into an R data frame
query <- "SELECT Critics.CriticName, Movies.MovieName, Ratings.Rating
          FROM Ratings
          JOIN Critics ON Ratings.CriticID = Critics.CriticID
          JOIN Movies ON Ratings.MovieID = Movies.MovieID;"

# Fetch the data
result <- dbGetQuery(con, query)

# Display the results
glimpse (result)
## Rows: 96
## Columns: 3
## $ CriticName <chr> "Burton\r", "Charley\r", "Dan\r", "Dieudonne\r", "Matt\r", …
## $ MovieName  <chr> "CaptainAmerica\r", "CaptainAmerica\r", "CaptainAmerica\r",…
## $ Rating     <int> NA, 4, NA, 5, 4, 4, 4, NA, 4, 4, 5, NA, 5, 4, 4, NA, NA, 5,…
# Close the database connection
dbDisconnect(con)

Global Baseline Estimate Calculation

# Calculating the mean movie rating
mean_movie_rating <- mean(result$Rating, na.rm = TRUE)

# Calculate movie average ratings
movie_avg <- result %>%
  group_by(MovieName) %>%
  summarize(movie_avg = mean(Rating, na.rm = TRUE)) %>%
  ungroup()

# Calculate user average ratings
user_avg <- result %>%
  group_by(CriticName) %>%
  summarize(user_avg = mean(Rating, na.rm = TRUE)) %>%
  ungroup()

# Calculate movie rating relative to average
movie_avg_relative <- movie_avg %>%
  mutate(movie_avg_relative = movie_avg - mean_movie_rating)

# Calculate user rating relative to average
user_avg_relative <- user_avg %>%
  mutate(user_avg_relative = user_avg - mean_movie_rating)

# Merge these data back into the original data frame
result <- result %>%
  left_join(movie_avg_relative, by = "MovieName") %>%
  left_join(user_avg_relative, by = "CriticName")

# Ensure that all necessary columns are numeric
result <- result %>%
  mutate(
    movie_avg = as.numeric(movie_avg),
    user_avg = as.numeric(user_avg),
    movie_avg_relative = as.numeric(movie_avg - mean_movie_rating),
    user_avg_relative = as.numeric(user_avg - mean_movie_rating)
  )

# Calculate the global baseline estimate for each combination
result <- result %>%
  mutate(Global_Baseline_Estimate = mean_movie_rating + movie_avg_relative + user_avg_relative)

glimpse (result)
## Rows: 96
## Columns: 8
## $ CriticName               <chr> "Burton\r", "Charley\r", "Dan\r", "Dieudonne\…
## $ MovieName                <chr> "CaptainAmerica\r", "CaptainAmerica\r", "Capt…
## $ Rating                   <int> NA, 4, NA, 5, 4, 4, 4, NA, 4, 4, 5, NA, 5, 4,…
## $ movie_avg                <dbl> 4.272727, 4.272727, 4.272727, 4.272727, 4.272…
## $ movie_avg_relative       <dbl> 0.3383010, 0.3383010, 0.3383010, 0.3383010, 0…
## $ user_avg                 <dbl> 4.000000, 3.500000, 5.000000, 4.666667, 3.250…
## $ user_avg_relative        <dbl> 0.06557377, -0.43442623, 1.06557377, 0.732240…
## $ Global_Baseline_Estimate <dbl> 4.338301, 3.838301, 5.338301, 5.004968, 3.588…
# Filter to find Param's predicted rating for Pitch Perfect 2
param_pitch_perfect2 <- result %>%
  filter(CriticName == "Param\r" & MovieName == "PitchPerfect2\r")

# Print the result for Param's predicted rating for Pitch Perfect 2
print(param_pitch_perfect2)
##   CriticName       MovieName Rating movie_avg movie_avg_relative user_avg
## 1    Param\r PitchPerfect2\r     NA  2.714286          -1.220141      3.5
##   user_avg_relative Global_Baseline_Estimate
## 1        -0.4344262                 2.279859