Installing and Loading Required Packages
if (!requireNamespace("DBI", quietly = TRUE)) install.packages("DBI")
if (!requireNamespace("RMariaDB", quietly = TRUE)) install.packages("RMariaDB")
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
library(DBI)
library(RMariaDB)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Connecting to the MySQL Database and Loading Data
con <- dbConnect(
RMariaDB::MariaDB(),
user = 'root',
password = Sys.getenv("MYSQL_PWD"),
dbname = 'movies_rating2',
host = 'localhost',
port = 3306
)
# Check if the connection is successful
if (!is.null(con)) {
print("Connected successfully!")
} else {
print("Connection failed.")
}
## [1] "Connected successfully!"
# Execute the SQL query and fetch the results into an R data frame
query <- "SELECT Critics.CriticName, Movies.MovieName, Ratings.Rating
FROM Ratings
JOIN Critics ON Ratings.CriticID = Critics.CriticID
JOIN Movies ON Ratings.MovieID = Movies.MovieID;"
# Fetch the data
result <- dbGetQuery(con, query)
# Display the results
glimpse (result)
## Rows: 96
## Columns: 3
## $ CriticName <chr> "Burton\r", "Charley\r", "Dan\r", "Dieudonne\r", "Matt\r", …
## $ MovieName <chr> "CaptainAmerica\r", "CaptainAmerica\r", "CaptainAmerica\r",…
## $ Rating <int> NA, 4, NA, 5, 4, 4, 4, NA, 4, 4, 5, NA, 5, 4, 4, NA, NA, 5,…
# Close the database connection
dbDisconnect(con)
Global Baseline Estimate Calculation
# Calculating the mean movie rating
mean_movie_rating <- mean(result$Rating, na.rm = TRUE)
# Calculate movie average ratings
movie_avg <- result %>%
group_by(MovieName) %>%
summarize(movie_avg = mean(Rating, na.rm = TRUE)) %>%
ungroup()
# Calculate user average ratings
user_avg <- result %>%
group_by(CriticName) %>%
summarize(user_avg = mean(Rating, na.rm = TRUE)) %>%
ungroup()
# Calculate movie rating relative to average
movie_avg_relative <- movie_avg %>%
mutate(movie_avg_relative = movie_avg - mean_movie_rating)
# Calculate user rating relative to average
user_avg_relative <- user_avg %>%
mutate(user_avg_relative = user_avg - mean_movie_rating)
# Merge these data back into the original data frame
result <- result %>%
left_join(movie_avg_relative, by = "MovieName") %>%
left_join(user_avg_relative, by = "CriticName")
# Ensure that all necessary columns are numeric
result <- result %>%
mutate(
movie_avg = as.numeric(movie_avg),
user_avg = as.numeric(user_avg),
movie_avg_relative = as.numeric(movie_avg - mean_movie_rating),
user_avg_relative = as.numeric(user_avg - mean_movie_rating)
)
# Calculate the global baseline estimate for each combination
result <- result %>%
mutate(Global_Baseline_Estimate = mean_movie_rating + movie_avg_relative + user_avg_relative)
glimpse (result)
## Rows: 96
## Columns: 8
## $ CriticName <chr> "Burton\r", "Charley\r", "Dan\r", "Dieudonne\…
## $ MovieName <chr> "CaptainAmerica\r", "CaptainAmerica\r", "Capt…
## $ Rating <int> NA, 4, NA, 5, 4, 4, 4, NA, 4, 4, 5, NA, 5, 4,…
## $ movie_avg <dbl> 4.272727, 4.272727, 4.272727, 4.272727, 4.272…
## $ movie_avg_relative <dbl> 0.3383010, 0.3383010, 0.3383010, 0.3383010, 0…
## $ user_avg <dbl> 4.000000, 3.500000, 5.000000, 4.666667, 3.250…
## $ user_avg_relative <dbl> 0.06557377, -0.43442623, 1.06557377, 0.732240…
## $ Global_Baseline_Estimate <dbl> 4.338301, 3.838301, 5.338301, 5.004968, 3.588…
# Filter to find Param's predicted rating for Pitch Perfect 2
param_pitch_perfect2 <- result %>%
filter(CriticName == "Param\r" & MovieName == "PitchPerfect2\r")
# Print the result for Param's predicted rating for Pitch Perfect 2
print(param_pitch_perfect2)
## CriticName MovieName Rating movie_avg movie_avg_relative user_avg
## 1 Param\r PitchPerfect2\r NA 2.714286 -1.220141 3.5
## user_avg_relative Global_Baseline_Estimate
## 1 -0.4344262 2.279859