## url <- "https://raw.githubusercontent.com/Kingtilon1/DATA607/main/assignment2/movies.csv"
## data <- read.csv(url)
## Then you can skip the code blocks from 3 to 5 and start your code from there
## This is how you implement the yaml file to hide your password
## dbname: test
## host: localhost
## port: 3306
## user: root
## password: your mysqlpassword
config <- yaml::read_yaml("config.yaml")
con <- dbConnect(
RMySQL::MySQL(),
dbname = config$dbname,
host = config$host,
port = config$port,
user = config$user,
password = config$password
)
query <- "SELECT * FROM test.movies_faves"
data <- dbGetQuery(con, query)
data$rating <- as.integer(data$rating)
head(data)
## id person movie_title rating
## 1 1 Denilyse Avengers: Endgame 5
## 2 2 Denilyse Black Panther 4
## 3 3 Denilyse Barbie 2
## 4 4 Denilyse Oppenheimer 0
## 5 5 Denilyse Jumanji 4
## 6 6 Denilyse Get Out 3
## making it so that every place where there is a 0 gets replaced
# Assuming 'data' is your movie ratings data frame
# Calculate the average rating for each movie (excluding zeros)
average_ratings <- aggregate(rating ~ movie_title, data = data[data$rating != 0, ], FUN = mean)
## the average excluding the zeros
average_ratings
## movie_title rating
## 1 Avengers: Endgame 4.600000
## 2 Barbie 2.200000
## 3 Black Panther 4.000000
## 4 Get Out 3.500000
## 5 Jumanji 3.600000
## 6 Oppenheimer 2.333333
# Calculate the number of non-zero ratings for each movie
num_non_zero_ratings <- aggregate(rating ~ movie_title, data = data[data$rating != 0, ], FUN = length)
# Merge the average ratings and number of ratings data frames
merged_data <- merge(average_ratings, num_non_zero_ratings, by = "movie_title")
# Calculate the weighted average rating
merged_data$weighted_average_rating <- (merged_data$rating.x * merged_data$rating.y) / (merged_data$rating.y + sum(data$rating == 0))
# Sort movies by weighted average rating in descending order
sorted_movies <- merged_data[order(-merged_data$weighted_average_rating), ]
# Display the top-rated movies
head(sorted_movies, 10)
## movie_title rating.x rating.y weighted_average_rating
## 1 Avengers: Endgame 4.600000 5 2.555556
## 3 Black Panther 4.000000 4 2.000000
## 5 Jumanji 3.600000 5 2.000000
## 4 Get Out 3.500000 4 1.750000
## 2 Barbie 2.200000 5 1.222222
## 6 Oppenheimer 2.333333 3 1.000000
websites that helped me calculate the weighted average how to calculate weighted mean