Using the information you collected on movie ratings, implement a
Global Baseline Estimate recommendation system in R.
Most recommender systems use personalized algorithms like “content
management” and “item-item collaborative filtering.” Sometimes
non-personalized recommenders are also useful or necessary. One of the
best non-personalized recommender system algorithms is the “Global
Baseline Estimate. The job here is to use the survey data collected and
write the R code that makes a movie recommendation using the Global
Baseline Estimate algorithm.
## Warning: package 'readxl' was built under R version 4.3.3
# GitHub URL of the Excel file (Raw file URL)
github_raw_url <- "https://raw.githubusercontent.com/pujaroy280/DATA607Week11RecommenderSystems/main/MovieRatings.xlsx"
# File download location
download_location <- "MovieRatings.xlsx"
# Download the file from GitHub
download.file(url = github_raw_url, destfile = download_location, mode = "wb")
# Read the Excel file
movie_ratings <- read_excel(download_location)
# View the data
print(movie_ratings)## # A tibble: 16 × 7
## Critic CaptainAmerica Deadpool Frozen JungleBook PitchPerfect2 StarWarsForce
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Burton NA NA NA 4 NA 4
## 2 Charley 4 5 4 3 2 3
## 3 Dan NA 5 NA NA NA 5
## 4 Dieudo… 5 4 NA NA NA 5
## 5 Matt 4 NA 2 NA 2 5
## 6 Mauric… 4 NA 3 3 4 NA
## 7 Max 4 4 4 2 2 4
## 8 Nathan NA NA NA NA NA 4
## 9 Param 4 4 1 NA NA 5
## 10 Parshu 4 3 5 5 2 3
## 11 Prasha… 5 5 5 5 NA 4
## 12 Shipra NA NA 4 5 NA 3
## 13 Sreeja… 5 5 5 4 4 5
## 14 Steve 4 NA NA NA NA 4
## 15 Vuthy 4 5 3 3 3 NA
## 16 Xingjia NA NA 5 5 NA NA
# Subset the data frame to include only the columns for specific movies
movies_subset <- movie_ratings[, c("CaptainAmerica", "Deadpool", "Frozen", "JungleBook", "PitchPerfect2", "StarWarsForce")]
# Calculate the global average rating excluding NA values
global_average_rating <- mean(as.matrix(movies_subset), na.rm = TRUE)
# Print the global average rating
print(global_average_rating)## [1] 3.934426
# Calculate baseline estimate for each movie and critic
# Function to calculate baseline estimate
calculate_baseline <- function(ratings_matrix, mu) {
# Initialize empty matrix for baseline estimates
baseline_matrix <- matrix(0, nrow = nrow(ratings_matrix), ncol = ncol(ratings_matrix))
# Loop through each critic
for (i in 1:nrow(ratings_matrix)) {
# Loop through each movie
for (j in 1:ncol(ratings_matrix)) {
# Calculate baseline estimate for critic i and movie j
if (!is.na(ratings_matrix[i, j])) {
baseline_matrix[i, j] <- mu + mean(ratings_matrix[i, ], na.rm = TRUE) - mu + mean(ratings_matrix[, j], na.rm = TRUE) - mu
}
}
}
return(baseline_matrix)
}
# Calculate global average rating excluding NA values
mu <- mean(as.matrix(movie_ratings), na.rm = TRUE)## Warning in mean.default(as.matrix(movie_ratings), na.rm = TRUE): argument is
## not numeric or logical: returning NA
# Recommend movies based on highest baseline estimate for each user
recommend_movies <- function(baseline_matrix) {
# Initialize empty list to store recommendations
recommendations <- list()
# Loop through each user
for (i in 1:nrow(baseline_matrix)) {
# Find movie with highest baseline estimate for user i
max_index <- which.max(baseline_matrix[i, ])
# Store recommendation
recommendations[[i]] <- names(movie_ratings)[-1][max_index]
}
return(recommendations)
}
# Get movie recommendations
movie_recommendations <- recommend_movies(baseline_estimates)
# Print movie recommendations for each user
for (i in 1:length(movie_recommendations)) {
cat("Critic", i, ":", movie_recommendations[[i]], "\n")
}## Critic 1 : CaptainAmerica
## Critic 2 :
## Critic 3 : CaptainAmerica
## Critic 4 : Frozen
## Critic 5 : Deadpool
## Critic 6 : Deadpool
## Critic 7 :
## Critic 8 : CaptainAmerica
## Critic 9 : JungleBook
## Critic 10 :
## Critic 11 : PitchPerfect2
## Critic 12 : CaptainAmerica
## Critic 13 :
## Critic 14 : Deadpool
## Critic 15 : StarWarsForce
## Critic 16 : CaptainAmerica