Loading data
url<- "https://raw.githubusercontent.com/stormwhale/data-mines/refs/heads/main/movieR.csv"
movie<- read.csv(url)
movie_ratings<- movie[1:16, 2:7] %>%
sapply(function(x) as.numeric(x))
Create a function that calculates the global baseline estimate
prediction_rating<- function(row, col){
#This calculates mean movie ratings
movie_mean<- mean(movie_ratings, na.rm=TRUE)
#This calculates the relative movie rating by finding the mean of the column
# and subtract the mean movie rating
movie_rating_to_avg<- mean(col, na.rm=TRUE)
re_movie<- (movie_rating_to_avg - movie_mean)
#This calculates relative movie rating by find the mean of the user's overall
# rating and subtract the mean movie rating
user_rating_to_avg<- mean(row, na.rm=TRUE)
re_user<- (user_rating_to_avg - movie_mean)
return(movie_mean + re_user + re_movie)
}
# preview the working table and observe where the NA values are located:
movie_ratings
## CaptainAmerica Deadpool Frozen JungleBook PitchPerfect2 StarWarsForce
## [1,] NA NA NA 4 NA 4
## [2,] 4 5 4 3 2 3
## [3,] NA 5 NA NA NA 5
## [4,] 5 4 NA NA NA 5
## [5,] 4 NA 2 NA 2 5
## [6,] 4 NA 3 3 4 NA
## [7,] 4 4 4 2 2 4
## [8,] NA NA NA NA NA 4
## [9,] 4 4 1 NA NA 5
## [10,] 4 3 5 5 2 3
## [11,] 5 5 5 5 NA 4
## [12,] NA NA 4 5 NA 3
## [13,] 5 5 5 4 4 5
## [14,] 4 NA NA NA NA 4
## [15,] 4 5 3 3 3 NA
## [16,] NA NA 5 5 NA NA
To run the prediction function to all NA values and add the names
and titles of the movies back to the data frame:
# This function loops through first rows and then columns to search for any
# cells that are NA in the data-frame and run the prediction function from
# above to predict the user rating of the movie if NA is found.
pre_movie<- function(ratings) {
for (i in 1:nrow(movie_ratings)){
for (j in 1:ncol(movie_ratings)) {
if (is.na(movie_ratings[i, j])){
movie_ratings[i, j]<- prediction_rating(movie_ratings[i, ], movie_ratings[, j])
}
}
}
return(round(movie_ratings, 1))
}
pre_rating<- pre_movie(movie_ratings)
Recombine the critic names to the dataframe:
#Recombine the user names with the original data-frame
final_rating<- cbind(movie[, 1], pre_rating)
## Warning in cbind(movie[, 1], pre_rating): number of rows of result is not a
## multiple of vector length (arg 1)
# change column name to Critics
colnames(final_rating)[colnames(final_rating)=="V1"] <-"Critic" # change column name to Critics
final_rating
## CaptainAmerica Deadpool Frozen JungleBook PitchPerfect2
## [1,] "Burton" "4.3" "4.6" "4" "4" "3"
## [2,] "Charley" "4" "5" "4" "3" "2"
## [3,] "Dan" "5.3" "5" "4.9" "5" "3.9"
## [4,] "Dieudonne" "5" "4" "4.6" "4.7" "3.6"
## [5,] "Matt" "4" "3.8" "2" "3.5" "2"
## [6,] "Mauricio" "4" "4" "3" "3" "4"
## [7,] "Max" "4" "4" "4" "2" "2"
## [8,] "Nathan" "4.4" "4.6" "4.3" "4.4" "3.4"
## [9,] "Param" "4" "4" "1" "3.6" "2.6"
## [10,] "Parshu" "4" "3" "5" "5" "2"
## [11,] "Prashanth" "5" "5" "5" "5" "3.8"
## [12,] "Shipra" "4.4" "4.6" "4" "5" "3.3"
## [13,] "Sreejaya" "5" "5" "5" "4" "4"
## [14,] "Steve" "4" "4.5" "4.1" "4.2" "3.3"
## [15,] "Vuthy" "4" "5" "3" "3" "3"
## [16,] "Xingjia" "5.4" "5.6" "5" "5" "4.4"
## StarWarsForce
## [1,] "4"
## [2,] "3"
## [3,] "5"
## [4,] "5"
## [5,] "5"
## [6,] "3.8"
## [7,] "4"
## [8,] "4"
## [9,] "5"
## [10,] "3"
## [11,] "4"
## [12,] "3"
## [13,] "5"
## [14,] "4"
## [15,] "3.8"
## [16,] "5.3"