recommender system

Loading data

url<- "https://raw.githubusercontent.com/stormwhale/data-mines/refs/heads/main/movieR.csv"
movie<- read.csv(url)

movie_ratings<- movie[1:16, 2:7] %>% 
  sapply(function(x) as.numeric(x))

Create a function that calculates the global baseline estimate

prediction_rating<- function(row, col){
  #This calculates mean movie ratings
  movie_mean<- mean(movie_ratings, na.rm=TRUE)
  
  #This calculates the relative movie rating by finding the mean of the column 
  # and subtract the mean movie rating
  movie_rating_to_avg<- mean(col, na.rm=TRUE)
  re_movie<- (movie_rating_to_avg - movie_mean)
  
  #This calculates relative movie rating by find the mean of the user's overall
  # rating and subtract the mean movie rating
  user_rating_to_avg<- mean(row, na.rm=TRUE)
  re_user<- (user_rating_to_avg - movie_mean)
  
  return(movie_mean + re_user + re_movie)
}
# preview the working table and observe where the NA values are located:
movie_ratings

##       CaptainAmerica Deadpool Frozen JungleBook PitchPerfect2 StarWarsForce
##  [1,]             NA       NA     NA          4            NA             4
##  [2,]              4        5      4          3             2             3
##  [3,]             NA        5     NA         NA            NA             5
##  [4,]              5        4     NA         NA            NA             5
##  [5,]              4       NA      2         NA             2             5
##  [6,]              4       NA      3          3             4            NA
##  [7,]              4        4      4          2             2             4
##  [8,]             NA       NA     NA         NA            NA             4
##  [9,]              4        4      1         NA            NA             5
## [10,]              4        3      5          5             2             3
## [11,]              5        5      5          5            NA             4
## [12,]             NA       NA      4          5            NA             3
## [13,]              5        5      5          4             4             5
## [14,]              4       NA     NA         NA            NA             4
## [15,]              4        5      3          3             3            NA
## [16,]             NA       NA      5          5            NA            NA

To run the prediction function to all NA values and add the names and titles of the movies back to the data frame:

# This function loops through first rows and then columns to search for any 
# cells that are NA in the data-frame and run the prediction function from 
# above to predict the user rating of the movie if NA is found.

pre_movie<- function(ratings) {
  for (i in 1:nrow(movie_ratings)){
    for (j in 1:ncol(movie_ratings)) {
      if (is.na(movie_ratings[i, j])){
        movie_ratings[i, j]<- prediction_rating(movie_ratings[i, ], movie_ratings[, j])
      }
    }
  } 
  return(round(movie_ratings, 1))
}

pre_rating<- pre_movie(movie_ratings)

Recombine the critic names to the dataframe:

#Recombine the user names with the original data-frame
final_rating<- cbind(movie[, 1], pre_rating)

## Warning in cbind(movie[, 1], pre_rating): number of rows of result is not a
## multiple of vector length (arg 1)

# change column name to Critics
colnames(final_rating)[colnames(final_rating)=="V1"] <-"Critic" # change column name to Critics

final_rating

##                   CaptainAmerica Deadpool Frozen JungleBook PitchPerfect2
##  [1,] "Burton"    "4.3"          "4.6"    "4"    "4"        "3"          
##  [2,] "Charley"   "4"            "5"      "4"    "3"        "2"          
##  [3,] "Dan"       "5.3"          "5"      "4.9"  "5"        "3.9"        
##  [4,] "Dieudonne" "5"            "4"      "4.6"  "4.7"      "3.6"        
##  [5,] "Matt"      "4"            "3.8"    "2"    "3.5"      "2"          
##  [6,] "Mauricio"  "4"            "4"      "3"    "3"        "4"          
##  [7,] "Max"       "4"            "4"      "4"    "2"        "2"          
##  [8,] "Nathan"    "4.4"          "4.6"    "4.3"  "4.4"      "3.4"        
##  [9,] "Param"     "4"            "4"      "1"    "3.6"      "2.6"        
## [10,] "Parshu"    "4"            "3"      "5"    "5"        "2"          
## [11,] "Prashanth" "5"            "5"      "5"    "5"        "3.8"        
## [12,] "Shipra"    "4.4"          "4.6"    "4"    "5"        "3.3"        
## [13,] "Sreejaya"  "5"            "5"      "5"    "4"        "4"          
## [14,] "Steve"     "4"            "4.5"    "4.1"  "4.2"      "3.3"        
## [15,] "Vuthy"     "4"            "5"      "3"    "3"        "3"          
## [16,] "Xingjia"   "5.4"          "5.6"    "5"    "5"        "4.4"        
##       StarWarsForce
##  [1,] "4"          
##  [2,] "3"          
##  [3,] "5"          
##  [4,] "5"          
##  [5,] "5"          
##  [6,] "3.8"        
##  [7,] "4"          
##  [8,] "4"          
##  [9,] "5"          
## [10,] "3"          
## [11,] "4"          
## [12,] "3"          
## [13,] "5"          
## [14,] "4"          
## [15,] "3.8"        
## [16,] "5.3"

recommender system

Chi Hang(Philip) Cheung

2024-11-22

Loading data

Create a function that calculates the global baseline estimate

To run the prediction function to all NA values and add the names and titles of the movies back to the data frame:

Recombine the critic names to the dataframe: