Item-Based Collaborative Filtering

This is based on the article linked below by Muffaddal Qutbuddin.

https://towardsdatascience.com/comprehensive-guide-on-item-based-recommendation-systems-d67e40e2b75d

The data set in this exercise has 20 users and 20 movies.

data <- read_excel('IBCF - Movie Ratings.xlsx')

Ratings of 20 Movies by 20 Users

Please refer to the article above to download this sample data set.

Adjust Movie Ratings

Adjust movie ratings by subtracting user’s average rating from every rating for that user. This adjusts for user-bias.

## create a copy of rating dataframe
data.adjusted <-data[FALSE,]

#myrating <- as.numeric(data[1,-1])
#mean(myrating[myrating!=0], na.rm = TRUE)

##normalize user rating
for (u in 1:nrow(data)) {
  #get rating of the user for each item
  ratings <-as.numeric(data[u,-1])
  #calculate average rating
  meanAvg <- mean(ratings[ratings!=0], na.rm=TRUE)

#iterate each user ratings.
# we start with 2nd column as first column is user id
  for (j in 2:ncol(data)) {
    #store user id in normalized dataframe
    data.adjusted[u,1]<-data[u,1]

    #store zero incase of no rating
    if(data[u,j]==0 | is.na(data[u,j])){
      data.adjusted[u,j] <- 0
    }
    #subtract user's item rating with average rating.
    else{
      data.adjusted[u,j] <- data[u,j] - meanAvg
    }
  }
}

Preview Adjusted Ratings

Create data.ibs and data.adjusted.ibs data frames without User column. Replace zero ratings with NA in data data frame.

data.ibs<-data[,-1]
data.adjusted.ibs<-data.adjusted[,-1]
data[data==0] <- NA
data.ibs[data.ibs==0] <- NA

Build Similarity Matrix

Create function calCosine to calculate cosine similarity.

The function defined below is based on this formula.

NOTE: this function was modified so that it is consistent with the formula above. The denominator should use the adjusted ratings as well. This was not the case with the original function.

#function to calculate cosine similarity
calCosine <- function(`r_i_adj`, `r_j_adj`)
{
  return(sum(r_i_adj*r_j_adj) / (sqrt(sum(r_i_adj*r_i_adj)) * sqrt(sum(r_j_adj*r_j_adj))))
}
#create an emptry table to store similarity
data.ibs.similarity <- read.table(text = "",
                                  colClasses = rep(c('numeric'),ncol(data.adjusted.ibs)),
                                  col.names = c('items',colnames(data.adjusted.ibs)),
                                  check.names=FALSE)

# Lets fill in those empty spaces with cosine similarities
# Loop through the columns
for(i in 1:ncol(data.adjusted.ibs)) {
  # Loop through the columns for each column
  for(j in 1:ncol(data.adjusted.ibs)) {
    #get movie name for which to calculate similartiy
    data.ibs.similarity[i,1] <- colnames(data.adjusted.ibs)[i]
    # Fill in  cosine similarities
    data.ibs.similarity[i,j+1] <- calCosine(as.matrix(data.adjusted.ibs[,i]), as.matrix(data.adjusted.ibs[,j]))
  }
}

Preview Similarity Matrix


Calculate Recommendation Score

Create function calScore to calculate the recommendation score.

This function is based on this formula.

#function to compute score for item recommendation.
calScore <- function(history, similarities, avgRating)
{
  return (sum((history-avgRating)*similarities)/sum(similarities))
}

NOTE: The code below was modified so that the target item’s overall average rating (targetItem.rating.avg) is taken into account instead of the user’s average rating. I think that this modification aligns the calculation with the formula above.

#create empty dataframe for score
data.ibs.user.score = data[FALSE,]

# Loop through the users (rows)
for(i in 1:nrow(data.ibs))
{
  #get user id for which to calculate score
  users <- as.numeric(data[i,1])
  data.ibs.user.score[i,1] <- users
  
  # Loops through the movies (columns)
  for(j in 2:ncol(data))
  {
    # Get the movie's name
    item <- colnames(data)[j]
    # We do not want to recommend products you have already consumed
    # If you have already consumed it, we store -1
    #check if user have rated the movie or not.
    if(data[i,j] >0 & is.na(data[i,j])==FALSE) #user has a rating
    {
      data.ibs.user.score[i,j]<- -1
   
    }else {
      
      # We first have to get a product's top 10 neighbours sorted by similarity
      #get top 10 similar movies to our given movie
      topN <- head(n=11,( data.ibs.similarity[ order( data.ibs.similarity[,item], decreasing = T),][,c('items',item)] ) )
      topN.similarities <- as.numeric(topN[,item])
      
      #Dropping first movie as it will be the same movie
      topN <- topN[-c(1),] #remove 1st movie since it is the same movie
      topN.similarities <- topN.similarities[-1]
    
      # We then get the user's rating history for those 10 movies.
      #topN.userPurchases <-  as.numeric( data[,c('User',topN.names)] %>% filter(User==users))[-1]
      topN.userPurchases <-  as.numeric( data[,c('User',topN$items)] %>% filter(User==users))[-1]
      
      
      #calculate  score for the given movie and the user
      item.rating.avg <-as.numeric(colMeans(x=data.ibs[,topN$items], na.rm = TRUE))
      targetItem.rating.avg <- colMeans(x=data.ibs[,item], na.rm = TRUE)
      
      #replace NA with zero
      topN.userPurchases[is.na(topN.userPurchases)] <- 0
      
      data.ibs.user.score[i,j] <- targetItem.rating.avg+(calScore(similarities=topN.similarities, history=topN.userPurchases, avgRating = item.rating.avg))
      
      
      #if(item == '1: Toy Story (1995)' & users=='1577'){print(topN.userPurchases)}
      #if(item == '1: Toy Story (1995)' & users=='1577'){print(topN.similarities)}
      #if(item == '1: Toy Story (1995)' & users=='1577'){print(item.rating.avg)}
      #if(item == '1: Toy Story (1995)' & users=='1577'){print(targetItem.rating.avg)}
      
      #Get Top 10 most similar movies for Toy Story and relevant ratings for User 1577
      if(item == '1: Toy Story (1995)' & users=='1577'){
        user1577_ToyStory <- cbind(topN$items, topN.similarities,topN.userPurchases,item.rating.avg,rep(targetItem.rating.avg, 10))
      }
      
    } # close else statement
  } # end product for loop
} # end user for loop

User 1577 and Toy Story Top 10 Most Similar Movies

User 1577 for Toy Story has a recommendation score of 1.3441.

Table below provides information you need to calculate recommendation score for Toy Story for user 1577.


Recommendation Scores

Remember, movies that were already rated by users receive a score of -1.


Save output

write.csv(data.ibs.similarity, 'similarity matrix.csv')
write.csv(data.ibs.user.score, 'IBCF score.csv')