Introduction

The goal of this assignment is give you practice working with accuracy and other recommender system metrics.

  1. As in your previous assignments, compare the accuracy of at least two recommender system algorithms against your offline data.
  2. Implement support for at least one business or user experience goal such as increased serendipity, novelty, or diversity.
  3. Compare and report on any change in accuracy before and after you’ve made the change in #2.
  4. As part of your textual conclusion, discuss one or more additional experiments that could be performed and/or metrics that could be evaluated only if online evaluation was possible. Also, briefly propose how you would design a reasonable online evaluation environment.
library(recommenderlab)
library(ggplot2)
library(knitr)
library(kableExtra)
library(devtools)
library(tidyverse)
library(tictoc)
library(dplyr)
library(stats)
library(irlba)
library(rsvd)

Data Setup

Jester5K data set - The data set contains a sample of 5000 users from the anonymous ratings data from the Jester Online Joke Recommender System collected between April 1999 and May 2003.

https://rdrr.io/cran/recommenderlab/man/Jester5k.html

set.seed(612)
data(Jester5k)
jester_df <- as(Jester5k, 'data.frame')

Data Exploration

#Dimension
dim(Jester5k)
## [1] 5000  100
jester_df$user <- as.numeric(jester_df$user)
jester_df$item <- as.numeric(jester_df$item)
summary(jester_df)
##       user             item            rating     
##  Min.   : NA      Min.   : NA      Min.   :-9.95  
##  1st Qu.: NA      1st Qu.: NA      1st Qu.:-3.06  
##  Median : NA      Median : NA      Median : 1.46  
##  Mean   :NaN      Mean   :NaN      Mean   : 0.85  
##  3rd Qu.: NA      3rd Qu.: NA      3rd Qu.: 5.10  
##  Max.   : NA      Max.   : NA      Max.   : 9.90  
##  NA's   :362106   NA's   :362106
#Vector
vector_ratings <- as.vector(as.vector(Jester5k@data))
head(as(Jester5k,"matrix")[,1:10])
##           j1    j2    j3    j4    j5    j6    j7    j8    j9   j10
## u2841   7.91  9.17  5.34  8.16 -8.74  7.14  8.88 -8.25  5.87  6.21
## u15547 -3.20 -3.50 -9.56 -8.74 -6.36 -3.30  0.78  2.18 -8.40 -8.79
## u15221 -1.70  1.21  1.55  2.77  5.58  3.06  2.72 -4.66  4.51 -3.06
## u15573 -7.38 -8.93 -3.88 -7.23 -4.90  4.13  2.57  3.83  4.37  3.16
## u21505  0.10  4.17  4.90  1.55  5.53  1.50 -3.79  1.94  3.59  4.81
## u15994  0.83 -4.90  0.68 -7.18  0.34 -4.32 -6.17  6.12 -5.58  5.44
#Sparsity Check
sparsity <- function(ratings){
  nratings(ratings) / (dim(ratings)[1] * dim(ratings)[2])
}
sparsity(Jester5k)
## [1] 0.724212
jester_orig <- as.matrix(Jester5k@data)
length(jester_orig[jester_orig==0]) / (ncol(jester_orig)*nrow(jester_orig))
## [1] 0.277906
# creating matrix replacing zeros with NAs
jester_matrix <- jester_orig
is.na(jester_matrix) <- jester_matrix == 0

ratings_jester <- Jester5k[rowCounts(Jester5k) > 50, colCounts(Jester5k) > 100]
ratings_jester
## 3875 x 100 rating matrix of class 'realRatingMatrix' with 314302 ratings.

Data Evaluation

Creates an evaluationScheme object from a data set. The scheme can be a simple split into training and test data

#evaluation
eval_sets <- evaluationScheme(data = ratings_jester, method = "cross-validation", k= 4, given = 30, goodRating = 3)
eval_sets
## Evaluation scheme with 30 items given
## Method: 'cross-validation' with 4 run(s).
## Good ratings: >=3.000000
## Data set: 3875 x 100 rating matrix of class 'realRatingMatrix' with 314302 ratings.
#train
getData(eval_sets, "train")
## 2904 x 100 rating matrix of class 'realRatingMatrix' with 235457 ratings.
#known
getData(eval_sets, "known")
## 971 x 100 rating matrix of class 'realRatingMatrix' with 29130 ratings.
#unknown
getData(eval_sets, "unknown")
## 971 x 100 rating matrix of class 'realRatingMatrix' with 49715 ratings.

Model Comparison

Comparing the difference models, Item Based Collaborative, User Based Collaborative Model filtering, SVD and RANDOM models chosen

eval1 <- list(
  IBCF_cos = list(name = "IBCF", param = list(method="cosine")),
  IBCF_pear = list(name = "IBCF", param = list(method="pearson")),
  UBCF_cos = list(name = "UBCF", param = list(method="cosine")),
  UBCF_pear = list(name = "UBCF", param = list(method="pearson")),
  SVD = list(name = "SVD"),
  random = list(name = "RANDOM")
)

inter1 <- evaluate(x = eval_sets, method = eval1, n= c(1, 5, seq(10, 100, 10)))
## IBCF run fold/sample [model time/prediction time]
##   1  [0.26sec/0.24sec] 
##   2  [0.26sec/0.21sec] 
##   3  [0.19sec/0.18sec] 
##   4  [0.24sec/0.15sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.22sec/0.17sec] 
##   2  [0.22sec/0.18sec] 
##   3  [0.19sec/0.2sec] 
##   4  [0.2sec/0.2sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.03sec/4.73sec] 
##   2  [0.04sec/4.16sec] 
##   3  [0.03sec/4.2sec] 
##   4  [0.03sec/4.33sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.03sec/3.63sec] 
##   2  [0.03sec/3.68sec] 
##   3  [0.03sec/3.96sec] 
##   4  [0.03sec/3.86sec] 
## SVD run fold/sample [model time/prediction time]
##   1  [0.1sec/0.14sec] 
##   2  [0.09sec/0.19sec] 
##   3  [0.07sec/0.14sec] 
##   4  [0.11sec/0.14sec] 
## RANDOM run fold/sample [model time/prediction time]
##   1  [0sec/0.37sec] 
##   2  [0sec/0.16sec] 
##   3  [0sec/0.14sec] 
##   4  [0sec/0.17sec]

Interpretation

ROC Curves and Precision-Recall curves

par(mfrow=c(1,2))
plot(inter1, annotate = 1, legend = "bottomright")
title("ROC curve")

plot(inter1, "prec/rec", annotate = 1, legend = "bottomright")
title("Precision-Recall")

ROC Curves and Precision-Recall curves - Nearest Neighbors Threshold

eval2 <- lapply(c(5, 10, 20, 30, 40, 50, 60, 70, 80), function(k){
  list(name = "UBCF", param = list(method = "pearson", nn=k))
})

names(eval2) <- paste0("UBCF_nn_", c(5, 10, 20, 30, 40, 50, 60, 70, 80))

inter2 <- evaluate(x = eval_sets, method = eval2, n = c(1, 5, seq(10, 100, 10)))
## UBCF run fold/sample [model time/prediction time]
##   1  [0.02sec/3.49sec] 
##   2  [0.03sec/3.43sec] 
##   3  [0.01sec/3.63sec] 
##   4  [0.04sec/3.47sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.02sec/3.62sec] 
##   2  [0.03sec/3.43sec] 
##   3  [0.03sec/3.4sec] 
##   4  [0.02sec/3.32sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.04sec/3.49sec] 
##   2  [0.03sec/3.41sec] 
##   3  [0.03sec/3.56sec] 
##   4  [0.03sec/3.62sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.01sec/3.48sec] 
##   2  [0.03sec/3.66sec] 
##   3  [0.02sec/3.49sec] 
##   4  [0.04sec/3.59sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.03sec/3.74sec] 
##   2  [0.03sec/4.02sec] 
##   3  [0.04sec/4.37sec] 
##   4  [0.03sec/4.33sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.03sec/3.9sec] 
##   2  [0.02sec/3.86sec] 
##   3  [0.03sec/3.66sec] 
##   4  [0.03sec/4.16sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.03sec/4.86sec] 
##   2  [0.03sec/4.1sec] 
##   3  [0.03sec/3.95sec] 
##   4  [0.03sec/3.56sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.02sec/3.66sec] 
##   2  [0.05sec/3.77sec] 
##   3  [0.04sec/4.46sec] 
##   4  [0.03sec/5sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.01sec/3.79sec] 
##   2  [0.03sec/4.07sec] 
##   3  [0.02sec/3.81sec] 
##   4  [0.01sec/3.84sec]
par(mfrow=c(1,2))
plot(inter2, annotate = 1, legend = "bottomright")
title("ROC Curve - UBCF")

plot(inter2, "prec/rec", annotate = 1, legend = "bottomleft")
title("Precision-Recall - UBCF")

RMSE Value

rmse1 <- Recommender(getData(eval_sets, "train"), "UBCF", parameter = list(method = "pearson", nn=20))
prediction <- predict(rmse1, getData(eval_sets, "known"), type="ratings", n=10)

calcPredictionAccuracy(prediction, getData(eval_sets, "unknown"))
##      RMSE       MSE       MAE 
##  4.373599 19.128369  3.417372

Serendipity

Serendipity has been increasingly used in recommender systems, The term serendipity means a lucky finding or a satisfying surprise, We can add serendipity to recommend some jokes to users which are no part of the our nearest neighbors calculations. As expected the RMSE value has increased a bit.

set.seed(612)
#random
'%rand%' <- function(x,y){x + sample(c(1,-1),length(y),replace = TRUE) * y}
Jester.random <-  Jester5k
Jester.random@data@x<-sapply(Jester.random@data@x, function(x){x %rand% runif(1, 0, 2) })
#eval train
eval_sch <- evaluationScheme(Jester.random, method = "split",train = 0.8, given = 30, goodRating = 3, k=5)
model2 <- Recommender(getData(eval_sch, "train"), "Popular")
#predict
rating1 <- Jester5k
eval3 <- evaluationScheme(rating1, method="split", train=0.8, k=1, given=10, goodRating=1 )
predict1 <- predict(model2, getData(eval3, "known"), type="ratings",n=10)
predict1@data@x[predict1@data@x[] < -10] <- -10
predict1@data@x[predict1@data@x[] > 10] <- 10
#RMSE result
calcPredictionAccuracy(predict1, getData(eval3, "unknown"))
##      RMSE       MSE       MAE 
##  4.559960 20.793234  3.579772

Online Evaluation

In the search for a suitable recommendation algorithm, A methodology of evaluation is necessary in order to compare the results. Online experiments involve issuing recommendations and then querying the users about how they rate the items, but sometimes based on user behavior such as ignorance of giving review or lack of knowledge may result in incorrect results but Offline evaluations are the most common evaluation method for research paper recommender systems. However, no thorough discussion on the appropriateness of offline evaluations has taken place, despite some voiced criticism.