The goal of this assignment is give you practice working with accuracy and other recommender system metrics.

  • As in your previous assignments, compare the accuracy of at least two recommender system algorithms against your offline data.
  • Implement support for at least one business or user experience goal such as increased serendipity, novelty, or diversity.
  • Compare and report on any change in accuracy before and after you’ve made the change in #2.
  • As part of your textual conclusion, discuss one or more additional experiments that could be performed and/or metrics that could be evaluated only if online evaluation was possible. Also, briefly propose how you would design a reasonable online evaluation environment.
# Loading required libraries
library(recommenderlab)
library(reshape2)
library(RCurl)
library(dplyr)
library(ggplot2)
library(knitr)
library(tidyverse)
library(knitr)

1 Data Loading

set.seed(1)
data_package <- data(package = "recommenderlab")
data_package$results[, "Item"]
## [1] "Jester5k"                    "JesterJokes (Jester5k)"     
## [3] "MSWeb"                       "MovieLense"                 
## [5] "MovieLenseMeta (MovieLense)"

2 Data summary and preprocessing

data("Jester5k")
Jester5k
## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362106 ratings.

A summary of the data shows a 5000 by 100 matrix, users and items respectively, with a ratings range of approximately -9.95 to +9.90. There are no missing values.

#summary
jDF <- as(Jester5k, 'data.frame')
jDF$user <- as.numeric(jDF$user)
jDF$item <- as.numeric(jDF$item)
summary(jDF)
##       user           item            rating     
##  Min.   :   1   Min.   :  1.00   Min.   :-9.95  
##  1st Qu.:1263   1st Qu.: 21.00   1st Qu.:-3.06  
##  Median :2495   Median : 43.00   Median : 1.46  
##  Mean   :2503   Mean   : 43.79   Mean   : 0.85  
##  3rd Qu.:3749   3rd Qu.: 63.00   3rd Qu.: 5.10  
##  Max.   :5000   Max.   :100.00   Max.   : 9.90
similarity_users <- similarity(Jester5k[1:4, ], method ="cosine", which = "users")
as.matrix(similarity_users)
##            u2841      u15547     u15221      u15573
## u2841  0.0000000 0.500128118 0.15871815 0.197523538
## u15547 0.5001281 0.000000000 0.08944693 0.003513988
## u15221 0.1587182 0.089446933 0.00000000 0.133177034
## u15573 0.1975235 0.003513988 0.13317703 0.000000000

3 Data Exploration and Visualization

image(as.matrix(similarity_users), main = "User similarity")

hist(getRatings(Jester5k), col="turquoise3", main="Distribution of Joke Ratings", xlab="Joke Rating")

image(Jester5k[1:20, 1:50], "Jester Sampled Ratings Matrix")

4 Recommendation Models

Let’s check what are the Recommendation Models that are available

recommender_models <- recommenderRegistry$get_entries(dataType ="realRatingMatrix")
names(recommender_models)
## [1] "ALS_realRatingMatrix"          "ALS_implicit_realRatingMatrix"
## [3] "IBCF_realRatingMatrix"         "POPULAR_realRatingMatrix"     
## [5] "RANDOM_realRatingMatrix"       "RERECOMMEND_realRatingMatrix" 
## [7] "SVD_realRatingMatrix"          "SVDF_realRatingMatrix"        
## [9] "UBCF_realRatingMatrix"
slotNames(Jester5k)
## [1] "data"      "normalize"
class(Jester5k@data)
## [1] "dgCMatrix"
## attr(,"package")
## [1] "Matrix"
dim(Jester5k@data)
## [1] 5000  100

We are creating subset of jester dataset

rating_jokes<- Jester5k[rowCounts(Jester5k) > 50,colCounts(Jester5k) > 100] 
rating_jokes
## 3875 x 100 rating matrix of class 'realRatingMatrix' with 314302 ratings.

5 Evaluate and Compare Models

Let’s split the dataset into train and test and evaluate UBCF, IBCF, RANDOM, POPULAR and SVD models

e <- evaluationScheme(rating_jokes, method="split", train=0.8, k=1, given=10, goodRating=1 )
ubcf_rec <- Recommender(getData(e, "train"), "UBCF")
ibcf_rec <- Recommender(getData(e, "train"), "IBCF")
rand_rec <- Recommender(getData(e, "train"), "RANDOM")
pop_rec <- Recommender(getData(e, "train"), "POPULAR")
svd_rec <- Recommender(getData(e, "train"), "SVD")
ubcf_pred <- predict(ubcf_rec, getData(e, "known"), type="ratings")
ibcf_pred <- predict(ibcf_rec, getData(e, "known"), type="ratings")
rand_pred <- predict(rand_rec, getData(e, "known"), type="ratings")
pop_pred <- predict(pop_rec, getData(e, "known"), type="ratings")
svd_pred <- predict(svd_rec, getData(e, "known"), type="ratings")
errs <- rbind(
  ubcf = calcPredictionAccuracy(ubcf_pred, getData(e, "unknown")), 
  ibcf = calcPredictionAccuracy(ibcf_pred, getData(e, "unknown")), 
  rand = calcPredictionAccuracy(rand_pred, getData(e, "unknown")), 
  pop = calcPredictionAccuracy(pop_pred, getData(e, "unknown")),
  svd = calcPredictionAccuracy(svd_pred, getData(e, "unknown"))
)
kable(errs)
RMSE MSE MAE
ubcf 4.628835 21.42612 3.610215
ibcf 5.523931 30.51381 4.248054
rand 6.358415 40.42943 4.904322
pop 4.581196 20.98736 3.579535
svd 4.751079 22.57275 3.738948

5.1 Evaluate

algorithms <- list("Random" = list(name="RANDOM", param=NULL),
                   "Popular" = list(name="POPULAR", param=NULL),
                   "UBCF"= list(name="UBCF", param=list(nn=50)),
                   "IBCF"= list(name="IBCF", param=list(k=50)),
                   "SVD"= list(name="SVD")
                   )

results <- evaluate(e, algorithms, type = "topNList", n=c(1, 5, 10, 20, 30, 50))
## RANDOM run fold/sample [model time/prediction time]
##   1  [0.04sec/0.43sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  [0.08sec/1.94sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.07sec/4.25sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.33sec/0.28sec] 
## SVD run fold/sample [model time/prediction time]
##   1  [0.69sec/0.36sec]
plot(results, "prec/rec", annotate=c(1,3), legend="bottomright")

plot(results, "ROC", annotate=c(4, 3))

  • True Positive Rate (TPR): This is the percentage of purchased items that have been recommended. It’s the number of TP divided by the number of purchased items (TP + FN).

  • False Positive Rate (FPR): This is the percentage of not purchased items that have been recommended. It’s the number of FP divided by the number of not purchased items (FP + TN)

Two accuracy metrics are as follows:

  • Precision: This is the percentage of recommended items that have been purchased. It’s the number of FP divided by the total number of positives (TP + FP).

  • Recall: This is the percentage of purchased items that have been recommended. It’s the number of TP divided by the total number of purchases (TP + FN). It’s also equal to the True Positive Rate.

kable(head(getConfusionMatrix(results$SVD)[1]))
TP FP FN TN precision recall TPR FPR
1 0.6541935 0.3458065 36.45161 52.54839 0.6541935 0.0201930 0.0201930 0.0067701
5 3.1651613 1.8348387 33.94065 51.05935 0.6330323 0.0957387 0.0957387 0.0352403
10 6.0877419 3.9122581 31.01806 48.98194 0.6087742 0.1804408 0.1804408 0.0729438
20 11.3122581 8.6877419 25.79355 44.20645 0.5656129 0.3294089 0.3294089 0.1594675
30 15.7522581 14.2477419 21.35355 38.64645 0.5250753 0.4460446 0.4460446 0.2610391
50 23.4206452 26.5793548 13.68516 26.31484 0.4684129 0.6451949 0.6451949 0.4909890
kable(head(getConfusionMatrix(results$Popular)[1]))
TP FP FN TN precision recall TPR FPR
1 0.763871 0.236129 36.34194 52.65806 0.7638710 0.0235458 0.0235458 0.0040862
5 3.646452 1.353548 33.45935 51.54065 0.7292903 0.1159756 0.1159756 0.0239099
10 7.109677 2.890323 29.99613 50.00387 0.7109677 0.2187956 0.2187956 0.0503608
20 12.872258 7.127742 24.23355 45.76645 0.6436129 0.3836385 0.3836385 0.1254410
30 17.295484 12.704516 19.81032 40.18968 0.5765161 0.4997104 0.4997104 0.2268141
50 24.254193 25.745807 12.85161 27.14839 0.4850839 0.6731215 0.6731215 0.4674098
kable(head(getConfusionMatrix(results$UBCF)[1]))
TP FP FN TN precision recall TPR FPR
1 0.7174194 0.2825806 36.38839 52.61161 0.7174194 0.0214709 0.0214709 0.0049406
5 3.5974194 1.4025806 33.50839 51.49161 0.7194839 0.1139464 0.1139464 0.0247390
10 6.9664516 3.0335484 30.13935 49.86065 0.6966452 0.2147957 0.2147957 0.0530635
20 12.8219355 7.1780645 24.28387 45.71613 0.6410968 0.3807074 0.3807074 0.1267463
30 17.2167742 12.7832258 19.88903 40.11097 0.5738925 0.4942407 0.4942407 0.2275813
50 24.2412903 25.7587097 12.86452 27.13548 0.4848258 0.6703262 0.6703262 0.4686155

5.2 Suppress Warnings

Let’s evaluate the suppress warnings as well

eval_results <- suppressWarnings(evaluate(x =e , method = algorithms, n = seq(10, 100, 10)))
## RANDOM run fold/sample [model time/prediction time]
##   1  [0sec/0.34sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  [0.05sec/2.28sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.05sec/4.24sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.3sec/0.28sec] 
## SVD run fold/sample [model time/prediction time]
##   1  [0.3sec/0.2sec]
#results <- evaluate(e, algorithms, type = "topNList", n=c(1, 5, 10, 20, 30, 50))
plot(eval_results, "prec/rec", annotate = T, main = "Precision-recall")
title("Precision-recall")

plot(eval_results, annotate = 1, legend = "topleft") 
title("ROC curve")

6 Artifical variation

Let’s try to introduce some artifical variation, a second recommender engine is developed to abberate the original data as a complementary pairing to the original data set.

set.seed(223)

#partially randomizing the data -2 to +2 from original data point
'%rand%' <- function(x,y){x + sample(c(1,-1),length(y),replace = TRUE) * y}
Jester.random <-  Jester5k
Jester.random@data@x<-sapply(Jester.random@data@x, function(x){x %rand% runif(1, 0, 2) })

e.2 <- evaluationScheme(Jester.random, method = "split",train = 0.8, given = 30, goodRating = 3, k=5)
Rec.model.2 <- Recommender(getData(e.2, "train"), "Popular")

#Making predictions 
# Target the original dataset!
prediction.2 <- predict(Rec.model.2, getData(e, "known"), type="ratings",n=10)

# set all predictions that fall outside the valid range to the boundary values
prediction.2@data@x[prediction.2@data@x[] < -10] <- -10
prediction.2@data@x[prediction.2@data@x[] > 10] <- 10

calcPredictionAccuracy(prediction.2, getData(e, "unknown"))
##      RMSE       MSE       MAE 
##  4.579972 20.976148  3.580440

7 Comparison

We receive almost very close RMSE for both normal and artificial model techniques. So, both models are equally efficient for the given dataset.