Evaluate recommender system

Recommender System

CUNY MSDS DATA 643

Date: 2018/07/01
Author: Rose Koh

Introduction

Goal

Accuracy and Beyond

Data

Info

Preprocessing

jester <- read.xlsx2("data/jester-data-2.xls", "jester-data-2-new", header = F, 
                     colClasses='numeric',stringsAsFactors=FALSE)

# remove first column
jester <- jester[ , -1]

# Missing value to NA
jester[jester==99] <- NA

# Check NA table
table(is.na(jester))
## 
##   FALSE    TRUE 
## 1708993  641007
total = nrow(jester) * ncol(jester)
count.nan = sum(is.na(jester))
sparsity = round((total-count.nan)/total,4)

# index
names(jester)[1:100] <- paste("item", 1:100, sep="")

#user <- c(seq(1, length(jester)))
#test <- data.frame(user,jester)

# str(jester)
# value chr -> num
fwrite(jester,"jester")
jester <- fread("jester",colClasses="numeric")
# str(jester)

# Subset and create matrix
sample <- jester[sample.int(nrow(jester), 5000, replace=FALSE), ]
matrix <- as.matrix(sample)
real.rating.mat <- as(matrix, "realRatingMatrix")
real.rating.mat
## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362984 ratings.

Sparsity and distribution

Visualization

# Distribution of ratings
hist(getRatings(real.rating.mat), col="skyblue", main="Distribution of Jester Ratings", xlab="Rating")

# Average distribution of ratings
avg.rating <- colMeans(na.omit(sample))
hist(avg.rating, col="skyblue", main="Distribution of Average Jester Ratings", xlab="Average Rating")

Evaluation and Model Selection

Evaluation

items.to.keep <- 15
rating.threshold <- 1
number.of.trial <- 3
method = "split"
eval.method <- evaluationScheme(real.rating.mat, method=method, train=0.75, 
                                k=number.of.trial, 
                                given=items.to.keep, 
                                goodRating=rating.threshold )

# Evaluating a model consists of comparing the recommendations with the unknown purchases. The ratings are between 1 and 5, and we need to define what constitutes good and bad items. For this purpose, we will define a threshold with the minimum rating that is considered good
  
eval.method
## Evaluation scheme with 15 items given
## Method: 'split' with 3 run(s).
## Training set proportion: 0.750
## Good ratings: >=1.000000
## Data set: 5000 x 100 rating matrix of class 'realRatingMatrix' with 362984 ratings.

Model Selection

models <- list(
  UBCF.cos.null = list(name = "UBCF", param = list(method = "cosine", normalize = NULL)),
  UBCF.prs.null = list(name = "UBCF", param = list(method = "pearson", normalize = NULL)),
  UBCF.cos.center = list(name = "UBCF", param = list(method = "cosine", normalize = "center")),
  UBCF.prs.center = list(name = "UBCF", param = list(method = "pearson", normalize = "center")),
  UBCF.cos.z = list(name = "UBCF", param = list(method = "cosine", normalize = "Z-score")),
  UBCF.prs.z = list(name = "UBCF", param = list(method = "pearson", normalize = "Z-score"))
)

eval.results <- suppressWarnings(evaluate(x = eval.method, method = models, n = seq(10, 100, 10)))
## UBCF run fold/sample [model time/prediction time]
##   1  [0.006sec/4.893sec] 
##   2  [0.025sec/4.813sec] 
##   3  [0.001sec/4.377sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.001sec/4.129sec] 
##   2  [0sec/3.954sec] 
##   3  [0.001sec/4.068sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.024sec/4.127sec] 
##   2  [0.021sec/4.454sec] 
##   3  [0.031sec/3.869sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.024sec/3.829sec] 
##   2  [0.022sec/3.464sec] 
##   3  [0.022sec/3.507sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.125sec/4.308sec] 
##   2  [0.122sec/4.255sec] 
##   3  [0.123sec/4.245sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.115sec/3.99sec] 
##   2  [0.117sec/3.369sec] 
##   3  [0.117sec/3.866sec]
plot(eval.results, "prec/rec", annotate = T, main = "UBCF-Precision-recall")
title("UBCF-Precision-recall")

plot(eval.results, annotate = 1, legend = "topleft") 
title("UBCF-ROC curve")

models <- list(
  IBCF.cos.null = list(name = "IBCF", param = list(method = "cosine", normalize = NULL)),
  IBCF.prs.null = list(name = "IBCF", param = list(method = "pearson", normalize = NULL)),
  IBCF.cos.center = list(name = "IBCF", param = list(method = "cosine", normalize = "center")),
  IBCF.prs.center = list(name = "IBCF", param = list(method = "pearson", normalize = "center")),
  IBCF.cos.z = list(name = "IBCF", param = list(method = "cosine", normalize = "Z-score")),
  IBCF.prs.z = list(name = "IBCF", param = list(method = "pearson", normalize = "Z-score"))
)

eval.results <- suppressWarnings(evaluate(x = eval.method, method = models, n = seq(10, 100, 10)))
## IBCF run fold/sample [model time/prediction time]
##   1  [0.153sec/0.211sec] 
##   2  [0.205sec/0.203sec] 
##   3  [0.147sec/0.198sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.233sec/0.215sec] 
##   2  [0.294sec/0.18sec] 
##   3  [0.243sec/0.207sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.178sec/0.256sec] 
##   2  [0.174sec/0.274sec] 
##   3  [0.162sec/0.27sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.253sec/0.261sec] 
##   2  [0.258sec/0.274sec] 
##   3  [0.349sec/0.462sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.296sec/0.28sec] 
##   2  [0.254sec/0.296sec] 
##   3  [0.258sec/0.276sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.33sec/0.287sec] 
##   2  [0.344sec/0.288sec] 
##   3  [0.344sec/0.289sec]
plot(eval.results, "prec/rec", annotate = T, main = "IBCF-Precision-recall")
title("IBCF-Precision-recall")

plot(eval.results, annotate = 1, legend = "topleft") 
title("IBCF-ROC curve")

models <- list(
  POPULAR.cos.null = list(name = "POPULAR", param = list(method = "cosine", normalize = NULL)),
  POPULAR.prs.null = list(name = "POPULAR", param = list(method = "pearson", normalize = NULL)),
  POPULAR.cos.center = list(name = "POPULAR", param = list(method = "cosine", normalize = "center")),
  POPULAR.prs.center = list(name = "POPULAR", param = list(method = "pearson", normalize = "center")),
  POPULAR.cos.z = list(name = "POPULAR", param = list(method = "cosine", normalize = "Z-score")),
  POPULAR.prs.z = list(name = "POPULAR", param = list(method = "pearson", normalize = "Z-score"))
)

eval.results <- suppressWarnings(evaluate(x = eval.method, method = models, n = seq(10, 100, 10)))
## POPULAR run fold/sample [model time/prediction time]
##   1  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.008sec/2.123sec] 
##   2  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.031sec/2.094sec] 
##   3  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.006sec/2.04sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.008sec/2.138sec] 
##   2  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.006sec/2.178sec] 
##   3  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.007sec/1.964sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.039sec/1.865sec] 
##   2  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.023sec/1.781sec] 
##   3  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.024sec/1.761sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.027sec/1.769sec] 
##   2  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.024sec/1.755sec] 
##   3  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.028sec/1.786sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.107sec/1.935sec] 
##   2  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.105sec/1.785sec] 
##   3  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.105sec/1.959sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.133sec/2.162sec] 
##   2  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.121sec/2.077sec] 
##   3  Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
## [0.118sec/2.057sec]
plot(eval.results, "prec/rec", annotate = T, main = "POPULAR-Precision-recall")
title("Popular-Precision-recall")

plot(eval.results, annotate = 1, legend = "topleft") 
title("Popular-ROC curve")

models <- list(
  RANDOM.cos.null = list(name = "RANDOM", param = list(method = "cosine", normalize = NULL)),
  RANDOM.prs.null = list(name = "RANDOM", param = list(method = "pearson", normalize = NULL)),
  RANDOM.cos.center = list(name = "RANDOM", param = list(method = "cosine", normalize = "center")),
  RANDOM.prs.center = list(name = "RANDOM", param = list(method = "pearson", normalize = "center")),
  RANDOM.cos.z = list(name = "RANDOM", param = list(method = "cosine", normalize = "Z-score")),
  RANDOM.prs.z = list(name = "RANDOM", param = list(method = "pearson", normalize = "Z-score"))
)

eval.results <- suppressWarnings(evaluate(x = eval.method, method = models, n = seq(10, 100, 10)))
## RANDOM run fold/sample [model time/prediction time]
##   1  [0.003sec/0.252sec] 
##   2  [0.022sec/0.229sec] 
##   3  [0.002sec/0.23sec] 
## RANDOM run fold/sample [model time/prediction time]
##   1  [0.003sec/0.232sec] 
##   2  [0.003sec/0.232sec] 
##   3  [0.003sec/0.232sec] 
## RANDOM run fold/sample [model time/prediction time]
##   1  [0.003sec/0.455sec] 
##   2  [0.002sec/0.238sec] 
##   3  [0.003sec/0.226sec] 
## RANDOM run fold/sample [model time/prediction time]
##   1  [0.002sec/0.235sec] 
##   2  [0.002sec/0.239sec] 
##   3  [0.002sec/0.223sec] 
## RANDOM run fold/sample [model time/prediction time]
##   1  [0.003sec/0.225sec] 
##   2  [0.003sec/0.218sec] 
##   3  [0.002sec/0.199sec] 
## RANDOM run fold/sample [model time/prediction time]
##   1  [0.002sec/0.229sec] 
##   2  [0.002sec/0.233sec] 
##   3  [0.002sec/0.223sec]
plot(eval.results, "prec/rec", annotate = T, main = "RANDOM-Precision-recall")
title("Random-Precision-recall")

plot(eval.results, annotate = 1, legend = "topleft") 
title("Random-ROC curve")

Prediction and Accuracy

Prediction

# user base cf
ubcf <- Recommender(getData(eval.method, "train"), "UBCF")

# item base cf
ibcf <- Recommender(getData(eval.method, "train"), "IBCF")

# serendipity and novelty
popular <- Recommender(getData(eval.method, "train"), "POPULAR")

random <- Recommender(getData(eval.method, "train"), "RANDOM")
# user base cf
ubcf.after <- Recommender(getData(eval.method, "train"), "UBCF", parameter = list(method = "pearson", normalize = "Z-score"))

# item base cf
ibcf.after <- Recommender(getData(eval.method, "train"), "IBCF", parameter = list(method = "cosine", normalize = "Z-score"))

# serendipity and novelty
popular.after <- Recommender(getData(eval.method, "train"), "POPULAR", parameter = list(method = "pearson", normalize = "Z-score"))
## Available parameter (with default values):
## normalize     =  center
## aggregationRatings    =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colMeans")
## aggregationPopularity     =  function (x, na.rm = FALSE, dims = 1, ...)  standardGeneric("colSums")
## verbose   =  FALSE
random.after <- Recommender(getData(eval.method, "train"), "RANDOM", parameter = list(method = "pearson", normalize = "Z-score"))
ubcf.pred <- predict(ubcf, getData(eval.method, "known"), type="ratings")
ibcf.pred <- predict(ibcf, getData(eval.method, "known"), type="ratings")
pop.pred <- predict(popular, getData(eval.method, "known"), type="ratings")
rand.pred <- predict(random, getData(eval.method, "known"), type="ratings")

ubcf.pred.after <- predict(ubcf.after, getData(eval.method, "known"), type="ratings")
ibcf.pred.after <- predict(ibcf.after, getData(eval.method, "known"), type="ratings")
pop.pred.after <- predict(popular.after, getData(eval.method, "known"), type="ratings")
rand.pred.after <- predict(random.after, getData(eval.method, "known"), type="ratings")

Accuracy

error <- rbind(
  ubcf = calcPredictionAccuracy(ubcf.pred, getData(eval.method, "unknown")), 
  ubcf.after = calcPredictionAccuracy(ubcf.pred.after, getData(eval.method, "unknown")), 
  ibcf = calcPredictionAccuracy(ibcf.pred, getData(eval.method, "unknown")), 
  ibcf.after = calcPredictionAccuracy(ibcf.pred.after, getData(eval.method, "unknown")), 
  rand = calcPredictionAccuracy(rand.pred, getData(eval.method, "unknown")), 
  rand.after = calcPredictionAccuracy(rand.pred.after, getData(eval.method, "unknown")), 
  pop = calcPredictionAccuracy(pop.pred, getData(eval.method, "unknown")),
  pop.after = calcPredictionAccuracy(pop.pred.after, getData(eval.method, "unknown"))
)

kable(error)
RMSE MSE MAE
ubcf 4.544521 20.65267 3.561756
ubcf.after 4.511355 20.35233 3.505248
ibcf 5.180793 26.84062 4.038888
ibcf.after 5.167722 26.70536 4.033240
rand 6.269908 39.31175 4.832408
rand.after 6.291395 39.58165 4.845250
pop 4.458940 19.88214 3.494865
pop.after 4.455285 19.84957 3.460197

Conclusion

Conclusion