Accuracy and Beyond
- Compare the accuracy of at least two recommender system algorithms against your offline data.
- Implement support for at least one business or user experience goal such as increased serendipity, novelty, or diversity.
- Compare and report on any change in accuracy before and after you’ve made the change in #2.
- As part of your textual conclusion, discuss one or more additional experiments that could be performed and/or metrics that could be evaluated only if online evaluation was possible. Also, briefly propose how you would design a reasonable online evaluation environment.
library(recommenderlab)
## Loading required package: Matrix
## Loading required package: arules
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
## Loading required package: proxy
##
## Attaching package: 'proxy'
## The following object is masked from 'package:Matrix':
##
## as.matrix
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
## Loading required package: registry
library(ggplot2)
data("MovieLense")
library(pander)
# data selection
(ratings_movies <- MovieLense[rowCounts(MovieLense) > 50, colCounts(MovieLense) > 100])
## 560 x 332 rating matrix of class 'realRatingMatrix' with 55298 ratings.
# split data
precentage_training <- .8
# setting parameters
# check the minimum # of items purchased by user
# so there will not be any users without items to test the models
min(rowCounts(ratings_movies))
## [1] 18
items_to_keep <- 15
# minimum rating that is considered good
rating_threshold <- 3
# # of run the evaluation
n_eval <- 1
eval_sets <- evaluationScheme(
data = ratings_movies,
method = 'split',
train = precentage_training,
given = items_to_keep,
goodRating = rating_threshold,
k = n_eval
)
eval_sets
## Evaluation scheme with 15 items given
## Method: 'split' with 1 run(s).
## Training set proportion: 0.800
## Good ratings: >=3.000000
## Data set: 560 x 332 rating matrix of class 'realRatingMatrix' with 55298 ratings.
getData(eval_sets, 'train')
## 448 x 332 rating matrix of class 'realRatingMatrix' with 45095 ratings.
(nrow(getData(eval_sets, 'train'))/nrow(ratings_movies))
## [1] 0.8
getData(eval_sets, 'known')
## 112 x 332 rating matrix of class 'realRatingMatrix' with 1680 ratings.
getData(eval_sets, 'unknown')
## 112 x 332 rating matrix of class 'realRatingMatrix' with 8523 ratings.
nrow(getData(eval_sets, 'known'))/nrow(ratings_movies)
## [1] 0.2
unique(rowCounts(getData(eval_sets, 'known')))
## [1] 15
qplot(rowCounts(getData(eval_sets, 'unknown'))) +
geom_histogram(binwidth = 10) +
ggtitle('Unknown items by the users')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# k-fold tests on each user
n_fold <- 4
eval_sets <- evaluationScheme(
data = ratings_movies,
method = 'cross-validation',
k = n_fold,
given = items_to_keep,
goodRating = rating_threshold)
size_sets <- sapply(eval_sets@runsTrain, length)
size_sets
## [1] 420 420 420 420
model_to_evaluate <- "IBCF"
model_parameters <- NULL
## build model
eval_recommender <- Recommender(data = getData(eval_sets, "train"),
method = model_to_evaluate,
parameter = model_parameters)
items_to_recommend <- 10
## predict
eval_prediction <- predict(object = eval_recommender,
newdata = getData(eval_sets, "known"),
n = items_to_recommend,
type = "ratings")
class(eval_prediction)
## [1] "realRatingMatrix"
## attr(,"package")
## [1] "recommenderlab"
## Distribution of movies per user
qplot(rowCounts(eval_prediction)) +
geom_histogram(binwidth = 10) +
ggtitle("Distribution of movies per user")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

eval_accuracy <- calcPredictionAccuracy(
x = eval_prediction,
data = getData(eval_sets, "unknown"),
byUser = TRUE)
pander(head(eval_accuracy))
| 2 |
1.338 |
1.789 |
0.8071 |
| 3 |
1.147 |
1.316 |
0.9055 |
| 5 |
1.678 |
2.816 |
1.311 |
| 10 |
0.616 |
0.3795 |
0.441 |
| 14 |
1.141 |
1.302 |
0.9438 |
| 22 |
1.592 |
2.536 |
1.172 |
qplot(eval_accuracy[, "RMSE"]) +
geom_histogram(binwidth = 0.1) +
ggtitle("Distribution of the RMSE by user")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

eval_accuracy <- calcPredictionAccuracy(
x = eval_prediction,
data = getData(eval_sets, "unknown"),
byUser = FALSE)
eval_accuracy
## RMSE MSE MAE
## 1.2017507 1.4442047 0.8989494
results <- evaluate(x = eval_sets,
method = model_to_evaluate,
n = seq(10, 100, 10))
## IBCF run fold/sample [model time/prediction time]
## 1 [1.14sec/0.36sec]
## 2 [0.68sec/0.15sec]
## 3 [0.59sec/0.28sec]
## 4 [0.57sec/0.1sec]
class(results)
## [1] "evaluationResults"
## attr(,"package")
## [1] "recommenderlab"
head(getConfusionMatrix(results)[[1]])
## TP FP FN TN precision recall TPR
## 10 2.807143 7.192857 71.26429 235.7357 0.2807143 0.03868631 0.03868631
## 20 5.357143 14.642857 68.71429 228.2857 0.2678571 0.07522148 0.07522148
## 30 7.650000 22.350000 66.42143 220.5786 0.2550000 0.10726173 0.10726173
## 40 10.328571 29.671429 63.74286 213.2571 0.2582143 0.14290043 0.14290043
## 50 12.850000 37.150000 61.22143 205.7786 0.2570000 0.17838537 0.17838537
## 60 15.450000 44.550000 58.62143 198.3786 0.2575000 0.21785722 0.21785722
## FPR
## 10 0.02927602
## 20 0.06003685
## 30 0.09198549
## 40 0.12181104
## 50 0.15252608
## 60 0.18312719
pander(head(getConfusionMatrix(results)[[1]]))
| 10 |
2.807 |
7.193 |
71.26 |
235.7 |
0.2807 |
0.03869 |
0.03869 |
0.02928 |
| 20 |
5.357 |
14.64 |
68.71 |
228.3 |
0.2679 |
0.07522 |
0.07522 |
0.06004 |
| 30 |
7.65 |
22.35 |
66.42 |
220.6 |
0.255 |
0.1073 |
0.1073 |
0.09199 |
| 40 |
10.33 |
29.67 |
63.74 |
213.3 |
0.2582 |
0.1429 |
0.1429 |
0.1218 |
| 50 |
12.85 |
37.15 |
61.22 |
205.8 |
0.257 |
0.1784 |
0.1784 |
0.1525 |
| 60 |
15.45 |
44.55 |
58.62 |
198.4 |
0.2575 |
0.2179 |
0.2179 |
0.1831 |
columns_to_sum <- c("TP", "FP", "FN", "TN")
indices_summed <- Reduce("+", getConfusionMatrix(results))[, columns_to_sum]
head(indices_summed)
## TP FP FN TN
## 10 10.86429 29.13571 281.3286 946.6714
## 20 20.99286 59.00714 271.2000 916.8000
## 30 30.57857 89.42143 261.6143 886.3857
## 40 40.68571 119.31429 251.5071 856.4929
## 50 50.02857 149.97143 242.1643 825.8357
## 60 60.53571 179.46429 231.6571 796.3429
pander(head(indices_summed))
| 10 |
10.86 |
29.14 |
281.3 |
946.7 |
| 20 |
20.99 |
59.01 |
271.2 |
916.8 |
| 30 |
30.58 |
89.42 |
261.6 |
886.4 |
| 40 |
40.69 |
119.3 |
251.5 |
856.5 |
| 50 |
50.03 |
150 |
242.2 |
825.8 |
| 60 |
60.54 |
179.5 |
231.7 |
796.3 |
plot(results,
annotate = TRUE,
main = "ROC curve")

plot(results, "prec/rec",
annotate = TRUE,
main = "Precision-recall")

models_to_evaluate <- list(
IBCF_cos = list(name = "IBCF", param = list(method = "cosine")),
IBCF_cor = list(name = "IBCF", param = list(method = "pearson")),
UBCF_cos = list(name = "UBCF", param = list(method = "cosine")),
UBCF_cor = list(name = "UBCF", param = list(method = "pearson")),
random = list(name = "RANDOM", param=NULL)
)
n_recommendations <- c(1, 5, seq(10, 100, 10))
list_results <- evaluate(x = eval_sets,
method = models_to_evaluate,
n = n_recommendations)
## IBCF run fold/sample [model time/prediction time]
## 1 [0.62sec/0.11sec]
## 2 [0.83sec/0.09sec]
## 3 [0.7sec/0.1sec]
## 4 [0.73sec/0.09sec]
## IBCF run fold/sample [model time/prediction time]
## 1 [0.57sec/0.08sec]
## 2 [0.54sec/0.1sec]
## 3 [0.71sec/0.07sec]
## 4 [0.66sec/0.09sec]
## UBCF run fold/sample [model time/prediction time]
## 1 [0sec/0.39sec]
## 2 [0.05sec/0.38sec]
## 3 [0sec/0.53sec]
## 4 [0sec/0.37sec]
## UBCF run fold/sample [model time/prediction time]
## 1 [0sec/0.53sec]
## 2 [0sec/0.38sec]
## 3 [0.02sec/0.48sec]
## 4 [0sec/0.42sec]
## RANDOM run fold/sample [model time/prediction time]
## 1 [0.01sec/0.11sec]
## 2 [0.05sec/0.11sec]
## 3 [0sec/0.27sec]
## 4 [0sec/0.11sec]
class(list_results)
## [1] "evaluationResultList"
## attr(,"package")
## [1] "recommenderlab"
class(list_results[[1]])
## [1] "evaluationResults"
## attr(,"package")
## [1] "recommenderlab"
sapply(list_results, class) == "evaluationResults"
## IBCF_cos IBCF_cor UBCF_cos UBCF_cor random
## TRUE TRUE TRUE TRUE TRUE
avg_matrices <- lapply(list_results, avg)
pander(head(avg_matrices$IBCF_cos)[, 5:8])
| 1 |
0.3125 |
0.004225 |
0.004225 |
0.002754 |
| 5 |
0.2893 |
0.01986 |
0.01986 |
0.01438 |
| 10 |
0.2716 |
0.03687 |
0.03687 |
0.02958 |
| 20 |
0.2624 |
0.07248 |
0.07248 |
0.06016 |
| 30 |
0.2548 |
0.1053 |
0.1053 |
0.09123 |
| 40 |
0.2543 |
0.1412 |
0.1412 |
0.1218 |
## compare the results of different models
plot(list_results,
annotate = 1,
legend = "topleft")
title("ROC curve")

plot(list_results,
"prec/rec",
annotate = 1,
legend = "bottomright")
title("Precision-recall")

vector_k <- c(5, 10, 20, 30, 40)
models_to_evaluate <- lapply(vector_k, function(k){
list(name = "IBCF",
param = list(method = "cosine",
k = k))
})
names(models_to_evaluate) <- paste0("IBCF_k_", vector_k)
##
n_recommendations <- c(1, 5, seq(10, 100, 10))
list_results <- evaluate(x = eval_sets,
method = models_to_evaluate,
n = n_recommendations)
## IBCF run fold/sample [model time/prediction time]
## 1 [0.57sec/0.03sec]
## 2 [0.67sec/0.05sec]
## 3 [0.64sec/0.05sec]
## 4 [0.69sec/0.05sec]
## IBCF run fold/sample [model time/prediction time]
## 1 [0.49sec/0.06sec]
## 2 [0.62sec/0.05sec]
## 3 [0.64sec/0.07sec]
## 4 [0.7sec/0.06sec]
## IBCF run fold/sample [model time/prediction time]
## 1 [0.66sec/0.08sec]
## 2 [0.51sec/0.09sec]
## 3 [0.52sec/0.08sec]
## 4 [0.64sec/0.07sec]
## IBCF run fold/sample [model time/prediction time]
## 1 [0.67sec/0.09sec]
## 2 [0.67sec/0.09sec]
## 3 [0.67sec/0.09sec]
## 4 [0.67sec/0.09sec]
## IBCF run fold/sample [model time/prediction time]
## 1 [0.67sec/0.1sec]
## 2 [0.65sec/0.08sec]
## 3 [0.58sec/0.27sec]
## 4 [0.6sec/0.1sec]
## topleft
plot(list_results,
annotate = 1,
legend = "topleft")
title("ROC curve")

## bottomright
plot(list_results,
"prec/rec",
annotate = 1,
legend = "bottomright")
title("Precision-recall")
