We start by loading some libraries and reading in the two data files.
library(recommenderlab)
library(data.table)
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)
library(DT)
library(mltools)
library(knitr)
library(grid)
library(gridExtra)
library(corrplot)
library(qgraph)
library(methods)
library(Matrix)
Initially I tried to use CSV imported manually, but I’m having issues either with the conversion from a sparse data table to a realRatingMatrix or by the use of the sparsify function of the MLtools package
data(MovieLense)
e <- evaluationScheme(MovieLense, method="split", train=0.8,
given=-1, goodRating=3)
model <- Recommender(getData(e, 'train'), method = "SVDF")
let’s test prediction for a user
#Making predictions
current_user = 1
prediction <- predict(model, current_user, type = "topNList", data=getData(e, 'train'), n=3)
as(prediction, 'list')
## $`372`
## [1] "Contact (1997)" "Shawshank Redemption, The (1994)"
## [3] "Jerry Maguire (1996)"
Here we are going to compare SVD with SVDF and an additional algorithm, “popular” predicts ratings according to their mean rating.
scheme <- evaluationScheme(MovieLense, method = "cross-validation", k = 3, train=0.8, given = 3, goodRating = 3)
algorithms <- list("random" = list(name = "RANDOM", param = NULL),
"popular" = list(name = "POPULAR"),
"SVDF" = list(name = "SVDF"),
"SVD" = list(name = "SVD")
)
results <- evaluate(scheme, algorithms, type = "ratings", progress = FALSE)
Preparing output to be displayed in the ggplot chart.
tmp <- lapply(results, function(x) slot(x, "results"))
res <- tmp %>%
lapply(function(x) unlist(lapply(x, function(x) unlist(x@cm[ ,"RMSE"])))) %>%
as.data.frame() %>%
gather(key = "Algorithm", value = "RMSE")
res %>%
mutate(Algorithm=factor(Algorithm, levels = c("random", "popular", "SVDF", "SVD"))) %>%
ggplot(aes(Algorithm, RMSE, fill = Algorithm)) + geom_bar(stat = "summary") +
geom_errorbar(stat = "summary", width = 0.3, size = 0.8) + coord_cartesian(ylim = c(0.6, 1.3)) +
guides(fill = FALSE)
## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
plot(results, annotate = 1, legend = "topleft")
title("ROC curve")
plot(results, "prec/rec", annotate = 1, legend = "bottomright")
title("Precision-recall")