Project 4 - Alvaro Bueno

Part I: Exploratory Analysis

Read in the data

We start by loading some libraries and reading in the two data files.

library(recommenderlab)
library(data.table)
library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)
library(DT)
library(mltools)
library(knitr)
library(grid)
library(gridExtra)
library(corrplot)
library(qgraph)
library(methods)
library(Matrix)

Using recommenderlab’s eval scheme to build the sets

Initially I tried to use CSV imported manually, but I’m having issues either with the conversion from a sparse data table to a realRatingMatrix or by the use of the sparsify function of the MLtools package

Running an algorithm in Recommenderlab

data(MovieLense)
e <- evaluationScheme(MovieLense, method="split", train=0.8,
 given=-1, goodRating=3)

model <- Recommender(getData(e, 'train'), method = "SVDF")

Creating predictions

let’s test prediction for a user

#Making predictions 
current_user = 1
prediction <- predict(model, current_user, type = "topNList", data=getData(e, 'train'), n=3)
as(prediction, 'list')

## $`372`
## [1] "Contact (1997)"                   "Shawshank Redemption, The (1994)"
## [3] "Jerry Maguire (1996)"

Here we are going to compare SVD with SVDF and an additional algorithm, “popular” predicts ratings according to their mean rating.

scheme <- evaluationScheme(MovieLense, method = "cross-validation", k = 3, train=0.8, given = 3, goodRating = 3)

algorithms <- list("random" = list(name = "RANDOM", param = NULL),
                   "popular" = list(name = "POPULAR"),
                   "SVDF" = list(name = "SVDF"),
                   "SVD" = list(name = "SVD")
                   )
                   
results <- evaluate(scheme, algorithms, type = "ratings", progress = FALSE)

Preparing output to be displayed in the ggplot chart.

tmp <- lapply(results, function(x) slot(x, "results"))
res <- tmp %>% 
  lapply(function(x) unlist(lapply(x, function(x) unlist(x@cm[ ,"RMSE"])))) %>% 
  as.data.frame() %>% 
  gather(key = "Algorithm", value = "RMSE")

res %>% 
  mutate(Algorithm=factor(Algorithm, levels = c("random", "popular", "SVDF", "SVD"))) %>%
  ggplot(aes(Algorithm, RMSE, fill = Algorithm)) + geom_bar(stat = "summary") + 
  geom_errorbar(stat = "summary", width = 0.3, size = 0.8) + coord_cartesian(ylim = c(0.6, 1.3)) + 
  guides(fill = FALSE)

## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()

plot(results, annotate = 1, legend = "topleft")
title("ROC curve")

plot(results, "prec/rec", annotate = 1, legend = "bottomright")
title("Precision-recall")