Using data ratings from library SVDApproximation, there are 1M ratings with 6040 users and 3706 movies. The ratings are 1-5 (stars).
library(recommenderlab)
## Loading required package: Matrix
## Loading required package: arules
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
## Loading required package: proxy
##
## Attaching package: 'proxy'
## The following object is masked from 'package:Matrix':
##
## as.matrix
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
## Loading required package: registry
library(SVDApproximation)
## Warning: replacing previous import 'data.table::melt' by 'reshape2::melt'
## when loading 'SVDApproximation'
## Warning: replacing previous import 'data.table::dcast' by 'reshape2::dcast'
## when loading 'SVDApproximation'
library(devtools)
install_github(repo = "SVDApproximation", username = "tarashnot")
## Warning: Username parameter is deprecated. Please use tarashnot/
## SVDApproximation
## Skipping install of 'SVDApproximation' from a github remote, the SHA1 (b53f26e5) has not changed since last install.
## Use `force = TRUE` to force installation
library(SVDApproximation)
dim(ratings)
## [1] 1000209 3
head(ratings)
## user item rating
## 1: 1 1 5
## 2: 6 1 4
## 3: 8 1 4
## 4: 9 1 5
## 5: 10 1 5
## 6: 18 1 4
summary(ratings)
## user item rating
## Min. : 1 Min. : 1 Min. :1.000
## 1st Qu.:1506 1st Qu.: 966 1st Qu.:3.000
## Median :3070 Median :1658 Median :4.000
## Mean :3025 Mean :1731 Mean :3.582
## 3rd Qu.:4476 3rd Qu.:2566 3rd Qu.:4.000
## Max. :6040 Max. :3706 Max. :5.000
visualize_ratings(ratings_table = ratings)
# computer item average as missing value
sparse_ratings <- sparseMatrix(i = ratings$user, j = ratings$item, x = ratings$rating,
dims = c(length(unique(ratings$user)), length(unique(ratings$item))),
dimnames = list(paste("u", 1:length(unique(ratings$user)), sep = ""),
paste("m", 1:length(unique(ratings$item)), sep = "")))
sparse_ratings[1:10, 1:10]
## 10 x 10 sparse Matrix of class "dgCMatrix"
## [[ suppressing 10 column names 'm1', 'm2', 'm3' ... ]]
##
## u1 5 . . . . . . . . .
## u2 . . . . . . . . . .
## u3 . . . . . . . . . .
## u4 . . . . . . . . . .
## u5 . . . . . 2 . . . .
## u6 4 . . . . . . . . .
## u7 . . . . . 4 . . . .
## u8 4 . . 3 . . . . . .
## u9 5 . . . . . . . . .
## u10 5 5 . . . . 4 . . .
(real_ratings <- new("realRatingMatrix", data = sparse_ratings))
## 6040 x 3706 rating matrix of class 'realRatingMatrix' with 1000209 ratings.
# create a Recommender model
model <- Recommender(real_ratings, method = "POPULAR", param=list(normalize = "center"))
# prediction of missing values for first 7 users using item average
prediction <- predict(model, real_ratings[1:7], type="ratings")
(as(prediction, "matrix")[,1:7])
## m1 m2 m3 m4 m5 m6 m7
## u1 NA 3.864537 3.748432 3.489864 3.768903 4.469532 4.073769
## u2 4.192675 3.389036 3.272931 3.014363 3.293402 3.994031 3.598268
## u3 4.381458 3.577818 3.461714 3.203145 3.482184 4.182814 3.787050
## u4 4.669973 3.866334 3.750229 3.491661 3.770700 4.471329 4.075566
## u5 3.625962 2.822322 2.706218 2.447649 2.726688 NA 3.031554
## u6 NA 3.577266 3.461161 3.202593 3.481632 4.182261 3.786498
## u7 4.802078 3.998438 3.882334 3.623765 3.902804 NA 4.207670
# model evaluation
# 5 ratings of 20% of users are excluded for testing
set.seed(1)
e <- evaluationScheme(real_ratings, method="split", train=0.8, given=-7)
model <- Recommender(getData(e, "train"), "POPULAR")
prediction <- predict(model, getData(e, "known"), type="ratings")
rmse_popular <- calcPredictionAccuracy(prediction, getData(e, "unknown"))[1]
rmse_popular
## RMSE
## 0.9578232
#Building model
model <- Recommender(real_ratings, method = "UBCF",
param=list(normalize = "center", method="Cosine", nn=50))
#Making predictions
prediction <- predict(model, real_ratings[1:5], type="ratings")
as(prediction, "matrix")[,1:5]
## m1 m2 m3 m4 m5
## u1 NA 4.211122 4.198299 4.188679 4.188679
## u2 3.921419 3.648803 3.677701 3.710468 3.753001
## u3 3.985738 3.799285 3.850174 3.891836 3.861364
## u4 4.167479 4.183525 4.190476 4.190476 4.195312
## u5 3.275539 3.071561 3.122653 3.118225 3.146465
#Estimating RMSE
set.seed(1)
model <- Recommender(getData(e, "train"), method = "UBCF",
param=list(normalize = "center", method="Cosine", nn=50))
prediction <- predict(model, getData(e, "known"), type="ratings")
rmse_ubcf <- calcPredictionAccuracy(prediction, getData(e, "unknown"))[1]
rmse_ubcf
## RMSE
## 0.985047
#Building model
model <- Recommender(real_ratings, method = "IBCF",
param=list(normalize = "center", method="Cosine", k=350))
#Making predictions
prediction <- predict(model, real_ratings[1:5], type="ratings")
as(prediction, "matrix")[,1:8]
## m1 m2 m3 m4 m5 m6 m7 m8
## u1 NA 4.315856 4.077801 4.309387 4.299674 4.419849 4.000000 4.122938
## u2 4.229380 3.890647 3.905199 3.987753 4.061174 3.828620 4.003760 4.487145
## u3 4.004607 4.174743 4.256458 4.214143 4.098748 4.109843 4.147706 4.408627
## u4 4.254916 4.500481 4.083806 4.462319 4.118940 4.404064 4.269858 4.195575
## u5 3.518569 3.804019 3.549567 3.653532 4.137978 NA 3.791349 3.595634
#Estimating RMSE
set.seed(1)
model <- Recommender(getData(e, "train"), method = "IBCF",
param=list(normalize = "center", method="Cosine", k=350))
prediction <- predict(model, getData(e, "known"), type="ratings")
rmse_ibcf <- calcPredictionAccuracy(prediction, getData(e, "unknown"))[1]
rmse_ibcf
## RMSE
## 1.032426
Summary of findings and recommendations Compare the RMSEs of content based model, user-user based model, and item-item based model, they are 0.9578232, 0.985047, 1.032426 respectively. The results show that the model accuracy from high to low is content based model, user-user based model, item-item based model. Generally speaking, the highest performance of these three models is item-item based model, and the content based is the lowest. It looks a conflict of the results with the general model performance. I would consider to rerun the model at least 10 times to get a more accurate result.
Building a Recommendation System with R by Suresh K. Gorakala, Michele Usuelli