—————-Evaluation of Recommender Systems————-

Import libraries and preprocess the dataset

set.seed(1)
library(magrittr)
library(pander)
## Warning: package 'pander' was built under R version 3.6.3
library(recommenderlab)
## Warning: package 'recommenderlab' was built under R version 3.6.3
## Loading required package: Matrix
## Loading required package: arules
## Warning: package 'arules' was built under R version 3.6.3
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
## Loading required package: proxy
## Warning: package 'proxy' was built under R version 3.6.3
## 
## Attaching package: 'proxy'
## The following object is masked from 'package:Matrix':
## 
##     as.matrix
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## The following object is masked from 'package:base':
## 
##     as.matrix
## Loading required package: registry
## Registered S3 methods overwritten by 'registry':
##   method               from 
##   print.registry_field proxy
##   print.registry_entry proxy
library(ggplot2)
library(tictoc)
ratings <- read.csv('C:\\DATA612\\DATA612ASSINGMENTS-master\\ml-20m\\ratings.csv')
ratings <- ratings[1:600000,]
ratings <- ratings[complete.cases(ratings),]

Convert to realRatingMatrix

# ratingsMatrix <- sparseMatrix(as.integer(ratings$userId), as.integer(ratings$movieId),  x = ratings$rating)
# colnames(ratingsMatrix) <- levels(ratings$movieId)
# rownames(ratingsMatrix) <- levels(ratings$userId)
# mratings <- as(ratingsMatrix, "realRatingMatrix")
tempRatings <- as(ratings, "realRatingMatrix")

Explore

# mratings

Select Subset

# (ratingShort <- mratings[rowCounts(mratings) > 200, colCounts(mratings) > 150])
(ratings_movies <- tempRatings[rowCounts(tempRatings)>80, colCounts(tempRatings)>100])
## 1816 x 1429 rating matrix of class 'realRatingMatrix' with 358629 ratings.

Check and Remove Empty Lines

# ratingShort <- mratings[, colCounts(mratings) > 200]
# ratingShort <- ratingShort[rowCounts(ratingShort) > 150, ]
# ratingShort
# (ratings_movies <- ratingShort[, colCounts(ratingShort) != 0])
(ratings_movies <- ratings_movies[rowCounts(ratings_movies) >120, colCounts(ratings_movies) > 150])
## 1105 x 882 rating matrix of class 'realRatingMatrix' with 241279 ratings.

Set up One Model and Evaluate

n_fold <- 4
items_to_keep <- 10
rating_threshold <- 3
eval_sets <- evaluationScheme(data = ratings_movies,
                              method = "cross-validation",
                              k = n_fold,
                              given = items_to_keep,
                              goodRating = rating_threshold)
model_to_evaluate <- "IBCF"
model_parameters <- NULL
eval_recommender <- Recommender(data = getData(eval_sets, "train"),
                                method = model_to_evaluate,
                                parameter = model_parameters)
items_to_recommend <- 10
eval_prediction <- predict(object = eval_recommender,
                           newdata = getData(eval_sets, "known"),
                           n = items_to_recommend,
                           type = "ratings")
class(eval_prediction)
## [1] "realRatingMatrix"
## attr(,"package")
## [1] "recommenderlab"

Evaluation of Recomendation Accuracy per user

Evaluation of the ratings

qplot(rowCounts(eval_prediction)) +
  geom_histogram(binwidth = 10) +
  ggtitle("Distribution of movies per user")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

This distribution is normal. the number of movies rated per user is between 40 and 300

eval_accuracy <- calcPredictionAccuracy(
  x = eval_prediction,
  data = getData(eval_sets, "unknown"),
  byUser = TRUE)
## head(eval_accuracy)
pander(head(eval_accuracy))
  RMSE MSE MAE
1 0.2866 0.08215 0.1721
7 1.548 2.397 1.293
29 1.361 1.852 1.111
35 0.8752 0.766 0.6135
53 1.438 2.067 1
56 2.185 4.773 2.045
qplot(eval_accuracy[, "RMSE"]) +
  geom_histogram(binwidth = 0.1) +
  ggtitle("Distribution of the RMSE by user")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

This distribution is quite normal. The rating RMSE is between 0.2 and 2.

Overall Recommendation Accuracy

Average evaluation result of ratings

eval_accuracy <- calcPredictionAccuracy(
  x = eval_prediction,
  data = getData(eval_sets, "unknown"),
  byUser = FALSE)
eval_accuracy
##     RMSE      MSE      MAE 
## 1.392838 1.939999 1.030707
results <- evaluate(x = eval_sets,
                    method = model_to_evaluate,
                    n = seq(10, 100, 10))
## IBCF run fold/sample [model time/prediction time]
##   1  [12.79sec/0.08sec] 
##   2  [14.66sec/0.17sec] 
##   3  [4.61sec/0.12sec] 
##   4  [5.07sec/0.33sec]
class(results)
## [1] "evaluationResults"
## attr(,"package")
## [1] "recommenderlab"
## head(getConfusionMatrix(results)[[1]])

Comparing The recommendation with purchases having positive ratings. The threshold is 3.

First Element of the Confusion Matrix Which Correspond to a Split of the K-Fold

pander(head(getConfusionMatrix(results)[[1]]))
  TP FP FN TN precision recall TPR FPR
10 2.354 7.646 166.1 695.9 0.2354 0.01478 0.01478 0.01087
20 4.516 15.48 163.9 688.1 0.2258 0.02804 0.02804 0.02201
30 6.574 23.39 161.9 680.2 0.2195 0.04156 0.04156 0.03332
40 8.606 31.28 159.8 672.3 0.2159 0.05431 0.05431 0.04457
50 10.42 39.33 158 664.2 0.2096 0.06506 0.06506 0.056
60 12.36 47.15 156.1 656.4 0.2081 0.07648 0.07648 0.0671

Sum of all the Elements in the Confusion Matrix List

columns_to_sum <- c("TP", "FP", "FN", "TN")
indices_summed <- Reduce("+", getConfusionMatrix(results))[, columns_to_sum]
## head(indices_summed)
pander(head(indices_summed))
  TP FP FN TN
10 9.534 30.4 697.4 2751
20 18.03 61.79 688.9 2719
30 26.73 92.78 680.2 2688
40 35.1 123.9 671.8 2657
50 43.06 155.3 663.8 2626
60 50.92 186.2 656 2595

ROC = % purchased items that have been recommended/% not purchased items that have been recommended.

plot(results,
     annotate = TRUE,
     main = "ROC curve")

Precision = % recommended items that have been purchased Recall = % purchased items that have been recommended

If a small percentage of purchased items are recommended, the precision usually decreases. On the other hand, a higher percentage of purchased items will be recommended so that the recall increases:

Gorakala, Suresh K.. Building a Recommendation System with R (p. 91). Packt Publishing. Kindle Edition.

plot(results, "prec/rec",
     annotate = TRUE,
     main = "Precision-recall")

The curve is monotone and the trend is predictable.

Comparison of Different Models of Recommender System

Applying differnt technique to the same data, we can compare the recommender models and pick the appropriate one.

library(pander)
set.seed(1)
library(recommenderlab)
library(ggplot2)
# data(MovieLense)
# ratings_movies <- MovieLense[rowCounts(MovieLense) > 50,
#                              colCounts(MovieLense) > 100]
n_fold <- 4
items_to_keep <- 9
rating_threshold <- 3
eval_sets <- evaluationScheme(data = ratings_movies,
                              method = "cross-validation",
                              k = n_fold,
                              given = items_to_keep,
                              goodRating = rating_threshold)
train <- getData(eval_sets, "train")
known <- getData(eval_sets, "known")
unknown <- getData(eval_sets, "unknown")
## list(name = "IBCF", param = list(k = 20))
models_to_evaluate <- list(
  IBCF_cos = list(name = "IBCF", param = list(method = "cosine")),
  IBCF_cor = list(name = "IBCF", param = list(method = "pearson")),
  UBCF_cos = list(name = "UBCF", param = list(method = "cosine")),
  UBCF_cor = list(name = "UBCF", param = list(method = "pearson")),
  SVD = list(name = "SVD", param = list(k = 50)),
  random = list(name = "RANDOM", param=NULL)
)
n_recommendations <- c(1, 5, seq(10, 100, 10))
list_results <- evaluate(x = eval_sets,
                    method = models_to_evaluate,
                    n = n_recommendations)
## IBCF run fold/sample [model time/prediction time]
##   1  [5.39sec/0.06sec] 
##   2  [5.83sec/0.07sec] 
##   3  [4.84sec/0.08sec] 
##   4  [5.03sec/0.08sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [4.55sec/0.08sec] 
##   2  [4.59sec/0.1sec] 
##   3  [4.88sec/0.06sec] 
##   4  [5.04sec/0.08sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.01sec/1.54sec] 
##   2  [0.01sec/1.32sec] 
##   3  [0.01sec/1.41sec] 
##   4  [0.02sec/1.66sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.01sec/1.13sec] 
##   2  [0.01sec/1.21sec] 
##   3  [0.03sec/1.26sec] 
##   4  [0sec/1.16sec] 
## SVD run fold/sample [model time/prediction time]
##   1  [0.38sec/0.13sec] 
##   2  [0.47sec/0.21sec] 
##   3  [0.39sec/0.14sec] 
##   4  [0.42sec/0.14sec] 
## RANDOM run fold/sample [model time/prediction time]
##   1  [0.02sec/0.18sec] 
##   2  [0sec/0.22sec] 
##   3  [0.01sec/0.2sec] 
##   4  [0.02sec/0.15sec]
class(list_results)
## [1] "evaluationResultList"
## attr(,"package")
## [1] "recommenderlab"
class(list_results[[1]])
## [1] "evaluationResults"
## attr(,"package")
## [1] "recommenderlab"

The Evaluation list Evaluate all the recommender models

sapply(list_results, class) == "evaluationResults"
## IBCF_cos IBCF_cor UBCF_cos UBCF_cor      SVD   random 
##     TRUE     TRUE     TRUE     TRUE     TRUE     TRUE
avg_matrices <- lapply(list_results, avg)
## head(avg_matrices$IBCF_cos[, 5:8])
pander(head(avg_matrices$SVD)[, 5:8])
  precision recall TPR FPR
1 0.6146 0.003964 0.003964 0.0005429
5 0.5334 0.01693 0.01693 0.003292
10 0.4958 0.03094 0.03094 0.007097
20 0.4408 0.05424 0.05424 0.01579
30 0.41 0.07479 0.07479 0.02503
40 0.3888 0.09406 0.09406 0.03465
plot(list_results,
     annotate = 1,
     legend = "topleft")
title("ROC curve")

plot(list_results,
     "prec/rec",
     annotate = 1,
     legend = "bottomright")
title("Precision-recall")

The models UBCF_cor perform better than other models. We can make an hybrid model base on the UBCF_cor and SVD.

## Hybrid Recommender Model

model_method <- "SVD"

# Training
tic()
modelSVD <- Recommender(train, method = model_method, parameter = list(k = 50))
t <- toc(quiet = TRUE)
train_time <- round(t$toc - t$tic, 2)

# Predicting
tic()
predSVD <- predict(modelSVD, newdata = known, type = "ratings")
t <- toc(quiet = TRUE)
predict_time <- round(t$toc - t$tic, 2)

# timing <- rbind(timing, data.frame(Model = as.factor(model_method), Training = as.double(train_time), 
#     Predicting = as.double(predict_time)))

# Accuracy
accSVD <- calcPredictionAccuracy(predSVD, unknown)
# resultsSVD <- evaluate(x = eval, method = model_method, n = c(1, 5, 10,
# 30, 60))
accSVD
##      RMSE       MSE       MAE 
## 0.9530662 0.9083351 0.7392059
model_method <- "UBCF"
library(tictoc)
# Training
tic()
modelUBCF <- Recommender(train, method = model_method)
t <- toc(quiet = TRUE)
train_time <- round(t$toc - t$tic, 2)

# Predicting
tic()
predUBCF <- predict(modelUBCF, newdata = known, type = "ratings")
t <- toc(quiet = TRUE)
predict_time <- round(t$toc - t$tic, 2)

# timing <- rbind(timing, data.frame(Model = as.factor(model_method), Training = as.double(train_time), 
#     Predicting = as.double(predict_time)))
# Accuracy
accUBCF <- calcPredictionAccuracy(predUBCF, unknown)
resultsUBCF <- evaluate(x = eval_sets, method = model_method, n = c(1, 5, 10, 30, 60))
## UBCF run fold/sample [model time/prediction time]
##   1  [0.01sec/1.12sec] 
##   2  [0.01sec/1.22sec] 
##   3  [0.02sec/2.12sec] 
##   4  [0.01sec/1.64sec]
accUBCF
##      RMSE       MSE       MAE 
## 0.9385449 0.8808665 0.7254522
model_Hybrid <- HybridRecommender(modelUBCF, modelSVD, weights = c(0.99, 0.01))
pred_Hybrid <- predict(model_Hybrid, newdata = known, type = "ratings")
(accHybrid <- calcPredictionAccuracy(pred_Hybrid, unknown))
##      RMSE       MSE       MAE 
## 0.9386135 0.8809953 0.7255295

Implement Support for Businessand User Experience Goal

It always possible to recommend a non popular product to a user and study the reaction of this user to the recommendation. This type of recommendation can improve user experience, expand user preference, and provide additional knowledge about a user. Another approach to novelty/serendipity could be a weighted/hybrid solution of the top performing UBCF model, along with some randomness.

Comparison of the accuracy

The accuracy really did not change. It may be better to use a model that chose the best model for a particular set of recommendation.

Let us look at top 10 recommendations for the first user in the test set.

pUBCF <- predict(modelUBCF, newdata = known[1], type = "topNList")
pHybrid <- predict(model_Hybrid, newdata = known[1], type = "topNList")
pUBCF@items
## $`1`
##  [1] 584 281 149 271  76 215 289 262 545 269
pHybrid@items
## $`1`
##  [1] 584 281 149 271  76 215 289 262 545 269

Now as we see, the Hybrid model includes most of the items recommended by the UBCF model, but there are new items and the order is different.

—————————- Conclusion——————————–

. I used recommenderlab code and 20 million movie rating from movielens build my project.

• I first study the effect of IBCF on individual recommendatio. The recommenderlab allow me to build a list of six different recommender system algorithms and compared the accuracy of all the three different models.

Reference:

Building a Recommendation System with R Book by Michele Usuelli and Suresh K. Gorakala

DATA 612 Project 4 | Accuracy and Beyond 2019-07-01 http://rstudio-pubs-static.s3.amazonaws.com/509782_839ec319a8d24a70a9e2ddd0fefa02d7.html#objective