Overview

This lab explores popular R recombination system. After doing research online, I saw that the recommenderlab was a recommended package to use for R.

Load the Libraries

library(recommenderlab)
## Loading required package: Matrix
## Loading required package: arules
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
## Loading required package: proxy
## 
## Attaching package: 'proxy'
## The following object is masked from 'package:Matrix':
## 
##     as.matrix
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## The following object is masked from 'package:base':
## 
##     as.matrix
## Registered S3 methods overwritten by 'registry':
##   method               from 
##   print.registry_field proxy
##   print.registry_entry proxy

Get the Data

The below data frame is the survey data I collected:

df <- data.frame(Id = c(1, 2, 3, 4, 5),
                 Barbie = c(5, 4, 4, 3, 4),
                 Oppenheimer = c(3, 2, 2, 3, 4),
                 TopGunMaverick = c(3, 3, 3, 5, 3),
                 AvatarTheWayOfWater = c(2, 5, 5, 4, 4),
                 Wicked = c(4, 3, 4, 3, 4),
                 GetOut = c(5, 5, 4, 4, 5))

I then turned the data frame into a matrix:

movie_matrix <- as.matrix(df[, -which(names(df) == "Id")])
row.names(movie_matrix) = c("u1", "u2", "u3", "u4", "u5")
r <- as(movie_matrix, "realRatingMatrix")
r
## 5 x 6 rating matrix of class 'realRatingMatrix' with 30 ratings.
getRatingMatrix(r)
## 5 x 6 sparse Matrix of class "dgCMatrix"
##    Barbie Oppenheimer TopGunMaverick AvatarTheWayOfWater Wicked GetOut
## u1      5           3              3                   2      4      5
## u2      4           2              3                   5      3      5
## u3      4           2              3                   5      4      4
## u4      3           3              5                   4      3      4
## u5      4           4              3                   4      4      5

Creating the Recommender

Create the evaluation scheme using the split method:

# eval_scheme <- evaluationScheme(r, method="cross", train=0.8,
#                       given=-1, goodRating=5)
eval_scheme <- evaluationScheme(r, method="split", train=0.9, k=3, given=-1, goodRating=5)
eval_scheme
## Evaluation scheme using all-but-1 items
## Method: 'split' with 3 run(s).
## Training set proportion: 0.900
## Good ratings: >=5.000000
## Data set: 5 x 6 rating matrix of class 'realRatingMatrix' with 30 ratings.

Evaluate several recommender algorithms using an evaluation scheme:

algorithms <- list("random items" = list(name="RANDOM", param=NULL),
                   "popular items" = list(name="POPULAR", param=NULL),
                   "user-based CF" = list(name="UBCF", param=list(nn=50)),
                   "item-based CF" = list(name="IBCF", param=list(k=50)),
                   "SVD approximation" = list(name="SVD", param=list(k = 3)))
results <- recommenderlab::evaluate(
  eval_scheme,
  algorithms,
  type= "topNList",
  n = 1:5)
## RANDOM run fold/sample [model time/prediction time]
##   1  [0.004sec/0.012sec] 
##   2  [0sec/0.011sec] 
##   3  [0sec/0.001sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  [0.001sec/0.012sec] 
##   2  [0.001sec/0.006sec] 
##   3  [0sec/0.001sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.001sec/0.005sec] 
##   2  [0sec/0.017sec] 
##   3  [0.001sec/0.001sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.002sec/0.002sec] 
##   2  [0.001sec/0.001sec] 
##   3  [0.001sec/0.001sec] 
## SVD run fold/sample [model time/prediction time]
##   1
## Warning in irlba::irlba(m, nv = p$k, maxit = p$maxiter): You're computing too
## large a percentage of total singular values, use a standard svd instead.
## [0.002sec/0.001sec] 
##   2
## Warning in irlba::irlba(m, nv = p$k, maxit = p$maxiter): You're computing too
## large a percentage of total singular values, use a standard svd instead.
## [0.001sec/0.001sec] 
##   3
## Warning in irlba::irlba(m, nv = p$k, maxit = p$maxiter): You're computing too
## large a percentage of total singular values, use a standard svd instead.
## [0sec/0.002sec]
results
## List of evaluation results for 5 recommenders:
## 
## $`random items`
## Evaluation results for 3 folds/samples using method 'RANDOM'.
## 
## $`popular items`
## Evaluation results for 3 folds/samples using method 'POPULAR'.
## 
## $`user-based CF`
## Evaluation results for 3 folds/samples using method 'UBCF'.
## 
## $`item-based CF`
## Evaluation results for 3 folds/samples using method 'IBCF'.
## 
## $`SVD approximation`
## Evaluation results for 3 folds/samples using method 'SVD'.
getConfusionMatrix(results$`random items`)[[1]]
##      TP FP FN TN N precision recall TPR FPR n
## [1,]  1  0  0  2 3 1.0000000      1   1 0.0 1
## [2,]  1  1  0  1 3 0.5000000      1   1 0.5 2
## [3,]  1  2  0  0 3 0.3333333      1   1 1.0 3
## [4,]  1  2  0  0 3 0.3333333      1   1 1.0 4
## [5,]  1  2  0  0 3 0.3333333      1   1 1.0 5
avg(results)
## $`random items`
##             TP        FP FN        TN        N precision recall TPR FPR n
## [1,] 0.3333333 0.3333333  0 0.6666667 1.333333       NaN    NaN NaN NaN 1
## [2,] 0.3333333 0.6666667  0 0.3333333 1.333333       NaN    NaN NaN NaN 2
## [3,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 3
## [4,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 4
## [5,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 5
## 
## $`popular items`
##             TP        FP FN        TN        N precision recall TPR FPR n
## [1,] 0.3333333 0.3333333  0 0.6666667 1.333333       NaN    NaN NaN NaN 1
## [2,] 0.3333333 0.6666667  0 0.3333333 1.333333       NaN    NaN NaN NaN 2
## [3,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 3
## [4,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 4
## [5,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 5
## 
## $`user-based CF`
##             TP        FP FN        TN        N precision recall TPR FPR n
## [1,] 0.3333333 0.3333333  0 0.6666667 1.333333       NaN    NaN NaN NaN 1
## [2,] 0.3333333 0.6666667  0 0.3333333 1.333333       NaN    NaN NaN NaN 2
## [3,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 3
## [4,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 4
## [5,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 5
## 
## $`item-based CF`
##             TP        FP        FN        TN        N precision recall TPR FPR
## [1,] 0.0000000 0.6666667 0.3333333 0.3333333 1.333333       NaN    NaN NaN NaN
## [2,] 0.3333333 0.6666667 0.0000000 0.3333333 1.333333       NaN    NaN NaN NaN
## [3,] 0.3333333 1.0000000 0.0000000 0.0000000 1.333333       NaN    NaN NaN NaN
## [4,] 0.3333333 1.0000000 0.0000000 0.0000000 1.333333       NaN    NaN NaN NaN
## [5,] 0.3333333 1.0000000 0.0000000 0.0000000 1.333333       NaN    NaN NaN NaN
##      n
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
## [5,] 5
## 
## $`SVD approximation`
##             TP        FP FN        TN        N precision recall TPR FPR n
## [1,] 0.3333333 0.3333333  0 0.6666667 1.333333       NaN    NaN NaN NaN 1
## [2,] 0.3333333 0.6666667  0 0.3333333 1.333333       NaN    NaN NaN NaN 2
## [3,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 3
## [4,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 4
## [5,] 0.3333333 1.0000000  0 0.0000000 1.333333       NaN    NaN NaN NaN 5