The goal of this assignment is give you practice working with accuracy and other recommender system metrics.

As in your previous assignments, compare the accuracy of at least two recommender system algorithms against your offline data.
Implement support for at least one business or user experience goal such as increased serendipity, novelty, or diversity.
Compare and report on any change in accuracy before and after you’ve made the change in #2.
As part of your textual conclusion, discuss one or more additional experiments that could be performed and/or metrics that could be evaluated only if online evaluation was possible. Also, briefly propose how you would design a reasonable online evaluation environment.

# Loading required libraries
library(recommenderlab)
library(reshape2)
library(RCurl)
library(dplyr)
library(ggplot2)
library(knitr)
library(tidyverse)
library(knitr)

1 Data Loading

set.seed(1)
data_package <- data(package = "recommenderlab")
data_package$results[, "Item"]

## [1] "Jester5k"                    "JesterJokes (Jester5k)"     
## [3] "MSWeb"                       "MovieLense"                 
## [5] "MovieLenseMeta (MovieLense)"

2 Data summary and preprocessing

data("Jester5k")
Jester5k

## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362106 ratings.

A summary of the data shows a 5000 by 100 matrix, users and items respectively, with a ratings range of approximately -9.95 to +9.90. There are no missing values.

#summary
jDF <- as(Jester5k, 'data.frame')
jDF$user <- as.numeric(jDF$user)
jDF$item <- as.numeric(jDF$item)
summary(jDF)

##       user           item            rating     
##  Min.   :   1   Min.   :  1.00   Min.   :-9.95  
##  1st Qu.:1263   1st Qu.: 21.00   1st Qu.:-3.06  
##  Median :2495   Median : 43.00   Median : 1.46  
##  Mean   :2503   Mean   : 43.79   Mean   : 0.85  
##  3rd Qu.:3749   3rd Qu.: 63.00   3rd Qu.: 5.10  
##  Max.   :5000   Max.   :100.00   Max.   : 9.90

similarity_users <- similarity(Jester5k[1:4, ], method ="cosine", which = "users")
as.matrix(similarity_users)

##            u2841      u15547     u15221      u15573
## u2841  0.0000000 0.500128118 0.15871815 0.197523538
## u15547 0.5001281 0.000000000 0.08944693 0.003513988
## u15221 0.1587182 0.089446933 0.00000000 0.133177034
## u15573 0.1975235 0.003513988 0.13317703 0.000000000

3 Data Exploration and Visualization

image(as.matrix(similarity_users), main = "User similarity")

hist(getRatings(Jester5k), col="turquoise3", main="Distribution of Joke Ratings", xlab="Joke Rating")

image(Jester5k[1:20, 1:50], "Jester Sampled Ratings Matrix")

4 Recommendation Models

Let’s check what are the Recommendation Models that are available

recommender_models <- recommenderRegistry$get_entries(dataType ="realRatingMatrix")
names(recommender_models)

## [1] "ALS_realRatingMatrix"          "ALS_implicit_realRatingMatrix"
## [3] "IBCF_realRatingMatrix"         "POPULAR_realRatingMatrix"     
## [5] "RANDOM_realRatingMatrix"       "RERECOMMEND_realRatingMatrix" 
## [7] "SVD_realRatingMatrix"          "SVDF_realRatingMatrix"        
## [9] "UBCF_realRatingMatrix"

slotNames(Jester5k)

## [1] "data"      "normalize"

class(Jester5k@data)

## [1] "dgCMatrix"
## attr(,"package")
## [1] "Matrix"

dim(Jester5k@data)

## [1] 5000  100

We are creating subset of jester dataset

rating_jokes<- Jester5k[rowCounts(Jester5k) > 50,colCounts(Jester5k) > 100] 
rating_jokes

## 3875 x 100 rating matrix of class 'realRatingMatrix' with 314302 ratings.

5 Evaluate and Compare Models

Let’s split the dataset into train and test and evaluate UBCF, IBCF, RANDOM, POPULAR and SVD models

e <- evaluationScheme(rating_jokes, method="split", train=0.8, k=1, given=10, goodRating=1 )

ubcf_rec <- Recommender(getData(e, "train"), "UBCF")
ibcf_rec <- Recommender(getData(e, "train"), "IBCF")
rand_rec <- Recommender(getData(e, "train"), "RANDOM")
pop_rec <- Recommender(getData(e, "train"), "POPULAR")
svd_rec <- Recommender(getData(e, "train"), "SVD")

ubcf_pred <- predict(ubcf_rec, getData(e, "known"), type="ratings")
ibcf_pred <- predict(ibcf_rec, getData(e, "known"), type="ratings")
rand_pred <- predict(rand_rec, getData(e, "known"), type="ratings")
pop_pred <- predict(pop_rec, getData(e, "known"), type="ratings")
svd_pred <- predict(svd_rec, getData(e, "known"), type="ratings")

errs <- rbind(
  ubcf = calcPredictionAccuracy(ubcf_pred, getData(e, "unknown")), 
  ibcf = calcPredictionAccuracy(ibcf_pred, getData(e, "unknown")), 
  rand = calcPredictionAccuracy(rand_pred, getData(e, "unknown")), 
  pop = calcPredictionAccuracy(pop_pred, getData(e, "unknown")),
  svd = calcPredictionAccuracy(svd_pred, getData(e, "unknown"))
)
kable(errs)

	RMSE	MSE	MAE
ubcf	4.628835	21.42612	3.610215
ibcf	5.523931	30.51381	4.248054
rand	6.358415	40.42943	4.904322
pop	4.581196	20.98736	3.579535
svd	4.751079	22.57275	3.738948

5.1 Evaluate

algorithms <- list("Random" = list(name="RANDOM", param=NULL),
                   "Popular" = list(name="POPULAR", param=NULL),
                   "UBCF"= list(name="UBCF", param=list(nn=50)),
                   "IBCF"= list(name="IBCF", param=list(k=50)),
                   "SVD"= list(name="SVD")
                   )

results <- evaluate(e, algorithms, type = "topNList", n=c(1, 5, 10, 20, 30, 50))

## RANDOM run fold/sample [model time/prediction time]
##   1  [0.04sec/0.43sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  [0.08sec/1.94sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.07sec/4.25sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.33sec/0.28sec] 
## SVD run fold/sample [model time/prediction time]
##   1  [0.69sec/0.36sec]

plot(results, "prec/rec", annotate=c(1,3), legend="bottomright")

plot(results, "ROC", annotate=c(4, 3))

True Positive Rate (TPR): This is the percentage of purchased items that have been recommended. It’s the number of TP divided by the number of purchased items (TP + FN).
False Positive Rate (FPR): This is the percentage of not purchased items that have been recommended. It’s the number of FP divided by the number of not purchased items (FP + TN)

Two accuracy metrics are as follows:

Precision: This is the percentage of recommended items that have been purchased. It’s the number of FP divided by the total number of positives (TP + FP).
Recall: This is the percentage of purchased items that have been recommended. It’s the number of TP divided by the total number of purchases (TP + FN). It’s also equal to the True Positive Rate.

kable(head(getConfusionMatrix(results$SVD)[1]))

	TP	FP	FN	TN	precision	recall	TPR	FPR
1	0.6541935	0.3458065	36.45161	52.54839	0.6541935	0.0201930	0.0201930	0.0067701
5	3.1651613	1.8348387	33.94065	51.05935	0.6330323	0.0957387	0.0957387	0.0352403
10	6.0877419	3.9122581	31.01806	48.98194	0.6087742	0.1804408	0.1804408	0.0729438
20	11.3122581	8.6877419	25.79355	44.20645	0.5656129	0.3294089	0.3294089	0.1594675
30	15.7522581	14.2477419	21.35355	38.64645	0.5250753	0.4460446	0.4460446	0.2610391
50	23.4206452	26.5793548	13.68516	26.31484	0.4684129	0.6451949	0.6451949	0.4909890

kable(head(getConfusionMatrix(results$Popular)[1]))

	TP	FP	FN	TN	precision	recall	TPR	FPR
1	0.763871	0.236129	36.34194	52.65806	0.7638710	0.0235458	0.0235458	0.0040862
5	3.646452	1.353548	33.45935	51.54065	0.7292903	0.1159756	0.1159756	0.0239099
10	7.109677	2.890323	29.99613	50.00387	0.7109677	0.2187956	0.2187956	0.0503608
20	12.872258	7.127742	24.23355	45.76645	0.6436129	0.3836385	0.3836385	0.1254410
30	17.295484	12.704516	19.81032	40.18968	0.5765161	0.4997104	0.4997104	0.2268141
50	24.254193	25.745807	12.85161	27.14839	0.4850839	0.6731215	0.6731215	0.4674098

kable(head(getConfusionMatrix(results$UBCF)[1]))

	TP	FP	FN	TN	precision	recall	TPR	FPR
1	0.7174194	0.2825806	36.38839	52.61161	0.7174194	0.0214709	0.0214709	0.0049406
5	3.5974194	1.4025806	33.50839	51.49161	0.7194839	0.1139464	0.1139464	0.0247390
10	6.9664516	3.0335484	30.13935	49.86065	0.6966452	0.2147957	0.2147957	0.0530635
20	12.8219355	7.1780645	24.28387	45.71613	0.6410968	0.3807074	0.3807074	0.1267463
30	17.2167742	12.7832258	19.88903	40.11097	0.5738925	0.4942407	0.4942407	0.2275813
50	24.2412903	25.7587097	12.86452	27.13548	0.4848258	0.6703262	0.6703262	0.4686155

5.2 Suppress Warnings

Let’s evaluate the suppress warnings as well

eval_results <- suppressWarnings(evaluate(x =e , method = algorithms, n = seq(10, 100, 10)))

## RANDOM run fold/sample [model time/prediction time]
##   1  [0sec/0.34sec] 
## POPULAR run fold/sample [model time/prediction time]
##   1  [0.05sec/2.28sec] 
## UBCF run fold/sample [model time/prediction time]
##   1  [0.05sec/4.24sec] 
## IBCF run fold/sample [model time/prediction time]
##   1  [0.3sec/0.28sec] 
## SVD run fold/sample [model time/prediction time]
##   1  [0.3sec/0.2sec]

#results <- evaluate(e, algorithms, type = "topNList", n=c(1, 5, 10, 20, 30, 50))

plot(eval_results, "prec/rec", annotate = T, main = "Precision-recall")
title("Precision-recall")

plot(eval_results, annotate = 1, legend = "topleft") 
title("ROC curve")

6 Artifical variation

Let’s try to introduce some artifical variation, a second recommender engine is developed to abberate the original data as a complementary pairing to the original data set.

set.seed(223)

#partially randomizing the data -2 to +2 from original data point
'%rand%' <- function(x,y){x + sample(c(1,-1),length(y),replace = TRUE) * y}
Jester.random <-  Jester5k
Jester.random@data@x<-sapply(Jester.random@data@x, function(x){x %rand% runif(1, 0, 2) })

e.2 <- evaluationScheme(Jester.random, method = "split",train = 0.8, given = 30, goodRating = 3, k=5)
Rec.model.2 <- Recommender(getData(e.2, "train"), "Popular")

#Making predictions 
# Target the original dataset!
prediction.2 <- predict(Rec.model.2, getData(e, "known"), type="ratings",n=10)

# set all predictions that fall outside the valid range to the boundary values
prediction.2@data@x[prediction.2@data@x[] < -10] <- -10
prediction.2@data@x[prediction.2@data@x[] > 10] <- 10

calcPredictionAccuracy(prediction.2, getData(e, "unknown"))

##      RMSE       MSE       MAE 
##  4.579972 20.976148  3.580440

7 Comparison

We receive almost very close RMSE for both normal and artificial model techniques. So, both models are equally efficient for the given dataset.

DATA643_Project4

Niteen Kumar

July 3, 2018