Matrix Factorization with GD

if("recosystem" %in% rownames(installed.packages()) == FALSE){
  install.packages("recosystem")
}
library(recosystem)
library(SVDApproximation)
## Warning: replacing previous import 'data.table::melt' by 'reshape2::melt'
## when loading 'SVDApproximation'
## Warning: replacing previous import 'data.table::dcast' by 'reshape2::dcast'
## when loading 'SVDApproximation'
library(data.table)

set.seed(1)
in_train <- rep(TRUE, nrow(ratings))
in_train[sample(1:nrow(ratings), size = round(0.2 * length(unique(ratings$user)), 0) * 5)] <- FALSE

ratings_train <- ratings[(in_train)]
ratings_test <- ratings[(!in_train)]

write.table(ratings_train, file = "trainset.txt", sep = " ", row.names = FALSE, col.names = FALSE)
write.table(ratings_test, file = "testset.txt", sep = " ", row.names = FALSE, col.names = FALSE)

r = Reco()

train_data <- data_file('trainset.txt', index1 = TRUE)
test_data <- data_file('testset.txt', index1 = TRUE)

# opts <- r$tune(train_data, 
#                 opts = list(dim = c(1:20), 
#                             lrate = c(0.05),
#                             nthread = 4, 
#                             costp_l1 = c(0, 0.1),
#                             costp_l2 = c(0.01, 0.1),
#                             costq_l1 = c(0, 0.1),
#                             costq_l2 = c(0.01, 0.1), 
#                             niter = 200, 
#                             nfold = 10, 
#                             verbose = FALSE))

# save(opts, file = 'opts.RData')
attach('opts.RData')

r$train(train_data, opts = c(opts$min, nthread = 4, niter = 500, verbose = FALSE))

out_pred = out_file(tempfile())

r$predict(test_data, out_pred)
## prediction output generated at C:\Users\10121760\AppData\Local\Temp\RtmpCKaeXU\file16cc4519856a5
scores_real <- read.table('testset.txt', header = FALSE, sep = " ")$V3
scores_pred <- scan(out_pred@dest)
  
rmse_mf <- sqrt(mean((scores_real-scores_pred) ^ 2))
rmse_mf
## [1] 0.8446387
user = 1:20
movie = 1:20
pred = expand.grid(user = user, movie = movie)
test_set = data_memory(pred$user, pred$movie, index1 = TRUE)
pred$rating = r$predict(test_set, out_memory())

library(ggplot2)
ggplot(pred, aes(x = movie, y = user, fill = rating)) +
    geom_raster() +
    scale_fill_gradient("Rating", low = "#d6e685", high = "#1e6823") +
    xlab("Movie ID") + ylab("User ID") +
    coord_fixed() +
    theme_bw(base_size = 22)

Slope One

if("SlopeOne" %in% rownames(installed.packages()) == FALSE){
  install_github(repo = "SlopeOne", username = "tarashnot")
}
library(SlopeOne)

names(ratings) <- c("user_id", "item_id", "rating")
ratings <- data.table(ratings)

ratings[, user_id := as.character(user_id)]
ratings[, item_id := as.character(item_id)]

setkey(ratings, user_id, item_id)

set.seed(1)

in_train <- rep(TRUE, nrow(ratings))
in_train[sample(1:nrow(ratings), size = round(0.2 * length(unique(ratings$user_id)), 0) * 5)] <- FALSE

ratings_train <- ratings[(in_train)]
ratings_test <- ratings[(!in_train)]

ratings_train_norm <- normalize_ratings(ratings_train)

# model <- build_slopeone(ratings_train_norm$ratings)
# 
# predictions <- predict_slopeone(model, 
#                                 ratings_test[ , c(1, 2), with = FALSE], 
#                                 ratings_train_norm$ratings)
# unnormalized_predictions <- unnormalize_ratings(normalized = ratings_train_norm, 
#                                                 ratings = predictions)
# 
# rmse_slopeone <- sqrt(mean((unnormalized_predictions$predicted_rating - ratings_test$rating) ^ 2))
# rmse_slopeone

Summary of findings and recommendations

I learned more recommender system algorithms in this project: SVD, ALS, Slope One, and Matrix Factorization with GD, some results of the models were compared.

Reference:

https://rpubs.com/tarashnot/recommender_comparison

https://www.r-bloggers.com/recosystem-recommender-system-using-parallel-matrix-factorization/