Matrix Factorization with Gradient Descend
if("recosystem" %in% rownames(installed.packages()) == FALSE){
install.packages("recosystem")
}
library(recosystem)
library(SVDApproximation)
## Warning: replacing previous import 'data.table::melt' by 'reshape2::melt'
## when loading 'SVDApproximation'
## Warning: replacing previous import 'data.table::dcast' by 'reshape2::dcast'
## when loading 'SVDApproximation'
set.seed(1)
in_train <- rep(TRUE, nrow(ratings))
in_train[sample(1:nrow(ratings), size = round(0.2 * length(unique(ratings$user)), 0) * 5)] <- FALSE
ratings_train <- ratings[(in_train)]
ratings_test <- ratings[(!in_train)]
write.table(ratings_train, file = "trainset.txt", sep = " ", row.names = FALSE, col.names = FALSE)
write.table(ratings_test, file = "testset.txt", sep = " ", row.names = FALSE, col.names = FALSE)
r = Reco()
# read in
train_data <- data_file('trainset.txt', index1 = TRUE)
test_data <- data_file('testset.txt', index1 = TRUE)
# tune model, select best tuning parameters
# opts <- r$tune(train_data,
# opts = list(dim = c(1:20),
# lrate = c(0.05),
# nthread = 4,
# costp_l1 = c(0, 0.1),
# costp_l2 = c(0.01, 0.1),
# costq_l1 = c(0, 0.1),
# costq_l2 = c(0.01, 0.1),
# niter = 200,
# nfold = 10,
# verbose = FALSE))
# save(opts, file = 'opts.RData')
attach('opts.RData')
# train model
r$train(train_data, opts = c(opts$min, nthread = 4, niter = 500, verbose = FALSE))
# predict
out_pred = out_file(tempfile())
r$predict(test_data, out_pred)
## prediction output generated at C:\Users\10121760\AppData\Local\Temp\Rtmp6z72J5\file13d544d775d41
scores_real <- read.table('testset.txt', header = FALSE, sep = " ")$V3
scores_pred <- scan(out_pred@dest)
rmse_mf <- sqrt(mean((scores_real-scores_pred) ^ 2))
rmse_mf
## [1] 0.8452224
# predict for the first 20 users on first 20 movies
user = 1:20
movie = 1:20
pred = expand.grid(user = user, movie = movie)
test_set = data_memory(pred$user, pred$movie, index1 = TRUE)
pred$rating = r$predict(test_set, out_memory())
library(ggplot2)
ggplot(pred, aes(x = movie, y = user, fill = rating)) +
geom_raster() +
scale_fill_gradient("Rating", low = "#d6e685", high = "#1e6823") +
xlab("Movie ID") + ylab("User ID") +
coord_fixed() +
theme_bw(base_size = 22)

Summary and Finding:
- Recosystem Was introduced Yu-Chin Juan, Wei-Sheng Chin, Yong Zhuang, Bo-Wen Yuan, Meng-Yuan Yang, and Chih-Jen Lin
- Built on top of LIBMF
- Parallel Matrix Factorization
- Store model in hard disk to reduce memory use
- Create data source with data_file()