if("recosystem" %in% rownames(installed.packages()) == FALSE){
install.packages("recosystem")
}
library(recosystem)
library(SVDApproximation)
## Warning: replacing previous import 'data.table::melt' by 'reshape2::melt'
## when loading 'SVDApproximation'
## Warning: replacing previous import 'data.table::dcast' by 'reshape2::dcast'
## when loading 'SVDApproximation'
library(data.table)
set.seed(1)
in_train <- rep(TRUE, nrow(ratings))
in_train[sample(1:nrow(ratings), size = round(0.2 * length(unique(ratings$user)), 0) * 5)] <- FALSE
ratings_train <- ratings[(in_train)]
ratings_test <- ratings[(!in_train)]
write.table(ratings_train, file = "trainset.txt", sep = " ", row.names = FALSE, col.names = FALSE)
write.table(ratings_test, file = "testset.txt", sep = " ", row.names = FALSE, col.names = FALSE)
r = Reco()
train_data <- data_file('trainset.txt', index1 = TRUE)
test_data <- data_file('testset.txt', index1 = TRUE)
# opts <- r$tune(train_data,
# opts = list(dim = c(1:20),
# lrate = c(0.05),
# nthread = 4,
# costp_l1 = c(0, 0.1),
# costp_l2 = c(0.01, 0.1),
# costq_l1 = c(0, 0.1),
# costq_l2 = c(0.01, 0.1),
# niter = 200,
# nfold = 10,
# verbose = FALSE))
# save(opts, file = 'opts.RData')
attach('opts.RData')
r$train(train_data, opts = c(opts$min, nthread = 4, niter = 500, verbose = FALSE))
out_pred = out_file(tempfile())
r$predict(test_data, out_pred)
## prediction output generated at C:\Users\10121760\AppData\Local\Temp\RtmpCKaeXU\file16cc4519856a5
scores_real <- read.table('testset.txt', header = FALSE, sep = " ")$V3
scores_pred <- scan(out_pred@dest)
rmse_mf <- sqrt(mean((scores_real-scores_pred) ^ 2))
rmse_mf
## [1] 0.8446387
user = 1:20
movie = 1:20
pred = expand.grid(user = user, movie = movie)
test_set = data_memory(pred$user, pred$movie, index1 = TRUE)
pred$rating = r$predict(test_set, out_memory())
library(ggplot2)
ggplot(pred, aes(x = movie, y = user, fill = rating)) +
geom_raster() +
scale_fill_gradient("Rating", low = "#d6e685", high = "#1e6823") +
xlab("Movie ID") + ylab("User ID") +
coord_fixed() +
theme_bw(base_size = 22)
if("SlopeOne" %in% rownames(installed.packages()) == FALSE){
install_github(repo = "SlopeOne", username = "tarashnot")
}
library(SlopeOne)
names(ratings) <- c("user_id", "item_id", "rating")
ratings <- data.table(ratings)
ratings[, user_id := as.character(user_id)]
ratings[, item_id := as.character(item_id)]
setkey(ratings, user_id, item_id)
set.seed(1)
in_train <- rep(TRUE, nrow(ratings))
in_train[sample(1:nrow(ratings), size = round(0.2 * length(unique(ratings$user_id)), 0) * 5)] <- FALSE
ratings_train <- ratings[(in_train)]
ratings_test <- ratings[(!in_train)]
ratings_train_norm <- normalize_ratings(ratings_train)
# model <- build_slopeone(ratings_train_norm$ratings)
#
# predictions <- predict_slopeone(model,
# ratings_test[ , c(1, 2), with = FALSE],
# ratings_train_norm$ratings)
# unnormalized_predictions <- unnormalize_ratings(normalized = ratings_train_norm,
# ratings = predictions)
#
# rmse_slopeone <- sqrt(mean((unnormalized_predictions$predicted_rating - ratings_test$rating) ^ 2))
# rmse_slopeone
I learned more recommender system algorithms in this project: SVD, ALS, Slope One, and Matrix Factorization with GD, some results of the models were compared.