First prepare data frame on Spark.
devtools::install_github("chezou/sparklyr", ref = "implicit-als")
## Skipping install of 'sparklyr' from a github remote, the SHA1 (a7b82d93) has not changed since last install.
## Use `force = TRUE` to force installation
library(sparklyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sc <- spark_connect(master = "local")
user = c(0, 0, 1, 1, 2, 2)
item = c(0, 1, 1, 2, 1, 2)
rating = c(4.0, 2.0, 3.0, 4.0, 1.0, 5.0)
df <- data.frame(user = user, item = item, rating = rating)
movie_ratings <- sdf_copy_to(sc, df, "movie_rating", overwrite = TRUE)
movie_ratings
## Source: query [6 x 3]
## Database: spark connection master=local[8] app=sparklyr local=TRUE
##
## user item rating
## <dbl> <dbl> <dbl>
## 1 0 0 4
## 2 0 1 2
## 3 1 1 3
## 4 1 2 4
## 5 2 1 1
## 6 2 2 5
Then, create an explicit model with ALS.
explicit_model <- ml_als_factorization(movie_ratings, iter.max = 5, regularization.parameter = 0.01)
summary(explicit_model)
## Length Class Mode
## item.factors 11 data.frame list
## user.factors 11 data.frame list
## data 2 spark_jobj environment
## ml.options 6 ml_options list
## model.parameters 2 -none- list
## .call 4 -none- call
## .model 2 spark_jobj environment
predictions <- explicit_model$.model %>%
invoke("transform", spark_dataframe(movie_ratings)) %>%
collect()
predictions
## # A tibble: 6 <U+00D7> 4
## user item rating prediction
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 4 3.994546
## 2 0 1 2 2.000021
## 3 1 1 3 2.995667
## 4 2 1 1 1.002447
## 5 1 2 4 3.998286
## 6 2 2 5 4.994939
Of course, you can train with implicit model.
implicit_model <- ml_als_factorization(movie_ratings, iter.max = 5, regularization.parameter = 0.01, implicit.preferences = TRUE, alpha = 1.0)
summary(implicit_model)
## Length Class Mode
## item.factors 11 data.frame list
## user.factors 11 data.frame list
## data 2 spark_jobj environment
## ml.options 6 ml_options list
## model.parameters 2 -none- list
## .call 6 -none- call
## .model 2 spark_jobj environment
implicit_predictions <- implicit_model$.model %>%
invoke("transform", spark_dataframe(movie_ratings)) %>%
collect()
implicit_predictions
## # A tibble: 6 <U+00D7> 4
## user item rating prediction
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 4 0.9974966
## 2 0 1 2 0.9980122
## 3 1 1 3 0.9990543
## 4 2 1 1 0.9985161
## 5 1 2 4 0.9976761
## 6 2 2 5 0.9979272
If you want non-negative least square, you can add the option.
nn_model <- ml_als_factorization(movie_ratings, iter.max = 5, regularization.parameter = 0.01, nonnegative = TRUE)
summary(nn_model)
## Length Class Mode
## item.factors 11 data.frame list
## user.factors 11 data.frame list
## data 2 spark_jobj environment
## ml.options 6 ml_options list
## model.parameters 2 -none- list
## .call 5 -none- call
## .model 2 spark_jobj environment
nn_predictions <- nn_model$.model %>%
invoke("transform", spark_dataframe(movie_ratings)) %>%
collect()
nn_predictions
## # A tibble: 6 <U+00D7> 4
## user item rating prediction
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 4 3.994892
## 2 0 1 2 2.000874
## 3 1 1 3 2.996330
## 4 2 1 1 1.003765
## 5 1 2 4 3.998448
## 6 2 2 5 4.994483