sparklyr ALS simple examples

Prepare data

First prepare data frame on Spark.

devtools::install_github("chezou/sparklyr", ref = "implicit-als")

## Skipping install of 'sparklyr' from a github remote, the SHA1 (a7b82d93) has not changed since last install.
##   Use `force = TRUE` to force installation

library(sparklyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

sc <- spark_connect(master = "local")

user = c(0, 0, 1, 1, 2, 2)
item = c(0, 1, 1, 2, 1, 2)
rating = c(4.0, 2.0, 3.0, 4.0, 1.0, 5.0)

df <- data.frame(user = user, item = item, rating = rating)

movie_ratings <- sdf_copy_to(sc, df, "movie_rating", overwrite = TRUE)
movie_ratings

## Source:   query [6 x 3]
## Database: spark connection master=local[8] app=sparklyr local=TRUE
## 
##    user  item rating
##   <dbl> <dbl>  <dbl>
## 1     0     0      4
## 2     0     1      2
## 3     1     1      3
## 4     1     2      4
## 5     2     1      1
## 6     2     2      5

Then, create an explicit model with ALS.

explicit_model <- ml_als_factorization(movie_ratings, iter.max = 5, regularization.parameter = 0.01)
summary(explicit_model)

##                  Length Class      Mode       
## item.factors     11     data.frame list       
## user.factors     11     data.frame list       
## data              2     spark_jobj environment
## ml.options        6     ml_options list       
## model.parameters  2     -none-     list       
## .call             4     -none-     call       
## .model            2     spark_jobj environment

predictions <- explicit_model$.model %>%
  invoke("transform", spark_dataframe(movie_ratings)) %>%
  collect()

predictions

## # A tibble: 6 <U+00D7> 4
##    user  item rating prediction
##   <dbl> <dbl>  <dbl>      <dbl>
## 1     0     0      4   3.994546
## 2     0     1      2   2.000021
## 3     1     1      3   2.995667
## 4     2     1      1   1.002447
## 5     1     2      4   3.998286
## 6     2     2      5   4.994939

Of course, you can train with implicit model.

implicit_model <- ml_als_factorization(movie_ratings, iter.max = 5, regularization.parameter = 0.01, implicit.preferences = TRUE, alpha = 1.0)
summary(implicit_model)

##                  Length Class      Mode       
## item.factors     11     data.frame list       
## user.factors     11     data.frame list       
## data              2     spark_jobj environment
## ml.options        6     ml_options list       
## model.parameters  2     -none-     list       
## .call             6     -none-     call       
## .model            2     spark_jobj environment

implicit_predictions <- implicit_model$.model %>%
  invoke("transform", spark_dataframe(movie_ratings)) %>%
  collect()

implicit_predictions

## # A tibble: 6 <U+00D7> 4
##    user  item rating prediction
##   <dbl> <dbl>  <dbl>      <dbl>
## 1     0     0      4  0.9974966
## 2     0     1      2  0.9980122
## 3     1     1      3  0.9990543
## 4     2     1      1  0.9985161
## 5     1     2      4  0.9976761
## 6     2     2      5  0.9979272

If you want non-negative least square, you can add the option.

nn_model <- ml_als_factorization(movie_ratings, iter.max = 5, regularization.parameter = 0.01, nonnegative = TRUE)
summary(nn_model)

##                  Length Class      Mode       
## item.factors     11     data.frame list       
## user.factors     11     data.frame list       
## data              2     spark_jobj environment
## ml.options        6     ml_options list       
## model.parameters  2     -none-     list       
## .call             5     -none-     call       
## .model            2     spark_jobj environment

nn_predictions <- nn_model$.model %>%
  invoke("transform", spark_dataframe(movie_ratings)) %>%
  collect()

nn_predictions

## # A tibble: 6 <U+00D7> 4
##    user  item rating prediction
##   <dbl> <dbl>  <dbl>      <dbl>
## 1     0     0      4   3.994892
## 2     0     1      2   2.000874
## 3     1     1      3   2.996330
## 4     2     1      1   1.003765
## 5     1     2      4   3.998448
## 6     2     2      5   4.994483

sparklyr ALS simple examples

Aki Ariga

3/20/2017

Prepare data