mtcars %>% lm(mpg ~ wt + cyl, .)
mtcars %>% lm(mpg ~ wt + cyl, .)
# Samplemtcars %>% dplyr::sample_n(10) %>% lm(mpg ~ wt + cyl, .)
mtcars %>% lm(mpg ~ wt + cyl, .)
# Samplemtcars %>% dplyr::sample_n(10) %>% lm(mpg ~ wt + cyl, .)
# Profileprofvis::profvis(mtcars %>% lm(mpg ~ wt + cyl, .))
mtcars %>% lm(mpg ~ wt + cyl, .)
# Samplemtcars %>% dplyr::sample_n(10) %>% lm(mpg ~ wt + cyl, .)
# Profileprofvis::profvis(mtcars %>% lm(mpg ~ wt + cyl, .))
# Scale Upcloudml::cloudml_train("train.R")
mtcars %>% lm(mpg ~ wt + cyl, .)
# Samplemtcars %>% dplyr::sample_n(10) %>% lm(mpg ~ wt + cyl, .)
# Profileprofvis::profvis(mtcars %>% lm(mpg ~ wt + cyl, .))
# Scale Upcloudml::cloudml_train("train.R")
# Scale Outmtcars_tbl %>% sparklyr::ml_linear_regression(mpg ~ wt + cyl)
mtcars %>% lm(mpg ~ wt + cyl, .)
# Samplemtcars %>% dplyr::sample_n(10) %>% lm(mpg ~ wt + cyl, .)
# Profileprofvis::profvis(mtcars %>% lm(mpg ~ wt + cyl, .))
# Scale Upcloudml::cloudml_train("train.R")
# Scale Outmtcars_tbl %>% sparklyr::ml_linear_regression(mpg ~ wt + cyl)
Note: There are many more ways to sample, scale-up and scale-out.
# Scale Outmtcars_tbl %>% sparklyr::ml_linear_regression(mpg ~ wt + cyl)
library(sparklyr) # R Interface to Apache Sparkspark_install() # Install Apache Sparksc <- spark_connect(master = "local") # Connect to Spark cluster
library(sparklyr) # R Interface to Apache Sparkspark_install() # Install Apache Sparksc <- spark_connect(master = "local") # Connect to Spark cluster
cars_tbl <- spark_read_csv(sc, "cars", "mtcars/") # Read data in Sparksummarize(cars_tbl, n = n()) # Count records with dplyrdbGetQuery(sc, "SELECT count(*) FROM cars") # Count records with DBI
library(sparklyr) # R Interface to Apache Sparkspark_install() # Install Apache Sparksc <- spark_connect(master = "local") # Connect to Spark cluster
cars_tbl <- spark_read_csv(sc, "cars", "mtcars/") # Read data in Sparksummarize(cars_tbl, n = n()) # Count records with dplyrdbGetQuery(sc, "SELECT count(*) FROM cars") # Count records with DBI
ml_linear_regression(cars_tbl, mpg ~ wt + cyl) # Perform linear regression
library(sparklyr) # R Interface to Apache Sparkspark_install() # Install Apache Sparksc <- spark_connect(master = "local") # Connect to Spark cluster
cars_tbl <- spark_read_csv(sc, "cars", "mtcars/") # Read data in Sparksummarize(cars_tbl, n = n()) # Count records with dplyrdbGetQuery(sc, "SELECT count(*) FROM cars") # Count records with DBI
ml_linear_regression(cars_tbl, mpg ~ wt + cyl) # Perform linear regression
pipeline <- ml_pipeline(sc) %>% # Define Spark pipeline ft_r_formula(mpg ~ wt + cyl) %>% # Add formula transformation ml_linear_regression() # Add model to pipelinefitted <- ml_fit(pipeline, cars_tbl) # Fit pipeline
library(sparklyr) # R Interface to Apache Sparkspark_install() # Install Apache Sparksc <- spark_connect(master = "local") # Connect to Spark cluster
cars_tbl <- spark_read_csv(sc, "cars", "mtcars/") # Read data in Sparksummarize(cars_tbl, n = n()) # Count records with dplyrdbGetQuery(sc, "SELECT count(*) FROM cars") # Count records with DBI
ml_linear_regression(cars_tbl, mpg ~ wt + cyl) # Perform linear regression
pipeline <- ml_pipeline(sc) %>% # Define Spark pipeline ft_r_formula(mpg ~ wt + cyl) %>% # Add formula transformation ml_linear_regression() # Add model to pipelinefitted <- ml_fit(pipeline, cars_tbl) # Fit pipeline
spark_context(sc) %>% invoke("version") # Extend sparklyr with Scalaspark_apply(cars_tbl, nrow) # Extend sparklyr with R
cars_str <- stream_read_csv(sc, "mtcars/", "cars") # Read stream in Spark
cars_str <- stream_read_csv(sc, "mtcars/", "cars") # Read stream in Spark
out_str <- summarize(cars_str, n = n()) # Count records with dplyrout_str <- dbGetQuery(sc, "SELECT count(*) FROM cars") # Count records with DBI
cars_str <- stream_read_csv(sc, "mtcars/", "cars") # Read stream in Spark
out_str <- summarize(cars_str, n = n()) # Count records with dplyrout_str <- dbGetQuery(sc, "SELECT count(*) FROM cars") # Count records with DBI
out_str <- ml_transform(fitted, cars_str) # Apply pipeline to stream
cars_str <- stream_read_csv(sc, "mtcars/", "cars") # Read stream in Spark
out_str <- summarize(cars_str, n = n()) # Count records with dplyrout_str <- dbGetQuery(sc, "SELECT count(*) FROM cars") # Count records with DBI
out_str <- ml_transform(fitted, cars_str) # Apply pipeline to stream
out_str <- spark_apply(cars_str, nrow) # Extend streams with R
cars_str <- stream_read_csv(sc, "mtcars/", "cars") # Read stream in Spark
out_str <- summarize(cars_str, n = n()) # Count records with dplyrout_str <- dbGetQuery(sc, "SELECT count(*) FROM cars") # Count records with DBI
out_str <- ml_transform(fitted, cars_str) # Apply pipeline to stream
out_str <- spark_apply(cars_str, nrow) # Extend streams with R
stream_write_csv(out_str, "output/") # Write as a CSV streamreactiveSpark(out_str) # Use as a Shiny reactive
Apache Kafka is an open-source stream-processing software platform that provides a unified, high-throughput and low-latency for handling real-time data feeds.
Apache Kafka is an open-source stream-processing software platform that provides a unified, high-throughput and low-latency for handling real-time data feeds.
Streams, MLeap, Kubernetes and RStudio 1.2 integration.
Streams, MLeap, Kubernetes and RStudio 1.2 integration.
Arrow is a cross-language development platform for in-memory data.
Arrow is a cross-language development platform for in-memory data.
devtools::install_github("apache/arrow", subdir = "r", ref = "dc5df8f")devtools::install_github("rstudio/sparklyr")library(arrow)library(sparklyr)
Arrow is a cross-language development platform for in-memory data.
devtools::install_github("apache/arrow", subdir = "r", ref = "dc5df8f")devtools::install_github("rstudio/sparklyr")library(arrow)library(sparklyr)
sparkxgb is a sparklyr extension that provides an interface to XGBoost on Spark.
sparkxgb is a sparklyr extension that provides an interface to XGBoost on Spark.
devtools::install_github("rstudio/sparkxgb")library(sparkxgb)
sparkxgb is a sparklyr extension that provides an interface to XGBoost on Spark.
devtools::install_github("rstudio/sparkxgb")library(sparkxgb)
iris_tbl <- sdf_copy_to(sc, iris)xgb_model <- xgboost_classifier( iris_tbl, Species ~ ., objective = "multi:softprob", num_class = 3, num_round = 50, max_depth = 4)xgb_model %>% ml_predict(iris_tbl) %>% glimpse()
mtcars %>% lm(mpg ~ wt + cyl, .)
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |