# library(reticulate)
library(sparklyr)
##
## Attaching package: 'sparklyr'
## The following object is masked from 'package:stats':
##
## filter
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# spark_install(version = "3.3.0")
sc <- spark_connect(master = "local", version = "3.3.0")
## Warning in sprintf(versions$pattern, version$spark, version$hadoop): 2 arguments
## not used by format 'spark-3.3.0-bin-hadoop2'
# sc <- spark_connect(master = "yarn")
# sc <- spark_connect(master = "mesos://host:port")
# sc <- spark_connect(master = "k8s://https://server")
# sc <- spark_connect(master = "http://server/livy", method = "livy", version = "2.4.3")
# sc <- spark_connect(master = "spark://master-url:7077")
connection_is_open(sc)
## [1] TRUE
tbl_mtcars <- copy_to(sc, mtcars, "tbl_mtcars", overwrite = TRUE)
tbl_mtcars
## Warning in sdf_collect_static(object, impl, ...): NAs introduced by coercion to
## integer range
tbl_mtcars_new <- tbl_mtcars %>% ft_binarizer(input_col = "gear", output_col = "gear_ind", threshold = 4) %>%
select(mpg, cyl, gear, gear_ind)
## Warning: `as_integer()` is deprecated as of rlang 0.4.0
## Warning: `as_double()` is deprecated as of rlang 0.4.0
tbl_mtcars_new
## Warning in sdf_collect_static(object, impl, ...): NAs introduced by coercion to
## integer range
spark_disconnect(sc)