#install.packages("dplyr")
#install.packages("devtools")
#install.packages("sparklyr")
#devtools::install_github("rstudio/sparklyr")
System Environment Variables
System Path Variables
Install Spark from website: http://spark.apache.org/downloads.html Untar and place in C:
Change permissions on winutils tmp folder: winutils chmod 777 /tmp/hive
Put winutils.exe into /bin
library(sparklyr)
## Warning: package 'sparklyr' was built under R version 3.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
config <- spark_config() # Create a config to tune memory
config[["sparklyr.shell.driver-memory"]] <- "4G" # Set driver memory to 10GB
sc <- spark_connect(master = "local", # Connecto to local cluster
config = config) # using custom configs