# Import MySQL database to R
library(RMySQL)
# Connect to the MySQL database
conn <- dbConnect(
  RMySQL::MySQL(), 
  dbname='global-warming',
  host='127.0.0.1',
  port= 3306,
  user='root', 
  password='TempPassword')
# Importtable into R data.frames
df <- dbGetQuery(conn, "select * from co2;")
# Disconnect from the database
dbDisconnect(conn)
[1] TRUE
# Install and import Spark
library(sparklyr) # Connect R to Spark
spark_install(version='2.0.2') # Install spark (if not already installed)

# Create local spark cluster
sc <- spark_connect(master = "local", spark_home=spark_home_dir(version = "2.0.2"))

# Copy data frames into local Spark cluster tables 
library(dplyr)
df_tbl <- copy_to(sc, df, overwrite = TRUE)
# Display the data structures of the data stored in native R vs Spark
class(df) # intial data imported into R
[1] "data.frame"
class(df_tbl) # data converted to tbl_spark
[1] "tbl_spark" "tbl_sql"   "tbl_lazy"  "tbl"      
# Display the Spark Table
head(df_tbl)
# Summarise the Spark table to display total_co2 and count of co2 records per country
df_tbl %>%
  group_by(country_name) %>%
  summarize(total_co2 = sum(co2_emissions), count = n())
Missing values are always removed in SQL.
Use `SUM(x, na.rm = TRUE)` to silence this warningMissing values are always removed in SQL.
Use `SUM(x, na.rm = TRUE)` to silence this warningMissing values are always removed in SQL.
Use `SUM(x, na.rm = TRUE)` to silence this warningMissing values are always removed in SQL.
Use `SUM(x, na.rm = TRUE)` to silence this warningMissing values are always removed in SQL.
Use `SUM(x, na.rm = TRUE)` to silence this warningMissing values are always removed in SQL.
Use `SUM(x, na.rm = TRUE)` to silence this warning
# Disconnect from spark
spark_disconnect(sc)
LS0tCnRpdGxlOiAiU3BhcmsgRXhhbXBsZSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiAjIEltcG9ydCBNeVNRTCBkYXRhYmFzZSB0byBSCmxpYnJhcnkoUk15U1FMKQoKIyBDb25uZWN0IHRvIHRoZSBNeVNRTCBkYXRhYmFzZQpjb25uIDwtIGRiQ29ubmVjdCgKICBSTXlTUUw6Ok15U1FMKCksIAogIGRibmFtZT0nZ2xvYmFsLXdhcm1pbmcnLAogIGhvc3Q9JzEyNy4wLjAuMScsCiAgcG9ydD0gMzMwNiwKICB1c2VyPSdyb290JywgCiAgcGFzc3dvcmQ9J1RlbXBQYXNzd29yZCcpCgojIEltcG9ydHRhYmxlIGludG8gUiBkYXRhLmZyYW1lcwpkZiA8LSBkYkdldFF1ZXJ5KGNvbm4sICJzZWxlY3QgKiBmcm9tIGNvMjsiKQoKIyBEaXNjb25uZWN0IGZyb20gdGhlIGRhdGFiYXNlCmRiRGlzY29ubmVjdChjb25uKQpgYGAKCgpgYGB7cn0KIyBJbnN0YWxsIGFuZCBpbXBvcnQgU3BhcmsKbGlicmFyeShzcGFya2x5cikgIyBDb25uZWN0IFIgdG8gU3BhcmsKc3BhcmtfaW5zdGFsbCh2ZXJzaW9uPScyLjAuMicpICMgSW5zdGFsbCBzcGFyayAoaWYgbm90IGFscmVhZHkgaW5zdGFsbGVkKQoKIyBDcmVhdGUgbG9jYWwgc3BhcmsgY2x1c3RlcgpzYyA8LSBzcGFya19jb25uZWN0KG1hc3RlciA9ICJsb2NhbCIsIHNwYXJrX2hvbWU9c3BhcmtfaG9tZV9kaXIodmVyc2lvbiA9ICIyLjAuMiIpKQoKIyBDb3B5IGRhdGEgZnJhbWVzIGludG8gbG9jYWwgU3BhcmsgY2x1c3RlciB0YWJsZXMgCmxpYnJhcnkoZHBseXIpCmRmX3RibCA8LSBjb3B5X3RvKHNjLCBkZiwgb3ZlcndyaXRlID0gVFJVRSkKYGBgCgoKYGBge3J9CiMgRGlzcGxheSB0aGUgZGF0YSBzdHJ1Y3R1cmVzIG9mIHRoZSBkYXRhIHN0b3JlZCBpbiBuYXRpdmUgUiB2cyBTcGFyawpjbGFzcyhkZikgIyBpbnRpYWwgZGF0YSBpbXBvcnRlZCBpbnRvIFIKY2xhc3MoZGZfdGJsKSAjIGRhdGEgY29udmVydGVkIHRvIHRibF9zcGFyawpgYGAKCgoKYGBge3J9CiMgRGlzcGxheSB0aGUgU3BhcmsgVGFibGUKaGVhZChkZl90YmwpCmBgYAoKYGBge3J9CiMgU3VtbWFyaXNlIHRoZSBTcGFyayB0YWJsZSB0byBkaXNwbGF5IHRvdGFsX2NvMiBhbmQgY291bnQgb2YgY28yIHJlY29yZHMgcGVyIGNvdW50cnkKZGZfdGJsICU+JQogIGdyb3VwX2J5KGNvdW50cnlfbmFtZSkgJT4lCiAgc3VtbWFyaXplKHRvdGFsX2NvMiA9IHN1bShjbzJfZW1pc3Npb25zKSwgY291bnQgPSBuKCkpCmBgYAoKCmBgYHtyfQojIERpc2Nvbm5lY3QgZnJvbSBzcGFyawpzcGFya19kaXNjb25uZWN0KHNjKQpgYGAKCg==