For the NoSQL database, you may use MongoDB (which we introduced in week 7), Neo4j, or another NoSQL database of your choosing.

library(RMySQL)
## Warning: package 'RMySQL' was built under R version 3.3.3
## Loading required package: DBI
## Warning: package 'DBI' was built under R version 3.3.3
library(RNeo4j)
## Warning: package 'RNeo4j' was built under R version 3.3.3

load Data from SQL to R

mydb <- dbConnect(MySQL(), user="root", dbname="flights", password = "hild1024", host="localhost")

airlines <- dbGetQuery(conn = mydb, statement = 
                          "SELECT * FROM airlines")
airports <- dbGetQuery(conn = mydb, statement = 
                          "SELECT * FROM airports")
flights <- dbGetQuery(conn = mydb, statement = 
                          "SELECT * FROM flights")

# Decrease the sample loading to 2000
flights <- flights[1:2000,]
airports <- airports[airports$faa %in% flights$dest | airports$faa %in% flights$origin, ]

dbDisconnect(mydb)
## [1] TRUE

load data from R to Neo4j

graph = startGraph("http://localhost:7474/db/data/", username = "neo4j", password = "password")
clear(graph, input = FALSE)

Setting Node for Airport

query1 <- "CREATE (a:Airport {faa: {faa}, name: {name}})"

t = newTransaction(graph)

for (i in 1:nrow(airports)) {
  faa = airports$faa[i]
  name = airports$name[i]

  appendCypher(t, 
               query1, 
               faa = faa, 
               name = name)
}
commit(t)

Setting Node for Flight

query2 <- "CREATE (b:Flight {name: {name}, carrier: {carrier}, origin: {origin}, dest: {dest}})"

t = newTransaction(graph)

for (i in 1:nrow(flights)) {
  name = flights$flight[i]
  carrier = flights$carrier[i]
  origin = flights$origin[i]
  dest = flights$dest[i]

  appendCypher(t, 
               query2, 
               name = name, 
               carrier = carrier,
               origin = origin,
               dest = dest)
}
commit(t)

Setting relationship between node

query3 <- '
MATCH (a:Flight) WHERE a.dest={dest} AND a.name={fnum}
MATCH (b:Airport) WHERE b.faa={dest}
CREATE (a) -[r:Destination {arr_time:{arr_time}}] -> (b)
'

t = newTransaction(graph)

for (i in 1:nrow(flights)) {
  dest = flights$dest[i]
  fnum = flights$flight[i]
  arr_time = flights$arr_time[i]

  appendCypher(t, 
               query3, 
               fnum = fnum,
               dest = dest, 
               arr_time = arr_time)
}
commit(t)

Visualization

picture

picture

advantages and disadvantages of storing the data in a relational database vs. NoSQL database

A NoSQL database lets you build an application without having to define the schema first unlike relational databases which make you define your schema before you can add any data to the system.

Relational databases were built in an era where data was fairly structured and clearly defined by their relationships.

NoSQL databases are open source whereas relational databases typically are closed source with licensing fees baked into the use of their software.