For the NoSQL database, you may use MongoDB (which we introduced in week 7), Neo4j, or another NoSQL database of your choosing.
library(RMySQL)
## Warning: package 'RMySQL' was built under R version 3.3.3
## Loading required package: DBI
## Warning: package 'DBI' was built under R version 3.3.3
library(RNeo4j)
## Warning: package 'RNeo4j' was built under R version 3.3.3
mydb <- dbConnect(MySQL(), user="root", dbname="flights", password = "hild1024", host="localhost")
airlines <- dbGetQuery(conn = mydb, statement =
"SELECT * FROM airlines")
airports <- dbGetQuery(conn = mydb, statement =
"SELECT * FROM airports")
flights <- dbGetQuery(conn = mydb, statement =
"SELECT * FROM flights")
# Decrease the sample loading to 2000
flights <- flights[1:2000,]
airports <- airports[airports$faa %in% flights$dest | airports$faa %in% flights$origin, ]
dbDisconnect(mydb)
## [1] TRUE
graph = startGraph("http://localhost:7474/db/data/", username = "neo4j", password = "password")
clear(graph, input = FALSE)
query1 <- "CREATE (a:Airport {faa: {faa}, name: {name}})"
t = newTransaction(graph)
for (i in 1:nrow(airports)) {
faa = airports$faa[i]
name = airports$name[i]
appendCypher(t,
query1,
faa = faa,
name = name)
}
commit(t)
query2 <- "CREATE (b:Flight {name: {name}, carrier: {carrier}, origin: {origin}, dest: {dest}})"
t = newTransaction(graph)
for (i in 1:nrow(flights)) {
name = flights$flight[i]
carrier = flights$carrier[i]
origin = flights$origin[i]
dest = flights$dest[i]
appendCypher(t,
query2,
name = name,
carrier = carrier,
origin = origin,
dest = dest)
}
commit(t)
query3 <- '
MATCH (a:Flight) WHERE a.dest={dest} AND a.name={fnum}
MATCH (b:Airport) WHERE b.faa={dest}
CREATE (a) -[r:Destination {arr_time:{arr_time}}] -> (b)
'
t = newTransaction(graph)
for (i in 1:nrow(flights)) {
dest = flights$dest[i]
fnum = flights$flight[i]
arr_time = flights$arr_time[i]
appendCypher(t,
query3,
fnum = fnum,
dest = dest,
arr_time = arr_time)
}
commit(t)
Visualization
picture
A NoSQL database lets you build an application without having to define the schema first unlike relational databases which make you define your schema before you can add any data to the system.
Relational databases were built in an era where data was fairly structured and clearly defined by their relationships.
NoSQL databases are open source whereas relational databases typically are closed source with licensing fees baked into the use of their software.