This assignment is to take a relational data base and convert it to a No SQL database using either MongoDB or Neo4j. I have chosen Neo4j.
if(!require ("RMySQL")) install.packages('RMySQL')
## Loading required package: RMySQL
## Loading required package: DBI
if(!require ("RNeo4j")) install.packages('RNeo4j')
## Loading required package: RNeo4j
mydb <- dbConnect(MySQL(), user='root',password='test123', dbname='flights',host='localhost')
st1 <- "SELECT * from flights.airlines"
airlines <- dbGetQuery(mydb,st1 )
st2 <- "SELECT * from flights.airports"
airports <- dbGetQuery(mydb,st2 )
st3 <- "select * from flights.flights where year=2013 and month=1 and day =1"
flights <- dbGetQuery(mydb,st3 )
st4 <- "SELECT * FROM planes"
planes <- dbGetQuery(mydb,st4 )
st5 <- "SELECT * FROM weather"
weather <- dbGetQuery(mydb,st5 )
dbDisconnect(mydb)
## [1] TRUE
flights <- flights[flights$dep_time > 400 & flights$dep_time < 700, ]
airports <- airports[airports$faa %in% flights$dest,]
airlines <- airlines[airlines$carrier %in% flights$carrier,]
graph = startGraph("http://localhost:7474/db/data/",username="neo4j",password="neo5j")
clear(graph, input = FALSE)
#Clear Existing nodes and relationship from database
#MATCH (p)-[r]-() delete p,r;
addConstraint(graph, "Airline" , "carrier")
q1 <- "CREATE (a:Airline {carrier: {carrier}, name: {name}})"
t = newTransaction(graph)
for (i in 1:nrow(airlines)) {
carrier = airlines$carrier[i]
name = airlines$name[i]
appendCypher( t, q1, carrier = carrier, name=name)
}
commit(t)
q2 <- "CREATE (b:Airport {faa: {faa}, name: {name}})"
t = newTransaction(graph)
for (i in 1:nrow(airports)) {
faa = airports$faa[i]
name = airports$name[i]
appendCypher( t, q2, faa = faa, name=name)
}
commit(t)
q3 <- "CREATE (c:Flights {name: {name}, carrier: {carrier},origin: {origin}, dest:{dest} })"
t = newTransaction(graph)
for (i in 1:nrow(flights)) {
name = flights$flight[i]
carrier = flights$carrier[i]
origin = flights$origin[i]
dest = flights$dest[i]
appendCypher( t, q3, name=name,carrier = carrier,origin=origin, dest=dest)
}
commit(t)
query1 <- '
MATCH (a:Flights),(b:Airline)
WHERE a.carrier={carrier} AND b.carrier={carrier}
CREATE (a) -[r:Serves] -> (b)
'
t = newTransaction(graph)
for (i in 1:nrow(flights)) {
carrier = flights$carrier[i]
fnum = flights$flight[i]
appendCypher(t,
query1,
fnum = fnum,
carrier = carrier)
}
commit(t)
query2 <- '
MATCH (a:Flights) ,(c:Airport)
WHERE a.origin={origin} AND c.faa={origin}
CREATE (c) -[o:Origin] -> (d)
'
t = newTransaction(graph)
for (i in 1:nrow(flights)) {
origin = flights$origin[i]
fnum = flights$flight[i]
dep_time = flights$dep_time[i]
appendCypher(t,
query2,
origin = origin,
fnum = fnum,
dep_time = dep_time)
}
commit(t)
library(png)
img <- readPNG('c:/cuny/graph_q2.PNG')
grid::grid.raster(img)
RDBMS is used in ERP environment by various industries and integrated with other tools designed for analytics. SQL Statements are complex and large to link multiple tables. RDBMS follow ACID(Atomicity, Consistency, isolation, Durablity) principles. RBBMS is around for long, mature and proven.
NoSQL is easy to install, create graphs, load files in many formats and create queries that are shorter to link many tables. This also use ACID principles.