library(RCurl)
## Loading required package: bitops
myfile_flight <- getURL('https://raw.githubusercontent.com/yuenwong2sps/Data607/master/Week12/neo4j-flight-lab-data.csv',
ssl.verifyhost=FALSE, ssl.verifypeer=FALSE)
myfile_airport <- getURL('https://raw.githubusercontent.com/yuenwong2sps/Data607/master/Week12/neo4j-airport-csv-raw.csv',
ssl.verifyhost=FALSE, ssl.verifypeer=FALSE)
mydat_airport <- read.csv(textConnection(myfile_airport), header=T)
mydat_flight <- read.csv(textConnection(myfile_flight), header=T)
library(RNeo4j)
graph = startGraph("http://localhost:7474/db/data/",
username = Neo4J_Username,
password = Neo4J_Password)
clear(graph, input = FALSE)
#unqiue airport data
addConstraint(graph, "Airport","label")
for(i in 1:length(mydat_airport$label))
{
createNode(graph, "Airport",
label = mydat_airport[i,]$label,
code=mydat_airport[i,]$label,
city=mydat_airport[i,]$city,
state = mydat_airport[i,]$state
)
}
for(i in 1:length(mydat_flight$flight))
{
createNode(graph, "flights",
flight = mydat_flight[i,]$flight,
airline = mydat_flight[i,]$airline,
capacity = mydat_flight[i,]$capacity
)
}
query = "match (a:Airport {code:{airport_code}}), (b:flights {flight:{flight_num}}) create (b) -[r:Arrive {landing:{landing_time}}]->(a)"
t = newTransaction(graph)
for(i in 1:length(mydat_flight$flight))
{
v_airport_code = mydat_flight[i,]$arrive
v_flight_num = mydat_flight[i,]$flight
v_landing_time = mydat_flight[i,]$landing
appendCypher(t,
query,
airport_code = v_airport_code,
flight_num = v_flight_num,
landing_time = v_landing_time
)
}
commit(t)
query = "match (a:Airport {code:{airport_code}}), (b:flights {flight:{flight_num}}) create (a) -[r:Depart {takeoff:{takeoff_time}}]->(b)"
t = newTransaction(graph)
for(i in 1:length(mydat_flight$flight))
{
v_airport_code = mydat_flight[i,]$depart
v_flight_num = mydat_flight[i,]$flight
v_takeoff_time = mydat_flight[i,]$takeoff
appendCypher(t,
query,
airport_code = v_airport_code,
flight_num = v_flight_num,
takeoff_time = v_takeoff_time
)
}
commit(t)
#we need to specific the field to query data instead of just "n"
query = "MATCH (d:Airport)-[t:Depart]->(f:flights)-[l:Arrive]->(a:Airport)
RETURN d.label as FromAirport,t.takeoff as TakeOff, f.airline as AirLine, f.flight as FlightNum,l.landing as Landing, a.label as ToAirport"
match_n_return_n = cypher(graph, query)
kable(match_n_return_n)
FromAirport | TakeOff | AirLine | FlightNum | Landing | ToAirport |
---|---|---|---|---|---|
PIT | 1100 | Delta | 44 | 1203 | ATL |
BOS | 1315 | American | 1258 | 1455 | ATL |
DTW | 1310 | American | 1257 | 1455 | ATL |
DTW | 914 | Delta | 24 | 1123 | ATL |
BOS | 816 | Delta | 38 | 1040 | ATL |
PIT | 955 | Delta | 46 | 1103 | BOS |
DTW | 945 | Delta | 27 | 1210 | BOS |
ATL | 1201 | Delta | 37 | 1444 | BOS |
DTW | 1300 | American | 1231 | 1520 | BOS |
ATL | 1340 | American | 1292 | 1545 | BOS |
DTW | 1600 | Southwest | 101 | 1810 | BOS |
BOS | 1310 | American | 1277 | 1530 | DTW |
ATL | 926 | Delta | 23 | 1109 | DTW |
BOS | 1009 | Delta | 28 | 1228 | DTW |
BOS | 1605 | Southwest | 102 | 1815 | DTW |
PIT | 1117 | Delta | 36 | 1219 | DTW |
PIT | 1620 | Southwest | 104 | 1735 | DTW |
ATL | 1330 | American | 1291 | 1530 | DTW |
PIT | 1400 | American | 1278 | 1505 | DTW |
BOS | 744 | Delta | 45 | 855 | PIT |
DTW | 1305 | American | 1232 | 1410 | PIT |
DTW | 955 | Delta | 35 | 1106 | PIT |
DTW | 1615 | Southwest | 103 | 1730 | PIT |
ATL | 901 | Delta | 43 | 1001 | PIT |
Now that we have some data loaded, let’s answer some basic questions. Write Cypher queries to answer each of the following:
query = "MATCH (d:Airport)-->(f:flights)-->(a:Airport) where d.code = 'DTW' and a.code = 'ATL'
RETURN d.label as FromAirport, f.airline as AirLine, f.flight as FlightNum, a.label as ToAirport"
flights_From_DTW_To_ATL = cypher(graph, query)
kable(flights_From_DTW_To_ATL)
FromAirport | AirLine | FlightNum | ToAirport |
---|---|---|---|
DTW | American | 1257 | ATL |
DTW | Delta | 24 | ATL |
query = "MATCH (d:Airport)-[t:Depart]->(f:flights)-->(a:Airport) where t.takeoff < 1100
RETURN d.label as FromAirport,t.takeoff as TakeOff, f.airline as AirLine, f.flight as FlightNum, a.label as ToAirport"
flights_Before11AM = cypher(graph, query)
kable(flights_Before11AM)
FromAirport | TakeOff | AirLine | FlightNum | ToAirport |
---|---|---|---|---|
ATL | 901 | Delta | 43 | PIT |
ATL | 926 | Delta | 23 | DTW |
BOS | 744 | Delta | 45 | PIT |
BOS | 816 | Delta | 38 | ATL |
BOS | 1009 | Delta | 28 | DTW |
DTW | 955 | Delta | 35 | PIT |
DTW | 914 | Delta | 24 | ATL |
DTW | 945 | Delta | 27 | BOS |
PIT | 955 | Delta | 46 | BOS |
query = "MATCH (d:Airport)-[t:Depart]->(f:flights)-[l:Arrive]->(a:Airport) where f.capacity > 150
RETURN d.label as FromAirport,t.takeoff as TakeOff, f.airline as AirLine, f.flight as FlightNum,l.landing as Landing, a.label as ToAirport, f.capacity as Capacity"
flights_LargerThan150 = cypher(graph, query)
kable(flights_LargerThan150)
FromAirport | TakeOff | AirLine | FlightNum | Landing | ToAirport | Capacity |
---|---|---|---|---|---|---|
DTW | 1305 | American | 1232 | 1410 | PIT | 160 |
BOS | 1315 | American | 1258 | 1455 | ATL | 160 |
PIT | 1100 | Delta | 44 | 1203 | ATL | 160 |
DTW | 914 | Delta | 24 | 1123 | ATL | 160 |
BOS | 1009 | Delta | 28 | 1228 | DTW | 160 |
BOS | 816 | Delta | 38 | 1040 | ATL | 160 |
PIT | 955 | Delta | 46 | 1103 | BOS | 160 |
ATL | 926 | Delta | 23 | 1109 | DTW | 160 |
DTW | 945 | Delta | 27 | 1210 | BOS | 160 |
ATL | 1201 | Delta | 37 | 1444 | BOS | 160 |
ATL | 901 | Delta | 43 | 1001 | PIT | 160 |
BOS | 744 | Delta | 45 | 855 | PIT | 160 |
DTW | 1300 | American | 1231 | 1520 | BOS | 160 |
BOS | 1310 | American | 1277 | 1530 | DTW | 160 |
PIT | 1400 | American | 1278 | 1505 | DTW | 160 |
ATL | 1340 | American | 1292 | 1545 | BOS | 160 |
query = "MATCH (d:Airport)-[t:Depart]->(f:flights)-[l:Arrive]->(a:Airport) where f.airline ='Delta' and a.code = 'BOS'
RETURN d.label as FromAirport,t.takeoff as TakeOff, f.airline as AirLine, f.flight as FlightNum,l.landing as Landing, a.label as ToAirport"
flights_Delta_BOS = cypher(graph, query)
kable(flights_Delta_BOS)
FromAirport | TakeOff | AirLine | FlightNum | Landing | ToAirport |
---|---|---|---|---|---|
PIT | 955 | Delta | 46 | 1103 | BOS |
DTW | 945 | Delta | 27 | 1210 | BOS |
ATL | 1201 | Delta | 37 | 1444 | BOS |
Let’s make a few simple changes to our data. Perform the following tasks:
Pittsburge_node = getNodes(graph, "Match (a:Airport) where a.code = 'PIT' return a ")
#updateProp expect a node, but getNodes return a list
#https://tutel.me/c/programming/questions/35876340/cannot+use+method+39updateprop39+in+rneo4j+package
Pittsburge_node = Pittsburge_node[[1]] #fix the issue
Before update:
(Pittsburge_node)
## < Node >
## Airport
##
## $code
## [1] "PIT"
##
## $city
## [1] "Pittsburgh"
##
## $state
## [1] "Pennsylvania"
##
## $label
## [1] "PIT"
After update:
Pittsburge_node = updateProp(Pittsburge_node, code = 'PGH', label= 'PGH')
(Pittsburge_node)
## < Node >
## Airport
##
## $code
## [1] "PGH"
##
## $city
## [1] "Pittsburgh"
##
## $state
## [1] "Pennsylvania"
##
## $label
## [1] "PGH"
delta_node = getNodes(graph, "match (f:flights) where f.airline='Delta' and f.flight=28 return f")
delta_node = delta_node[[1]] #fix the issue
Before udpate:
(delta_node)
## < Node >
## flights
##
## $flight
## [1] 28
##
## $airline
## [1] "Delta"
##
## $capacity
## [1] 160
After update:
delta_node = updateProp(delta_node, flight = 29)
(delta_node)
## < Node >
## flights
##
## $flight
## [1] 29
##
## $airline
## [1] "Delta"
##
## $capacity
## [1] 160
query = "match (fa:Airport)-[d:Depart]->(f:flights)-[l:Arrive]->(ta:Airport) where (fa.code = 'DTW' or fa.code = 'BOS') and (ta.code = 'BOS' or ta.code = 'DTW') and f.airline = 'Southwest' delete d, f, l"
cypher(graph, query)
Re-run records to verify the result
query = "MATCH (d:Airport)-[t:Depart]->(f:flights)-[l:Arrive]->(a:Airport)
RETURN d.label as FromAirport,t.takeoff as TakeOff, f.airline as AirLine, f.flight as FlightNum,l.landing as Landing, a.label as ToAirport"
match_n_return_n = cypher(graph, query)
kable(match_n_return_n)
FromAirport | TakeOff | AirLine | FlightNum | Landing | ToAirport |
---|---|---|---|---|---|
PGH | 1100 | Delta | 44 | 1203 | ATL |
BOS | 1315 | American | 1258 | 1455 | ATL |
DTW | 1310 | American | 1257 | 1455 | ATL |
DTW | 914 | Delta | 24 | 1123 | ATL |
BOS | 816 | Delta | 38 | 1040 | ATL |
PGH | 955 | Delta | 46 | 1103 | BOS |
DTW | 945 | Delta | 27 | 1210 | BOS |
ATL | 1201 | Delta | 37 | 1444 | BOS |
DTW | 1300 | American | 1231 | 1520 | BOS |
ATL | 1340 | American | 1292 | 1545 | BOS |
BOS | 1310 | American | 1277 | 1530 | DTW |
ATL | 926 | Delta | 23 | 1109 | DTW |
BOS | 1009 | Delta | 29 | 1228 | DTW |
PGH | 1117 | Delta | 36 | 1219 | DTW |
PGH | 1620 | Southwest | 104 | 1735 | DTW |
ATL | 1330 | American | 1291 | 1530 | DTW |
PGH | 1400 | American | 1278 | 1505 | DTW |
BOS | 744 | Delta | 45 | 855 | PGH |
DTW | 1305 | American | 1232 | 1410 | PGH |
DTW | 955 | Delta | 35 | 1106 | PGH |
DTW | 1615 | Southwest | 103 | 1730 | PGH |
ATL | 901 | Delta | 43 | 1001 | PGH |
Just run cypher to delete all records.
query = "match (a) optional match (a)-[r]-() delete a,r"
cypher(graph, query)
Both offer advantage and disadvantage.
NoSQL database provides dynamaic data structure and schema-less design but it doesn’t now have stored procedures. Also, Relational database is a more matured technology that there are many expert developers/adminstorator. NoSQL is relatively new technology.