suppressMessages(library(RMySQL))
suppressMessages(library(RNeo4j))
con<-dbConnect(MySQL(),user='root',password='',dbname='flights', host='localhost')
sql<-"select * from flights"
res<- dbGetQuery(con,sql)
df<-data.frame(res)
write.csv(df,"flights.csv",row.names=F)
dbDisconnect(con)
## [1] TRUE
for Neo4j visualization, I just use the first 10 rows in the table
file<-file("flights.csv",open="r")
data<-read.csv(file,header=TRUE,stringsAsFactors =FALSE )
close(file)
df<-data.frame(data)
df<-df[1:10,]
df
## year month day dep_time dep_delay arr_time arr_delay carrier tailnum
## 1 2013 1 1 517 2 830 11 UA N14228
## 2 2013 1 1 533 4 850 20 UA N24211
## 3 2013 1 1 542 2 923 33 AA N619AA
## 4 2013 1 1 544 -1 1004 -18 B6 N804JB
## 5 2013 1 1 554 -6 812 -25 DL N668DN
## 6 2013 1 1 554 -4 740 12 UA N39463
## 7 2013 1 1 555 -5 913 19 B6 N516JB
## 8 2013 1 1 557 -3 709 -14 EV N829AS
## 9 2013 1 1 557 -3 838 -8 B6 N593JB
## 10 2013 1 1 558 -2 753 8 AA N3ALAA
## flight origin dest air_time distance hour minute
## 1 1545 EWR IAH 227 1400 5 17
## 2 1714 LGA IAH 227 1416 5 33
## 3 1141 JFK MIA 160 1089 5 42
## 4 725 JFK BQN 183 1576 5 44
## 5 461 LGA ATL 116 762 6 54
## 6 1696 EWR ORD 150 719 6 54
## 7 507 EWR FLL 158 1065 6 55
## 8 5708 LGA IAD 53 229 6 57
## 9 79 JFK MCO 140 944 6 57
## 10 301 LGA ORD 138 733 6 58
graph=startGraph("http://localhost:7474/db/data/")
#clear(graph)
query="MERGE (origin:Airport {name:{origin}})
MERGE (destination:Airport {name:{dest}})
CREATE (origin)<-[:ORIGIN]-(:Flight {number:{flight_num},date:{flight_date},arrival:{arr_time},depart:{dep_time}})-[:DESTINATION]->(destination)"
t<- newTransaction(graph)
for (i in 1:nrow(df))
{
origin<-df[i,]$origin
dest<-df[i,]$dest
flight<-paste(df[i,]$carrier,df[i,]$flight)
flight_date<-paste(df[i,]$year,'/',df[i,]$month,'/',df[i,]$day,sep='')
arr_time<- df[i,]$arr_time
dep_time<-df[i,]$dep_time
appendCypher(t,query,origin=origin,dest=dest,flight_date=flight_date,arr_time=arr_time,dep_time=dep_time,flight_num=flight)
}
commit(t)
cypher(graph,"MATCH (o:Airport)<-[:ORIGIN]-(f:Flight)-[:DESTINATION]->(d:Airport) RETURN o.name,f.number,f.date,f.arrival,f.depart,d.name")
## o.name f.number f.date f.arrival f.depart d.name
## 1 EWR UA 1545 2013/1/1 830 517 IAH
## 2 JFK B6 725 2013/1/1 1004 544 BQN
## 3 JFK AA 1141 2013/1/1 923 542 MIA
## 4 LGA UA 1714 2013/1/1 850 533 IAH
## 5 LGA EV 5708 2013/1/1 709 557 IAD
## 6 EWR B6 507 2013/1/1 913 555 FLL
## 7 EWR UA 1696 2013/1/1 740 554 ORD
## 8 LGA DL 461 2013/1/1 812 554 ATL
## 9 LGA AA 301 2013/1/1 753 558 ORD
## 10 JFK B6 79 2013/1/1 838 557 MCO
With the graph we can see the relationship between airports more clearly and we can also see mutiple relationships between airports which is not that obverious in table view. We also store the flight information and airport information into seperate node which makes it more easier to visualize it.