Output sql table into CSV

suppressMessages(library(RMySQL))
suppressMessages(library(RNeo4j))
con<-dbConnect(MySQL(),user='root',password='',dbname='flights', host='localhost')
sql<-"select * from flights"
res<- dbGetQuery(con,sql)
df<-data.frame(res)
write.csv(df,"flights.csv",row.names=F)
dbDisconnect(con)
## [1] TRUE

Read csv into R

for Neo4j visualization, I just use the first 10 rows in the table

file<-file("flights.csv",open="r")
data<-read.csv(file,header=TRUE,stringsAsFactors =FALSE )
close(file)
df<-data.frame(data)
df<-df[1:10,]
df
##    year month day dep_time dep_delay arr_time arr_delay carrier tailnum
## 1  2013     1   1      517         2      830        11      UA  N14228
## 2  2013     1   1      533         4      850        20      UA  N24211
## 3  2013     1   1      542         2      923        33      AA  N619AA
## 4  2013     1   1      544        -1     1004       -18      B6  N804JB
## 5  2013     1   1      554        -6      812       -25      DL  N668DN
## 6  2013     1   1      554        -4      740        12      UA  N39463
## 7  2013     1   1      555        -5      913        19      B6  N516JB
## 8  2013     1   1      557        -3      709       -14      EV  N829AS
## 9  2013     1   1      557        -3      838        -8      B6  N593JB
## 10 2013     1   1      558        -2      753         8      AA  N3ALAA
##    flight origin dest air_time distance hour minute
## 1    1545    EWR  IAH      227     1400    5     17
## 2    1714    LGA  IAH      227     1416    5     33
## 3    1141    JFK  MIA      160     1089    5     42
## 4     725    JFK  BQN      183     1576    5     44
## 5     461    LGA  ATL      116      762    6     54
## 6    1696    EWR  ORD      150      719    6     54
## 7     507    EWR  FLL      158     1065    6     55
## 8    5708    LGA  IAD       53      229    6     57
## 9      79    JFK  MCO      140      944    6     57
## 10    301    LGA  ORD      138      733    6     58

Create graph

graph=startGraph("http://localhost:7474/db/data/")
#clear(graph)

Create Neo4j Schema using RNeo4j

query="MERGE (origin:Airport {name:{origin}})
MERGE (destination:Airport {name:{dest}})
CREATE (origin)<-[:ORIGIN]-(:Flight {number:{flight_num},date:{flight_date},arrival:{arr_time},depart:{dep_time}})-[:DESTINATION]->(destination)"

t<- newTransaction(graph)
for (i in 1:nrow(df))
{
  origin<-df[i,]$origin
  dest<-df[i,]$dest
  flight<-paste(df[i,]$carrier,df[i,]$flight)
  flight_date<-paste(df[i,]$year,'/',df[i,]$month,'/',df[i,]$day,sep='')
  arr_time<- df[i,]$arr_time
  dep_time<-df[i,]$dep_time
  appendCypher(t,query,origin=origin,dest=dest,flight_date=flight_date,arr_time=arr_time,dep_time=dep_time,flight_num=flight)
  
}
commit(t)

Table view of Neo4j database in R

cypher(graph,"MATCH (o:Airport)<-[:ORIGIN]-(f:Flight)-[:DESTINATION]->(d:Airport) RETURN o.name,f.number,f.date,f.arrival,f.depart,d.name")
##    o.name f.number   f.date f.arrival f.depart d.name
## 1     EWR  UA 1545 2013/1/1       830      517    IAH
## 2     JFK   B6 725 2013/1/1      1004      544    BQN
## 3     JFK  AA 1141 2013/1/1       923      542    MIA
## 4     LGA  UA 1714 2013/1/1       850      533    IAH
## 5     LGA  EV 5708 2013/1/1       709      557    IAD
## 6     EWR   B6 507 2013/1/1       913      555    FLL
## 7     EWR  UA 1696 2013/1/1       740      554    ORD
## 8     LGA   DL 461 2013/1/1       812      554    ATL
## 9     LGA   AA 301 2013/1/1       753      558    ORD
## 10    JFK    B6 79 2013/1/1       838      557    MCO

With the graph we can see the relationship between airports more clearly and we can also see mutiple relationships between airports which is not that obverious in table view. We also store the flight information and airport information into seperate node which makes it more easier to visualize it.