## Warning: package 'dbConnect' was built under R version 3.4.4
## Warning: package 'RMySQL' was built under R version 3.4.4
## Warning: package 'gWidgets' was built under R version 3.4.4
## Warning: changing locked binding for 'length.path' in 'httr' whilst loading
## 'RNeo4j'
## Warning: package 'shiny' was built under R version 3.4.4
## Warning: package 'miniUI' was built under R version 3.4.4

Shiny function to store password

get_password <- function() {
 ui <- miniPage(
   gadgetTitleBar("Please enter your password"),
   miniContentPanel(
     passwordInput("password", "")
   )
 )

 server <- function(input, output) {
   observeEvent(input$done, {
     stopApp(input$password)
   })
   observeEvent(input$cancel, {
     stopApp(stop("No password.", call. = FALSE))
   })
 }

 runGadget(ui, server, viewer = dialogViewer("Password", height = 200))
}

Access local MySQL

pw <- get_password()
## 
## Listening on http://127.0.0.1:5448
db <- dbConnect(MySQL(), user='root',password=pw, host='localhost', dbname='flights')
dbListTables(db)
## [1] "airlines" "airports" "flights"  "planes"   "weather"

Sql code query within R

  • I ended up only using the flights table, other tables are commented out
  • Write flights2 to csv and reload that csv from github for reproducibility
flights2 <- dbGetQuery(db,"select * from flights;" )
#airlines <- dbGetQuery(db,"select * from airlines;" )
#airports <- dbGetQuery(db,"select * from airports;" )
#planes <- dbGetQuery(db,"select * from planes;" )
#kable(head(airports))
#kable(head(airlines))
kable(head(flights2))
year month day dep_time dep_delay arr_time arr_delay carrier tailnum flight origin dest air_time distance hour minute
2013 1 1 517 2 830 11 UA N14228 1545 EWR IAH 227 1400 5 17
2013 1 1 533 4 850 20 UA N24211 1714 LGA IAH 227 1416 5 33
2013 1 1 542 2 923 33 AA N619AA 1141 JFK MIA 160 1089 5 42
2013 1 1 544 -1 1004 -18 B6 N804JB 725 JFK BQN 183 1576 5 44
2013 1 1 554 -6 812 -25 DL N668DN 461 LGA ATL 116 762 6 54
2013 1 1 554 -4 740 12 UA N39463 1696 EWR ORD 150 719 6 54
dim(flights2)
## [1] 336776     16
## Cut the file down to first 1000 rows from over 300k observations
flights2<- flights2[1:1000,]


## Reduce Airport and airline tables to subsetted data only
##my_airports <- airports[airports$faa %in% flights$origin | airports$faa %in% flights$dest , ]
##airlines <- airlines[airlines$carrier %in% flights$carrier,]

write.csv(flights2, file = "Myflights.csv")

RNeo4j

flights <- read.csv("https://raw.githubusercontent.com/justinherman42/Justin-Data-607/master/week%2013%20nosql/Myflights.csv")


graph = startGraph("http://localhost:7474/db/data/", username="neo4j", password="12345")
clear(graph, input = FALSE)

Make query to display a relationship of departures and arrivals

query = "MERGE (origin:Airport {name:{origin_name}, carrier:{carrier}})
MERGE (destination:Airport {name:{dest_name},carrier:{carrier}})
CREATE (origin)<-[:ORIGIN]-(:Flight{number:{flight_num},carrier:{carrier}})-[:DESTINATION]->
(destination)"

t = newTransaction(graph)

for (i in 1:nrow(flights)) {
  origin_name = flights[i, ]$origin
  dest_name = flights[i, ]$dest
  flight_num = flights[i, ]$flight
  carrier=flights[i,]$carrier
  appendCypher(t, 
               query, 
               origin_name = origin_name, 
               dest_name = dest_name, 
               flight_num = flight_num,
               carrier = carrier)
}

commit(t)

my_neo <- cypher(graph, "MATCH (origin:Airport)<-[:ORIGIN]-(f:Flight)-[:DESTINATION]->(dest_Airport:Airport)
               RETURN origin.name as Airport, f.number as flight,f.carrier as carrier, dest_Airport.name as destination")

kable(head(my_neo,20))
Airport flight carrier destination
LGA 983 B6 TPA
JFK 21 B6 TPA
JFK 71 B6 TPA
LGA 985 B6 TPA
JFK 63 B6 TPA
JFK 163 B6 TPA
EWR 537 B6 TPA
JFK 27 B6 TPA
JFK 59 B6 TPA
LGA 983 B6 TPA
JFK 71 B6 TPA
EWR 1676 UA LAX
JFK 1141 UA LAX
EWR 387 UA LAX
EWR 1482 UA LAX
EWR 342 UA LAX
JFK 535 UA LAX
EWR 1165 UA LAX
EWR 1600 UA LAX
JFK 530 UA LAX
Graph of arrival/takeoff with airline

Graph of arrival/takeoff with airline

Relational databases versus neo4j