## Warning: package 'dbConnect' was built under R version 3.4.4
## Warning: package 'RMySQL' was built under R version 3.4.4
## Warning: package 'gWidgets' was built under R version 3.4.4
## Warning: changing locked binding for 'length.path' in 'httr' whilst loading
## 'RNeo4j'
## Warning: package 'shiny' was built under R version 3.4.4
## Warning: package 'miniUI' was built under R version 3.4.4
Shiny function to store password
get_password <- function() {
ui <- miniPage(
gadgetTitleBar("Please enter your password"),
miniContentPanel(
passwordInput("password", "")
)
)
server <- function(input, output) {
observeEvent(input$done, {
stopApp(input$password)
})
observeEvent(input$cancel, {
stopApp(stop("No password.", call. = FALSE))
})
}
runGadget(ui, server, viewer = dialogViewer("Password", height = 200))
}
Access local MySQL
##
## Listening on http://127.0.0.1:5448
db <- dbConnect(MySQL(), user='root',password=pw, host='localhost', dbname='flights')
dbListTables(db)
## [1] "airlines" "airports" "flights" "planes" "weather"
Sql code query within R
- I ended up only using the flights table, other tables are commented out
- Write flights2 to csv and reload that csv from github for reproducibility
flights2 <- dbGetQuery(db,"select * from flights;" )
#airlines <- dbGetQuery(db,"select * from airlines;" )
#airports <- dbGetQuery(db,"select * from airports;" )
#planes <- dbGetQuery(db,"select * from planes;" )
#kable(head(airports))
#kable(head(airlines))
kable(head(flights2))
| 2013 |
1 |
1 |
517 |
2 |
830 |
11 |
UA |
N14228 |
1545 |
EWR |
IAH |
227 |
1400 |
5 |
17 |
| 2013 |
1 |
1 |
533 |
4 |
850 |
20 |
UA |
N24211 |
1714 |
LGA |
IAH |
227 |
1416 |
5 |
33 |
| 2013 |
1 |
1 |
542 |
2 |
923 |
33 |
AA |
N619AA |
1141 |
JFK |
MIA |
160 |
1089 |
5 |
42 |
| 2013 |
1 |
1 |
544 |
-1 |
1004 |
-18 |
B6 |
N804JB |
725 |
JFK |
BQN |
183 |
1576 |
5 |
44 |
| 2013 |
1 |
1 |
554 |
-6 |
812 |
-25 |
DL |
N668DN |
461 |
LGA |
ATL |
116 |
762 |
6 |
54 |
| 2013 |
1 |
1 |
554 |
-4 |
740 |
12 |
UA |
N39463 |
1696 |
EWR |
ORD |
150 |
719 |
6 |
54 |
## [1] 336776 16
## Cut the file down to first 1000 rows from over 300k observations
flights2<- flights2[1:1000,]
## Reduce Airport and airline tables to subsetted data only
##my_airports <- airports[airports$faa %in% flights$origin | airports$faa %in% flights$dest , ]
##airlines <- airlines[airlines$carrier %in% flights$carrier,]
write.csv(flights2, file = "Myflights.csv")
RNeo4j
- Please add your own user name password if needed to graph call
flights <- read.csv("https://raw.githubusercontent.com/justinherman42/Justin-Data-607/master/week%2013%20nosql/Myflights.csv")
graph = startGraph("http://localhost:7474/db/data/", username="neo4j", password="12345")
clear(graph, input = FALSE)
Make query to display a relationship of departures and arrivals
query = "MERGE (origin:Airport {name:{origin_name}, carrier:{carrier}})
MERGE (destination:Airport {name:{dest_name},carrier:{carrier}})
CREATE (origin)<-[:ORIGIN]-(:Flight{number:{flight_num},carrier:{carrier}})-[:DESTINATION]->
(destination)"
t = newTransaction(graph)
for (i in 1:nrow(flights)) {
origin_name = flights[i, ]$origin
dest_name = flights[i, ]$dest
flight_num = flights[i, ]$flight
carrier=flights[i,]$carrier
appendCypher(t,
query,
origin_name = origin_name,
dest_name = dest_name,
flight_num = flight_num,
carrier = carrier)
}
commit(t)
my_neo <- cypher(graph, "MATCH (origin:Airport)<-[:ORIGIN]-(f:Flight)-[:DESTINATION]->(dest_Airport:Airport)
RETURN origin.name as Airport, f.number as flight,f.carrier as carrier, dest_Airport.name as destination")
kable(head(my_neo,20))
| LGA |
983 |
B6 |
TPA |
| JFK |
21 |
B6 |
TPA |
| JFK |
71 |
B6 |
TPA |
| LGA |
985 |
B6 |
TPA |
| JFK |
63 |
B6 |
TPA |
| JFK |
163 |
B6 |
TPA |
| EWR |
537 |
B6 |
TPA |
| JFK |
27 |
B6 |
TPA |
| JFK |
59 |
B6 |
TPA |
| LGA |
983 |
B6 |
TPA |
| JFK |
71 |
B6 |
TPA |
| EWR |
1676 |
UA |
LAX |
| JFK |
1141 |
UA |
LAX |
| EWR |
387 |
UA |
LAX |
| EWR |
1482 |
UA |
LAX |
| EWR |
342 |
UA |
LAX |
| JFK |
535 |
UA |
LAX |
| EWR |
1165 |
UA |
LAX |
| EWR |
1600 |
UA |
LAX |
| JFK |
530 |
UA |
LAX |
Relational databases versus neo4j
- Relational databases
- structured
- Computational and memory-intensive
- Use of foreign keys to create connections
- good for data that doesn’t require exploring relationships
- Neo4j
- flexible
- Built in relationships
- good for data that has many interconnecting relationships that need to be explored