MySQL to CSV

First we will take the Flights Database from MySQL and transfer it to a .csv file with the help of https://www.r-bloggers.com/dump-mysql-to-csv-using-r/

tables <- dbListTables(db)
tables
## [1] "airlines" "airports" "cities"   "flights"  "ontime"   "planes"  
## [7] "weather"
for (i in 1 : length(tables)){
  temp<-(dbReadTable(db, tables[i]))
  write.table(temp, tables[i], row.names=F, sep = ',')
}
flights <- data.frame(read.csv("~\\Dataset\\flights", stringsAsFactors = FALSE))
head(flights)
##   year month day dep_time dep_delay arr_time arr_delay carrier tailnum
## 1 2013     1   1      517         2      830        11      UA  N14228
## 2 2013     1   1      533         4      850        20      UA  N24211
## 3 2013     1   1      542         2      923        33      AA  N619AA
## 4 2013     1   1      544        -1     1004       -18      B6  N804JB
## 5 2013     1   1      554        -6      812       -25      DL  N668DN
## 6 2013     1   1      554        -4      740        12      UA  N39463
##   flight origin dest air_time distance hour minute
## 1   1545    EWR  IAH      227     1400    5     17
## 2   1714    LGA  IAH      227     1416    5     33
## 3   1141    JFK  MIA      160     1089    5     42
## 4    725    JFK  BQN      183     1576    5     44
## 5    461    LGA  ATL      116      762    6     54
## 6   1696    EWR  ORD      150      719    6     54
ontime <- data.frame(read.csv("~\\Dataset\\ontime", stringsAsFactors = FALSE))
head(ontime)
##   city_id air_line arrived   n
## 1       1   Alaska  ontime 497
## 2       1   Alaska delayed  62
## 3       2   Alaska  ontime 221
## 4       2   Alaska delayed  12
## 5       3   Alaska  ontime 212
## 6       3   Alaska delayed  20
planes <- data.frame(read.csv("~\\Dataset\\planes", stringsAsFactors = FALSE))
head(planes)
##   tailnum year                    type     manufacturer     model engines
## 1  N10156 2004 Fixed wing multi engine          EMBRAER EMB-145XR       2
## 2  N102UW 1998 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 3  N103US 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 4  N104UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 5  N10575 2002 Fixed wing multi engine          EMBRAER EMB-145LR       2
## 6  N105UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
##   seats speed    engine
## 1    55    NA Turbo-fan
## 2   182    NA Turbo-fan
## 3   182    NA Turbo-fan
## 4   182    NA Turbo-fan
## 5    55    NA Turbo-fan
## 6   182    NA Turbo-fan
weather <- data.frame(read.csv("~\\Dataset\\weather", stringsAsFactors = FALSE))
head(weather)
##   origin year month day hour  temp  dewp humid wind_dir wind_speed
## 1    EWR 2013     1   1    0 37.04 21.92 53.97      230   10.35702
## 2    EWR 2013     1   1    1 37.04 21.92 53.97      230   13.80936
## 3    EWR 2013     1   1    2 37.94 21.92 52.09      230   12.65858
## 4    EWR 2013     1   1    3 37.94 23.00 54.51      230   13.80936
## 5    EWR 2013     1   1    4 37.94 24.08 57.04      240   14.96014
## 6    EWR 2013     1   1    6 39.02 26.06 59.37      270   10.35702
##   wind_gust precip pressure visib
## 1  11.91865      0   1013.9    10
## 2  15.89154      0   1013.0    10
## 3  14.56724      0   1012.6    10
## 4  15.89154      0   1012.7    10
## 5  17.21583      0   1012.8    10
## 6  11.91865      0   1012.0    10
airlines <- data.frame(read.csv("~\\Dataset\\airlines", stringsAsFactors = FALSE))
head(airlines)
##   carrier                       name
## 1      9E        Endeavor Air Inc.\n
## 2      AA   American Airlines Inc.\n
## 3      AS     Alaska Airlines Inc.\n
## 4      B6          JetBlue Airways\n
## 5      DL     Delta Air Lines Inc.\n
## 6      EV ExpressJet Airlines Inc.\n
airports <- data.frame(read.csv("~\\Dataset\\airports", stringsAsFactors = FALSE))
head(airports)
##   faa                           name      lat       lon  alt tz dst
## 1 04G              Lansdowne Airport 41.13047 -80.61958 1044 -5   A
## 2 06A  Moton Field Municipal Airport 32.46057 -85.68003  264 -5   A
## 3 06C            Schaumburg Regional 41.98934 -88.10124  801 -6   A
## 4 06N                Randall Airport 41.43191 -74.39156  523 -5   A
## 5 09J          Jekyll Island Airport 31.07447 -81.42778   11 -4   A
## 6 0A9 Elizabethton Municipal Airport 36.37122 -82.17342 1593 -4   A
cities <- data.frame(read.csv("~\\Dataset\\cities", stringsAsFactors = FALSE))
head(cities)
##   city_id          city
## 1       1   Los Angeles
## 2       2       Phoenix
## 3       3     San Diego
## 4       4 San Francisco
## 5       5        Seatle

From dataframe to MongoDB

Now we will use the dataframes to populate the MongoDB.

suppressMessages(suppressWarnings(library(rmongodb)))
mongo = mongo.create(host = "localhost")
mongo.is.connected(mongo)
## [1] TRUE
b1=mongo.bson.from.df(flights)
b2=mongo.bson.from.df(ontime)
b3=mongo.bson.from.df(planes)
b4=mongo.bson.from.df(weather)
b5=mongo.bson.from.df(airlines)
b6=mongo.bson.from.df(airports)
b7=mongo.bson.from.df(cities)
b1[[1]]
##  year : 16    2013
##  month : 16   1
##  day : 16     1
##  dep_time : 16    517
##  dep_delay : 16   2
##  arr_time : 16    830
##  arr_delay : 16   11
##  carrier : 2      UA
##  tailnum : 2      N14228
##  flight : 16      1545
##  origin : 2   EWR
##  dest : 2     IAH
##  air_time : 16    227
##  distance : 16    1400
##  hour : 16    5
##  minute : 16      17
b2[[1]]
##  city_id : 16     1
##  air_line : 2     Alaska
##  arrived : 2      ontime
##  n : 16   497
b3[[1]]
##  tailnum : 2      N10156
##  year : 16    2004
##  type : 2     Fixed wing multi engine
##  manufacturer : 2     EMBRAER
##  model : 2    EMB-145XR
##  engines : 16     2
##  seats : 16   55
##  speed : 10   BSON_NULL
##  engine : 2   Turbo-fan
b4[[1]]
##  origin : 2   EWR
##  year : 16    2013
##  month : 16   1
##  day : 16     1
##  hour : 16    0
##  temp : 1     37.040000
##  dewp : 1     21.920000
##  humid : 1    53.970000
##  wind_dir : 16    230
##  wind_speed : 1   10.357020
##  wind_gust : 1    11.918651
##  precip : 1   0.000000
##  pressure : 1     1013.900000
##  visib : 16   10
b5[[1]]
##  carrier : 2      9E
##  name : 2     Endeavor Air Inc.
b6[[1]]
##  faa : 2      04G
##  name : 2     Lansdowne Airport
##  lat : 1      41.130472
##  lon : 1      -80.619583
##  alt : 16     1044
##  tz : 16      -5
##  dst : 2      A
b7[[1]]
##  city_id : 16     1
##  city : 2     Los Angeles
mongo.insert(mongo,"flights",b1)
## [1] FALSE
mongo.insert(mongo,"ontime",b2)
## [1] FALSE
mongo.insert(mongo,"planes",b3)
## [1] FALSE
mongo.insert(mongo,"weather",b4)
## [1] FALSE
mongo.insert(mongo,"airlines",b5)
## [1] FALSE
mongo.insert(mongo,"airports",b6)
## [1] FALSE
mongo.insert(mongo,"cities",b7)
## [1] FALSE

From csv file to Neo4J

First I established a database called ‘Flights’ using the Neo4J desktop app. Neo4J assigned this database to an http port of 11001. To access this database in Chrome, I entered http:\\localhost:11001 note this is different than the default port of 7474.

To use the ‘LOAD CSV WITH HEADERS’ cypher command I had to move the csv files into Neo4J desktop import file or the following warning would appear “Couldn’t load the external resource at: file:/C:/Users/Nate/AppData/Roaming/Neo4j%20Desktop/Application/neo4jDatabases/database-c2becb82-5c85-430d-bb42-4ad6749f1f0b/current/import/airlines

The successfull Cypher queries were as follows:

load csv with headers from “file:///airlines” as airlines create (n:Airlines {carrier: airlines.carrier, name: airlines.name})

MATCH (n:Airlines) RETURN n LIMIT 1

returned: { “name”: “Endeavor Air Inc.”, “carrier”: “9E” }

load csv with headers from “file:///airports” as airports create (n:Airports {faa: airports.faa, name: airports.name, lat: airports.lat, lon: airports.lon, alt: airports.alt, tz: airports.tz, dst: airports.dst})

MATCH (n:Airports) RETURN n LIMIT 1

returned:

{ “dst”: “A”, “tz”: “-5”, “faa”: “04G”, “alt”: “1044”, “name”: “Lansdowne Airport”, “lon”: “-80.6195833”, “lat”: “41.1304722” }

load csv with headers from “file:///planes” as planes create (n:Planes {tailnum: planes.tailnum, year: planes.year, type: planes.type, manufacturer: planes.manufacturer, model: planes.model,engines: planes.engines, seats: planes.seats, speed: planes.speed. engine: planes.engine})

MATCH (n:Planes) RETURN n LIMIT 1

returned:

{ “tailnum”: “N10156”, “year”: “2004”, “engine”: “Turbo-fan”, “engines”: “2”, “model”: “EMB-145XR”, “type”: “Fixed wing multi engine”, “seats”: “55”, “speed”: “NA”, “manufacturer”: “EMBRAER” }

load csv with headers from “file:///weather” as weather create (n:Weather {origin:weather.origin, year:weather.year, month:weather.month, day:weather.day, hour:weather.hour, temp:weather.temp, dewp:weather.dewp, humid:weather.humid, wind_dir:weather.wind_dir, wind_speed:weather.wind_speed, wind_gust:weather.wind_gust, precip:weather.precip, pressure:weather.pressure, visib:weather.visib})

MATCH (n:Weather) RETURN n LIMIT 1

returned:

{ “temp”: “37.04”, “year”: “2013”, “origin”: “EWR”, “visib”: “10”, “humid”: “53.97”, “wind_dir”: “230”, “pressure”: “1013.9”, “wind_gust”: “11.9186514756”, “precip”: “0”, “month”: “1”, “dewp”: “21.92”, “hour”: “0”, “wind_speed”: “10.35702”, “day”: “1” }

load csv with headers from “file:///flights” as flights create (n:Flights {year:flights.year, month:flights.month, day:flights.day, dep_time:flights.dep_time, dep_delay:flights.dep_delay, arr_time:flights.arr_time, arr_delay:flights.arr_delay, carrier:flights.carrier, tailnum:flights.tailnum, flight:flights.flight, origin:flights.origin, dest:flights.dest, air_time:flights.air_time, distance:flights.distance, hour:flights.hour, minute:flights.minute})

Note the retured values once completed: Added 336776 labels, created 336776 nodes, set 5388416 properties, completed after 14772 ms.

This required me to allot 8 Gb of RAM to Neo4J to complete.

MATCH (n:Flights) RETURN n LIMIT 1

{ “flight”: “1545”, “arr_delay”: “11”, “distance”: “1400”, “tailnum”: “N14228”, “year”: “2013”, “origin”: “EWR”, “dep_time”: “517”, “dep_delay”: “2”, “dest”: “IAH”, “minute”: “17”, “carrier”: “UA”, “month”: “1”, “hour”: “5”, “arr_time”: “830”, “air_time”: “227”, “day”: “1” }

These were from the Alsaka Air vs. AM West assignment:

load csv with headers from “file:///cities” as cities create (n:Cities {city_id:cities.city_id, city:cities.city})

MATCH (n:Cities) RETURN n LIMIT 1

{ “city”: “Los Angeles”, “city_id”: “1” }

load csv with headers from “file:///ontime” as ontime create (n:Ontime {city_id:ontime.city_id, air_line:ontime.air_line, arrived:ontime.arrived, n:ontime.n})

MATCH (n:Ontime) RETURN n LIMIT 1

{ “arrived”: “ontime”, “n”: “497”, “air_line”: “Alaska”, “city_id”: “1” }