library(dbConnect)
library(jsonlite)
library(mongolite)
setwd("~/Google Drive/CUNY/git/DATA607/Project4")

Import data from MySQL

After loading the flights database into a local MySQL database, I then imported each of the tables into R data frames. Since I’m building the data around the flights table, I only selected those flights originating from EWR since the weather data was really only available for EWR. I also limited it to the first 100 flights since it othewise takes a significant amount of time to format the data and import it into MongoDB.

myDb = dbConnect(MySQL(), user=username, password=password, dbname=dbname, host=host)
flightsQry = dbSendQuery(myDb, 'SELECT * FROM flights WHERE origin = "EWR" LIMIT 100;')
flights = fetch(flightsQry, n = -1)
airportsQry = dbSendQuery(myDb, 'SELECT * FROM airports;')
airports = fetch(airportsQry, n = -1)
airlinesQry = dbSendQuery(myDb, 'SELECT * FROM airlines;')
airlines = fetch(airlinesQry, n = -1)
planesQry = dbSendQuery(myDb, 'SELECT * FROM planes;')
planes = fetch(planesQry, n = -1)
weatherQry = dbSendQuery(myDb, 'SELECT * FROM weather;')
weather = fetch(weatherQry, n = -1)

Convert MySQL tables into nested data frames

Then, in order to nest the airport, airline, plane and weather details, I looped through the flights data frame and nested the aforementioned details as embedded data frames. To verify that this nesting process would produce the JSON format I wanted, I used the prettify and toJSON functions to confirm.

I initially tried and failed to apply a vectorized approach to this procedure. Furthermore, I also tried and failed to use the apply family of functions to accomplish this. Any help would be much appreciated regarding this, as I seem to often struggle to find more efficient approaches to these types of challenges.

for (i in 1:nrow(flights)) {
  flights$origin_detail[[i]] = airports[airports$faa == flights$origin[i],]
  flights$dest_detail[[i]] = airports[airports$faa == flights$dest[i],]
  flights$airline_detail[[i]] = airlines[airlines$carrier == flights$carrier[i],]
  flights$plane_detail[[i]] = planes[planes$tailnum == flights$tailnum[i],]
  # check if weather row is NA
  weather_row = weather[weather$origin == flights$origin[i] & 
                                   weather$year == flights$year[i] &
                                   weather$month == flights$month[i] &
                                   weather$day == flights$day[i] &
                                   weather$hour == flights$hour[i]
                                   , ][1,]
  if (!is.na(weather_row[1])) {
    flights$weather_detail[[i]] = weather[weather$origin == flights$origin[i] & 
                                   weather$year == flights$year[i] &
                                   weather$month == flights$month[i] &
                                   weather$day == flights$day[i] &
                                   weather$hour == flights$hour[i]
                                   , ][1,]
  } 
}
# check JSON validity
prettify(toJSON(flights[1:3,]))
[
    {
        "year": 2013,
        "month": 1,
        "day": 1,
        "dep_time": 517,
        "dep_delay": 2,
        "arr_time": 830,
        "arr_delay": 11,
        "carrier": "UA",
        "tailnum": "N14228",
        "flight": 1545,
        "origin": "EWR",
        "dest": "IAH",
        "air_time": 227,
        "distance": 1400,
        "hour": 5,
        "minute": 17,
        "origin_detail": [
            {
                "faa": "EWR",
                "name": "Newark Liberty Intl",
                "lat": 40.6925,
                "lon": -74.1687,
                "alt": 18,
                "tz": -5,
                "dst": "A"
            }
        ],
        "dest_detail": [
            {
                "faa": "IAH",
                "name": "George Bush Intercontinental",
                "lat": 29.9844,
                "lon": -95.3414,
                "alt": 97,
                "tz": -6,
                "dst": "A"
            }
        ],
        "airline_detail": [
            {
                "carrier": "UA",
                "name": "United Air Lines Inc.\r"
            }
        ],
        "plane_detail": [
            {
                "tailnum": "N14228",
                "year": 1999,
                "type": "Fixed wing multi engine",
                "manufacturer": "BOEING",
                "model": "737-824",
                "engines": 2,
                "seats": 149,
                "engine": "Turbo-fan"
            }
        ],
        "weather_detail": {

        }
    },
    {
        "year": 2013,
        "month": 1,
        "day": 1,
        "dep_time": 554,
        "dep_delay": -4,
        "arr_time": 740,
        "arr_delay": 12,
        "carrier": "UA",
        "tailnum": "N39463",
        "flight": 1696,
        "origin": "EWR",
        "dest": "ORD",
        "air_time": 150,
        "distance": 719,
        "hour": 6,
        "minute": 54,
        "origin_detail": [
            {
                "faa": "EWR",
                "name": "Newark Liberty Intl",
                "lat": 40.6925,
                "lon": -74.1687,
                "alt": 18,
                "tz": -5,
                "dst": "A"
            }
        ],
        "dest_detail": [
            {
                "faa": "ORD",
                "name": "Chicago Ohare Intl",
                "lat": 41.9786,
                "lon": -87.9048,
                "alt": 668,
                "tz": -6,
                "dst": "A"
            }
        ],
        "airline_detail": [
            {
                "carrier": "UA",
                "name": "United Air Lines Inc.\r"
            }
        ],
        "plane_detail": [
            {
                "tailnum": "N39463",
                "year": 2012,
                "type": "Fixed wing multi engine",
                "manufacturer": "BOEING",
                "model": "737-924ER",
                "engines": 2,
                "seats": 191,
                "engine": "Turbo-fan"
            }
        ],
        "weather_detail": [
            {
                "origin": "EWR",
                "year": 2013,
                "month": 1,
                "day": 1,
                "hour": 6,
                "temp": 39.02,
                "dewp": 26.06,
                "humid": 59.37,
                "wind_dir": 270,
                "wind_speed": 10.357,
                "wind_gust": 11.9187,
                "precip": 0,
                "pressure": 1012,
                "visib": 10
            }
        ]
    },
    {
        "year": 2013,
        "month": 1,
        "day": 1,
        "dep_time": 555,
        "dep_delay": -5,
        "arr_time": 913,
        "arr_delay": 19,
        "carrier": "B6",
        "tailnum": "N516JB",
        "flight": 507,
        "origin": "EWR",
        "dest": "FLL",
        "air_time": 158,
        "distance": 1065,
        "hour": 6,
        "minute": 55,
        "origin_detail": [
            {
                "faa": "EWR",
                "name": "Newark Liberty Intl",
                "lat": 40.6925,
                "lon": -74.1687,
                "alt": 18,
                "tz": -5,
                "dst": "A"
            }
        ],
        "dest_detail": [
            {
                "faa": "FLL",
                "name": "Fort Lauderdale Hollywood Intl",
                "lat": 26.0726,
                "lon": -80.1527,
                "alt": 9,
                "tz": -5,
                "dst": "A"
            }
        ],
        "airline_detail": [
            {
                "carrier": "B6",
                "name": "JetBlue Airways\r"
            }
        ],
        "plane_detail": [
            {
                "tailnum": "N516JB",
                "year": 2000,
                "type": "Fixed wing multi engine",
                "manufacturer": "AIRBUS INDUSTRIE",
                "model": "A320-232",
                "engines": 2,
                "seats": 200,
                "engine": "Turbo-fan"
            }
        ],
        "weather_detail": [
            {
                "origin": "EWR",
                "year": 2013,
                "month": 1,
                "day": 1,
                "hour": 6,
                "temp": 39.02,
                "dewp": 26.06,
                "humid": 59.37,
                "wind_dir": 270,
                "wind_speed": 10.357,
                "wind_gust": 11.9187,
                "precip": 0,
                "pressure": 1012,
                "visib": 10
            }
        ]
    }
]
 

Import JSON file into MongoDB

Having confirmed that the JSON output was valid, I then researched potential MongoDB packages for R and settled on mongolite as it builds on jsonlite, which I found to be more user-friendly than the alternatives. I also installed MongoDB locally and started a mongod service in order to import data and interact with them. Lastly, I run a few simple queries and then finally drop the collection.

m = mongo(collection = 'flights')
m$insert(flights)

Complete! Processed total of 100 rows.
$nInserted
[1] 100

$nMatched
[1] 0

$nRemoved
[1] 0

$nUpserted
[1] 0

$writeErrors
list()
# check record counts
m$count()
[1] 100
# perform query and retrieve data
m$find( '{ "carrier": "UA" }' )

 Found 51 records...
 Imported 51 records. Simplifying into dataframe...
m$find( ' { "plane_detail.manufacturer" : "BOEING" } ' )

 Found 44 records...
 Imported 44 records. Simplifying into dataframe...
# list distinct values
m$distinct("plane_detail.manufacturer")
[1] "BOEING"             "AIRBUS INDUSTRIE"   "AIRBUS"             "EMBRAER"           
[5] "HURLEY JAMES LARRY" "BOMBARDIER INC"    
# remove the collection
m$drop()
[1] TRUE
LS0tCnRpdGxlOiAiSGFvLVByb2plY3Q0IgpvdXRwdXQ6IAogIGh0bWxfbm90ZWJvb2s6CiAgICB0aGVtZTogeWV0aQogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZm9sZGluZzogc2hvdwotLS0KCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9VFJVRX0KbGlicmFyeShkYkNvbm5lY3QpCmxpYnJhcnkoanNvbmxpdGUpCmxpYnJhcnkobW9uZ29saXRlKQoKc2V0d2QoIn4vR29vZ2xlIERyaXZlL0NVTlkvZ2l0L0RBVEE2MDcvUHJvamVjdDQiKQpgYGAKCgpgYGB7ciBlY2hvPUZBTFNFfQp1c2VybmFtZSA9ICdyb290JwpwYXNzd29yZCA9ICdwYXNzd29yZCcKZGJuYW1lID0gJ2ZsaWdodHMnCmhvc3QgPSAnMTI3LjAuMC4xJwpgYGAKCiNJbXBvcnQgZGF0YSBmcm9tIE15U1FMIyAgCgpBZnRlciBsb2FkaW5nIHRoZSBmbGlnaHRzIGRhdGFiYXNlIGludG8gYSBsb2NhbCBNeVNRTCBkYXRhYmFzZSwgSSB0aGVuIGltcG9ydGVkIGVhY2ggb2YgdGhlIHRhYmxlcyBpbnRvIFIgZGF0YSBmcmFtZXMuIFNpbmNlIEknbSBidWlsZGluZyB0aGUgZGF0YSBhcm91bmQgdGhlIGZsaWdodHMgdGFibGUsIEkgb25seSBzZWxlY3RlZCB0aG9zZSBmbGlnaHRzIG9yaWdpbmF0aW5nIGZyb20gRVdSIHNpbmNlIHRoZSB3ZWF0aGVyIGRhdGEgd2FzIHJlYWxseSBvbmx5IGF2YWlsYWJsZSBmb3IgRVdSLiBJIGFsc28gbGltaXRlZCBpdCB0byB0aGUgZmlyc3QgMTAwIGZsaWdodHMgc2luY2UgaXQgb3RoZXdpc2UgdGFrZXMgYSBzaWduaWZpY2FudCBhbW91bnQgb2YgdGltZSB0byBmb3JtYXQgdGhlIGRhdGEgYW5kIGltcG9ydCBpdCBpbnRvIE1vbmdvREIuICAKCmBgYHtyfQpteURiID0gZGJDb25uZWN0KE15U1FMKCksIHVzZXI9dXNlcm5hbWUsIHBhc3N3b3JkPXBhc3N3b3JkLCBkYm5hbWU9ZGJuYW1lLCBob3N0PWhvc3QpCgpmbGlnaHRzUXJ5ID0gZGJTZW5kUXVlcnkobXlEYiwgJ1NFTEVDVCAqIEZST00gZmxpZ2h0cyBXSEVSRSBvcmlnaW4gPSAiRVdSIiBMSU1JVCAxMDA7JykKZmxpZ2h0cyA9IGZldGNoKGZsaWdodHNRcnksIG4gPSAtMSkKCmFpcnBvcnRzUXJ5ID0gZGJTZW5kUXVlcnkobXlEYiwgJ1NFTEVDVCAqIEZST00gYWlycG9ydHM7JykKYWlycG9ydHMgPSBmZXRjaChhaXJwb3J0c1FyeSwgbiA9IC0xKQoKYWlybGluZXNRcnkgPSBkYlNlbmRRdWVyeShteURiLCAnU0VMRUNUICogRlJPTSBhaXJsaW5lczsnKQphaXJsaW5lcyA9IGZldGNoKGFpcmxpbmVzUXJ5LCBuID0gLTEpCgpwbGFuZXNRcnkgPSBkYlNlbmRRdWVyeShteURiLCAnU0VMRUNUICogRlJPTSBwbGFuZXM7JykKcGxhbmVzID0gZmV0Y2gocGxhbmVzUXJ5LCBuID0gLTEpCgp3ZWF0aGVyUXJ5ID0gZGJTZW5kUXVlcnkobXlEYiwgJ1NFTEVDVCAqIEZST00gd2VhdGhlcjsnKQp3ZWF0aGVyID0gZmV0Y2god2VhdGhlclFyeSwgbiA9IC0xKQpgYGAKCiNDb252ZXJ0IE15U1FMIHRhYmxlcyBpbnRvIG5lc3RlZCBkYXRhIGZyYW1lcyMgIAoKVGhlbiwgaW4gb3JkZXIgdG8gbmVzdCB0aGUgYWlycG9ydCwgYWlybGluZSwgcGxhbmUgYW5kIHdlYXRoZXIgZGV0YWlscywgSSBsb29wZWQgdGhyb3VnaCB0aGUgZmxpZ2h0cyBkYXRhIGZyYW1lIGFuZCBuZXN0ZWQgdGhlIGFmb3JlbWVudGlvbmVkIGRldGFpbHMgYXMgZW1iZWRkZWQgZGF0YSBmcmFtZXMuIFRvIHZlcmlmeSB0aGF0IHRoaXMgbmVzdGluZyBwcm9jZXNzIHdvdWxkIHByb2R1Y2UgdGhlIEpTT04gZm9ybWF0IEkgd2FudGVkLCBJIHVzZWQgdGhlIGBwcmV0dGlmeWAgYW5kIGB0b0pTT05gIGZ1bmN0aW9ucyB0byBjb25maXJtLiAgCgpJIGluaXRpYWxseSB0cmllZCBhbmQgZmFpbGVkIHRvIGFwcGx5IGEgdmVjdG9yaXplZCBhcHByb2FjaCB0byB0aGlzIHByb2NlZHVyZS4gRnVydGhlcm1vcmUsIEkgYWxzbyB0cmllZCBhbmQgZmFpbGVkIHRvIHVzZSB0aGUgYGFwcGx5YCBmYW1pbHkgb2YgZnVuY3Rpb25zIHRvIGFjY29tcGxpc2ggdGhpcy4gQW55IGhlbHAgd291bGQgYmUgbXVjaCBhcHByZWNpYXRlZCByZWdhcmRpbmcgdGhpcywgYXMgSSBzZWVtIHRvIG9mdGVuIHN0cnVnZ2xlIHRvIGZpbmQgbW9yZSBlZmZpY2llbnQgYXBwcm9hY2hlcyB0byB0aGVzZSB0eXBlcyBvZiBjaGFsbGVuZ2VzLiAgCgpgYGB7cn0KZm9yIChpIGluIDE6bnJvdyhmbGlnaHRzKSkgewogIGZsaWdodHMkb3JpZ2luX2RldGFpbFtbaV1dID0gYWlycG9ydHNbYWlycG9ydHMkZmFhID09IGZsaWdodHMkb3JpZ2luW2ldLF0KICBmbGlnaHRzJGRlc3RfZGV0YWlsW1tpXV0gPSBhaXJwb3J0c1thaXJwb3J0cyRmYWEgPT0gZmxpZ2h0cyRkZXN0W2ldLF0KICBmbGlnaHRzJGFpcmxpbmVfZGV0YWlsW1tpXV0gPSBhaXJsaW5lc1thaXJsaW5lcyRjYXJyaWVyID09IGZsaWdodHMkY2FycmllcltpXSxdCiAgZmxpZ2h0cyRwbGFuZV9kZXRhaWxbW2ldXSA9IHBsYW5lc1twbGFuZXMkdGFpbG51bSA9PSBmbGlnaHRzJHRhaWxudW1baV0sXQogICMgY2hlY2sgaWYgd2VhdGhlciByb3cgaXMgTkEKICB3ZWF0aGVyX3JvdyA9IHdlYXRoZXJbd2VhdGhlciRvcmlnaW4gPT0gZmxpZ2h0cyRvcmlnaW5baV0gJiAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB3ZWF0aGVyJHllYXIgPT0gZmxpZ2h0cyR5ZWFyW2ldICYKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB3ZWF0aGVyJG1vbnRoID09IGZsaWdodHMkbW9udGhbaV0gJgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHdlYXRoZXIkZGF5ID09IGZsaWdodHMkZGF5W2ldICYKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB3ZWF0aGVyJGhvdXIgPT0gZmxpZ2h0cyRob3VyW2ldCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgLCBdWzEsXQogIGlmICghaXMubmEod2VhdGhlcl9yb3dbMV0pKSB7CiAgICBmbGlnaHRzJHdlYXRoZXJfZGV0YWlsW1tpXV0gPSB3ZWF0aGVyW3dlYXRoZXIkb3JpZ2luID09IGZsaWdodHMkb3JpZ2luW2ldICYgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgd2VhdGhlciR5ZWFyID09IGZsaWdodHMkeWVhcltpXSAmCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgd2VhdGhlciRtb250aCA9PSBmbGlnaHRzJG1vbnRoW2ldICYKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB3ZWF0aGVyJGRheSA9PSBmbGlnaHRzJGRheVtpXSAmCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgd2VhdGhlciRob3VyID09IGZsaWdodHMkaG91cltpXQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICwgXVsxLF0KICB9IAp9CgojIGNoZWNrIEpTT04gdmFsaWRpdHkKcHJldHRpZnkodG9KU09OKGZsaWdodHNbMTozLF0pKQpgYGAKCiNJbXBvcnQgSlNPTiBmaWxlIGludG8gTW9uZ29EQiMgIAoKSGF2aW5nIGNvbmZpcm1lZCB0aGF0IHRoZSBKU09OIG91dHB1dCB3YXMgdmFsaWQsIEkgdGhlbiByZXNlYXJjaGVkIHBvdGVudGlhbCBNb25nb0RCIHBhY2thZ2VzIGZvciBSIGFuZCBzZXR0bGVkIG9uIG1vbmdvbGl0ZSBhcyBpdCBidWlsZHMgb24ganNvbmxpdGUsIHdoaWNoIEkgZm91bmQgdG8gYmUgbW9yZSB1c2VyLWZyaWVuZGx5IHRoYW4gdGhlIGFsdGVybmF0aXZlcy4gSSBhbHNvIGluc3RhbGxlZCBNb25nb0RCIGxvY2FsbHkgYW5kIHN0YXJ0ZWQgYSBtb25nb2Qgc2VydmljZSBpbiBvcmRlciB0byBpbXBvcnQgZGF0YSBhbmQgaW50ZXJhY3Qgd2l0aCB0aGVtLiBMYXN0bHksIEkgcnVuIGEgZmV3IHNpbXBsZSBxdWVyaWVzIGFuZCB0aGVuIGZpbmFsbHkgZHJvcCB0aGUgY29sbGVjdGlvbi4gIAoKYGBge3J9Cm0gPSBtb25nbyhjb2xsZWN0aW9uID0gJ2ZsaWdodHMnKQptJGluc2VydChmbGlnaHRzKQojIGNoZWNrIHJlY29yZCBjb3VudHMKbSRjb3VudCgpCgojIHBlcmZvcm0gcXVlcnkgYW5kIHJldHJpZXZlIGRhdGEKbSRmaW5kKCAneyAiY2FycmllciI6ICJVQSIgfScgKQptJGZpbmQoICcgeyAicGxhbmVfZGV0YWlsLm1hbnVmYWN0dXJlciIgOiAiQk9FSU5HIiB9ICcgKQoKIyBsaXN0IGRpc3RpbmN0IHZhbHVlcwptJGRpc3RpbmN0KCJwbGFuZV9kZXRhaWwubWFudWZhY3R1cmVyIikKCiMgcmVtb3ZlIHRoZSBjb2xsZWN0aW9uCm0kZHJvcCgpCmBgYAo=