Load MySQL Flights database tables to R dataframes

#install DBI
#install.packages("RMySQL")
#instal.pakages("rmongodb")
suppressWarnings(library(RMySQL))
## Loading required package: DBI
suppressWarnings(library(rmongodb))
suppressWarnings(library(RJSONIO))
## Note: the specification for S3 class "AsIs" in package 'RJSONIO' seems equivalent to one from package 'jsonlite': not turning on duplicate class definitions for this class.
suppressWarnings(library(rjson))
## 
## Attaching package: 'rjson'
## The following objects are masked from 'package:RJSONIO':
## 
##     fromJSON, toJSON
suppressWarnings(library (plyr))

connect to MySQL server

connect to the flights database

drv <- dbDriver("MySQL")
con <- dbConnect(drv, user="root", password = "tara1975", dbname = "flights", 
                 host = "localhost")

dbListTables(con)
## [1] "airlines" "airports" "flights"  "planes"   "weather"

migrate flights table from MySQL DB to R data frame

query <- "select * from flights;"
flights <- dbGetQuery(con, query)
head(flights, 10)
##    year month day dep_time dep_delay arr_time arr_delay carrier tailnum
## 1  2013     1   1      517         2      830        11      UA  N14228
## 2  2013     1   1      533         4      850        20      UA  N24211
## 3  2013     1   1      542         2      923        33      AA  N619AA
## 4  2013     1   1      544        -1     1004       -18      B6  N804JB
## 5  2013     1   1      554        -6      812       -25      DL  N668DN
## 6  2013     1   1      554        -4      740        12      UA  N39463
## 7  2013     1   1      555        -5      913        19      B6  N516JB
## 8  2013     1   1      557        -3      709       -14      EV  N829AS
## 9  2013     1   1      557        -3      838        -8      B6  N593JB
## 10 2013     1   1      558        -2      753         8      AA  N3ALAA
##    flight origin dest air_time distance hour minute
## 1    1545    EWR  IAH      227     1400    5     17
## 2    1714    LGA  IAH      227     1416    5     33
## 3    1141    JFK  MIA      160     1089    5     42
## 4     725    JFK  BQN      183     1576    5     44
## 5     461    LGA  ATL      116      762    6     54
## 6    1696    EWR  ORD      150      719    6     54
## 7     507    EWR  FLL      158     1065    6     55
## 8    5708    LGA  IAD       53      229    6     57
## 9      79    JFK  MCO      140      944    6     57
## 10    301    LGA  ORD      138      733    6     58
nrow(flights)
## [1] 336776

migrate airlines table from MySQL DB to R data frame

query <- "select * from airlines;"
airlines <- dbGetQuery(con, query)
head(airlines, 10)
##    carrier                          name
## 1       9E           Endeavor Air Inc.\r
## 2       AA      American Airlines Inc.\r
## 3       AS        Alaska Airlines Inc.\r
## 4       B6             JetBlue Airways\r
## 5       DL        Delta Air Lines Inc.\r
## 6       EV    ExpressJet Airlines Inc.\r
## 7       F9      Frontier Airlines Inc.\r
## 8       FL AirTran Airways Corporation\r
## 9       HA      Hawaiian Airlines Inc.\r
## 10      MQ                   Envoy Air\r
nrow(airlines)
## [1] 16

migrate weather table from MySQL DB to R data frame

query <- "select * from weather;"
weather <- dbGetQuery(con, query)
head(weather, 10)
##    origin year month day hour  temp  dewp humid wind_dir wind_speed
## 1     EWR 2013     1   1    0 37.04 21.92 53.97      230   10.35702
## 2     EWR 2013     1   1    1 37.04 21.92 53.97      230   13.80936
## 3     EWR 2013     1   1    2 37.94 21.92 52.09      230   12.65858
## 4     EWR 2013     1   1    3 37.94 23.00 54.51      230   13.80936
## 5     EWR 2013     1   1    4 37.94 24.08 57.04      240   14.96014
## 6     EWR 2013     1   1    6 39.02 26.06 59.37      270   10.35702
## 7     EWR 2013     1   1    7 39.02 26.96 61.63      250    8.05546
## 8     EWR 2013     1   1    8 39.02 28.04 64.43      240   11.50780
## 9     EWR 2013     1   1    9 39.92 28.04 62.21      250   12.65858
## 10    EWR 2013     1   1   10 39.02 28.04 64.43      260   12.65858
##    wind_gust precip pressure visib
## 1  11.918651      0   1013.9    10
## 2  15.891535      0   1013.0    10
## 3  14.567241      0   1012.6    10
## 4  15.891535      0   1012.7    10
## 5  17.215830      0   1012.8    10
## 6  11.918651      0   1012.0    10
## 7   9.270062      0   1012.3    10
## 8  13.242946      0   1012.5    10
## 9  14.567241      0   1012.2    10
## 10 14.567241      0   1011.9    10
nrow(weather)
## [1] 8719

migrate planes table from MySQL DB to R data frame

query <- "select * from planes;"
planes <- dbGetQuery(con, query)
head(planes, 10)
##    tailnum year                    type     manufacturer     model engines
## 1   N10156 2004 Fixed wing multi engine          EMBRAER EMB-145XR       2
## 2   N102UW 1998 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 3   N103US 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 4   N104UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 5   N10575 2002 Fixed wing multi engine          EMBRAER EMB-145LR       2
## 6   N105UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 7   N107US 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 8   N108UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 9   N109UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 10  N110UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
##    seats speed    engine
## 1     55    NA Turbo-fan
## 2    182    NA Turbo-fan
## 3    182    NA Turbo-fan
## 4    182    NA Turbo-fan
## 5     55    NA Turbo-fan
## 6    182    NA Turbo-fan
## 7    182    NA Turbo-fan
## 8    182    NA Turbo-fan
## 9    182    NA Turbo-fan
## 10   182    NA Turbo-fan
nrow(planes)
## [1] 3322

migrate airports table from MySQL DB to R data frame

query <- "select * from airports;"
airports <- dbGetQuery(con, query)
head(airports, 10)
##    faa                           name      lat        lon  alt tz dst
## 1  04G              Lansdowne Airport 41.13047  -80.61958 1044 -5   A
## 2  06A  Moton Field Municipal Airport 32.46057  -85.68003  264 -5   A
## 3  06C            Schaumburg Regional 41.98934  -88.10124  801 -6   A
## 4  06N                Randall Airport 41.43191  -74.39156  523 -5   A
## 5  09J          Jekyll Island Airport 31.07447  -81.42778   11 -4   A
## 6  0A9 Elizabethton Municipal Airport 36.37122  -82.17342 1593 -4   A
## 7  0G6        Williams County Airport 41.46731  -84.50678  730 -5   A
## 8  0G7  Finger Lakes Regional Airport 42.88356  -76.78123  492 -5   A
## 9  0P2   Shoestring Aviation Airfield 39.79482  -76.64719 1000 -5   U
## 10 0S9          Jefferson County Intl 48.05381 -122.81064  108 -8   A
nrow(airports)
## [1] 1397

disconnect from MySQL

dbDisconnect(con) 
## [1] TRUE
dbUnloadDriver(drv)
## [1] TRUE

Connect and load to Mongo DB

Reference - https://cran.r-project.org/web/packages/rmongodb/vignettes/rmongodb_introduction.html
- https://rpubs.com/vbriot28

mongo <- mongo.create()
mongo.is.connected(mongo)
## [1] TRUE
mongo
## [1] 0
## attr(,"mongo")
## <pointer: 0x0000000013658400>
## attr(,"class")
## [1] "mongo"
## attr(,"host")
## [1] "127.0.0.1"
## attr(,"name")
## [1] ""
## attr(,"username")
## [1] ""
## attr(,"password")
## [1] ""
## attr(,"db")
## [1] "admin"
## attr(,"timeout")
## [1] 0

Convert flights DB R dataframe to JSON then to Mongo DB BSON files

b_airports <- lapply(split(airports, 1:nrow(airports)), function(x) mongo.bson.from.JSON(toJSON(x)))

#just load the first 5,000 rows (the flights df has 336,776 rows)
#as it takes too long to load the entire table
#b_flights <- lapply(split(flights, 1:500), function(x) #mongo.bson.from.JSON(toJSON(x)))

b_weather <- lapply(split(weather, 1:nrow(weather)), function(x) mongo.bson.from.JSON(toJSON(x)))

b_airlines <- lapply(split(airlines, 1:nrow(airlines)), function(x) mongo.bson.from.JSON(toJSON(x)))

b_planes <- lapply(split(planes, 1:nrow(planes)), function(x) mongo.bson.from.JSON(toJSON(x)))

Connect to mongo DB database m_flights

db <- "m_flights"

m_airlines <- "mflights.airlines"
m_airports <- "mflights.airports"
m_weather <- "mflights.weather"
m_planes <- "mflights.planes"
m_flights <-"mflights.flights"

mongo.get.database.collections(mongo, db)
## character(0)

Load records from BSON to airlines MongoDB collection

#make sure we delete the records from the previous run's collection
#before loading
if(mongo.count(mongo,m_airlines) != 0){
  mongo.remove(mongo, m_airlines, criteria = mongo.bson.empty()) 
}
## [1] TRUE
mongo.insert.batch(mongo, m_airlines, b_airlines)
## [1] TRUE

Load records from BSON to airports MongoDB collection

#make sure we delete the records from the previous run's collection
#before loading
if(mongo.count(mongo,m_airports) != 0){
  mongo.remove(mongo, m_airports, criteria = mongo.bson.empty()) 
}
## [1] TRUE
mongo.insert.batch(mongo, m_airports, b_airports)
## [1] TRUE

Load records from BSON to weather MongoDB collection

#make sure we delete the records from the previous run's collection
#before loading
if(mongo.count(mongo,m_weather) != 0){
  mongo.remove(mongo, m_weather, criteria = mongo.bson.empty()) 
}
## [1] TRUE
mongo.insert.batch(mongo, m_weather, b_weather)
## [1] TRUE

Load records from BSON to planes MongoDB collection

#make sure we delete the records from the previous run's collection
#before loading
if(mongo.count(mongo,m_planes) != 0){
  mongo.remove(mongo, m_planes, criteria = mongo.bson.empty()) 
}
## [1] TRUE
mongo.insert.batch(mongo, m_planes, b_planes)
## [1] TRUE

Load records from BSON to flights MongoDB collection

#make sure we delete the records from the previous run's collection
#before loading
#if(mongo.count(mongo,m_flights) != 0){
#  mongo.remove(mongo, m_flights, criteria = mongo.bson.empty()) 
#}

#mongo.insert.batch(mongo, m_flights, b_flights)

Compare MongoDB and MySQL flights DB

Check to make sure counts match for both mongoDB collection and dataframe

mongo.count(mongo, m_planes) == nrow(planes)
## [1] TRUE
mongo.count(mongo, m_airports) == nrow(airports)
## [1] TRUE
mongo.count(mongo, m_weather)  == nrow(weather)
## [1] TRUE
mongo.count(mongo, m_airlines)  == nrow(airlines)
## [1] TRUE

Examine first 5 records from airplane collection

l_planes = mongo.find.all(mongo, m_planes, limit=5)
l_planes
## [[1]]
## [[1]]$`_id`
## [1] "57257a2dd7b9a8eac55ee5a8"
## 
## [[1]]$tailnum
## [1] "N10156"
## 
## [[1]]$year
## [1] 2004
## 
## [[1]]$type
## [1] "Fixed wing multi engine"
## 
## [[1]]$manufacturer
## [1] "EMBRAER"
## 
## [[1]]$model
## [1] "EMB-145XR"
## 
## [[1]]$engines
## [1] 2
## 
## [[1]]$seats
## [1] 55
## 
## [[1]]$speed
## [1] "NA"
## 
## [[1]]$engine
## [1] "Turbo-fan"
## 
## 
## [[2]]
## [[2]]$`_id`
## [1] "57257a2dd7b9a8eac55ee5a9"
## 
## [[2]]$tailnum
## [1] "N102UW"
## 
## [[2]]$year
## [1] 1998
## 
## [[2]]$type
## [1] "Fixed wing multi engine"
## 
## [[2]]$manufacturer
## [1] "AIRBUS INDUSTRIE"
## 
## [[2]]$model
## [1] "A320-214"
## 
## [[2]]$engines
## [1] 2
## 
## [[2]]$seats
## [1] 182
## 
## [[2]]$speed
## [1] "NA"
## 
## [[2]]$engine
## [1] "Turbo-fan"
## 
## 
## [[3]]
## [[3]]$`_id`
## [1] "57257a2dd7b9a8eac55ee5aa"
## 
## [[3]]$tailnum
## [1] "N103US"
## 
## [[3]]$year
## [1] 1999
## 
## [[3]]$type
## [1] "Fixed wing multi engine"
## 
## [[3]]$manufacturer
## [1] "AIRBUS INDUSTRIE"
## 
## [[3]]$model
## [1] "A320-214"
## 
## [[3]]$engines
## [1] 2
## 
## [[3]]$seats
## [1] 182
## 
## [[3]]$speed
## [1] "NA"
## 
## [[3]]$engine
## [1] "Turbo-fan"
## 
## 
## [[4]]
## [[4]]$`_id`
## [1] "57257a2dd7b9a8eac55ee5ab"
## 
## [[4]]$tailnum
## [1] "N104UW"
## 
## [[4]]$year
## [1] 1999
## 
## [[4]]$type
## [1] "Fixed wing multi engine"
## 
## [[4]]$manufacturer
## [1] "AIRBUS INDUSTRIE"
## 
## [[4]]$model
## [1] "A320-214"
## 
## [[4]]$engines
## [1] 2
## 
## [[4]]$seats
## [1] 182
## 
## [[4]]$speed
## [1] "NA"
## 
## [[4]]$engine
## [1] "Turbo-fan"
## 
## 
## [[5]]
## [[5]]$`_id`
## [1] "57257a2dd7b9a8eac55ee5ac"
## 
## [[5]]$tailnum
## [1] "N10575"
## 
## [[5]]$year
## [1] 2002
## 
## [[5]]$type
## [1] "Fixed wing multi engine"
## 
## [[5]]$manufacturer
## [1] "EMBRAER"
## 
## [[5]]$model
## [1] "EMB-145LR"
## 
## [[5]]$engines
## [1] 2
## 
## [[5]]$seats
## [1] 55
## 
## [[5]]$speed
## [1] "NA"
## 
## [[5]]$engine
## [1] "Turbo-fan"
#convert list to a dataframe for comparison
df_lplanes <- ldply (l_planes, data.frame)
df_lplanes
##                       X_id tailnum year                    type
## 1 57257a2dd7b9a8eac55ee5a8  N10156 2004 Fixed wing multi engine
## 2 57257a2dd7b9a8eac55ee5a9  N102UW 1998 Fixed wing multi engine
## 3 57257a2dd7b9a8eac55ee5aa  N103US 1999 Fixed wing multi engine
## 4 57257a2dd7b9a8eac55ee5ab  N104UW 1999 Fixed wing multi engine
## 5 57257a2dd7b9a8eac55ee5ac  N10575 2002 Fixed wing multi engine
##       manufacturer     model engines seats speed    engine
## 1          EMBRAER EMB-145XR       2    55    NA Turbo-fan
## 2 AIRBUS INDUSTRIE  A320-214       2   182    NA Turbo-fan
## 3 AIRBUS INDUSTRIE  A320-214       2   182    NA Turbo-fan
## 4 AIRBUS INDUSTRIE  A320-214       2   182    NA Turbo-fan
## 5          EMBRAER EMB-145LR       2    55    NA Turbo-fan
#compare against the first 5 rows in the planes dataframe taken from MySQL
df_lplanes[,2:ncol(df_lplanes)] == head(planes, 5)
##      tailnum year type manufacturer model engines seats speed engine
## [1,]    TRUE TRUE TRUE         TRUE  TRUE    TRUE  TRUE    NA   TRUE
## [2,]    TRUE TRUE TRUE         TRUE  TRUE    TRUE  TRUE    NA   TRUE
## [3,]    TRUE TRUE TRUE         TRUE  TRUE    TRUE  TRUE    NA   TRUE
## [4,]    TRUE TRUE TRUE         TRUE  TRUE    TRUE  TRUE    NA   TRUE
## [5,]    TRUE TRUE TRUE         TRUE  TRUE    TRUE  TRUE    NA   TRUE

Examine first 5 records from weather collection

l_weather = mongo.find.all(mongo, m_weather, limit=5)
l_weather
## [[1]]
## [[1]]$`_id`
## [1] "57257a2dd7b9a8eac55ec399"
## 
## [[1]]$origin
## [1] "EWR"
## 
## [[1]]$year
## [1] 2013
## 
## [[1]]$month
## [1] 1
## 
## [[1]]$day
## [1] 1
## 
## [[1]]$hour
## [1] 0
## 
## [[1]]$temp
## [1] 37.04
## 
## [[1]]$dewp
## [1] 21.92
## 
## [[1]]$humid
## [1] 53.97
## 
## [[1]]$wind_dir
## [1] 230
## 
## [[1]]$wind_speed
## [1] 10.35702
## 
## [[1]]$wind_gust
## [1] 11.91865
## 
## [[1]]$precip
## [1] 0
## 
## [[1]]$pressure
## [1] 1013.9
## 
## [[1]]$visib
## [1] 10
## 
## 
## [[2]]
## [[2]]$`_id`
## [1] "57257a2dd7b9a8eac55ec39a"
## 
## [[2]]$origin
## [1] "EWR"
## 
## [[2]]$year
## [1] 2013
## 
## [[2]]$month
## [1] 1
## 
## [[2]]$day
## [1] 1
## 
## [[2]]$hour
## [1] 1
## 
## [[2]]$temp
## [1] 37.04
## 
## [[2]]$dewp
## [1] 21.92
## 
## [[2]]$humid
## [1] 53.97
## 
## [[2]]$wind_dir
## [1] 230
## 
## [[2]]$wind_speed
## [1] 13.80936
## 
## [[2]]$wind_gust
## [1] 15.89154
## 
## [[2]]$precip
## [1] 0
## 
## [[2]]$pressure
## [1] 1013
## 
## [[2]]$visib
## [1] 10
## 
## 
## [[3]]
## [[3]]$`_id`
## [1] "57257a2dd7b9a8eac55ec39b"
## 
## [[3]]$origin
## [1] "EWR"
## 
## [[3]]$year
## [1] 2013
## 
## [[3]]$month
## [1] 1
## 
## [[3]]$day
## [1] 1
## 
## [[3]]$hour
## [1] 2
## 
## [[3]]$temp
## [1] 37.94
## 
## [[3]]$dewp
## [1] 21.92
## 
## [[3]]$humid
## [1] 52.09
## 
## [[3]]$wind_dir
## [1] 230
## 
## [[3]]$wind_speed
## [1] 12.65858
## 
## [[3]]$wind_gust
## [1] 14.56724
## 
## [[3]]$precip
## [1] 0
## 
## [[3]]$pressure
## [1] 1012.6
## 
## [[3]]$visib
## [1] 10
## 
## 
## [[4]]
## [[4]]$`_id`
## [1] "57257a2dd7b9a8eac55ec39c"
## 
## [[4]]$origin
## [1] "EWR"
## 
## [[4]]$year
## [1] 2013
## 
## [[4]]$month
## [1] 1
## 
## [[4]]$day
## [1] 1
## 
## [[4]]$hour
## [1] 3
## 
## [[4]]$temp
## [1] 37.94
## 
## [[4]]$dewp
## [1] 23
## 
## [[4]]$humid
## [1] 54.51
## 
## [[4]]$wind_dir
## [1] 230
## 
## [[4]]$wind_speed
## [1] 13.80936
## 
## [[4]]$wind_gust
## [1] 15.89154
## 
## [[4]]$precip
## [1] 0
## 
## [[4]]$pressure
## [1] 1012.7
## 
## [[4]]$visib
## [1] 10
## 
## 
## [[5]]
## [[5]]$`_id`
## [1] "57257a2dd7b9a8eac55ec39d"
## 
## [[5]]$origin
## [1] "EWR"
## 
## [[5]]$year
## [1] 2013
## 
## [[5]]$month
## [1] 1
## 
## [[5]]$day
## [1] 1
## 
## [[5]]$hour
## [1] 4
## 
## [[5]]$temp
## [1] 37.94
## 
## [[5]]$dewp
## [1] 24.08
## 
## [[5]]$humid
## [1] 57.04
## 
## [[5]]$wind_dir
## [1] 240
## 
## [[5]]$wind_speed
## [1] 14.96014
## 
## [[5]]$wind_gust
## [1] 17.21583
## 
## [[5]]$precip
## [1] 0
## 
## [[5]]$pressure
## [1] 1012.8
## 
## [[5]]$visib
## [1] 10
#convert list to a dataframe for comparison
df_lweather <- ldply (l_weather, data.frame)
df_lweather
##                       X_id origin year month day hour  temp  dewp humid
## 1 57257a2dd7b9a8eac55ec399    EWR 2013     1   1    0 37.04 21.92 53.97
## 2 57257a2dd7b9a8eac55ec39a    EWR 2013     1   1    1 37.04 21.92 53.97
## 3 57257a2dd7b9a8eac55ec39b    EWR 2013     1   1    2 37.94 21.92 52.09
## 4 57257a2dd7b9a8eac55ec39c    EWR 2013     1   1    3 37.94 23.00 54.51
## 5 57257a2dd7b9a8eac55ec39d    EWR 2013     1   1    4 37.94 24.08 57.04
##   wind_dir wind_speed wind_gust precip pressure visib
## 1      230   10.35702  11.91865      0   1013.9    10
## 2      230   13.80936  15.89154      0   1013.0    10
## 3      230   12.65858  14.56724      0   1012.6    10
## 4      230   13.80936  15.89154      0   1012.7    10
## 5      240   14.96014  17.21583      0   1012.8    10
#compare against the first 5 rows in the weather dataframe taken from MySQL
df_lweather[,2:ncol(df_lweather)] == head(weather, 5)
##      origin year month  day hour temp dewp humid wind_dir wind_speed
## [1,]   TRUE TRUE  TRUE TRUE TRUE TRUE TRUE  TRUE     TRUE       TRUE
## [2,]   TRUE TRUE  TRUE TRUE TRUE TRUE TRUE  TRUE     TRUE       TRUE
## [3,]   TRUE TRUE  TRUE TRUE TRUE TRUE TRUE  TRUE     TRUE       TRUE
## [4,]   TRUE TRUE  TRUE TRUE TRUE TRUE TRUE  TRUE     TRUE       TRUE
## [5,]   TRUE TRUE  TRUE TRUE TRUE TRUE TRUE  TRUE     TRUE       TRUE
##      wind_gust precip pressure visib
## [1,]      TRUE   TRUE     TRUE  TRUE
## [2,]      TRUE   TRUE     TRUE  TRUE
## [3,]      TRUE   TRUE     TRUE  TRUE
## [4,]      TRUE   TRUE     TRUE  TRUE
## [5,]      TRUE   TRUE     TRUE  TRUE

Examine first 5 records from airports collection

l_airports = mongo.find.all(mongo, m_airports, limit=5)
l_airports
## [[1]]
## [[1]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe24"
## 
## [[1]]$faa
## [1] "04G"
## 
## [[1]]$name
## [1] "Lansdowne Airport"
## 
## [[1]]$lat
## [1] 41.13047
## 
## [[1]]$lon
## [1] -80.61958
## 
## [[1]]$alt
## [1] 1044
## 
## [[1]]$tz
## [1] -5
## 
## [[1]]$dst
## [1] "A"
## 
## 
## [[2]]
## [[2]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe25"
## 
## [[2]]$faa
## [1] "06A"
## 
## [[2]]$name
## [1] "Moton Field Municipal Airport"
## 
## [[2]]$lat
## [1] 32.46057
## 
## [[2]]$lon
## [1] -85.68003
## 
## [[2]]$alt
## [1] 264
## 
## [[2]]$tz
## [1] -5
## 
## [[2]]$dst
## [1] "A"
## 
## 
## [[3]]
## [[3]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe26"
## 
## [[3]]$faa
## [1] "06C"
## 
## [[3]]$name
## [1] "Schaumburg Regional"
## 
## [[3]]$lat
## [1] 41.98934
## 
## [[3]]$lon
## [1] -88.10124
## 
## [[3]]$alt
## [1] 801
## 
## [[3]]$tz
## [1] -6
## 
## [[3]]$dst
## [1] "A"
## 
## 
## [[4]]
## [[4]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe27"
## 
## [[4]]$faa
## [1] "06N"
## 
## [[4]]$name
## [1] "Randall Airport"
## 
## [[4]]$lat
## [1] 41.43191
## 
## [[4]]$lon
## [1] -74.39156
## 
## [[4]]$alt
## [1] 523
## 
## [[4]]$tz
## [1] -5
## 
## [[4]]$dst
## [1] "A"
## 
## 
## [[5]]
## [[5]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe28"
## 
## [[5]]$faa
## [1] "09J"
## 
## [[5]]$name
## [1] "Jekyll Island Airport"
## 
## [[5]]$lat
## [1] 31.07447
## 
## [[5]]$lon
## [1] -81.42778
## 
## [[5]]$alt
## [1] 11
## 
## [[5]]$tz
## [1] -4
## 
## [[5]]$dst
## [1] "A"
#convert list to a dataframe for comparison
df_lairports <- ldply (l_airports, data.frame)
df_lairports
##                       X_id faa                          name      lat
## 1 57257a2dd7b9a8eac55ebe24 04G             Lansdowne Airport 41.13047
## 2 57257a2dd7b9a8eac55ebe25 06A Moton Field Municipal Airport 32.46057
## 3 57257a2dd7b9a8eac55ebe26 06C           Schaumburg Regional 41.98934
## 4 57257a2dd7b9a8eac55ebe27 06N               Randall Airport 41.43191
## 5 57257a2dd7b9a8eac55ebe28 09J         Jekyll Island Airport 31.07447
##         lon  alt tz dst
## 1 -80.61958 1044 -5   A
## 2 -85.68003  264 -5   A
## 3 -88.10124  801 -6   A
## 4 -74.39156  523 -5   A
## 5 -81.42778   11 -4   A
#compare against the first 5 rows in the airport dataframe taken from MySQL
df_lairports[,2:ncol(df_lairports)] == head(airports, 5)
##       faa name  lat  lon  alt   tz  dst
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [2,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [3,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [4,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [5,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE

Examine first 5 records from airplane collection

l_airlines = mongo.find.all(mongo, m_airlines, limit=5)
l_airlines
## [[1]]
## [[1]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe14"
## 
## [[1]]$carrier
## [1] "9E"
## 
## [[1]]$name
## [1] "Endeavor Air Inc.\r"
## 
## 
## [[2]]
## [[2]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe15"
## 
## [[2]]$carrier
## [1] "AA"
## 
## [[2]]$name
## [1] "American Airlines Inc.\r"
## 
## 
## [[3]]
## [[3]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe16"
## 
## [[3]]$carrier
## [1] "AS"
## 
## [[3]]$name
## [1] "Alaska Airlines Inc.\r"
## 
## 
## [[4]]
## [[4]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe17"
## 
## [[4]]$carrier
## [1] "B6"
## 
## [[4]]$name
## [1] "JetBlue Airways\r"
## 
## 
## [[5]]
## [[5]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe18"
## 
## [[5]]$carrier
## [1] "DL"
## 
## [[5]]$name
## [1] "Delta Air Lines Inc.\r"
#convert list to a dataframe for comparison
df_lairlines <- ldply (l_airlines, data.frame)
df_lairlines
##                       X_id carrier                     name
## 1 57257a2dd7b9a8eac55ebe14      9E      Endeavor Air Inc.\r
## 2 57257a2dd7b9a8eac55ebe15      AA American Airlines Inc.\r
## 3 57257a2dd7b9a8eac55ebe16      AS   Alaska Airlines Inc.\r
## 4 57257a2dd7b9a8eac55ebe17      B6        JetBlue Airways\r
## 5 57257a2dd7b9a8eac55ebe18      DL   Delta Air Lines Inc.\r
#compare against the first 5 rows in the airlines dataframe taken from MySQL
df_lairlines[,2:ncol(df_lairlines)] == head(airlines, 5)
##      carrier name
## [1,]    TRUE TRUE
## [2,]    TRUE TRUE
## [3,]    TRUE TRUE
## [4,]    TRUE TRUE
## [5,]    TRUE TRUE

Comparison Between SQL and NoSQL

Reference: http://www.thegeekstuff.com/2014/01/sql-vs-nosql-db/

1.SQL databases are relational databases while NoSQL databases are graphical or network datases.
2. NoSQL databases strength lie on data that are naturally heirarchical (such as organization charts) while SQL databases are better in performing aggregate functions such as summation and averaging of column values.
3. SQL databases are vertically scalable while NoSQL databases are horizontally scalable. This means that we can increase the scale of SQL databases by increasing CPU speed while we can increase the scale of NoSQL databases by increasing the number of servers.
4. SQL databases are the preferred databases for applications with complex queries while NoSQL databases are the preferred databases for big data.
5.SQL databases emphasizes on ACID properties ( Atomicity, Consistency, Isolation and Durability) whereas the NoSQL database follows the Brewers CAP theorem ( Consistency, Availability and Partition tolerance ).
6. SQL databases are better for applications with heavy transaction processing because it provides better data integrity than NoSQL databases.