#install DBI
#install.packages("RMySQL")
#instal.pakages("rmongodb")
suppressWarnings(library(RMySQL))
## Loading required package: DBI
suppressWarnings(library(rmongodb))
suppressWarnings(library(RJSONIO))
## Note: the specification for S3 class "AsIs" in package 'RJSONIO' seems equivalent to one from package 'jsonlite': not turning on duplicate class definitions for this class.
suppressWarnings(library(rjson))
##
## Attaching package: 'rjson'
## The following objects are masked from 'package:RJSONIO':
##
## fromJSON, toJSON
suppressWarnings(library (plyr))
drv <- dbDriver("MySQL")
con <- dbConnect(drv, user="root", password = "tara1975", dbname = "flights",
host = "localhost")
dbListTables(con)
## [1] "airlines" "airports" "flights" "planes" "weather"
query <- "select * from flights;"
flights <- dbGetQuery(con, query)
head(flights, 10)
## year month day dep_time dep_delay arr_time arr_delay carrier tailnum
## 1 2013 1 1 517 2 830 11 UA N14228
## 2 2013 1 1 533 4 850 20 UA N24211
## 3 2013 1 1 542 2 923 33 AA N619AA
## 4 2013 1 1 544 -1 1004 -18 B6 N804JB
## 5 2013 1 1 554 -6 812 -25 DL N668DN
## 6 2013 1 1 554 -4 740 12 UA N39463
## 7 2013 1 1 555 -5 913 19 B6 N516JB
## 8 2013 1 1 557 -3 709 -14 EV N829AS
## 9 2013 1 1 557 -3 838 -8 B6 N593JB
## 10 2013 1 1 558 -2 753 8 AA N3ALAA
## flight origin dest air_time distance hour minute
## 1 1545 EWR IAH 227 1400 5 17
## 2 1714 LGA IAH 227 1416 5 33
## 3 1141 JFK MIA 160 1089 5 42
## 4 725 JFK BQN 183 1576 5 44
## 5 461 LGA ATL 116 762 6 54
## 6 1696 EWR ORD 150 719 6 54
## 7 507 EWR FLL 158 1065 6 55
## 8 5708 LGA IAD 53 229 6 57
## 9 79 JFK MCO 140 944 6 57
## 10 301 LGA ORD 138 733 6 58
nrow(flights)
## [1] 336776
query <- "select * from airlines;"
airlines <- dbGetQuery(con, query)
head(airlines, 10)
## carrier name
## 1 9E Endeavor Air Inc.\r
## 2 AA American Airlines Inc.\r
## 3 AS Alaska Airlines Inc.\r
## 4 B6 JetBlue Airways\r
## 5 DL Delta Air Lines Inc.\r
## 6 EV ExpressJet Airlines Inc.\r
## 7 F9 Frontier Airlines Inc.\r
## 8 FL AirTran Airways Corporation\r
## 9 HA Hawaiian Airlines Inc.\r
## 10 MQ Envoy Air\r
nrow(airlines)
## [1] 16
query <- "select * from weather;"
weather <- dbGetQuery(con, query)
head(weather, 10)
## origin year month day hour temp dewp humid wind_dir wind_speed
## 1 EWR 2013 1 1 0 37.04 21.92 53.97 230 10.35702
## 2 EWR 2013 1 1 1 37.04 21.92 53.97 230 13.80936
## 3 EWR 2013 1 1 2 37.94 21.92 52.09 230 12.65858
## 4 EWR 2013 1 1 3 37.94 23.00 54.51 230 13.80936
## 5 EWR 2013 1 1 4 37.94 24.08 57.04 240 14.96014
## 6 EWR 2013 1 1 6 39.02 26.06 59.37 270 10.35702
## 7 EWR 2013 1 1 7 39.02 26.96 61.63 250 8.05546
## 8 EWR 2013 1 1 8 39.02 28.04 64.43 240 11.50780
## 9 EWR 2013 1 1 9 39.92 28.04 62.21 250 12.65858
## 10 EWR 2013 1 1 10 39.02 28.04 64.43 260 12.65858
## wind_gust precip pressure visib
## 1 11.918651 0 1013.9 10
## 2 15.891535 0 1013.0 10
## 3 14.567241 0 1012.6 10
## 4 15.891535 0 1012.7 10
## 5 17.215830 0 1012.8 10
## 6 11.918651 0 1012.0 10
## 7 9.270062 0 1012.3 10
## 8 13.242946 0 1012.5 10
## 9 14.567241 0 1012.2 10
## 10 14.567241 0 1011.9 10
nrow(weather)
## [1] 8719
query <- "select * from planes;"
planes <- dbGetQuery(con, query)
head(planes, 10)
## tailnum year type manufacturer model engines
## 1 N10156 2004 Fixed wing multi engine EMBRAER EMB-145XR 2
## 2 N102UW 1998 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## 3 N103US 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## 4 N104UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## 5 N10575 2002 Fixed wing multi engine EMBRAER EMB-145LR 2
## 6 N105UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## 7 N107US 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## 8 N108UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## 9 N109UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## 10 N110UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## seats speed engine
## 1 55 NA Turbo-fan
## 2 182 NA Turbo-fan
## 3 182 NA Turbo-fan
## 4 182 NA Turbo-fan
## 5 55 NA Turbo-fan
## 6 182 NA Turbo-fan
## 7 182 NA Turbo-fan
## 8 182 NA Turbo-fan
## 9 182 NA Turbo-fan
## 10 182 NA Turbo-fan
nrow(planes)
## [1] 3322
query <- "select * from airports;"
airports <- dbGetQuery(con, query)
head(airports, 10)
## faa name lat lon alt tz dst
## 1 04G Lansdowne Airport 41.13047 -80.61958 1044 -5 A
## 2 06A Moton Field Municipal Airport 32.46057 -85.68003 264 -5 A
## 3 06C Schaumburg Regional 41.98934 -88.10124 801 -6 A
## 4 06N Randall Airport 41.43191 -74.39156 523 -5 A
## 5 09J Jekyll Island Airport 31.07447 -81.42778 11 -4 A
## 6 0A9 Elizabethton Municipal Airport 36.37122 -82.17342 1593 -4 A
## 7 0G6 Williams County Airport 41.46731 -84.50678 730 -5 A
## 8 0G7 Finger Lakes Regional Airport 42.88356 -76.78123 492 -5 A
## 9 0P2 Shoestring Aviation Airfield 39.79482 -76.64719 1000 -5 U
## 10 0S9 Jefferson County Intl 48.05381 -122.81064 108 -8 A
nrow(airports)
## [1] 1397
dbDisconnect(con)
## [1] TRUE
dbUnloadDriver(drv)
## [1] TRUE
Reference - https://cran.r-project.org/web/packages/rmongodb/vignettes/rmongodb_introduction.html
- https://rpubs.com/vbriot28
mongo <- mongo.create()
mongo.is.connected(mongo)
## [1] TRUE
mongo
## [1] 0
## attr(,"mongo")
## <pointer: 0x0000000013658400>
## attr(,"class")
## [1] "mongo"
## attr(,"host")
## [1] "127.0.0.1"
## attr(,"name")
## [1] ""
## attr(,"username")
## [1] ""
## attr(,"password")
## [1] ""
## attr(,"db")
## [1] "admin"
## attr(,"timeout")
## [1] 0
b_airports <- lapply(split(airports, 1:nrow(airports)), function(x) mongo.bson.from.JSON(toJSON(x)))
#just load the first 5,000 rows (the flights df has 336,776 rows)
#as it takes too long to load the entire table
#b_flights <- lapply(split(flights, 1:500), function(x) #mongo.bson.from.JSON(toJSON(x)))
b_weather <- lapply(split(weather, 1:nrow(weather)), function(x) mongo.bson.from.JSON(toJSON(x)))
b_airlines <- lapply(split(airlines, 1:nrow(airlines)), function(x) mongo.bson.from.JSON(toJSON(x)))
b_planes <- lapply(split(planes, 1:nrow(planes)), function(x) mongo.bson.from.JSON(toJSON(x)))
db <- "m_flights"
m_airlines <- "mflights.airlines"
m_airports <- "mflights.airports"
m_weather <- "mflights.weather"
m_planes <- "mflights.planes"
m_flights <-"mflights.flights"
mongo.get.database.collections(mongo, db)
## character(0)
#make sure we delete the records from the previous run's collection
#before loading
if(mongo.count(mongo,m_airlines) != 0){
mongo.remove(mongo, m_airlines, criteria = mongo.bson.empty())
}
## [1] TRUE
mongo.insert.batch(mongo, m_airlines, b_airlines)
## [1] TRUE
#make sure we delete the records from the previous run's collection
#before loading
if(mongo.count(mongo,m_airports) != 0){
mongo.remove(mongo, m_airports, criteria = mongo.bson.empty())
}
## [1] TRUE
mongo.insert.batch(mongo, m_airports, b_airports)
## [1] TRUE
#make sure we delete the records from the previous run's collection
#before loading
if(mongo.count(mongo,m_weather) != 0){
mongo.remove(mongo, m_weather, criteria = mongo.bson.empty())
}
## [1] TRUE
mongo.insert.batch(mongo, m_weather, b_weather)
## [1] TRUE
#make sure we delete the records from the previous run's collection
#before loading
if(mongo.count(mongo,m_planes) != 0){
mongo.remove(mongo, m_planes, criteria = mongo.bson.empty())
}
## [1] TRUE
mongo.insert.batch(mongo, m_planes, b_planes)
## [1] TRUE
#make sure we delete the records from the previous run's collection
#before loading
#if(mongo.count(mongo,m_flights) != 0){
# mongo.remove(mongo, m_flights, criteria = mongo.bson.empty())
#}
#mongo.insert.batch(mongo, m_flights, b_flights)
mongo.count(mongo, m_planes) == nrow(planes)
## [1] TRUE
mongo.count(mongo, m_airports) == nrow(airports)
## [1] TRUE
mongo.count(mongo, m_weather) == nrow(weather)
## [1] TRUE
mongo.count(mongo, m_airlines) == nrow(airlines)
## [1] TRUE
l_planes = mongo.find.all(mongo, m_planes, limit=5)
l_planes
## [[1]]
## [[1]]$`_id`
## [1] "57257a2dd7b9a8eac55ee5a8"
##
## [[1]]$tailnum
## [1] "N10156"
##
## [[1]]$year
## [1] 2004
##
## [[1]]$type
## [1] "Fixed wing multi engine"
##
## [[1]]$manufacturer
## [1] "EMBRAER"
##
## [[1]]$model
## [1] "EMB-145XR"
##
## [[1]]$engines
## [1] 2
##
## [[1]]$seats
## [1] 55
##
## [[1]]$speed
## [1] "NA"
##
## [[1]]$engine
## [1] "Turbo-fan"
##
##
## [[2]]
## [[2]]$`_id`
## [1] "57257a2dd7b9a8eac55ee5a9"
##
## [[2]]$tailnum
## [1] "N102UW"
##
## [[2]]$year
## [1] 1998
##
## [[2]]$type
## [1] "Fixed wing multi engine"
##
## [[2]]$manufacturer
## [1] "AIRBUS INDUSTRIE"
##
## [[2]]$model
## [1] "A320-214"
##
## [[2]]$engines
## [1] 2
##
## [[2]]$seats
## [1] 182
##
## [[2]]$speed
## [1] "NA"
##
## [[2]]$engine
## [1] "Turbo-fan"
##
##
## [[3]]
## [[3]]$`_id`
## [1] "57257a2dd7b9a8eac55ee5aa"
##
## [[3]]$tailnum
## [1] "N103US"
##
## [[3]]$year
## [1] 1999
##
## [[3]]$type
## [1] "Fixed wing multi engine"
##
## [[3]]$manufacturer
## [1] "AIRBUS INDUSTRIE"
##
## [[3]]$model
## [1] "A320-214"
##
## [[3]]$engines
## [1] 2
##
## [[3]]$seats
## [1] 182
##
## [[3]]$speed
## [1] "NA"
##
## [[3]]$engine
## [1] "Turbo-fan"
##
##
## [[4]]
## [[4]]$`_id`
## [1] "57257a2dd7b9a8eac55ee5ab"
##
## [[4]]$tailnum
## [1] "N104UW"
##
## [[4]]$year
## [1] 1999
##
## [[4]]$type
## [1] "Fixed wing multi engine"
##
## [[4]]$manufacturer
## [1] "AIRBUS INDUSTRIE"
##
## [[4]]$model
## [1] "A320-214"
##
## [[4]]$engines
## [1] 2
##
## [[4]]$seats
## [1] 182
##
## [[4]]$speed
## [1] "NA"
##
## [[4]]$engine
## [1] "Turbo-fan"
##
##
## [[5]]
## [[5]]$`_id`
## [1] "57257a2dd7b9a8eac55ee5ac"
##
## [[5]]$tailnum
## [1] "N10575"
##
## [[5]]$year
## [1] 2002
##
## [[5]]$type
## [1] "Fixed wing multi engine"
##
## [[5]]$manufacturer
## [1] "EMBRAER"
##
## [[5]]$model
## [1] "EMB-145LR"
##
## [[5]]$engines
## [1] 2
##
## [[5]]$seats
## [1] 55
##
## [[5]]$speed
## [1] "NA"
##
## [[5]]$engine
## [1] "Turbo-fan"
#convert list to a dataframe for comparison
df_lplanes <- ldply (l_planes, data.frame)
df_lplanes
## X_id tailnum year type
## 1 57257a2dd7b9a8eac55ee5a8 N10156 2004 Fixed wing multi engine
## 2 57257a2dd7b9a8eac55ee5a9 N102UW 1998 Fixed wing multi engine
## 3 57257a2dd7b9a8eac55ee5aa N103US 1999 Fixed wing multi engine
## 4 57257a2dd7b9a8eac55ee5ab N104UW 1999 Fixed wing multi engine
## 5 57257a2dd7b9a8eac55ee5ac N10575 2002 Fixed wing multi engine
## manufacturer model engines seats speed engine
## 1 EMBRAER EMB-145XR 2 55 NA Turbo-fan
## 2 AIRBUS INDUSTRIE A320-214 2 182 NA Turbo-fan
## 3 AIRBUS INDUSTRIE A320-214 2 182 NA Turbo-fan
## 4 AIRBUS INDUSTRIE A320-214 2 182 NA Turbo-fan
## 5 EMBRAER EMB-145LR 2 55 NA Turbo-fan
#compare against the first 5 rows in the planes dataframe taken from MySQL
df_lplanes[,2:ncol(df_lplanes)] == head(planes, 5)
## tailnum year type manufacturer model engines seats speed engine
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE NA TRUE
## [2,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE NA TRUE
## [3,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE NA TRUE
## [4,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE NA TRUE
## [5,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE NA TRUE
l_weather = mongo.find.all(mongo, m_weather, limit=5)
l_weather
## [[1]]
## [[1]]$`_id`
## [1] "57257a2dd7b9a8eac55ec399"
##
## [[1]]$origin
## [1] "EWR"
##
## [[1]]$year
## [1] 2013
##
## [[1]]$month
## [1] 1
##
## [[1]]$day
## [1] 1
##
## [[1]]$hour
## [1] 0
##
## [[1]]$temp
## [1] 37.04
##
## [[1]]$dewp
## [1] 21.92
##
## [[1]]$humid
## [1] 53.97
##
## [[1]]$wind_dir
## [1] 230
##
## [[1]]$wind_speed
## [1] 10.35702
##
## [[1]]$wind_gust
## [1] 11.91865
##
## [[1]]$precip
## [1] 0
##
## [[1]]$pressure
## [1] 1013.9
##
## [[1]]$visib
## [1] 10
##
##
## [[2]]
## [[2]]$`_id`
## [1] "57257a2dd7b9a8eac55ec39a"
##
## [[2]]$origin
## [1] "EWR"
##
## [[2]]$year
## [1] 2013
##
## [[2]]$month
## [1] 1
##
## [[2]]$day
## [1] 1
##
## [[2]]$hour
## [1] 1
##
## [[2]]$temp
## [1] 37.04
##
## [[2]]$dewp
## [1] 21.92
##
## [[2]]$humid
## [1] 53.97
##
## [[2]]$wind_dir
## [1] 230
##
## [[2]]$wind_speed
## [1] 13.80936
##
## [[2]]$wind_gust
## [1] 15.89154
##
## [[2]]$precip
## [1] 0
##
## [[2]]$pressure
## [1] 1013
##
## [[2]]$visib
## [1] 10
##
##
## [[3]]
## [[3]]$`_id`
## [1] "57257a2dd7b9a8eac55ec39b"
##
## [[3]]$origin
## [1] "EWR"
##
## [[3]]$year
## [1] 2013
##
## [[3]]$month
## [1] 1
##
## [[3]]$day
## [1] 1
##
## [[3]]$hour
## [1] 2
##
## [[3]]$temp
## [1] 37.94
##
## [[3]]$dewp
## [1] 21.92
##
## [[3]]$humid
## [1] 52.09
##
## [[3]]$wind_dir
## [1] 230
##
## [[3]]$wind_speed
## [1] 12.65858
##
## [[3]]$wind_gust
## [1] 14.56724
##
## [[3]]$precip
## [1] 0
##
## [[3]]$pressure
## [1] 1012.6
##
## [[3]]$visib
## [1] 10
##
##
## [[4]]
## [[4]]$`_id`
## [1] "57257a2dd7b9a8eac55ec39c"
##
## [[4]]$origin
## [1] "EWR"
##
## [[4]]$year
## [1] 2013
##
## [[4]]$month
## [1] 1
##
## [[4]]$day
## [1] 1
##
## [[4]]$hour
## [1] 3
##
## [[4]]$temp
## [1] 37.94
##
## [[4]]$dewp
## [1] 23
##
## [[4]]$humid
## [1] 54.51
##
## [[4]]$wind_dir
## [1] 230
##
## [[4]]$wind_speed
## [1] 13.80936
##
## [[4]]$wind_gust
## [1] 15.89154
##
## [[4]]$precip
## [1] 0
##
## [[4]]$pressure
## [1] 1012.7
##
## [[4]]$visib
## [1] 10
##
##
## [[5]]
## [[5]]$`_id`
## [1] "57257a2dd7b9a8eac55ec39d"
##
## [[5]]$origin
## [1] "EWR"
##
## [[5]]$year
## [1] 2013
##
## [[5]]$month
## [1] 1
##
## [[5]]$day
## [1] 1
##
## [[5]]$hour
## [1] 4
##
## [[5]]$temp
## [1] 37.94
##
## [[5]]$dewp
## [1] 24.08
##
## [[5]]$humid
## [1] 57.04
##
## [[5]]$wind_dir
## [1] 240
##
## [[5]]$wind_speed
## [1] 14.96014
##
## [[5]]$wind_gust
## [1] 17.21583
##
## [[5]]$precip
## [1] 0
##
## [[5]]$pressure
## [1] 1012.8
##
## [[5]]$visib
## [1] 10
#convert list to a dataframe for comparison
df_lweather <- ldply (l_weather, data.frame)
df_lweather
## X_id origin year month day hour temp dewp humid
## 1 57257a2dd7b9a8eac55ec399 EWR 2013 1 1 0 37.04 21.92 53.97
## 2 57257a2dd7b9a8eac55ec39a EWR 2013 1 1 1 37.04 21.92 53.97
## 3 57257a2dd7b9a8eac55ec39b EWR 2013 1 1 2 37.94 21.92 52.09
## 4 57257a2dd7b9a8eac55ec39c EWR 2013 1 1 3 37.94 23.00 54.51
## 5 57257a2dd7b9a8eac55ec39d EWR 2013 1 1 4 37.94 24.08 57.04
## wind_dir wind_speed wind_gust precip pressure visib
## 1 230 10.35702 11.91865 0 1013.9 10
## 2 230 13.80936 15.89154 0 1013.0 10
## 3 230 12.65858 14.56724 0 1012.6 10
## 4 230 13.80936 15.89154 0 1012.7 10
## 5 240 14.96014 17.21583 0 1012.8 10
#compare against the first 5 rows in the weather dataframe taken from MySQL
df_lweather[,2:ncol(df_lweather)] == head(weather, 5)
## origin year month day hour temp dewp humid wind_dir wind_speed
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [2,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [3,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [4,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [5,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## wind_gust precip pressure visib
## [1,] TRUE TRUE TRUE TRUE
## [2,] TRUE TRUE TRUE TRUE
## [3,] TRUE TRUE TRUE TRUE
## [4,] TRUE TRUE TRUE TRUE
## [5,] TRUE TRUE TRUE TRUE
l_airports = mongo.find.all(mongo, m_airports, limit=5)
l_airports
## [[1]]
## [[1]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe24"
##
## [[1]]$faa
## [1] "04G"
##
## [[1]]$name
## [1] "Lansdowne Airport"
##
## [[1]]$lat
## [1] 41.13047
##
## [[1]]$lon
## [1] -80.61958
##
## [[1]]$alt
## [1] 1044
##
## [[1]]$tz
## [1] -5
##
## [[1]]$dst
## [1] "A"
##
##
## [[2]]
## [[2]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe25"
##
## [[2]]$faa
## [1] "06A"
##
## [[2]]$name
## [1] "Moton Field Municipal Airport"
##
## [[2]]$lat
## [1] 32.46057
##
## [[2]]$lon
## [1] -85.68003
##
## [[2]]$alt
## [1] 264
##
## [[2]]$tz
## [1] -5
##
## [[2]]$dst
## [1] "A"
##
##
## [[3]]
## [[3]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe26"
##
## [[3]]$faa
## [1] "06C"
##
## [[3]]$name
## [1] "Schaumburg Regional"
##
## [[3]]$lat
## [1] 41.98934
##
## [[3]]$lon
## [1] -88.10124
##
## [[3]]$alt
## [1] 801
##
## [[3]]$tz
## [1] -6
##
## [[3]]$dst
## [1] "A"
##
##
## [[4]]
## [[4]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe27"
##
## [[4]]$faa
## [1] "06N"
##
## [[4]]$name
## [1] "Randall Airport"
##
## [[4]]$lat
## [1] 41.43191
##
## [[4]]$lon
## [1] -74.39156
##
## [[4]]$alt
## [1] 523
##
## [[4]]$tz
## [1] -5
##
## [[4]]$dst
## [1] "A"
##
##
## [[5]]
## [[5]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe28"
##
## [[5]]$faa
## [1] "09J"
##
## [[5]]$name
## [1] "Jekyll Island Airport"
##
## [[5]]$lat
## [1] 31.07447
##
## [[5]]$lon
## [1] -81.42778
##
## [[5]]$alt
## [1] 11
##
## [[5]]$tz
## [1] -4
##
## [[5]]$dst
## [1] "A"
#convert list to a dataframe for comparison
df_lairports <- ldply (l_airports, data.frame)
df_lairports
## X_id faa name lat
## 1 57257a2dd7b9a8eac55ebe24 04G Lansdowne Airport 41.13047
## 2 57257a2dd7b9a8eac55ebe25 06A Moton Field Municipal Airport 32.46057
## 3 57257a2dd7b9a8eac55ebe26 06C Schaumburg Regional 41.98934
## 4 57257a2dd7b9a8eac55ebe27 06N Randall Airport 41.43191
## 5 57257a2dd7b9a8eac55ebe28 09J Jekyll Island Airport 31.07447
## lon alt tz dst
## 1 -80.61958 1044 -5 A
## 2 -85.68003 264 -5 A
## 3 -88.10124 801 -6 A
## 4 -74.39156 523 -5 A
## 5 -81.42778 11 -4 A
#compare against the first 5 rows in the airport dataframe taken from MySQL
df_lairports[,2:ncol(df_lairports)] == head(airports, 5)
## faa name lat lon alt tz dst
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [2,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [3,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [4,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [5,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
l_airlines = mongo.find.all(mongo, m_airlines, limit=5)
l_airlines
## [[1]]
## [[1]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe14"
##
## [[1]]$carrier
## [1] "9E"
##
## [[1]]$name
## [1] "Endeavor Air Inc.\r"
##
##
## [[2]]
## [[2]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe15"
##
## [[2]]$carrier
## [1] "AA"
##
## [[2]]$name
## [1] "American Airlines Inc.\r"
##
##
## [[3]]
## [[3]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe16"
##
## [[3]]$carrier
## [1] "AS"
##
## [[3]]$name
## [1] "Alaska Airlines Inc.\r"
##
##
## [[4]]
## [[4]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe17"
##
## [[4]]$carrier
## [1] "B6"
##
## [[4]]$name
## [1] "JetBlue Airways\r"
##
##
## [[5]]
## [[5]]$`_id`
## [1] "57257a2dd7b9a8eac55ebe18"
##
## [[5]]$carrier
## [1] "DL"
##
## [[5]]$name
## [1] "Delta Air Lines Inc.\r"
#convert list to a dataframe for comparison
df_lairlines <- ldply (l_airlines, data.frame)
df_lairlines
## X_id carrier name
## 1 57257a2dd7b9a8eac55ebe14 9E Endeavor Air Inc.\r
## 2 57257a2dd7b9a8eac55ebe15 AA American Airlines Inc.\r
## 3 57257a2dd7b9a8eac55ebe16 AS Alaska Airlines Inc.\r
## 4 57257a2dd7b9a8eac55ebe17 B6 JetBlue Airways\r
## 5 57257a2dd7b9a8eac55ebe18 DL Delta Air Lines Inc.\r
#compare against the first 5 rows in the airlines dataframe taken from MySQL
df_lairlines[,2:ncol(df_lairlines)] == head(airlines, 5)
## carrier name
## [1,] TRUE TRUE
## [2,] TRUE TRUE
## [3,] TRUE TRUE
## [4,] TRUE TRUE
## [5,] TRUE TRUE
Reference: http://www.thegeekstuff.com/2014/01/sql-vs-nosql-db/
1.SQL databases are relational databases while NoSQL databases are graphical or network datases.
2. NoSQL databases strength lie on data that are naturally heirarchical (such as organization charts) while SQL databases are better in performing aggregate functions such as summation and averaging of column values.
3. SQL databases are vertically scalable while NoSQL databases are horizontally scalable. This means that we can increase the scale of SQL databases by increasing CPU speed while we can increase the scale of NoSQL databases by increasing the number of servers.
4. SQL databases are the preferred databases for applications with complex queries while NoSQL databases are the preferred databases for big data.
5.SQL databases emphasizes on ACID properties ( Atomicity, Consistency, Isolation and Durability) whereas the NoSQL database follows the Brewers CAP theorem ( Consistency, Availability and Partition tolerance ).
6. SQL databases are better for applications with heavy transaction processing because it provides better data integrity than NoSQL databases.