For this project I am using the data feeds from the site “http://ourairports.com/data/”.

In this web I found couple of important and up-to-date data about airports in the world, even small Seaplane Base around the world.

library("RMongo")
library("jsonlite")
library("stringr")
library("knitr")
library("RMySQL")
library("sqldf")
library("DT")
library("reshape2")
library("tidyr")
#load ourairports data from .scv files
#ourairports

#Loading airports.csv
airports_df <- read.csv("https://raw.githubusercontent.com/mathsanu/CUNY_MSDA/master/DATA607/W13/airports.csv", header=TRUE)

#Loading countries.csv
countries_df <- read.csv("https://raw.githubusercontent.com/mathsanu/CUNY_MSDA/master/DATA607/W13/countries.csv", header=TRUE)

#Loading regions.csv
regions_df <- read.csv("https://raw.githubusercontent.com/mathsanu/CUNY_MSDA/master/DATA607/W13/regions.csv", header=TRUE)

#Loading runwayss.csv
runways_df <- read.csv("https://raw.githubusercontent.com/mathsanu/CUNY_MSDA/master/DATA607/W13/runways.csv", header=TRUE)

head(airports_df)
##       id ident          type                               name
## 1   6523   00A      heliport                  Total Rf Heliport
## 2   6524  00AK small_airport                       Lowell Field
## 3   6525  00AL small_airport                       Epps Airpark
## 4   6526  00AR        closed Newport Hospital & Clinic Heliport
## 5 322127  00AS small_airport                     Fulton Airport
## 6   6527  00AZ small_airport                     Cordes Airport
##   latitude_deg longitude_deg elevation_ft continent iso_country iso_region
## 1      40.0708     -74.93360           11      <NA>          US      US-PA
## 2      59.9492    -151.69600          450      <NA>          US      US-AK
## 3      34.8648     -86.77030          820      <NA>          US      US-AL
## 4      35.6087     -91.25490          237      <NA>          US      US-AR
## 5      34.9428     -97.81802         1100      <NA>          US      US-OK
## 6      34.3056    -112.16500         3810      <NA>          US      US-AZ
##   municipality scheduled_service gps_code iata_code local_code home_link
## 1     Bensalem                no      00A                  00A          
## 2 Anchor Point                no     00AK                 00AK          
## 3      Harvest                no     00AL                 00AL          
## 4      Newport                no                                        
## 5         Alex                no     00AS                 00AS          
## 6       Cordes                no     00AZ                 00AZ          
##   wikipedia_link keywords
## 1                        
## 2                        
## 3                        
## 4                    00AR
## 5                        
## 6
head(countries_df)
##       id code                 name continent
## 1 302672   AD              Andorra        EU
## 2 302618   AE United Arab Emirates        AS
## 3 302619   AF          Afghanistan        AS
## 4 302722   AG  Antigua and Barbuda      <NA>
## 5 302723   AI             Anguilla      <NA>
## 6 302673   AL              Albania        EU
##                                      wikipedia_link
## 1              http://en.wikipedia.org/wiki/Andorra
## 2 http://en.wikipedia.org/wiki/United_Arab_Emirates
## 3          http://en.wikipedia.org/wiki/Afghanistan
## 4  http://en.wikipedia.org/wiki/Antigua_and_Barbuda
## 5             http://en.wikipedia.org/wiki/Anguilla
## 6              http://en.wikipedia.org/wiki/Albania
##                                                               keywords
## 1                                                                     
## 2 UAE,Ù<U+0085>طارات فÙ<U+008A> اÙ<U+0084>Ø¥Ù<U+0085>ارات اÙ<U+0084>عربÙ<U+008A>Ø© اÙ<U+0084>Ù<U+0085>تحدة
## 3                                                                     
## 4                                                                     
## 5                                                                     
## 6
head(regions_df)
##       id  code local_code                  name continent iso_country
## 1 302811 AD-02         02               Canillo        EU          AD
## 2 302812 AD-03         03                Encamp        EU          AD
## 3 302813 AD-04         04            La Massana        EU          AD
## 4 302814 AD-05         05                Ordino        EU          AD
## 5 302815 AD-06         06 Sant Julià de Lòria        EU          AD
## 6 302816 AD-07         07      Andorra la Vella        EU          AD
##                                       wikipedia_link keywords
## 1               http://en.wikipedia.org/wiki/Canillo         
## 2                http://en.wikipedia.org/wiki/Encamp         
## 3            http://en.wikipedia.org/wiki/La_Massana         
## 4                http://en.wikipedia.org/wiki/Ordino         
## 5 http://en.wikipedia.org/wiki/Sant_Julià_de_Lòria         
## 6      http://en.wikipedia.org/wiki/Andorra_la_Vella
head(runways_df)
##       id airport_ref airport_ident length_ft width_ft surface lighted
## 1 269408        6523           00A        80       80  ASPH-G       1
## 2 255155        6524          00AK      2500       70    GRVL       0
## 3 254165        6525          00AL      2300      200    TURF       0
## 4 270932        6526          00AR        40       40   GRASS       0
## 5 322128      322127          00AS      1450       60    Turf       0
## 6 257681        6527          00AZ      1700       60  GRAVEL       0
##   closed le_ident le_latitude_deg le_longitude_deg le_elevation_ft
## 1      0       H1              NA               NA              NA
## 2      0        N              NA               NA              NA
## 3      0       01              NA               NA              NA
## 4      0       H1              NA               NA              NA
## 5      0        1              NA               NA              NA
## 6      0       15              NA               NA              NA
##   le_heading_degT le_displaced_threshold_ft he_ident he_latitude_deg
## 1              NA                        NA                       NA
## 2              NA                        NA        S              NA
## 3              NA                        NA       19              NA
## 4              NA                        NA       H1              NA
## 5              NA                        NA       19              NA
## 6              NA                        NA       33              NA
##   he_longitude_deg he_elevation_ft he_heading_degT
## 1               NA              NA              NA
## 2               NA              NA              NA
## 3               NA              NA              NA
## 4               NA              NA              NA
## 5               NA              NA              NA
## 6               NA              NA              NA
##   he_displaced_threshold_ft  X
## 1                        NA NA
## 2                        NA NA
## 3                        NA NA
## 4                        NA NA
## 5                        NA NA
## 6                        NA NA

Connecting to MySQL database in the localhost.

library('RMySQL')
#mydb = dbConnect(MySQL(), user='root', password='root', host='localhost', dbname="ourairports")
mydb = dbConnect(MySQL(), user='root', password='root', host='localhost')

# DROP the database if exists using RMySQL in R
dbSendQuery(mydb, "DROP DATABASE if exists ourairports;")
## <MySQLResult:959328309,0,0>
# creating the database using RMySQL in R
dbSendQuery(mydb, "CREATE DATABASE ourairports;")
## <MySQLResult:908998708,0,1>
mydb = dbConnect(MySQL(), user='root', password='root', host='localhost', dbname="ourairports")

Insert above record sets into MySQL

# 1 ~~~airports_df~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Insert records from airports_df in MySql Database ("ourairports")
dbSendQuery(mydb, "drop table if exists airports")
## <MySQLResult:842150189,1,0>
dbWriteTable(mydb, value = airports_df, name = "airports", overwrite=TRUE, row.names=FALSE,add_id = TRUE)
## [1] TRUE
# 2 ~~~countries_df~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Insert records from airports_df in MySql Database ("ourairports")

dbSendQuery(mydb, "drop table if exists countries")
## <MySQLResult:842412333,1,4>
dbWriteTable(mydb, value = countries_df, name = "countries", overwrite=TRUE, row.names=FALSE,add_id = TRUE)
## [1] TRUE
# 3 ~~~ regions_df ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Insert records from airports_df in MySql Database ("ourairports")

dbSendQuery(mydb, "drop table if exists regions")
## <MySQLResult:775043373,1,8>
dbWriteTable(mydb, value = regions_df, name = "regions", overwrite=TRUE, row.names=FALSE,add_id = TRUE)
## [1] TRUE
# 4 ~~~ runways_df ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Insert records from airports_df in MySql Database ("ourairports")

dbSendQuery(mydb, "drop table if exists runways")
## <MySQLResult:775108909,1,12>
dbWriteTable(mydb, value = runways_df, name = "runways", overwrite=TRUE, row.names=FALSE,add_id = TRUE)
## [1] TRUE

Then pull Countries<- Regions <- Airports with join tables

airp_df<-dbGetQuery(mydb,"SELECT c.code,c.name, 
c.continent,
r.name region_name,
r.code  region_Code, 
r.local_code  region_local_code,
a.name airport_name,
a.type,
a.elevation_ft, 
a.municipality
FROM ourairports.countries c
inner join regions r
ON c.code = r.iso_country
inner join airports a
on c.code = a.iso_country
and r.code = a.iso_region
where c.code = 'US';")


head(airp_df)
##   code          name continent  region_name region_Code region_local_code
## 1   US United States      <NA> Pennsylvania       US-PA                PA
## 2   US United States      <NA>       Alaska       US-AK                AK
## 3   US United States      <NA>      Alabama       US-AL                AL
## 4   US United States      <NA>     Arkansas       US-AR                AR
## 5   US United States      <NA>     Oklahoma       US-OK                OK
## 6   US United States      <NA>      Arizona       US-AZ                AZ
##                         airport_name          type elevation_ft
## 1                  Total Rf Heliport      heliport           11
## 2                       Lowell Field small_airport          450
## 3                       Epps Airpark small_airport          820
## 4 Newport Hospital & Clinic Heliport        closed          237
## 5                     Fulton Airport small_airport         1100
## 6                     Cordes Airport small_airport         3810
##   municipality
## 1     Bensalem
## 2 Anchor Point
## 3      Harvest
## 4      Newport
## 5         Alex
## 6       Cordes

Disconnect the connection to MySql.

dbDisconnect(mydb)
## [1] TRUE
mydb<-NA

#detach RMySQL to prevent masking of functions below
detach("package:RMySQL", unload=TRUE)

MongoDB Data Processing

Before you run below comand go to your CMD prompt and start mongo services using “mongod” and leave the comand window open

library(RMongo)
library(rmongodb)


#connect to MongoDB db
mongo <- mongoDbConnect("ourairports", "localhost", 27017)
mongo = mongo.create(host = "localhost")
mongo.is.connected(mongo)
## [1] TRUE
mongo.get.databases(mongo)
## [1] "database"    "flights"     "ourairports" "test"

Method 1 to add a documnt - create function to input data

library(RMongo)
library(jsonlite)
library(stringr)

mongo <- mongoDbConnect("ourairports", "localhost", 27017)

insert_colection<-function(connection,db,df){

  #convert df row to json mongodb document
  df_json<-toJSON(df)

  #clean up
  df_json<-sub("[","",df_json, fixed = TRUE)
  df_json<-sub("]","",df_json, fixed = TRUE)
  df_json<-as.character(df_json)

  #insert document
  dbInsertDocument(connection, db, df_json)

}

Call to the above function to insert rows/documents of above data set airp_df

#airports
for(i in 1:nrow(airp_df)) {
  insert_colection(mongo,"airp_df",airp_df[i,])
}

Method 2 to add a documnt - Using json and bson

library(jsonlite)
library(rmongodb)

#toJSON(airp_df)
#x <- toJSON(unname(split(airp_df, 1:nrow(airp_df))))
#cat(x)
# library(mongolite)
# m <- mongo("b")
# m$insert(b)

#Convert a data.frame to a mongo.bson object

mongo <- mongo.create()
mongo.is.connected(mongo)
## [1] TRUE
db <- "ourairports"
coll <- "worldairports"

bs<-mongo.bson.from.df(airp_df)

mongo.insert(mongo, ns = paste0(db ,".", coll), b = bs)
## [1] TRUE
head(bs)
## [[1]]
##  code : 2     US
##  name : 2     United States
##  continent : 10   BSON_NULL
##  region_name : 2      Pennsylvania
##  region_Code : 2      US-PA
##  region_local_code : 2    PA
##  airport_name : 2     Total Rf Heliport
##  type : 2     heliport
##  elevation_ft : 1     11.000000
##  municipality : 2     Bensalem
## 
## [[2]]
##  code : 2     US
##  name : 2     United States
##  continent : 10   BSON_NULL
##  region_name : 2      Alaska
##  region_Code : 2      US-AK
##  region_local_code : 2    AK
##  airport_name : 2     Lowell Field
##  type : 2     small_airport
##  elevation_ft : 1     450.000000
##  municipality : 2     Anchor Point
## 
## [[3]]
##  code : 2     US
##  name : 2     United States
##  continent : 10   BSON_NULL
##  region_name : 2      Alabama
##  region_Code : 2      US-AL
##  region_local_code : 2    AL
##  airport_name : 2     Epps Airpark
##  type : 2     small_airport
##  elevation_ft : 1     820.000000
##  municipality : 2     Harvest
## 
## [[4]]
##  code : 2     US
##  name : 2     United States
##  continent : 10   BSON_NULL
##  region_name : 2      Arkansas
##  region_Code : 2      US-AR
##  region_local_code : 2    AR
##  airport_name : 2     Newport Hospital & Clinic Heliport
##  type : 2     closed
##  elevation_ft : 1     237.000000
##  municipality : 2     Newport
## 
## [[5]]
##  code : 2     US
##  name : 2     United States
##  continent : 10   BSON_NULL
##  region_name : 2      Oklahoma
##  region_Code : 2      US-OK
##  region_local_code : 2    OK
##  airport_name : 2     Fulton Airport
##  type : 2     small_airport
##  elevation_ft : 1     1100.000000
##  municipality : 2     Alex
## 
## [[6]]
##  code : 2     US
##  name : 2     United States
##  continent : 10   BSON_NULL
##  region_name : 2      Arizona
##  region_Code : 2      US-AZ
##  region_local_code : 2    AZ
##  airport_name : 2     Cordes Airport
##  type : 2     small_airport
##  elevation_ft : 1     3810.000000
##  municipality : 2     Cordes

Quering MongoDB

library(RMongo)
library(jsonlite)
library(stringr)
library(knitr)
mongo <- mongoDbConnect("ourairports", "localhost", 27017)


#dbGetQuery(mongo, "airp_df",'{"code": "US"}')
#dbGetQuery(mongo, "worldairports",'{"code": "US"}')
#kable(dbGetQuery(mongo, "worldairports",'{"lat": {$gt:70}}'))

#Note -> Above select queries are not working with Knit but when running in current chunk they are working,
# Iwasn't able to figureout the issue

Comparison Between SQL and NoSQL

  • 1.SQL databases are relational databases while NoSQL databases are graphical or network datases.
  • 2.NoSQL databases strength lie on data that are naturally heirarchical (such as organization charts) while SQL databases are better in performing aggregate functions such as summation and averaging of column values.
  • 3.SQL databases are vertically scalable while NoSQL databases are horizontally scalable. This means that we can increase the scale of SQL databases by increasing CPU speed while we can increase the scale of NoSQL databases by increasing the number of servers.
  • 4.SQL databases are the preferred databases for applications with complex queries while NoSQL databases are the preferred databases for big data.
  • 5.SQL databases emphasizes on ACID properties ( Atomicity, Consistency, Isolation and Durability) whereas the NoSQL database follows the Brewers CAP theorem ( Consistency, Availability and Partition tolerance ).
  • 6.SQL databases are better for applications with heavy transaction processing because it provides better data integrity than NoSQL databases.