No SQL Migration

1. I began by loading the needed packages and calling the data in a .csv format from the github location where I stored it. I also confirmed the dimensions of the file.

require(rmongodb)
require(jsonlite)
require(RCurl)
airportsurl <- getURL("https://raw.githubusercontent.com/jeffnieman11/Data607_Project4/master/airports.csv")
airports <- read.csv(text=airportsurl)
dim(airports)
## [1] 1397    7
head(airports)
##   faa                           name      lat       lon  alt tz dst
## 1 04G              Lansdowne Airport 41.13047 -80.61958 1044 -5   A
## 2 06A  Moton Field Municipal Airport 32.46057 -85.68003  264 -5   A
## 3 06C            Schaumburg Regional 41.98934 -88.10124  801 -6   A
## 4 06N                Randall Airport 41.43191 -74.39156  523 -5   A
## 5 09J          Jekyll Island Airport 31.07447 -81.42778   11 -4   A
## 6 0A9 Elizabethton Municipal Airport 36.37122 -82.17342 1593 -4   A

2. I then created the mongo connection and tested to see if it is working properly. A value of “TRUE” confirmed it.

mongonew <- mongo.create()
mongonew
## [1] 0
## attr(,"mongo")
## <pointer: 0x00000000080909b8>
## attr(,"class")
## [1] "mongo"
## attr(,"host")
## [1] "127.0.0.1"
## attr(,"name")
## [1] ""
## attr(,"username")
## [1] ""
## attr(,"password")
## [1] ""
## attr(,"db")
## [1] "admin"
## attr(,"timeout")
## [1] 0
mongo.is.connected(mongonew)
## [1] TRUE

3. I then created names for a database and a collection where we add the records.

db <- "test.flights"
mnew <- "test.flights.airports"

mongo.get.database.collections(mongonew, db)
## character(0)

4. Following the advice on Andy Catlin’s to Mongo I used lappy to the split function and converted the csv file to JSON and then to bson which is the format needed for rmongodb. A test of 3 rows confirms the approach.

airports.1 <- lapply(split(airports, 1:nrow(airports)), function(x) mongo.bson.from.JSON(toJSON(x)))

airports.1[1:3]
## $`1`
##  1 : 3    
##      faa : 2      04G
##      name : 2     Lansdowne Airport
##      lat : 1      41.130500
##      lon : 1      -80.619600
##      alt : 16     1044
##      tz : 16      -5
##      dst : 2      A
## 
## 
## $`2`
##  1 : 3    
##      faa : 2      06A
##      name : 2     Moton Field Municipal Airport
##      lat : 1      32.460600
##      lon : 1      -85.680000
##      alt : 16     264
##      tz : 16      -5
##      dst : 2      A
## 
## 
## $`3`
##  1 : 3    
##      faa : 2      06C
##      name : 2     Schaumburg Regional
##      lat : 1      41.989300
##      lon : 1      -88.101200
##      alt : 16     801
##      tz : 16      -6
##      dst : 2      A

5. Using the mongo.insert.batch command I inserted the data from step 4 into my mongoDB and confirmed that there indeed were the same number of records as in the csv file.

mongo.insert.batch(mongonew, mnew, airports.1)
## [1] TRUE

6. I concluded by testing several queries. I first checked the count and records of my local airport DTW, finding to no surprise one record. Then I tested the altitude of 645 and again just pulled up the airport of DTW. I then queried the count and a few records at sea level, followed by queries for >=2000 feet and <0.

mongo.count(mongonew, mnew, query ='{"faa":"DTW"}')
## [1] 1
mongo.find.all(mongonew, mnew, query = '{"faa":"DTW"}')
## [[1]]
## [[1]]$`_id`
## [1] "571d2e07f5665d650f02a068"
## 
## [[1]]$faa
## [1] "DTW"
## 
## [[1]]$name
## [1] "Detroit Metro Wayne Co"
## 
## [[1]]$lat
## [1] 42.212
## 
## [[1]]$lon
## [1] -83.353
## 
## [[1]]$alt
## [1] 645
## 
## [[1]]$tz
## [1] -5
## 
## [[1]]$dst
## [1] "A"
mongo.find.all(mongonew, mnew, query = '{"alt":645}')
## [[1]]
## [[1]]$`_id`
## [1] "571d2e07f5665d650f02a068"
## 
## [[1]]$faa
## [1] "DTW"
## 
## [[1]]$name
## [1] "Detroit Metro Wayne Co"
## 
## [[1]]$lat
## [1] 42.212
## 
## [[1]]$lon
## [1] -83.353
## 
## [[1]]$alt
## [1] 645
## 
## [[1]]$tz
## [1] -5
## 
## [[1]]$dst
## [1] "A"
mongo.count(mongonew, mnew, query = '{"alt":0}')
## [1] 49
mongo.find.all(mongonew, mnew, query = '{"alt":0}', limit=5)
## [[1]]
## [[1]]$`_id`
## [1] "571d2e07f5665d650f029f57"
## 
## [[1]]$faa
## [1] "AGN"
## 
## [[1]]$name
## [1] "Angoon Seaplane Base"
## 
## [[1]]$lat
## [1] 57.504
## 
## [[1]]$lon
## [1] -134.59
## 
## [[1]]$alt
## [1] 0
## 
## [[1]]$tz
## [1] -9
## 
## [[1]]$dst
## [1] "A"
## 
## 
## [[2]]
## [[2]]$`_id`
## [1] "571d2e07f5665d650f029f6b"
## 
## [[2]]$faa
## [1] "ALZ"
## 
## [[2]]$name
## [1] "Alitak Seaplane Base"
## 
## [[2]]$lat
## [1] 56.899
## 
## [[2]]$lon
## [1] -154.25
## 
## [[2]]$alt
## [1] 0
## 
## [[2]]$tz
## [1] -9
## 
## [[2]]$dst
## [1] "A"
## 
## 
## [[3]]
## [[3]]$`_id`
## [1] "571d2e07f5665d650f029f77"
## 
## [[3]]$faa
## [1] "AOS"
## 
## [[3]]$name
## [1] "Amook Bay Seaplane Base"
## 
## [[3]]$lat
## [1] 57.471
## 
## [[3]]$lon
## [1] -153.82
## 
## [[3]]$alt
## [1] 0
## 
## [[3]]$tz
## [1] -9
## 
## [[3]]$dst
## [1] "A"
## 
## 
## [[4]]
## [[4]]$`_id`
## [1] "571d2e07f5665d650f029f7d"
## 
## [[4]]$faa
## [1] "AQC"
## 
## [[4]]$name
## [1] "Klawock Seaplane Base"
## 
## [[4]]$lat
## [1] 55.555
## 
## [[4]]$lon
## [1] -133.1
## 
## [[4]]$alt
## [1] 0
## 
## [[4]]$tz
## [1] -9
## 
## [[4]]$dst
## [1] "A"
## 
## 
## [[5]]
## [[5]]$`_id`
## [1] "571d2e07f5665d650f029f88"
## 
## [[5]]$faa
## [1] "ATT"
## 
## [[5]]$name
## [1] "Camp Mabry Austin City"
## 
## [[5]]$lat
## [1] 30.317
## 
## [[5]]$lon
## [1] -97.767
## 
## [[5]]$alt
## [1] 0
## 
## [[5]]$tz
## [1] -6
## 
## [[5]]$dst
## [1] "A"
mongo.count(mongonew, mnew, query = '{"alt":{"$gte":2000}}')
## [1] 187
mongo.find.one(mongonew, mnew, query = '{"alt":{"$gte":2000}}')
##  _id : 7      571d2e07f5665d650f029f15
##  faa : 4      
##      0 : 2    2G9
## 
##  name : 4     
##      0 : 2    Somerset County Airport
## 
##  lat : 4      
##      0 : 1    40.039000
## 
##  lon : 4      
##      0 : 1    -79.015000
## 
##  alt : 4      
##      0 : 16   2275
## 
##  tz : 4   
##      0 : 16   -5
## 
##  dst : 4      
##      0 : 2    A
mongo.count(mongonew, mnew, query = '{"alt":{"$lt":0}}')
## [1] 2
mongo.find.all(mongonew, mnew, query = '{"alt":{"$lt":0}}')
## [[1]]
## [[1]]$`_id`
## [1] "571d2e07f5665d650f02a176"
## 
## [[1]]$faa
## [1] "IPL"
## 
## [[1]]$name
## [1] "Imperial Co"
## 
## [[1]]$lat
## [1] 32.834
## 
## [[1]]$lon
## [1] -115.58
## 
## [[1]]$alt
## [1] -54
## 
## [[1]]$tz
## [1] -8
## 
## [[1]]$dst
## [1] "A"
## 
## 
## [[2]]
## [[2]]$`_id`
## [1] "571d2e07f5665d650f02a291"
## 
## [[2]]$faa
## [1] "NJK"
## 
## [[2]]$name
## [1] "El Centro Naf"
## 
## [[2]]$lat
## [1] 32.829
## 
## [[2]]$lon
## [1] -115.67
## 
## [[2]]$alt
## [1] -42
## 
## [[2]]$tz
## [1] -8
## 
## [[2]]$dst
## [1] "A"

Conclusions: Quite honestly I am more familiar with relational databases than I am with the noSQL database approach. The pros of the relational database approach has to do with the structure - the columns and rows. However the cons result the large space and process time complex and large relational databases take up. The NoSQL approach solves this problem, but involves learning a new way to approach the queries. Through this homework I learned how to put data into MongoDB and after a little research found the queries to be useful as well.