Introduction

The purpose of this assignment is to read a MYSQL database to MongoDB.

Steps 1. Read MYSQL database into R data frame 1. Export a CSV file from R 3. Migrate CSV file to MongoDB 4. Connect to MongoDB 5. Migrate file from MongoDB to R data frame

Load libraries

library(mongolite)
library(RMySQL)
## Loading required package: DBI
library(RJSONIO)
library(rjson)
## 
## Attaching package: 'rjson'
## The following objects are masked from 'package:RJSONIO':
## 
##     fromJSON, toJSON

Connect to MySQL

source("logincredentiasl.R")
dbConnect(MySQL(), user=MySQL_Username, password=MySQL_Password)
## <MySQLConnection:0,0>
sqlDb <- dbDriver("MySQL")
cnctSql <- dbConnect(sqlDb, MySQL_Username, MySQL_Password, dbname = "flights", 
                     host = "localhost")

dbListTables(cnctSql)
## [1] "airlines" "airports" "flights"

Query MySQL database

qryFlights <- "SELECT * FROM flights"
dbFlights <- dbGetQuery(cnctSql, qryFlights)
head(dbFlights)
##   year month day dep_time dep_delay arr_time arr_delay carrier tailnum
## 1 2013     1   1      517         2      830        11      UA  N14228
## 2 2013     1   1      533         4      850        20      UA  N24211
## 3 2013     1   1      542         2      923        33      AA  N619AA
## 4 2013     1   1      544        -1     1004       -18      B6  N804JB
## 5 2013     1   1      554        -6      812       -25      DL  N668DN
## 6 2013     1   1      554        -4      740        12      UA  N39463
##   flight origin dest air_time distance hour minute
## 1   1545    EWR  IAH      227     1400    5     17
## 2   1714    LGA  IAH      227     1416    5     33
## 3   1141    JFK  MIA      160     1089    5     42
## 4    725    JFK  BQN      183     1576    5     44
## 5    461    LGA  ATL      116      762    6     54
## 6   1696    EWR  ORD      150      719    6     54
qryAirl <- "SELECT * FROM airlines"
dbAirl <- dbGetQuery(cnctSql, qryAirl)
head(dbAirl)
##   carrier                       name
## 1      9E        Endeavor Air Inc.\r
## 2      AA   American Airlines Inc.\r
## 3      AS     Alaska Airlines Inc.\r
## 4      B6          JetBlue Airways\r
## 5      DL     Delta Air Lines Inc.\r
## 6      EV ExpressJet Airlines Inc.\r
qryAirpts <- "SELECT * FROM airports"
dbAirpts <- dbGetQuery(cnctSql, qryAirpts)
head(dbAirpts)
##   faa                           name      lat       lon  alt tz dst
## 1 04G              Lansdowne Airport 41.13047 -80.61958 1044 -5   A
## 2 06A  Moton Field Municipal Airport 32.46057 -85.68003  264 -5   A
## 3 06C            Schaumburg Regional 41.98934 -88.10124  801 -6   A
## 4 06N                Randall Airport 41.43191 -74.39156  523 -5   A
## 5 09J          Jekyll Island Airport 31.07447 -81.42778   11 -4   A
## 6 0A9 Elizabethton Municipal Airport 36.37122 -82.17342 1593 -4   A

Disconnect MySQL

dbDisconnect(cnctSql)
## [1] TRUE

Store data frames as csv files

write.table(dbFlights, file = "/users/sharonmorris/flights.csv", sep = ",", row.names = FALSE)
write.table(dbAirpts, file = "/users/sharonmorris/airports.csv", sep = ",", row.names = FALSE)
write.table(dbAirl, file = "/users/sharonmorris/airlines.csv", sep = ",", row.names = FALSE)

Import database to Mongodb

Import airlines data into MongoDB

Sharons-MacBook-Pro:~ sharonmorris$ mongoimport –db flights –collection airlines –type csv –file “airlines.csv” –headerline 2016-11-14T23:35:41.395-0500 connected to: localhost 2016-11-14T23:35:41.478-0500 imported 16 documents

Import airports data into MongoDB

Sharons-MacBook-Pro:~ sharonmorris$ mongoimport –db flights –collection airports –type csv –file “airports.csv” –headerline 2016-11-14T23:37:08.728-0500 connected to: localhost 2016-11-14T23:37:08.815-0500 imported 1397 documents

Import flights data into MongoDB

Sharons-MacBook-Pro:~ sharonmorris$ mongoimport –db flights –collection flights –type csv –file “flights.csv” –headerline 2016-11-14T23:38:02.788-0500 connected to: localhost 2016-11-14T23:38:09.143-0500 imported 336776 documents

Query Mongodb

library(RMongo)
## Loading required package: rJava
## 
## Attaching package: 'RMongo'
## The following object is masked from 'package:RMySQL':
## 
##     dbDisconnect
## The following objects are masked from 'package:DBI':
## 
##     dbDisconnect, dbGetQuery
library(knitr)
# Connect to MongoDB database
#aL <- mongoDbConnect("airlines", "localhost", port = 27017)
#aP <- mongoDbConnect("airports", "localhost", port = 27017)
f <- mongoDbConnect("flights", "localhost", port = 27017)

queryAirlines <- (dbGetQuery(f, 'airlines', "{'carrier': '9E'}"))
head(queryAirlines)
##                       X_id              name carrier
## 1 582a909ddcf32ffaa2c6a9ba Endeavor Air Inc.      NA
## 2                                         9E      NA
queryAirports <- dbGetQuery(f, 'airports', "{'lat': {'$lt': '100'}}")

queryFlights <- dbGetQuery(f, 'flights', "{'month': {'$lt': 10}, 'minute': {'$gte': 0.1}}")
a <- queryFlights[c('month', 'minute')]
summary(a)
##      month       minute     
##  Min.   :1   Min.   : 1.00  
##  1st Qu.:1   1st Qu.:16.00  
##  Median :1   Median :30.00  
##  Mean   :1   Mean   :31.63  
##  3rd Qu.:1   3rd Qu.:49.25  
##  Max.   :1   Max.   :59.00

Conclusion

Converting databases to JSON to import into MongoDB expanded the size of the file. R was unable to handle the JSON file. It was better to convert to CSV to be imported into MongoDB.

The structure of the data in MongoDB was not contained in rows or columns they are collections.