The purpose of this assignment is to read a MYSQL database to MongoDB.
Steps 1. Read MYSQL database into R data frame 1. Export a CSV file from R 3. Migrate CSV file to MongoDB 4. Connect to MongoDB 5. Migrate file from MongoDB to R data frame
library(mongolite)
library(RMySQL)
## Loading required package: DBI
library(RJSONIO)
library(rjson)
##
## Attaching package: 'rjson'
## The following objects are masked from 'package:RJSONIO':
##
## fromJSON, toJSON
source("logincredentiasl.R")
dbConnect(MySQL(), user=MySQL_Username, password=MySQL_Password)
## <MySQLConnection:0,0>
sqlDb <- dbDriver("MySQL")
cnctSql <- dbConnect(sqlDb, MySQL_Username, MySQL_Password, dbname = "flights",
host = "localhost")
dbListTables(cnctSql)
## [1] "airlines" "airports" "flights"
qryFlights <- "SELECT * FROM flights"
dbFlights <- dbGetQuery(cnctSql, qryFlights)
head(dbFlights)
## year month day dep_time dep_delay arr_time arr_delay carrier tailnum
## 1 2013 1 1 517 2 830 11 UA N14228
## 2 2013 1 1 533 4 850 20 UA N24211
## 3 2013 1 1 542 2 923 33 AA N619AA
## 4 2013 1 1 544 -1 1004 -18 B6 N804JB
## 5 2013 1 1 554 -6 812 -25 DL N668DN
## 6 2013 1 1 554 -4 740 12 UA N39463
## flight origin dest air_time distance hour minute
## 1 1545 EWR IAH 227 1400 5 17
## 2 1714 LGA IAH 227 1416 5 33
## 3 1141 JFK MIA 160 1089 5 42
## 4 725 JFK BQN 183 1576 5 44
## 5 461 LGA ATL 116 762 6 54
## 6 1696 EWR ORD 150 719 6 54
qryAirl <- "SELECT * FROM airlines"
dbAirl <- dbGetQuery(cnctSql, qryAirl)
head(dbAirl)
## carrier name
## 1 9E Endeavor Air Inc.\r
## 2 AA American Airlines Inc.\r
## 3 AS Alaska Airlines Inc.\r
## 4 B6 JetBlue Airways\r
## 5 DL Delta Air Lines Inc.\r
## 6 EV ExpressJet Airlines Inc.\r
qryAirpts <- "SELECT * FROM airports"
dbAirpts <- dbGetQuery(cnctSql, qryAirpts)
head(dbAirpts)
## faa name lat lon alt tz dst
## 1 04G Lansdowne Airport 41.13047 -80.61958 1044 -5 A
## 2 06A Moton Field Municipal Airport 32.46057 -85.68003 264 -5 A
## 3 06C Schaumburg Regional 41.98934 -88.10124 801 -6 A
## 4 06N Randall Airport 41.43191 -74.39156 523 -5 A
## 5 09J Jekyll Island Airport 31.07447 -81.42778 11 -4 A
## 6 0A9 Elizabethton Municipal Airport 36.37122 -82.17342 1593 -4 A
dbDisconnect(cnctSql)
## [1] TRUE
write.table(dbFlights, file = "/users/sharonmorris/flights.csv", sep = ",", row.names = FALSE)
write.table(dbAirpts, file = "/users/sharonmorris/airports.csv", sep = ",", row.names = FALSE)
write.table(dbAirl, file = "/users/sharonmorris/airlines.csv", sep = ",", row.names = FALSE)
Sharons-MacBook-Pro:~ sharonmorris$ mongoimport –db flights –collection airlines –type csv –file “airlines.csv” –headerline 2016-11-14T23:35:41.395-0500 connected to: localhost 2016-11-14T23:35:41.478-0500 imported 16 documents
Sharons-MacBook-Pro:~ sharonmorris$ mongoimport –db flights –collection airports –type csv –file “airports.csv” –headerline 2016-11-14T23:37:08.728-0500 connected to: localhost 2016-11-14T23:37:08.815-0500 imported 1397 documents
Sharons-MacBook-Pro:~ sharonmorris$ mongoimport –db flights –collection flights –type csv –file “flights.csv” –headerline 2016-11-14T23:38:02.788-0500 connected to: localhost 2016-11-14T23:38:09.143-0500 imported 336776 documents
library(RMongo)
## Loading required package: rJava
##
## Attaching package: 'RMongo'
## The following object is masked from 'package:RMySQL':
##
## dbDisconnect
## The following objects are masked from 'package:DBI':
##
## dbDisconnect, dbGetQuery
library(knitr)
# Connect to MongoDB database
#aL <- mongoDbConnect("airlines", "localhost", port = 27017)
#aP <- mongoDbConnect("airports", "localhost", port = 27017)
f <- mongoDbConnect("flights", "localhost", port = 27017)
queryAirlines <- (dbGetQuery(f, 'airlines', "{'carrier': '9E'}"))
head(queryAirlines)
## X_id name carrier
## 1 582a909ddcf32ffaa2c6a9ba Endeavor Air Inc. NA
## 2 9E NA
queryAirports <- dbGetQuery(f, 'airports', "{'lat': {'$lt': '100'}}")
queryFlights <- dbGetQuery(f, 'flights', "{'month': {'$lt': 10}, 'minute': {'$gte': 0.1}}")
a <- queryFlights[c('month', 'minute')]
summary(a)
## month minute
## Min. :1 Min. : 1.00
## 1st Qu.:1 1st Qu.:16.00
## Median :1 Median :30.00
## Mean :1 Mean :31.63
## 3rd Qu.:1 3rd Qu.:49.25
## Max. :1 Max. :59.00
Converting databases to JSON to import into MongoDB expanded the size of the file. R was unable to handle the JSON file. It was better to convert to CSV to be imported into MongoDB.
The structure of the data in MongoDB was not contained in rows or columns they are collections.