Overview

Migrate an SQL database to a No-SQL database. I will use the flights database as the SQL database and migrate to the NOSQL database MongoDB.

#load Mongo DB, JSON and other packages
library(mongolite)
library(tidyr)
library(dplyr)
library(jsonlite)
library(RMySQL)

Query a MongoDB Collection

For illustration I query the restaurants dataset using MongoDB. Data has been previously loaded into MongoDB:

# Query the Mongo database
m<-mongo(collection="restaurants", db="test")
# summary counts by borough
m$aggregate('[{"$group": { "_id": "$borough", "count": {"$sum": 1 } } }]')
## 
 Found 6 records...
 Imported 6 records. Simplifying into dataframe...
##             _id count
## 1       Missing    51
## 2        Queens  5656
## 3         Bronx  2338
## 4     Manhattan 10259
## 5      Brooklyn  6086
## 6 Staten Island   969
# select the data
restman<-m$find('{ "borough": "Manhattan" }')
## 
 Found 1000 records...
 Found 2000 records...
 Found 3000 records...
 Found 4000 records...
 Found 5000 records...
 Found 6000 records...
 Found 7000 records...
 Found 8000 records...
 Found 9000 records...
 Found 10000 records...
 Found 10259 records...
 Imported 10259 records. Simplifying into dataframe...
restman<-flatten(restman, recursive=TRUE)
# data header
head(restman)
##     borough      cuisine
## 1 Manhattan        Irish
## 2 Manhattan    American 
## 3 Manhattan    American 
## 4 Manhattan    American 
## 5 Manhattan      Chicken
## 6 Manhattan Delicatessen
##                                                                                                            grades
## 1                                       1409961600, 1374451200, 1343692800, 1325116800, A, A, A, A, 2, 11, 12, 12
## 2                                          1399420800, 1367539200, 1335744000, 1324944000, A, A, A, A, 3, 4, 6, 0
## 3                    1409616000, 1387411200, 1369699200, 1354838400, 1332979200, A, B, A, A, A, 12, 16, 9, 13, 11
## 4                                       1410480000, 1377648000, 1364169600, 1329177600, B, A, B, A, 26, 9, 20, 12
## 5 1410739200, 1393891200, 1374105600, 1357689600, 1334016000, 1321315200, A, A, A, A, A, A, 10, 13, 13, 11, 10, 7
## 6                                        1390262400, 1357257600, 1339027200, 1326758400, A, A, A, A, 12, 11, 6, 8
##                             name restaurant_id address.building
## 1 Dj Reynolds Pub And Restaurant      30191841              351
## 2     1 East 66Th Street Kitchen      40359480                1
## 3                  Glorious Food      40361521              522
## 4             P & S Deli Grocery      40362264              730
## 5              Harriet'S Kitchen      40362098              502
## 6                   Bully'S Deli      40361708              759
##         address.coord   address.street address.zipcode
## 1 -73.98514, 40.76769 West   57 Street           10019
## 2 -73.96927, 40.76852 East   66 Street           10065
## 3 -73.95171, 40.76746 East   74 Street           10021
## 4 -73.96806, 40.79256  Columbus Avenue           10025
## 5 -73.97611, 40.78671 Amsterdam Avenue           10024
## 6 -73.99253, 40.73093         Broadway           10003
#summary by cuisine in Manhattan
rest_cu<-restman %>% group_by(cuisine) %>% count(borough,sort=TRUE)
# summary of restuarants in the City
head(rest_cu)
## Source: local data frame [6 x 3]
## Groups: cuisine [6]
## 
##           cuisine   borough     n
##             <chr>     <chr> <int>
## 1       American  Manhattan  3205
## 2 Café/Coffee/Tea Manhattan   680
## 3         Italian Manhattan   621
## 4         Chinese Manhattan   510
## 5        Japanese Manhattan   438
## 6           Other Manhattan   371

Query the SQL database

I query the flights database from the local SQL server. I query the planes table in the flights database.

rmysql.settingsfile<-"C:/Program Files/MySQL/MySQL Server 5.0/my.ini"
con <- dbConnect(RMySQL::MySQL(), dbname = "flights", username="root", password="password")
flights.planes<-dbGetQuery(con,"SELECT * from planes")
dbDisconnect(con)
## [1] TRUE
# data header
head(flights.planes)
##   tailnum year                    type     manufacturer     model engines
## 1  N10156 2004 Fixed wing multi engine          EMBRAER EMB-145XR       2
## 2  N102UW 1998 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 3  N103US 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 4  N104UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
## 5  N10575 2002 Fixed wing multi engine          EMBRAER EMB-145LR       2
## 6  N105UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE  A320-214       2
##   seats speed    engine
## 1    55    NA Turbo-fan
## 2   182    NA Turbo-fan
## 3   182    NA Turbo-fan
## 4   182    NA Turbo-fan
## 5    55    NA Turbo-fan
## 6   182    NA Turbo-fan

Import to MongoDB

I add the data SQL database flights.planes as a dataset in MongoDB.

fp_js<-mongo(collection="flights", db="planes",url="mongodb://localhost")
# insert the database into a MongoDB
fp_js$insert(flights.planes)            
## 
Processed 1000 rows...
Processed 2000 rows...
Processed 3000 rows...
Complete! Processed total of 3322 rows.
## $nInserted
## [1] 3322
## 
## $nMatched
## [1] 0
## 
## $nRemoved
## [1] 0
## 
## $nUpserted
## [1] 0
## 
## $writeErrors
## list()
# select the data for manufacturer
planes_embraer<-fp_js$find('{"manufacturer":"EMBRAER"}')
## 
 Found 1000 records...
 Found 1196 records...
 Imported 1196 records. Simplifying into dataframe...
#head of the data
head(planes_embraer)
##   tailnum year                    type manufacturer     model engines
## 1  N10156 2004 Fixed wing multi engine      EMBRAER EMB-145XR       2
## 2  N10575 2002 Fixed wing multi engine      EMBRAER EMB-145LR       2
## 3  N11106 2002 Fixed wing multi engine      EMBRAER EMB-145XR       2
## 4  N11107 2002 Fixed wing multi engine      EMBRAER EMB-145XR       2
## 5  N11109 2002 Fixed wing multi engine      EMBRAER EMB-145XR       2
## 6  N11113 2002 Fixed wing multi engine      EMBRAER EMB-145XR       2
##   seats    engine
## 1    55 Turbo-fan
## 2    55 Turbo-fan
## 3    55 Turbo-fan
## 4    55 Turbo-fan
## 5    55 Turbo-fan
## 6    55 Turbo-fan