Migrate an SQL database to a No-SQL database. I will use the flights database as the SQL database and migrate to the NOSQL database MongoDB.
#load Mongo DB, JSON and other packages
library(mongolite)
library(tidyr)
library(dplyr)
library(jsonlite)
library(RMySQL)
For illustration I query the restaurants dataset using MongoDB. Data has been previously loaded into MongoDB:
# Query the Mongo database
m<-mongo(collection="restaurants", db="test")
# summary counts by borough
m$aggregate('[{"$group": { "_id": "$borough", "count": {"$sum": 1 } } }]')
##
Found 6 records...
Imported 6 records. Simplifying into dataframe...
## _id count
## 1 Missing 51
## 2 Queens 5656
## 3 Bronx 2338
## 4 Manhattan 10259
## 5 Brooklyn 6086
## 6 Staten Island 969
# select the data
restman<-m$find('{ "borough": "Manhattan" }')
##
Found 1000 records...
Found 2000 records...
Found 3000 records...
Found 4000 records...
Found 5000 records...
Found 6000 records...
Found 7000 records...
Found 8000 records...
Found 9000 records...
Found 10000 records...
Found 10259 records...
Imported 10259 records. Simplifying into dataframe...
restman<-flatten(restman, recursive=TRUE)
# data header
head(restman)
## borough cuisine
## 1 Manhattan Irish
## 2 Manhattan American
## 3 Manhattan American
## 4 Manhattan American
## 5 Manhattan Chicken
## 6 Manhattan Delicatessen
## grades
## 1 1409961600, 1374451200, 1343692800, 1325116800, A, A, A, A, 2, 11, 12, 12
## 2 1399420800, 1367539200, 1335744000, 1324944000, A, A, A, A, 3, 4, 6, 0
## 3 1409616000, 1387411200, 1369699200, 1354838400, 1332979200, A, B, A, A, A, 12, 16, 9, 13, 11
## 4 1410480000, 1377648000, 1364169600, 1329177600, B, A, B, A, 26, 9, 20, 12
## 5 1410739200, 1393891200, 1374105600, 1357689600, 1334016000, 1321315200, A, A, A, A, A, A, 10, 13, 13, 11, 10, 7
## 6 1390262400, 1357257600, 1339027200, 1326758400, A, A, A, A, 12, 11, 6, 8
## name restaurant_id address.building
## 1 Dj Reynolds Pub And Restaurant 30191841 351
## 2 1 East 66Th Street Kitchen 40359480 1
## 3 Glorious Food 40361521 522
## 4 P & S Deli Grocery 40362264 730
## 5 Harriet'S Kitchen 40362098 502
## 6 Bully'S Deli 40361708 759
## address.coord address.street address.zipcode
## 1 -73.98514, 40.76769 West 57 Street 10019
## 2 -73.96927, 40.76852 East 66 Street 10065
## 3 -73.95171, 40.76746 East 74 Street 10021
## 4 -73.96806, 40.79256 Columbus Avenue 10025
## 5 -73.97611, 40.78671 Amsterdam Avenue 10024
## 6 -73.99253, 40.73093 Broadway 10003
#summary by cuisine in Manhattan
rest_cu<-restman %>% group_by(cuisine) %>% count(borough,sort=TRUE)
# summary of restuarants in the City
head(rest_cu)
## Source: local data frame [6 x 3]
## Groups: cuisine [6]
##
## cuisine borough n
## <chr> <chr> <int>
## 1 American Manhattan 3205
## 2 Café/Coffee/Tea Manhattan 680
## 3 Italian Manhattan 621
## 4 Chinese Manhattan 510
## 5 Japanese Manhattan 438
## 6 Other Manhattan 371
I query the flights database from the local SQL server. I query the planes table in the flights database.
rmysql.settingsfile<-"C:/Program Files/MySQL/MySQL Server 5.0/my.ini"
con <- dbConnect(RMySQL::MySQL(), dbname = "flights", username="root", password="password")
flights.planes<-dbGetQuery(con,"SELECT * from planes")
dbDisconnect(con)
## [1] TRUE
# data header
head(flights.planes)
## tailnum year type manufacturer model engines
## 1 N10156 2004 Fixed wing multi engine EMBRAER EMB-145XR 2
## 2 N102UW 1998 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## 3 N103US 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## 4 N104UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## 5 N10575 2002 Fixed wing multi engine EMBRAER EMB-145LR 2
## 6 N105UW 1999 Fixed wing multi engine AIRBUS INDUSTRIE A320-214 2
## seats speed engine
## 1 55 NA Turbo-fan
## 2 182 NA Turbo-fan
## 3 182 NA Turbo-fan
## 4 182 NA Turbo-fan
## 5 55 NA Turbo-fan
## 6 182 NA Turbo-fan
I add the data SQL database flights.planes as a dataset in MongoDB.
fp_js<-mongo(collection="flights", db="planes",url="mongodb://localhost")
# insert the database into a MongoDB
fp_js$insert(flights.planes)
##
Processed 1000 rows...
Processed 2000 rows...
Processed 3000 rows...
Complete! Processed total of 3322 rows.
## $nInserted
## [1] 3322
##
## $nMatched
## [1] 0
##
## $nRemoved
## [1] 0
##
## $nUpserted
## [1] 0
##
## $writeErrors
## list()
# select the data for manufacturer
planes_embraer<-fp_js$find('{"manufacturer":"EMBRAER"}')
##
Found 1000 records...
Found 1196 records...
Imported 1196 records. Simplifying into dataframe...
#head of the data
head(planes_embraer)
## tailnum year type manufacturer model engines
## 1 N10156 2004 Fixed wing multi engine EMBRAER EMB-145XR 2
## 2 N10575 2002 Fixed wing multi engine EMBRAER EMB-145LR 2
## 3 N11106 2002 Fixed wing multi engine EMBRAER EMB-145XR 2
## 4 N11107 2002 Fixed wing multi engine EMBRAER EMB-145XR 2
## 5 N11109 2002 Fixed wing multi engine EMBRAER EMB-145XR 2
## 6 N11113 2002 Fixed wing multi engine EMBRAER EMB-145XR 2
## seats engine
## 1 55 Turbo-fan
## 2 55 Turbo-fan
## 3 55 Turbo-fan
## 4 55 Turbo-fan
## 5 55 Turbo-fan
## 6 55 Turbo-fan