library(tidyverse)
library(RMySQL)
library(DBI)
library(gridExtra)
library(mongolite)
# install.packages("RMySQL")
con2 <- dbConnect(RMySQL::MySQL(), dbname = "flights", host="localhost", user="root",password= "")
dbListTables(con2)
## [1] "airlines" "airports" "flights" "movies_master"
## [5] "movies_review" "planes" "weather"
# Database Operation
con2 <- dbConnect(RMySQL::MySQL(), dbname = "flights", host="localhost", user="root",password= "")
query <- "SELECT * FROM movies_review"
tab1 <- dbSendQuery(con2,query)
suppressWarnings(rm(movies_review ))
movies_review <- dbFetch(tab1)
query <- "SELECT * FROM movies_master"
tab1 <- dbSendQuery(con2,query)
movies_master <- dbFetch(tab1)
DT::datatable(movies_master)
DT::datatable(movies_review)
We are creating two colelction for this data, one for Movie and Movei_Review
# Using plain-text
URI = sprintf("mongodb+srv://%s:%s@%s/", USER, PASS, HOST)
OpenCon<- mongo(url = URI)
#Deault is Test colelction
OpenCon
## <Mongo collection> 'test'
## $aggregate(pipeline = "{}", options = "{\"allowDiskUse\":true}", handler = NULL, pagesize = 1000, iterate = FALSE)
## $count(query = "{}")
## $disconnect(gc = TRUE)
## $distinct(key, query = "{}")
## $drop()
## $export(con = stdout(), bson = FALSE, query = "{}", fields = "{}", sort = "{\"_id\":1}")
## $find(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0, handler = NULL, pagesize = 1000)
## $import(con, bson = FALSE)
## $index(add = NULL, remove = NULL)
## $info()
## $insert(data, pagesize = 1000, stop_on_error = TRUE, ...)
## $iterate(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0)
## $mapreduce(map, reduce, query = "{}", sort = "{}", limit = 0, out = NULL, scope = NULL)
## $remove(query, just_one = FALSE)
## $rename(name, db = NULL)
## $replace(query, update = "{}", upsert = FALSE)
## $run(command = "{\"ping\": 1}", simplify = TRUE)
## $update(query, update = "{\"$set\":{}}", filters = NULL, upsert = FALSE, multiple = FALSE)
# Lets create new colection MOvie
CON_movie <- mongo("movie", db = "MSDS_Review",url=URI)
#Check what is in collection Movie
# empty the colelction
CON_movie$drop() #Refresh the collection
CON_movie$count() # COunt collection
## [1] 0
CON_movie$insert(movies_master)
## List of 5
## $ nInserted : num 6
## $ nMatched : num 0
## $ nRemoved : num 0
## $ nUpserted : num 0
## $ writeErrors: list()
CON_movie$count() # COunt collection
## [1] 6
CON_movie$disconnect()
CON_movie$count()
## Connection lost. Trying to reconnect with mongo...
## [1] 6
# Lets create new colection MOvieR for Movie Reviews
CON_movieR <- mongo("movie_review", db = "MSDS_Review",url=URI)
CON_movieR$drop() #Refresh the collection
CON_movieR$count() # COunt collection
## [1] 0
CON_movieR$insert(movies_review)
## List of 5
## $ nInserted : num 149
## $ nMatched : num 0
## $ nRemoved : num 0
## $ nUpserted : num 0
## $ writeErrors: list()
CON_movieR$count() # COunt collection
## [1] 149
#View connection
CON_movie
## <Mongo collection> 'movie'
## $aggregate(pipeline = "{}", options = "{\"allowDiskUse\":true}", handler = NULL, pagesize = 1000, iterate = FALSE)
## $count(query = "{}")
## $disconnect(gc = TRUE)
## $distinct(key, query = "{}")
## $drop()
## $export(con = stdout(), bson = FALSE, query = "{}", fields = "{}", sort = "{\"_id\":1}")
## $find(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0, handler = NULL, pagesize = 1000)
## $import(con, bson = FALSE)
## $index(add = NULL, remove = NULL)
## $info()
## $insert(data, pagesize = 1000, stop_on_error = TRUE, ...)
## $iterate(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0)
## $mapreduce(map, reduce, query = "{}", sort = "{}", limit = 0, out = NULL, scope = NULL)
## $remove(query, just_one = FALSE)
## $rename(name, db = NULL)
## $replace(query, update = "{}", upsert = FALSE)
## $run(command = "{\"ping\": 1}", simplify = TRUE)
## $update(query, update = "{\"$set\":{}}", filters = NULL, upsert = FALSE, multiple = FALSE)
CON_movieR
## <Mongo collection> 'movie_review'
## $aggregate(pipeline = "{}", options = "{\"allowDiskUse\":true}", handler = NULL, pagesize = 1000, iterate = FALSE)
## $count(query = "{}")
## $disconnect(gc = TRUE)
## $distinct(key, query = "{}")
## $drop()
## $export(con = stdout(), bson = FALSE, query = "{}", fields = "{}", sort = "{\"_id\":1}")
## $find(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0, handler = NULL, pagesize = 1000)
## $import(con, bson = FALSE)
## $index(add = NULL, remove = NULL)
## $info()
## $insert(data, pagesize = 1000, stop_on_error = TRUE, ...)
## $iterate(query = "{}", fields = "{\"_id\":0}", sort = "{}", skip = 0, limit = 0)
## $mapreduce(map, reduce, query = "{}", sort = "{}", limit = 0, out = NULL, scope = NULL)
## $remove(query, just_one = FALSE)
## $rename(name, db = NULL)
## $replace(query, update = "{}", upsert = FALSE)
## $run(command = "{\"ping\": 1}", simplify = TRUE)
## $update(query, update = "{\"$set\":{}}", filters = NULL, upsert = FALSE, multiple = FALSE)
#Validation if data was moved successfuly to Mongo DB
ifelse(CON_movie$count()==nrow(movies_master),"movies_master All Data in Mongo","Something is not in Mongo")
## [1] "movies_master All Data in Mongo"
ifelse(CON_movieR$count()==nrow(movies_review),"movies_review All Data in Mongo","Something is not in Mongo")
## [1] "movies_review All Data in Mongo"
# Quick Way
CON_movieR$count()==nrow(movies_master)
## [1] FALSE
#The empty query {} means: select all data.
# Get all records
CON_movieR$count('{}')
## [1] 149
## Read all the data back into R
CON_movieR$find()
CON_movieR$find('{}')
## To query all rows where User_Name == "TheTopDawg" AND Rating > 5
CON_movieR$find('{"User_Name" : "TheTopDawg" ,"Rating":{"$gt":5}}')
#Read rating for Movie_ID = 1 and rating less than 5
subsetRevie <- CON_movieR$find('{"Movie_ID" : 1 ,"Rating":{"$lt":5}}')
subsetRevie
#Using In , find all the movie rating for 1 and 2 where rating is less than 5
subsetRevie12 <- CON_movieR$find('{"Movie_ID" :{"$in":[1,2]} ,"Rating":{"$lt":5}}')
subsetRevie12
# using Qury and Fields
MR_Query_Fields <- CON_movieR$find(
query = '{"Movie_ID" : 1 , "Rating" : {"$gt":5 }}',
fields = '{"ID" : true, "Rating" : true,"User_Name" : true,"Movie_ID":true}',
limit = 5
)
print(MR_Query_Fields)
## _id ID Movie_ID User_Name Rating
## 1 5cbd25d1364ca61aa0003858 1 1 TheTopDawg 8
## 2 5cbd25d1364ca61aa0003859 2 1 krice23 8
## 3 5cbd25d1364ca61aa000385a 3 1 celestinoavilajr 8
## 4 5cbd25d1364ca61aa000385b 4 1 dave-mcclain 8
## 5 5cbd25d1364ca61aa000385c 5 1 neener3707 7
#sort by rating IN dESCENDING ORDER for Movie_ID = 1
CON_movieR$find('{"Movie_ID" : 1}' , sort = '{"Rating": -1}')
# Check time , since by default Mongo has _id is the only index, and other field would be costly to the db
system.time(CON_movieR$find('{"Movie_ID" : 1}' , sort = '{"Rating": -1}') )
## user system elapsed
## 0.00 0.00 0.06
system.time(CON_movieR$find('{"Movie_ID" : 1}' , sort = '{"_id": -1}') )
## user system elapsed
## 0.00 0.00 0.05
#lets add Movie_ID as Index
CON_movieR$index(add = '{"Movie_ID" : 1}')
# Lets see how many index we have
CON_movieR$index()
# Check sysetm time taken for excecution now
system.time(CON_movieR$find('{"Movie_ID" : 1}' , sort = '{"Rating": -1}') )
## user system elapsed
## 0.00 0.00 0.05
# Lets use iterate to read data
DT::datatable(CON_movieR$find())
Review_Iter <- CON_movieR$iterate('{"Movie_ID" : 1}', sort = '{"Rating": -1}', limit = 7)
# read records from the iterator
while(!is.null(x <- Review_Iter$one())){
cat(sprintf("Found Movie ID %s for USER : %s\n", x$Movie_ID, x$User_Name))
}
## Found Movie ID 1 for USER : bdwyphil
## Found Movie ID 1 for USER : terry_z
## Found Movie ID 1 for USER : vhinzajhltb
## Found Movie ID 1 for USER : godsgirlfrombluford
## Found Movie ID 1 for USER : Neymar
## Found Movie ID 1 for USER : maruugaa
## Found Movie ID 1 for USER : TheTopDawg
# Lets say I want to remove all the rating of 4 for Movie 1
# 1 list those record
CON_movieR$find('{"Movie_ID" : 1,"Rating":{"$eq":4}}')
# 2 : Remvoe
CON_movieR$remove('{"Movie_ID" : 1,"Rating":{"$eq":4}}')
# 3 : Valdiate
CON_movieR$find('{"Movie_ID" : 1,"Rating":{"$eq":4}}')
CON_movieR$find('{"Movie_ID" : 1,"Rating":{"$ne":4}}') # not equal to 4
CON_movieR$count() # Count got reduced .
## [1] 148
#Let update rating from 8 to 9
CON_movieR$find('{"Movie_ID" : 1,"User_Name":"krice23"}')
CON_movieR$update('{"Movie_ID" : 1,"User_Name":"krice23"}','{"$set":{"Rating":9}}')
## List of 3
## $ modifiedCount: int 1
## $ matchedCount : int 1
## $ upsertedCount: int 0
CON_movieR$find('{"Movie_ID" : 1,"User_Name":"krice23"}')
# we should using multiple = TRUE if we want to udpate more that one record.
CON_movieR$find('{"User_Name":"TheTopDawg"}')
CON_movieR$update('{"User_Name":"TheTopDawg"}','{"$set":{"Rating":9}}')
## List of 3
## $ modifiedCount: int 1
## $ matchedCount : int 1
## $ upsertedCount: int 0
CON_movieR$find('{"User_Name":"TheTopDawg"}')
# multiple = TRUE if we want to udpate more that one record.
CON_movieR$update('{"User_Name":"TheTopDawg"}','{"$set":{"Rating":7}}',multiple = TRUE)
## List of 3
## $ modifiedCount: int 2
## $ matchedCount : int 2
## $ upsertedCount: int 0
CON_movieR$find('{"User_Name":"TheTopDawg"}')
Mongo stores data in BSON , we can get BSON output by passing BSON = true
CON_movie$export()
## { "_id" : { "$oid" : "5cbd25d0364ca61aa0003852" }, "ID" : 1, "Movie_Name" : "Bad Samaritan", "Year" : "2018" }
## { "_id" : { "$oid" : "5cbd25d0364ca61aa0003853" }, "ID" : 2, "Movie_Name" : "Avengers: Infinity War", "Year" : "2018" }
## { "_id" : { "$oid" : "5cbd25d0364ca61aa0003854" }, "ID" : 3, "Movie_Name" : "Rampage", "Year" : "2018" }
## { "_id" : { "$oid" : "5cbd25d0364ca61aa0003855" }, "ID" : 4, "Movie_Name" : "Truth or Dare", "Year" : "2018" }
## { "_id" : { "$oid" : "5cbd25d0364ca61aa0003856" }, "ID" : 5, "Movie_Name" : "Incredibles 2", "Year" : "2018" }
## { "_id" : { "$oid" : "5cbd25d0364ca61aa0003857" }, "ID" : 6, "Movie_Name" : "A Star Is Born", "Year" : "2018" }
CON_movie$export(file("mr.bson"),bson = TRUE)
OpenCon$run('{"listCollections":1}')
## $cursor
## $cursor$id
## [1] 0
##
## $cursor$ns
## [1] "test.$cmd.listCollections"
##
## $cursor$firstBatch
## name type info.readOnly
## 1 week9msds collection FALSE
## 2 book collection FALSE
## 3 users collection FALSE
## 4 fs.files collection FALSE
## 5 fs.chunks collection FALSE
## info.uuid idIndex.v
## 1 c7, c8, 84, 9c, 97, 98, 4c, a2, 92, e9, 7b, 86, ba, 19, 75, 99 2
## 2 b3, e0, 85, bb, 07, 0c, 43, 76, a2, 2e, f6, 33, ae, c0, 85, e5 2
## 3 18, 40, 84, 18, 57, c0, 4b, e3, 81, fe, 8d, bb, dc, 6c, 9e, 4a 2
## 4 7c, 33, 08, 49, fd, 51, 48, 06, b6, 92, e4, c5, c5, 62, 13, ba 2
## 5 7a, 0a, 2a, ac, 3b, cd, 43, 62, a3, 5d, 20, bb, 65, 36, 36, 3d 2
## idIndex._id idIndex.name idIndex.ns
## 1 1 _id_ test.week9msds
## 2 1 _id_ test.book
## 3 1 _id_ test.users
## 4 1 _id_ test.fs.files
## 5 1 _id_ test.fs.chunks
##
##
## $ok
## [1] 1
##
## $operationTime
## $operationTime$t
## [1] 1555899862
##
## $operationTime$i
## [1] 7
##
##
## $`$clusterTime`
## $`$clusterTime`$clusterTime
## $`$clusterTime`$clusterTime$t
## [1] 1555899862
##
## $`$clusterTime`$clusterTime$i
## [1] 7
##
##
## $`$clusterTime`$signature
## $`$clusterTime`$signature$hash
## [1] 24 0e 93 88 d5 fb 1f 67 01 cc 54 fa df 7a 36 12 28 97 23 fc
## attr(,"type")
## [1] 00
##
## $`$clusterTime`$signature$keyId
## [1] 6.659713e+18
CON_admin <- admin <- mongo(db = "admin",url = URI)
CON_admin$run('{"listDatabases":1}')
## $databases
## name sizeOnDisk empty
## 1 606Project 24576 FALSE
## 2 MS 229376 FALSE
## 3 MSDS_Review 32768 FALSE
## 4 MSProject3 3239936 FALSE
## 5 Week9 24576 FALSE
## 6 test 212992 FALSE
## 7 testSUNDAY 98304 FALSE
## 8 week9msds 24576 FALSE
## 9 admin 245760 FALSE
## 10 local 3534430208 FALSE
##
## $totalSize
## [1] 3538563072
##
## $ok
## [1] 1
##
## $operationTime
## $operationTime$t
## [1] 1555899862
##
## $operationTime$i
## [1] 7
##
##
## $`$clusterTime`
## $`$clusterTime`$clusterTime
## $`$clusterTime`$clusterTime$t
## [1] 1555899862
##
## $`$clusterTime`$clusterTime$i
## [1] 7
##
##
## $`$clusterTime`$signature
## $`$clusterTime`$signature$hash
## [1] 24 0e 93 88 d5 fb 1f 67 01 cc 54 fa df 7a 36 12 28 97 23 fc
## attr(,"type")
## [1] 00
##
## $`$clusterTime`$signature$keyId
## [1] 6.659713e+18
NOSQL_CON = mongo(collection = "movie", db = "MSDS_Review", url = URI) # create connection, database and collection
NOSQL_CON$index()
NOSQL_CON$count()
## [1] 6
NOSQL_CON$distinct("Movie_Name")
## [1] "Bad Samaritan" "Avengers: Infinity War"
## [3] "Rampage" "Truth or Dare"
## [5] "Incredibles 2" "A Star Is Born"
DT::datatable(NOSQL_CON$find())
NOSQL_CON$iterate()$one()
## $ID
## [1] 1
##
## $Movie_Name
## [1] "Bad Samaritan"
##
## $Year
## [1] "2018"
# Find Movie with name "Incredibles 2"
# USe count to get the count of matching record .
# use find to get the data
NOSQL_CON$count('{"Movie_Name" : "Incredibles 2" }')
## [1] 1
NOSQL_CON$count('{"Movie_Name" : "Incredibles2" }') # Note space missing between name `Incredibles 2`.
## [1] 0
NOSQL_CON$find('{"Movie_Name" : "Incredibles 2" }')
MOview_Rating <- NOSQL_CON$find('{"ID" : 1}')
print(MOview_Rating)
## ID Movie_Name Year
## 1 1 Bad Samaritan 2018
| SQL Database | NoSQL Database (MongoDB) |
|---|---|
| Relational database | Non-relational database |
| Supports SQL query language | Supports JSON query language |
| Table based | Collection based and key-value pair |
| Row based | Document based |
| Column based | Field based |
| Support foreign key | No support for foreign key |
| Support for triggers | No Support for triggers |
| Contains schema which is predefined | Contains dynamic schema |
| Not fit for hierarchical data storage | Best fit for hierarchical data storage |
| Vertically scalable - increasing RAM | Horizontally scalable - add more servers |
| Emphasizes on ACID properties (Atomicity, Consistency, Isolation and Durability) | Emphasizes on CAP theorem (Consistency, Availability and Partition tolerance) |
MongoDB is an open source document-oriented NoSQL database which stores data in the form of JSON-like objects. Hence a collection in MongoDB is equivalent to Table in relational database And a document in a collection in MongoDB is equivalent to a row in relational database table.
If we were using a relational database, we could perform a join on users and stores, and get all our objects in a single query. But MongoDB does not support joins and so, at times, requires bit of denormalization
NoSQL and relational databses differ in the manner they represent data. While a row stores data in columns, document stores in JSON like structure.
MongoDB documents support dynamic schema, i.e. documents in same collection can have different schema , one can have 5 fileds , while other can have more or less than five. This allows complex hierarchical /tree structured schemas is be efficiently stored in MongoDB.
Relationships in relational databse can be achieved through primary key /foreign key constraints ; No such mapping is present in MongoDB other than linking / emnbedding of documents .