Take information from a relational database created in week two’s assignment and migrate it to a MongoDB NoSQL database using R.
For this step, I installed the MongoDB database (community edition).
library(rmongodb)
# connect to MongoDB
mongo = mongo.create(host = "localhost")
print(mongo.is.connected(mongo))
## [1] TRUE
#Read in the Chicago crimes dataset
crimes=data.table::fread("Chicago Homicides 2018.csv")
names(crimes)
## [1] "ID" "Case Number" "Date"
## [4] "Block" "IUCR" "Primary Type"
## [7] "Description" "Location Description" "Arrest"
## [10] "Domestic" "Beat" "District"
## [13] "Ward" "Community Area" "FBI Code"
## [16] "X Coordinate" "Y Coordinate" "Year"
## [19] "Updated On" "Latitude" "Longitude"
## [22] "Location"
#Clean the data
names(crimes) = gsub(" ","",names(crimes))
names(crimes)
## [1] "ID" "CaseNumber" "Date"
## [4] "Block" "IUCR" "PrimaryType"
## [7] "Description" "LocationDescription" "Arrest"
## [10] "Domestic" "Beat" "District"
## [13] "Ward" "CommunityArea" "FBICode"
## [16] "XCoordinate" "YCoordinate" "Year"
## [19] "UpdatedOn" "Latitude" "Longitude"
## [22] "Location"
#Create a database and collection called crimes
my_collection = mongo(collection = "crimes", db = "Chicago") # create connection, database and collection
my_collection$insert(crimes)
## List of 5
## $ nInserted : num 424
## $ nMatched : num 0
## $ nRemoved : num 0
## $ nUpserted : num 0
## $ writeErrors: list()
#Take a look at the information
my_collection$info()
## List of 4
## $ collection: chr "crimes"
## $ db : NULL
## $ stats :List of 12
## ..$ ns : chr "Chicago.crimes"
## ..$ size : int 404060
## ..$ count : int 848
## ..$ avgObjSize : int 476
## ..$ storageSize : int 98304
## ..$ capped : logi FALSE
## ..$ wiredTiger :List of 14
## ..$ nindexes : int 1
## ..$ indexDetails :List of 1
## ..$ totalIndexSize: int 16384
## ..$ indexSizes :List of 1
## ..$ ok : num 1
## $ server :List of 27
## ..$ host : chr "DESKTOP-FGDMVA4"
## ..$ version : chr "4.0.4"
## ..$ process : chr "C:\\Program Files\\MongoDB\\Server\\4.0\\bin\\mongod.exe"
## ..$ pid : num 4500
## ..$ uptime : num 225473
## ..$ uptimeMillis : num 2.25e+08
## ..$ uptimeEstimate : num 225476
## ..$ localTime : POSIXct[1:1], format: "2018-11-23 15:13:55"
## ..$ asserts :List of 5
## ..$ connections :List of 3
## ..$ extra_info :List of 6
## ..$ freeMonitoring :List of 1
## ..$ globalLock :List of 3
## ..$ locks :List of 3
## ..$ logicalSessionRecordCache:List of 11
## ..$ network :List of 7
## ..$ opLatencies :List of 4
## ..$ opcounters :List of 6
## ..$ opcountersRepl :List of 6
## ..$ storageEngine :List of 5
## ..$ tcmalloc :List of 2
## ..$ transactions :List of 9
## ..$ transportSecurity :List of 5
## ..$ wiredTiger :List of 17
## ..$ mem :List of 6
## ..$ metrics :List of 10
## ..$ ok : num 1
#Take a count of the collection
my_collection$count()
## [1] 848
#Iterate over one table in the database
my_collection$iterate()$one()
## $ID
## [1] 23810
##
## $CaseNumber
## [1] "JB140548"
##
## $Date
## [1] "02/04/2018 01:36:00 AM"
##
## $Block
## [1] "117XX S SANGAMON ST"
##
## $IUCR
## [1] 110
##
## $PrimaryType
## [1] "HOMICIDE"
##
## $Description
## [1] "FIRST DEGREE MURDER"
##
## $LocationDescription
## [1] "STREET"
##
## $Arrest
## [1] FALSE
##
## $Domestic
## [1] FALSE
##
## $Beat
## [1] 524
##
## $District
## [1] 5
##
## $Ward
## [1] 34
##
## $CommunityArea
## [1] 53
##
## $FBICode
## [1] "01A"
##
## $XCoordinate
## [1] 1172100
##
## $YCoordinate
## [1] 1827003
##
## $Year
## [1] 2018
##
## $UpdatedOn
## [1] "02/11/2018 03:57:41 PM"
##
## $Latitude
## [1] 41.68074
##
## $Longitude
## [1] -87.64566
##
## $Location
## [1] "(41.68073915, -87.645661144)"
The results from the tutorial are also confirmed on the MongoDB server shown in the screen shots below:
Mongo Tutorial Image
With the success of the tutorial, close the connection.
#Close the connection to the mongoDB
mongo.destroy(mongo)
## NULL
print(mongo.is.connected(mongo))
## [1] FALSE
In this section, I read in the SQLite Movies database created from the Week 2 Assignment.
#create a connection to the SQLite database
conn<- dbConnect(RMySQL::MySQL(),
dbname="movie_reviews",
host="DESKTOP-FGDMVA4",
user="Student01",
password="CUNY_DATA607"
)
dbListTables(conn)
## [1] "movies" "reviewers" "reviews"
MW Frost code for migrating a R dataframe to MongoDB.
R function migrates code to MongoDB
#The parameters take in a dataframe, the name of the Mongo DB, the name of the collection and the variable names of a dataframe
convert_Movies_df <- function(df, db='MoviesMongoDB', col='moviescollection', fields=names(df)) {
#Create a connection
mongo <- mongo.create(host = "localhost")
if (mongo.is.connected(mongo)) {
#Iterates over each row of the dataframe and converts it to bson object which is then inserted into the Mongo instance.
for(i in 1:nrow(df)) {
b <- mongo.bson.from.list(df[i,fields])
if (!is.null(b)){
mongo.insert(mongo, paste(db, col, sep='.'), b)
#print("loaded")
}
else {
print('NULL BSON object')
print(t(df[i,]))
}
}
}
}
Apply the function to each dataframe
convert_Movies_df(Movies)
convert_Movies_df(Reviews)
convert_Movies_df(Reviewers)
movies_collection = mongo(collection = 'moviescollection', db='MoviesMongoDB')
movies_collection$info()
## List of 4
## $ collection: chr "moviescollection"
## $ db : NULL
## $ stats :List of 12
## ..$ ns : chr "MoviesMongoDB.moviescollection"
## ..$ size : int 16012
## ..$ count : int 138
## ..$ avgObjSize : int 116
## ..$ storageSize : int 16384
## ..$ capped : logi FALSE
## ..$ wiredTiger :List of 14
## ..$ nindexes : int 1
## ..$ indexDetails :List of 1
## ..$ totalIndexSize: int 16384
## ..$ indexSizes :List of 1
## ..$ ok : num 1
## $ server :List of 27
## ..$ host : chr "DESKTOP-FGDMVA4"
## ..$ version : chr "4.0.4"
## ..$ process : chr "C:\\Program Files\\MongoDB\\Server\\4.0\\bin\\mongod.exe"
## ..$ pid : num 4500
## ..$ uptime : num 225479
## ..$ uptimeMillis : num 2.25e+08
## ..$ uptimeEstimate : num 225482
## ..$ localTime : POSIXct[1:1], format: "2018-11-23 15:14:01"
## ..$ asserts :List of 5
## ..$ connections :List of 3
## ..$ extra_info :List of 6
## ..$ freeMonitoring :List of 1
## ..$ globalLock :List of 3
## ..$ locks :List of 3
## ..$ logicalSessionRecordCache:List of 11
## ..$ network :List of 7
## ..$ opLatencies :List of 4
## ..$ opcounters :List of 6
## ..$ opcountersRepl :List of 6
## ..$ storageEngine :List of 5
## ..$ tcmalloc :List of 2
## ..$ transactions :List of 9
## ..$ transportSecurity :List of 5
## ..$ wiredTiger :List of 17
## ..$ mem :List of 6
## ..$ metrics :List of 10
## ..$ ok : num 1
A check on the number of rows in the dataframes from the SQlite database and the Mongodb collection confirms that the counts are accurate
movies_collection$count()
## [1] 138
nrow(Reviews) + nrow(Reviewers) + nrow(Movies)
## [1] 69
movies_collection$iterate()$one()
## $ID
## [1] 1
##
## $Title
## [1] "Avengers Infinity Wars"
##
## $Genre
## [1] "Super Hero Action"
##
## $Box_Office
## [1] 2.046e+09
##
## $Male_Lead
## [1] "Robert Downey Jr"
##
## $Female_Lead
## [1] "Scarlett johansson"
##
## $Length
## [1] "02:29:00"
##
## $Trailer
## [1] "https://www.youtube.com/watch?v=Xe5MeKNFjGQ"
A screen shot from the Mongo instance also shows that the data has been migrated:
Mongo Movies
I found this to be one of the more challenging assignments of the semester. I am very intrigued about Graph databases and NoSQL databases as this will become the standard database in my field of E-Discovery. As data grows larger and sources become more diverse, traditional relational databases may not be adquate.
Finally, I was not able to get the “mongo.get.database.collections” function to work properly, and I cannot figure it out.
mongo_new <- mongo.create(host = "localhost", db='MoviesMongoDB')
mongo.is.connected(mongo_new)
## [1] TRUE
mongo.get.database.collections(mongo_new, db='MoviesMongoDB')
## character(0)