The Task
For this assignment, you should take information from a relational database and migrate it to a NoSQL database of your own choosing.
For the NoSQL database, you may use MongoDB (which we introduced in week 7), Neo4j, or another NoSQL database of your choosing.
Your migration process needs to be reproducible. R code is encouraged, but not required.
You should also briefly describe the advantages and disadvantages of storing the data in a relational database vs. your NoSQL database.
Loaded Packages
prettydoc TRUE
devtools TRUE
rmongodb TRUE
RMySQL TRUE
knitr TRUE
jsonlite TRUE
Create the MySQL Database from CSVs
movieDB <- dbConnect(MySQL(), user = "root", password = "kmgkmg", dbname = "movie_ratings",
host = "localhost", port = 3306)
my.files <- c("friends", "movies", "movies_ratings_friends", "ratings")
root <- "https://raw.githubusercontent.com/kylegilde/D607-Data-Acquistion/master/data-sets/movie_ratings/"
# Load my files as DFs and then create tables
create_tables <- function(file_names, root, filetype, database) {
for (i in 1:length(file_names)) {
my.url <- paste0(root, file_names[i], filetype)
x <- read.csv(my.url)
dbWriteTable(database, file_names[i], x, overwrite = T, row.names = F)
}
}
create_tables(my.files, root, ".csv", movieDB)
# dbGetQuery(movieDB, paste('describe', my.files[1])) dbGetQuery(movieDB,
# paste('describe', my.files[2])) dbGetQuery(movieDB, paste('describe',
# my.files[3])) dbGetQuery(movieDB, paste('describe', my.files[4]))
all_tables <- dbGetQuery(movieDB, "show tables")
kable(all_tables, caption = "Tables")| Tables_in_movie_ratings |
|---|
| friends |
| movies |
| movies_ratings_friends |
| ratings |
Create the MongoDB Database and Collections from the created MySQL Database
my.mongo <- mongo.create(host = "localhost")
create_collections <- function(mysql_database, mongo_obj, mongo_db) {
if (mongo.is.connected(mongo_obj)) {
# get all tables in the DB
table_list <- dbGetQuery(mysql_database, "show tables")[, 1]
select_star <- "select * from"
for (i in 1:length(table_list)) {
# create a DF from the table
df <- dbGetQuery(movieDB, paste(select_star, table_list[i]))
# create bson list
bson_list <- lapply(split(df, 1:nrow(df)), function(x) mongo.bson.from.JSON(toJSON(x)))
# insert bson list into collection
collection <- paste(mongo_db, table_list[i], sep = ".")
mongo.insert.batch(mongo_obj, collection, bson_list)
# print one record
print(mongo.find.one(mongo_obj, collection))
}
# print(mongo.get.database.collections(my.mongo, mongo_db))
} else {
return("No connection")
}
}
create_collections(movieDB, my.mongo, "movieDB")## _id : 7 59063b7f9394d40d27fed7bb
## 1 : 3
## id : 16 1
## name : 2 Dave
##
## _id : 7 59063b7f9394d40d27fed7c1
## 1 : 3
## id : 16 1
## movie : 2 Arrival
##
## _id : 7 59063b7f9394d40d27fed7ca
## 1 : 3
## id : 16 1
## movie_id : 16 1
## friend_id : 16 1
## rating_id : 16 4
##
## _id : 7 59063b7f9394d40d27fed7f3
## 1 : 3
## id : 16 1
## rating : 2 Great!
## rating_score : 16 5
# mongo.get.database.collections(my.mongo, db = 'movieDB')
# mongo.get.databases(my.mongo) mongo.find.all(my.mongo, 'movieBD.friends')The Advantages of Using MongoDB
According to Tutorialspoint.com, MongoDB is advantageous because
- it is schema-less. A collection can hold more than one type of document.
- it is very easy to scale.
dbDisconnect(movieDB)
## [1] TRUE
mongo.drop.database(my.mongo, db = "movieDB")
## [1] TRUE
mongo.destroy(my.mongo)
## NULL