For this assignment I used the DB that I built for the assignment in Week 2. Here is the link to sql file: https://github.com/olga0503/DATA-607/blob/master/HW2.sql
In order to transfer the data from relational DB to noSQL DB I implemented the following steps.
library(RMySQL)
library(DBI)
library(dbConnect)
library(rmongodb)
library(jsonlite)
library(stringr)
library(tidyr)
#db <- dbConnect(MySQL(),user="root",password="xxx",dbname="assignment2",host="localhost")
dbListTables(db)
## [1] "movies" "people" "reviews"
query <- dbSendQuery(db,"select * from movies")
movies <- fetch(query,n=-1)
movies
## movie_id movie_title
## 1 1 La la Land
## 2 2 Arrival
## 3 3 Lion
## 4 4 Star Trsck Beyond
## 5 5 Jakie
## 6 6 Zootopia
query <- dbSendQuery(db,"select movie_title, person_name,grade from movies,people,reviews where reviews.movie_id=movies.movie_id and people.person_id=reviews.person_id order by movies.movie_title")
reviews_data <- fetch(query,n=-1)
head(reviews_data)
## movie_title person_name grade
## 1 Arrival Pamela 5
## 2 Arrival Marina 2
## 3 Arrival Anrew 5
## 4 Arrival Alex 5
## 5 Arrival Albina 1
## 6 Arrival Irene 5
mongo <- mongo.create()
#testing connectoion
mongo.is.connected(mongo)
## [1] TRUE
db <- "cinematography"
coll <- "movies"
The json file should have the following structure:
movies = [
{“movie_id” = “…”, “movie_title” = “…”, “review”=[ {“name” = “…”, “grade” = “…”}, {“name” = “…”, “grade” = “…”}]},
{“movie_id” = “…”, “movie_title” = “…”, “review”=[ {“name” = “…”, “grade” = “…”}, {“name” = “…”, “grade” = “…”}]}, …]
#retriving all movie titles
movies
## movie_id movie_title
## 1 1 La la Land
## 2 2 Arrival
## 3 3 Lion
## 4 4 Star Trsck Beyond
## 5 5 Jakie
## 6 6 Zootopia
json_file_reviews<-c()
#building part of json file for reviews
#iterating throught all movies
for(i in 1:length(movies$movie_title)){
json_data_reviews<-c(sprintf('{"name":"%s", "grade": "%s"}',subset(reviews_data$person_name,reviews_data$movie_title==movies$movie_title[i]),subset(reviews_data$grade,reviews_data$movie_title==movies$movie_title[i])))
#separating each review by comma
json_data_reviews<-paste(json_data_reviews, collapse = ',')
#creating vector to store reviews for different movies
json_file_reviews<-c(json_file_reviews,json_data_reviews)
}
#adding '[' and ']' for review attribute
json_file_reviews<-paste('[',json_file_reviews[i], collapse = ',',']')
#building final json file
json_data_final <- c(sprintf('{"id":"%s", "title": "%s","review": %s}', movies$movie_id, movies$movie_title,json_file_reviews))
#separating movie entries by comma and adding '[' and ']' to the beggining and the and of json file
json_file_final<-paste("[",paste(json_data_final, collapse = ','),"]")
#printing json file
df<-fromJSON(json_file_final)
toJSON(df,pretty=TRUE)
## [
## {
## "id": "1",
## "title": "La la Land",
## "review": [
## {
## "name": "Pamela",
## "grade": "4"
## },
## {
## "name": "Marina",
## "grade": "4"
## },
## {
## "name": "Anrew",
## "grade": "3"
## },
## {
## "name": "Alex",
## "grade": "4"
## },
## {
## "name": "Albina",
## "grade": "5"
## },
## {
## "name": "Irene",
## "grade": "3"
## }
## ]
## },
## {
## "id": "2",
## "title": "Arrival",
## "review": [
## {
## "name": "Pamela",
## "grade": "4"
## },
## {
## "name": "Marina",
## "grade": "4"
## },
## {
## "name": "Anrew",
## "grade": "3"
## },
## {
## "name": "Alex",
## "grade": "4"
## },
## {
## "name": "Albina",
## "grade": "5"
## },
## {
## "name": "Irene",
## "grade": "3"
## }
## ]
## },
## {
## "id": "3",
## "title": "Lion",
## "review": [
## {
## "name": "Pamela",
## "grade": "4"
## },
## {
## "name": "Marina",
## "grade": "4"
## },
## {
## "name": "Anrew",
## "grade": "3"
## },
## {
## "name": "Alex",
## "grade": "4"
## },
## {
## "name": "Albina",
## "grade": "5"
## },
## {
## "name": "Irene",
## "grade": "3"
## }
## ]
## },
## {
## "id": "4",
## "title": "Star Trsck Beyond",
## "review": [
## {
## "name": "Pamela",
## "grade": "4"
## },
## {
## "name": "Marina",
## "grade": "4"
## },
## {
## "name": "Anrew",
## "grade": "3"
## },
## {
## "name": "Alex",
## "grade": "4"
## },
## {
## "name": "Albina",
## "grade": "5"
## },
## {
## "name": "Irene",
## "grade": "3"
## }
## ]
## },
## {
## "id": "5",
## "title": "Jakie",
## "review": [
## {
## "name": "Pamela",
## "grade": "4"
## },
## {
## "name": "Marina",
## "grade": "4"
## },
## {
## "name": "Anrew",
## "grade": "3"
## },
## {
## "name": "Alex",
## "grade": "4"
## },
## {
## "name": "Albina",
## "grade": "5"
## },
## {
## "name": "Irene",
## "grade": "3"
## }
## ]
## },
## {
## "id": "6",
## "title": "Zootopia",
## "review": [
## {
## "name": "Pamela",
## "grade": "4"
## },
## {
## "name": "Marina",
## "grade": "4"
## },
## {
## "name": "Anrew",
## "grade": "3"
## },
## {
## "name": "Alex",
## "grade": "4"
## },
## {
## "name": "Albina",
## "grade": "5"
## },
## {
## "name": "Irene",
## "grade": "3"
## }
## ]
## }
## ]
#converting json file to bson file
bs <- mongo.bson.from.JSON(json_file_final,pretty=TRUE)
#insertinf bson file to MongoDB
mongo.insert(mongo, ns = paste0(db ,".", coll), b = bs)
## [1] TRUE
#pulling data to bson file
bson <- mongo.find.one(mongo,paste0(db ,".", coll))
list <- mongo.bson.to.list(bson)
#pullind bsobn data to data frame
data = as.data.frame(t(unlist(list)), stringsAsFactors = F)
data
## _id 1.id 1.title 1.review.name 1.review.grade 1.review.name
## 1 256 1 La la Land Pamela 4 Marina
## 1.review.grade 1.review.name 1.review.grade 1.review.name 1.review.grade
## 1 4 Anrew 3 Alex 4
## 1.review.name 1.review.grade 1.review.name 1.review.grade 2.id 2.title
## 1 Albina 5 Irene 3 2 Arrival
## 2.review.name 2.review.grade 2.review.name 2.review.grade 2.review.name
## 1 Pamela 4 Marina 4 Anrew
## 2.review.grade 2.review.name 2.review.grade 2.review.name 2.review.grade
## 1 3 Alex 4 Albina 5
## 2.review.name 2.review.grade 3.id 3.title 3.review.name 3.review.grade
## 1 Irene 3 3 Lion Pamela 4
## 3.review.name 3.review.grade 3.review.name 3.review.grade 3.review.name
## 1 Marina 4 Anrew 3 Alex
## 3.review.grade 3.review.name 3.review.grade 3.review.name 3.review.grade
## 1 4 Albina 5 Irene 3
## 4.id 4.title 4.review.name 4.review.grade 4.review.name
## 1 4 Star Trsck Beyond Pamela 4 Marina
## 4.review.grade 4.review.name 4.review.grade 4.review.name 4.review.grade
## 1 4 Anrew 3 Alex 4
## 4.review.name 4.review.grade 4.review.name 4.review.grade 5.id 5.title
## 1 Albina 5 Irene 3 5 Jakie
## 5.review.name 5.review.grade 5.review.name 5.review.grade 5.review.name
## 1 Pamela 4 Marina 4 Anrew
## 5.review.grade 5.review.name 5.review.grade 5.review.name 5.review.grade
## 1 3 Alex 4 Albina 5
## 5.review.name 5.review.grade 6.id 6.title 6.review.name 6.review.grade
## 1 Irene 3 6 Zootopia Pamela 4
## 6.review.name 6.review.grade 6.review.name 6.review.grade 6.review.name
## 1 Marina 4 Anrew 3 Alex
## 6.review.grade 6.review.name 6.review.grade 6.review.name 6.review.grade
## 1 4 Albina 5 Irene 3
#disconnect from SQL
#on.exit(dbDisconnect(db))
#disconnect from Mongo DB
mongo.disconnect(mongo)
## [1] 0
## attr(,"mongo")
## <pointer: 0x7fe64bf4c020>
## attr(,"class")
## [1] "mongo"
## attr(,"host")
## [1] "127.0.0.1"
## attr(,"name")
## [1] ""
## attr(,"username")
## [1] ""
## attr(,"password")
## [1] ""
## attr(,"db")
## [1] "admin"
## attr(,"timeout")
## [1] 0
Advantages and disadvantages of Mongo DB
Advantages:
Disadvantages: