For this assignment I used the DB that I built for the assignment in Week 2. Here is the link to sql file: https://github.com/olga0503/DATA-607/blob/master/HW2.sql

In order to transfer the data from relational DB to noSQL DB I implemented the following steps.

  1. Installed R packages that will allow to connect with MySQL DB.
library(RMySQL)
library(DBI)
library(dbConnect)
library(rmongodb)
library(jsonlite)
library(stringr)
library(tidyr)
  1. Connected to MySQL
#db <- dbConnect(MySQL(),user="root",password="xxx",dbname="assignment2",host="localhost")
  1. Displayed tables
dbListTables(db)
## [1] "movies"  "people"  "reviews"
  1. Showed all movies and reviews.
query <- dbSendQuery(db,"select * from movies")
movies <- fetch(query,n=-1)
movies
##   movie_id       movie_title
## 1        1        La la Land
## 2        2           Arrival
## 3        3              Lion
## 4        4 Star Trsck Beyond
## 5        5             Jakie
## 6        6          Zootopia
query <- dbSendQuery(db,"select movie_title, person_name,grade from movies,people,reviews where reviews.movie_id=movies.movie_id and people.person_id=reviews.person_id order by movies.movie_title")
reviews_data <- fetch(query,n=-1)
head(reviews_data)
##   movie_title person_name grade
## 1     Arrival      Pamela     5
## 2     Arrival      Marina     2
## 3     Arrival       Anrew     5
## 4     Arrival        Alex     5
## 5     Arrival      Albina     1
## 6     Arrival       Irene     5
  1. Connected to MongoDB
mongo <- mongo.create()

#testing connectoion
mongo.is.connected(mongo)
## [1] TRUE
  1. Created new table and a new collection in MongoDB
db <- "cinematography"
coll <- "movies"
  1. Built JSON file

The json file should have the following structure:

movies = [

{“movie_id” = “…”, “movie_title” = “…”, “review”=[ {“name” = “…”, “grade” = “…”}, {“name” = “…”, “grade” = “…”}]},

{“movie_id” = “…”, “movie_title” = “…”, “review”=[ {“name” = “…”, “grade” = “…”}, {“name” = “…”, “grade” = “…”}]}, …]

#retriving all movie titles
movies
##   movie_id       movie_title
## 1        1        La la Land
## 2        2           Arrival
## 3        3              Lion
## 4        4 Star Trsck Beyond
## 5        5             Jakie
## 6        6          Zootopia
json_file_reviews<-c()

#building part of json file for reviews
#iterating throught all movies
for(i in 1:length(movies$movie_title)){
json_data_reviews<-c(sprintf('{"name":"%s", "grade": "%s"}',subset(reviews_data$person_name,reviews_data$movie_title==movies$movie_title[i]),subset(reviews_data$grade,reviews_data$movie_title==movies$movie_title[i])))

#separating each review by comma
json_data_reviews<-paste(json_data_reviews, collapse = ',')
#creating vector to store reviews for different movies
json_file_reviews<-c(json_file_reviews,json_data_reviews)
}


#adding '[' and ']' for review attribute
json_file_reviews<-paste('[',json_file_reviews[i], collapse = ',',']')

#building final json file
json_data_final <- c(sprintf('{"id":"%s", "title": "%s","review": %s}', movies$movie_id, movies$movie_title,json_file_reviews))

#separating movie entries by comma and adding '[' and ']' to the beggining and the and of json file
json_file_final<-paste("[",paste(json_data_final, collapse = ','),"]")

#printing json file
df<-fromJSON(json_file_final)
toJSON(df,pretty=TRUE)
## [
##   {
##     "id": "1",
##     "title": "La la Land",
##     "review": [
##       {
##         "name": "Pamela",
##         "grade": "4"
##       },
##       {
##         "name": "Marina",
##         "grade": "4"
##       },
##       {
##         "name": "Anrew",
##         "grade": "3"
##       },
##       {
##         "name": "Alex",
##         "grade": "4"
##       },
##       {
##         "name": "Albina",
##         "grade": "5"
##       },
##       {
##         "name": "Irene",
##         "grade": "3"
##       }
##     ]
##   },
##   {
##     "id": "2",
##     "title": "Arrival",
##     "review": [
##       {
##         "name": "Pamela",
##         "grade": "4"
##       },
##       {
##         "name": "Marina",
##         "grade": "4"
##       },
##       {
##         "name": "Anrew",
##         "grade": "3"
##       },
##       {
##         "name": "Alex",
##         "grade": "4"
##       },
##       {
##         "name": "Albina",
##         "grade": "5"
##       },
##       {
##         "name": "Irene",
##         "grade": "3"
##       }
##     ]
##   },
##   {
##     "id": "3",
##     "title": "Lion",
##     "review": [
##       {
##         "name": "Pamela",
##         "grade": "4"
##       },
##       {
##         "name": "Marina",
##         "grade": "4"
##       },
##       {
##         "name": "Anrew",
##         "grade": "3"
##       },
##       {
##         "name": "Alex",
##         "grade": "4"
##       },
##       {
##         "name": "Albina",
##         "grade": "5"
##       },
##       {
##         "name": "Irene",
##         "grade": "3"
##       }
##     ]
##   },
##   {
##     "id": "4",
##     "title": "Star Trsck Beyond",
##     "review": [
##       {
##         "name": "Pamela",
##         "grade": "4"
##       },
##       {
##         "name": "Marina",
##         "grade": "4"
##       },
##       {
##         "name": "Anrew",
##         "grade": "3"
##       },
##       {
##         "name": "Alex",
##         "grade": "4"
##       },
##       {
##         "name": "Albina",
##         "grade": "5"
##       },
##       {
##         "name": "Irene",
##         "grade": "3"
##       }
##     ]
##   },
##   {
##     "id": "5",
##     "title": "Jakie",
##     "review": [
##       {
##         "name": "Pamela",
##         "grade": "4"
##       },
##       {
##         "name": "Marina",
##         "grade": "4"
##       },
##       {
##         "name": "Anrew",
##         "grade": "3"
##       },
##       {
##         "name": "Alex",
##         "grade": "4"
##       },
##       {
##         "name": "Albina",
##         "grade": "5"
##       },
##       {
##         "name": "Irene",
##         "grade": "3"
##       }
##     ]
##   },
##   {
##     "id": "6",
##     "title": "Zootopia",
##     "review": [
##       {
##         "name": "Pamela",
##         "grade": "4"
##       },
##       {
##         "name": "Marina",
##         "grade": "4"
##       },
##       {
##         "name": "Anrew",
##         "grade": "3"
##       },
##       {
##         "name": "Alex",
##         "grade": "4"
##       },
##       {
##         "name": "Albina",
##         "grade": "5"
##       },
##       {
##         "name": "Irene",
##         "grade": "3"
##       }
##     ]
##   }
## ]
  1. Pulled Json file to MongoDB.
#converting json file to bson file
bs <- mongo.bson.from.JSON(json_file_final,pretty=TRUE)

#insertinf bson file to MongoDB
mongo.insert(mongo, ns = paste0(db ,".", coll), b = bs)
## [1] TRUE
  1. Tested Mongo DB
#pulling data to bson file
bson <- mongo.find.one(mongo,paste0(db ,".", coll))
list <- mongo.bson.to.list(bson)

#pullind bsobn data to data frame
data = as.data.frame(t(unlist(list)), stringsAsFactors = F)
data
##   _id 1.id    1.title 1.review.name 1.review.grade 1.review.name
## 1 256    1 La la Land        Pamela              4        Marina
##   1.review.grade 1.review.name 1.review.grade 1.review.name 1.review.grade
## 1              4         Anrew              3          Alex              4
##   1.review.name 1.review.grade 1.review.name 1.review.grade 2.id 2.title
## 1        Albina              5         Irene              3    2 Arrival
##   2.review.name 2.review.grade 2.review.name 2.review.grade 2.review.name
## 1        Pamela              4        Marina              4         Anrew
##   2.review.grade 2.review.name 2.review.grade 2.review.name 2.review.grade
## 1              3          Alex              4        Albina              5
##   2.review.name 2.review.grade 3.id 3.title 3.review.name 3.review.grade
## 1         Irene              3    3    Lion        Pamela              4
##   3.review.name 3.review.grade 3.review.name 3.review.grade 3.review.name
## 1        Marina              4         Anrew              3          Alex
##   3.review.grade 3.review.name 3.review.grade 3.review.name 3.review.grade
## 1              4        Albina              5         Irene              3
##   4.id           4.title 4.review.name 4.review.grade 4.review.name
## 1    4 Star Trsck Beyond        Pamela              4        Marina
##   4.review.grade 4.review.name 4.review.grade 4.review.name 4.review.grade
## 1              4         Anrew              3          Alex              4
##   4.review.name 4.review.grade 4.review.name 4.review.grade 5.id 5.title
## 1        Albina              5         Irene              3    5   Jakie
##   5.review.name 5.review.grade 5.review.name 5.review.grade 5.review.name
## 1        Pamela              4        Marina              4         Anrew
##   5.review.grade 5.review.name 5.review.grade 5.review.name 5.review.grade
## 1              3          Alex              4        Albina              5
##   5.review.name 5.review.grade 6.id  6.title 6.review.name 6.review.grade
## 1         Irene              3    6 Zootopia        Pamela              4
##   6.review.name 6.review.grade 6.review.name 6.review.grade 6.review.name
## 1        Marina              4         Anrew              3          Alex
##   6.review.grade 6.review.name 6.review.grade 6.review.name 6.review.grade
## 1              4        Albina              5         Irene              3
  1. Disconnected from SQL andMongo DB.
#disconnect from SQL
#on.exit(dbDisconnect(db))  

#disconnect from Mongo DB
mongo.disconnect(mongo)
## [1] 0
## attr(,"mongo")
## <pointer: 0x7fe64bf4c020>
## attr(,"class")
## [1] "mongo"
## attr(,"host")
## [1] "127.0.0.1"
## attr(,"name")
## [1] ""
## attr(,"username")
## [1] ""
## attr(,"password")
## [1] ""
## attr(,"db")
## [1] "admin"
## attr(,"timeout")
## [1] 0

Advantages and disadvantages of Mongo DB

Advantages:

  1. NoSQL DB allows build application without defining schema while relational DB allows schema.
  2. NoSQL DB can handle unstructured data while relational db can’t handle unstructured data.
  3. It’s much cheaper to scale NoSQL DB than relational DB.
  4. NoSQL DB are open sourse while relational db are topically closed source.

Disadvantages:

  1. Data redundancy (while there is less redundancy in relational databases)
  2. Lack of common declarative query language (while there is query language in relational db)