• CSV into MySQL
  • MySQL into R Dataframes
  • NoSQL Operations
    • Open a NoSQL connection
    • Perform NoSQL operations
    • Neo4j graph
  • Advantages: Relational Database (RDMS) vs Neo4j (NoSQL)
knitr::opts_chunk$set(echo = TRUE)

library(RMySQL)
## Loading required package: DBI
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## 
## Attaching package: 'RSQLite'
## The following object is masked from 'package:RMySQL':
## 
##     isIdCurrent
## sqldf will default to using MySQL
library(neo4r)

CSV into MySQL

dbConnection <- dbConnect(MySQL(), dbname="student_enrollment", user="root", password="root", host="localhost")
students_df <- read.csv("Students.csv",sep = ",")
subjects_df <- read.csv("Subjects.csv",sep = ",")

MySQL into R Dataframes

#Load dataframe into MySQL table
dbWriteTable(dbConnection, name="students_enrollments",value = students_df, overwrite=T)
## [1] TRUE
dbWriteTable(dbConnection, name="subjects",value = subjects_df, overwrite=T)
## [1] TRUE
#Now load MySQL data into dataframe
students_table_data <- dbReadTable(dbConnection, name="students_enrollments")
subjects_table_data <- dbReadTable(dbConnection, name="subjects")

NoSQL Operations

Open a NoSQL connection

con <- neo4j_api$new( url = "http://localhost:7474",  user = "neo4j",  password = "admin" )

Perform NoSQL operations

#Clear any existing data
call_neo4j("MATCH (n) DETACH DELETE n", con)
## No data returned.
## [[1]]
## # A tibble: 0 x 0
## 
## attr(,"class")
## [1] "neo"  "neo"  "list"
cypher_query_student <- " "
#Now load the student data dataframe into Neo4J graph Cypher query
for(i in 1:nrow(students_table_data)) {
  cypher_query_student <- paste(cypher_query_student, "CREATE", vec_to_cypher(students_table_data[i, ], "Student"), " ")
}
cypher_query_student
## [1] "  CREATE (:`Student` {`First.Name`: 'Santosh', `Last.Name`: 'C', `Subject.ID`: '1', `Joining.Date`: '2019'})   CREATE (:`Student` {`First.Name`: 'Santosh', `Last.Name`: 'C', `Subject.ID`: '2', `Joining.Date`: '2019'})   CREATE (:`Student` {`First.Name`: 'Raj', `Last.Name`: 'K', `Subject.ID`: '1', `Joining.Date`: '2019'})   CREATE (:`Student` {`First.Name`: 'Sam', `Last.Name`: 'D', `Subject.ID`: '1', `Joining.Date`: '2019'})   CREATE (:`Student` {`First.Name`: 'Priya', `Last.Name`: 'R', `Subject.ID`: '3', `Joining.Date`: '2019'})   CREATE (:`Student` {`First.Name`: 'Jim', `Last.Name`: 'D', `Subject.ID`: '4', `Joining.Date`: '2016'})   CREATE (:`Student` {`First.Name`: 'Albert', `Last.Name`: 'P', `Subject.ID`: '4', `Joining.Date`: '2016'})   CREATE (:`Student` {`First.Name`: 'Kris', `Last.Name`: 'K', `Subject.ID`: '1', `Joining.Date`: '2019'})   CREATE (:`Student` {`First.Name`: 'Beth', `Last.Name`: 'R', `Subject.ID`: '3', `Joining.Date`: '2019'})   CREATE (:`Student` {`First.Name`: 'Arthur', `Last.Name`: 'M', `Subject.ID`: '4', `Joining.Date`: '2018'})   CREATE (:`Student` {`First.Name`: 'Ram', `Last.Name`: 'R', `Subject.ID`: '4', `Joining.Date`: '2013'})   CREATE (:`Student` {`First.Name`: 'John', `Last.Name`: 'P', `Subject.ID`: '1', `Joining.Date`: '2012'})   CREATE (:`Student` {`First.Name`: 'Robert', `Last.Name`: 'K', `Subject.ID`: '3', `Joining.Date`: '2012'})   CREATE (:`Student` {`First.Name`: 'Seun', `Last.Name`: 'D', `Subject.ID`: '4', `Joining.Date`: '2012'})   CREATE (:`Student` {`First.Name`: 'John', `Last.Name`: 'S', `Subject.ID`: '4', `Joining.Date`: '2016'})   CREATE (:`Student` {`First.Name`: 'Vicky', `Last.Name`: 'D', `Subject.ID`: '1', `Joining.Date`: '2015'})   CREATE (:`Student` {`First.Name`: 'Win', `Last.Name`: 'A', `Subject.ID`: '3', `Joining.Date`: '2019'})   CREATE (:`Student` {`First.Name`: 'Taylor', `Last.Name`: 'L', `Subject.ID`: '4', `Joining.Date`: '2016'})   CREATE (:`Student` {`First.Name`: 'Washington', `Last.Name`: 'D', `Subject.ID`: '4', `Joining.Date`: '2016'})   CREATE (:`Student` {`First.Name`: 'Jim', `Last.Name`: 'D', `Subject.ID`: '1', `Joining.Date`: '2016'})   CREATE (:`Student` {`First.Name`: 'Jim', `Last.Name`: 'D', `Subject.ID`: '2', `Joining.Date`: '2016'})   CREATE (:`Student` {`First.Name`: 'Jim', `Last.Name`: 'D', `Subject.ID`: '3', `Joining.Date`: '2016'})   CREATE (:`Student` {`First.Name`: 'Ram', `Last.Name`: 'R', `Subject.ID`: '2', `Joining.Date`: '2013'})   CREATE (:`Student` {`First.Name`: 'Ram', `Last.Name`: 'R', `Subject.ID`: '1', `Joining.Date`: '2013'})  "
call_neo4j(paste(cypher_query_student,";"), con)
## No data returned.
## [[1]]
## # A tibble: 0 x 0
## 
## attr(,"class")
## [1] "neo"  "neo"  "list"
cypher_query_subject <- " "
#Now load the subject data dataframe into Neo4J graph Cypher query
i <- 0
for(i in 1:nrow(subjects_table_data)) {
  cypher_query_subject <- paste(cypher_query_subject, "CREATE", vec_to_cypher(subjects_table_data[i, ], "Subject"), " ")
}
cypher_query_subject
## [1] "  CREATE (:`Subject` {`Subject.ID`: '1', `Subject.Name`: 'Data Science', `Professor`: 'Elizabeth Parker'})   CREATE (:`Subject` {`Subject.ID`: '2', `Subject.Name`: 'Math', `Professor`: 'Lawrence Fulton'})   CREATE (:`Subject` {`Subject.ID`: '3', `Subject.Name`: 'R', `Professor`: 'Jason Bryer'})   CREATE (:`Subject` {`Subject.ID`: '4', `Subject.Name`: 'SQL', `Professor`: 'Andrew Catlin'})  "
call_neo4j(paste(cypher_query_subject,";"), con)
## No data returned.
## [[1]]
## # A tibble: 0 x 0
## 
## attr(,"class")
## [1] "neo"  "neo"  "list"
#Create a relationship between Student and Subject data
call_neo4j("MATCH (a:Student), (b:Subject) WHERE (a.SubjectId) = (b.SubjectId) CREATE (a) -[:ENROLLED_INTO]-> (b);", con)
## No data returned.
## [[1]]
## # A tibble: 0 x 0
## 
## attr(,"class")
## [1] "neo"  "neo"  "list"

Neo4j graph



Advantages: Relational Database (RDMS) vs Neo4j (NoSQL)

Relational DB:
1. Advantage: Data stored is very structured and easily related to different domains
2. Advantage: Strict data rules can be implemented
3. Advantage: Data is very reliable according to the preset data type.

  1. Disadvantage: Very difficult to work with unstructured or semi-structured data
  2. Disadvantage: Small change in data type or data format will not easily fit into database
  3. Disadvantage: Very slow performance when the volume of data increases

NoSQL DB:
1. Advantage: Very easy to work with semi-structured data as well
2. Advantage: Dynamic schema i.e., Modifications into data types or data model is easily accomodated into NoSQL
3. Advantage: Very fast to deal with high volume data

  1. Disadvantage: Difficult to maintain highly relational data
  2. Disadvantage: Not mature enough to handle highly transactional data