1. Loading and parsing XML file

  1. Loading required packages
library(XML)
library(RCurl)
## Loading required package: bitops
  1. XML package does not support https. So we will get the file from the secured site using RCurl first and then use xmlTreeParse().
# xml file url
xml_file = getURL("https://raw.githubusercontent.com/isrini/SI_IS607/master/books.xml")

# parse html content
xml_file2 = xmlTreeParse(xml_file)

# read the contents
root = xmlRoot(xml_file2)
xml_file2 = xmlSApply(root, xmlValue)
head(xml_file2)
##                                                                                   book 
##                             "01Practical Data Science with RfirstNina ZumelJohn Mount" 
##                                                                                   book 
## "02The Art of R Programming: A Tour of Statistical Software DesignfirstNorman Matloff" 
##                                                                                   book 
##                  "03Advanced R - Chapman and Hall/CRC The R SeriesfirstHadley Wickham"

2. Loading and parsing HTML file

  1. Loading required packages
library(XML)
library(RCurl)
  1. XML package does not support https. So we will get the file from the secured site using RCurl first and then use htmlTreeParse().
# html file url
html_file = getURL("https://raw.githubusercontent.com/isrini/SI_IS607/master/books.html")

# read html content
html_file2 = readHTMLTable(html_file, header=TRUE)
html_file2 = as.data.frame(html_file2)
head(html_file2)
##   NULL.id                                                      NULL.Title
## 1      01                                   Practical Data Science with R
## 2                                                                        
## 3      02 The Art of R Programming: A Tour of Statistical Software Design
## 4      03                  Advanced R - Chapman and Hall/CRC The R Series
##   NULL.Edition    NULL.Author
## 1        first     Nina Zumel
## 2                  John Mount
## 3        first Norman Matloff
## 4        first Hadley Wickham

3. Loading and parsing JSON file

  1. Loading required packages
library(RJSONIO)
  1. Get the file from the secured site using RCurl first and then use fromJSON().
# html file url
json_file = getURL("https://raw.githubusercontent.com/isrini/SI_IS607/master/books.json")

# fromJSON() in package RJSONIO
json_file2 = RJSONIO::fromJSON(json_file)

# read the contents
head(json_file2)
## $amazon
## $amazon$book
## $amazon$book[[1]]
## $amazon$book[[1]]$id
## [1] "01"
## 
## $amazon$book[[1]]$title
## [1] "Practical Data Science with R"
## 
## $amazon$book[[1]]$edition
## [1] "first"
## 
## $amazon$book[[1]]$author
## [1] "Nina Zumel" "John Mount"
## 
## 
## $amazon$book[[2]]
##                                                                id 
##                                                              "02" 
##                                                             title 
## "The Art of R Programming: A Tour of Statistical Software Design" 
##                                                           edition 
##                                                           "first" 
##                                                            author 
##                                                  "Norman Matloff" 
## 
## $amazon$book[[3]]
##                                               id 
##                                             "03" 
##                                            title 
## "Advanced R - Chapman and Hall/CRC The R Series" 
##                                          edition 
##                                          "first" 
##                                           author 
##                                 "Hadley Wickham"

Conclusion:

Based on the outputs from the three data frames as seen above, the data frames are not identical.