knitr::opts_chunk$set(echo = TRUE)
library(httr)
library(XML)
library(jsonlite)
library(rjson)
## 
## Attaching package: 'rjson'
## The following objects are masked from 'package:jsonlite':
## 
##     fromJSON, toJSON

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Load HTML INTO R

htmldf <- "https://raw.githubusercontent.com/jayatveluri/DataAcquisition/main/books.html"
htmldf <- GET(htmldf)
htmldf <- rawToChar(htmldf$content)
htmldf <- htmlParse(htmldf)
htmldf <- readHTMLTable(htmldf)
HTML <- data.frame(htmldf)
HTML
##   NULL.Type             NULL.Title              NULL.Author NULL.Genre
## 1     Novel   Sisters of the Snake Sasha Nanua,Sarena Nanua    Fiction
## 2     Novel   While Justice Sleeps            Stacey Abrams    Fiction
## 3     Novel To Kill a Mocking Bird               Harper Lee    Fiction
## 4     Novel       The Great Gatsby       F.Scott Fitzgerald    Fiction
##   NULL.Published
## 1     06/21/2021
## 2     05/11/2021
## 3     07/11/1960
## 4     04/10/1925

Load XML INTO R

xml <- "https://raw.githubusercontent.com/jayatveluri/DataAcquisition/main/books.xml"
xml <- GET(xml)
xml <- rawToChar(xml$content)
xml <- xmlParse(xml)
xml <- xmlToList(xml)
XML <- data.frame(xml)
XML
##    area.type      area.book.title area.book.author.text area.book.author..attrs
## id     Novel Sisters of the Snake           Sasha Nanua                       1
##    area.book.author.text.1 area.book.author..attrs.1 area.book.genre
## id            Sarena Nanua                         2         Fiction
##    area.book.published area.book..attrs    area.book.title.1 area.book.author
## id          06/21/2021                1 While Justice Sleeps    Stacey Abrams
##    area.book.genre.1 area.book.published.1 area.book..attrs.1
## id           Fiction            05/11/2021                  2
##         area.book.title.2 area.book.author.1 area.book.genre.2
## id To Kill a Mocking Bird         Harper Lee           Fiction
##    area.book.published.2 area.book..attrs.2 area.book.title.3
## id            07/11/1960                  3  The Great Gatsby
##    area.book.author.2 area.book.genre.3 area.book.published.3
## id F.Scott Fitzgerald           Fiction            04/10/1925
##    area.book..attrs.3 area..attrs
## id                  4           1

LOAD JSON into R

jsonData <- rjson::fromJSON(file="https://raw.githubusercontent.com/jayatveluri/DataAcquisition/main/books.json")
str(jsonData)
## List of 1
##  $ Novel:List of 1
##   ..$ book:List of 4
##   .. ..$ :List of 4
##   .. .. ..$ title    : chr "Sisters of the Snake"
##   .. .. ..$ author   : chr [1:2] "Sasha Nanua" "Sarena Nanua"
##   .. .. ..$ genre    : chr "Fiction"
##   .. .. ..$ published: chr "06/21/2021"
##   .. ..$ :List of 4
##   .. .. ..$ title    : chr "While Justice Sleeps"
##   .. .. ..$ author   : chr "Stacey Abrams"
##   .. .. ..$ genre    : chr "Fiction"
##   .. .. ..$ published: chr "05/11/2021"
##   .. ..$ :List of 4
##   .. .. ..$ title    : chr "To Kill a Mocking Bird"
##   .. .. ..$ author   : chr "Wiley"
##   .. .. ..$ genre    : chr "Fiction"
##   .. .. ..$ published: chr "07/11/1960"
##   .. ..$ :List of 4
##   .. .. ..$ title    : chr "The Great Gatsby"
##   .. .. ..$ author   : chr "F.Scott Fitzgerald"
##   .. .. ..$ genre    : chr "Fiction"
##   .. .. ..$ published: chr "04/10/1925"

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot. ## Are three data frames identical?

Yes they are identical