knitr::opts_chunk$set(echo = TRUE)
library(httr)
library(XML)
library(jsonlite)
library(rjson)
##
## Attaching package: 'rjson'
## The following objects are masked from 'package:jsonlite':
##
## fromJSON, toJSON
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
htmldf <- "https://raw.githubusercontent.com/jayatveluri/DataAcquisition/main/books.html"
htmldf <- GET(htmldf)
htmldf <- rawToChar(htmldf$content)
htmldf <- htmlParse(htmldf)
htmldf <- readHTMLTable(htmldf)
HTML <- data.frame(htmldf)
HTML
## NULL.Type NULL.Title NULL.Author NULL.Genre
## 1 Novel Sisters of the Snake Sasha Nanua,Sarena Nanua Fiction
## 2 Novel While Justice Sleeps Stacey Abrams Fiction
## 3 Novel To Kill a Mocking Bird Harper Lee Fiction
## 4 Novel The Great Gatsby F.Scott Fitzgerald Fiction
## NULL.Published
## 1 06/21/2021
## 2 05/11/2021
## 3 07/11/1960
## 4 04/10/1925
xml <- "https://raw.githubusercontent.com/jayatveluri/DataAcquisition/main/books.xml"
xml <- GET(xml)
xml <- rawToChar(xml$content)
xml <- xmlParse(xml)
xml <- xmlToList(xml)
XML <- data.frame(xml)
XML
## area.type area.book.title area.book.author.text area.book.author..attrs
## id Novel Sisters of the Snake Sasha Nanua 1
## area.book.author.text.1 area.book.author..attrs.1 area.book.genre
## id Sarena Nanua 2 Fiction
## area.book.published area.book..attrs area.book.title.1 area.book.author
## id 06/21/2021 1 While Justice Sleeps Stacey Abrams
## area.book.genre.1 area.book.published.1 area.book..attrs.1
## id Fiction 05/11/2021 2
## area.book.title.2 area.book.author.1 area.book.genre.2
## id To Kill a Mocking Bird Harper Lee Fiction
## area.book.published.2 area.book..attrs.2 area.book.title.3
## id 07/11/1960 3 The Great Gatsby
## area.book.author.2 area.book.genre.3 area.book.published.3
## id F.Scott Fitzgerald Fiction 04/10/1925
## area.book..attrs.3 area..attrs
## id 4 1
jsonData <- rjson::fromJSON(file="https://raw.githubusercontent.com/jayatveluri/DataAcquisition/main/books.json")
str(jsonData)
## List of 1
## $ Novel:List of 1
## ..$ book:List of 4
## .. ..$ :List of 4
## .. .. ..$ title : chr "Sisters of the Snake"
## .. .. ..$ author : chr [1:2] "Sasha Nanua" "Sarena Nanua"
## .. .. ..$ genre : chr "Fiction"
## .. .. ..$ published: chr "06/21/2021"
## .. ..$ :List of 4
## .. .. ..$ title : chr "While Justice Sleeps"
## .. .. ..$ author : chr "Stacey Abrams"
## .. .. ..$ genre : chr "Fiction"
## .. .. ..$ published: chr "05/11/2021"
## .. ..$ :List of 4
## .. .. ..$ title : chr "To Kill a Mocking Bird"
## .. .. ..$ author : chr "Wiley"
## .. .. ..$ genre : chr "Fiction"
## .. .. ..$ published: chr "07/11/1960"
## .. ..$ :List of 4
## .. .. ..$ title : chr "The Great Gatsby"
## .. .. ..$ author : chr "F.Scott Fitzgerald"
## .. .. ..$ genre : chr "Fiction"
## .. .. ..$ published: chr "04/10/1925"
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot. ## Are three data frames identical?
Yes they are identical