Introduction

In this file, three data frames (tables) are created. Each one shows the same data, but each one has data from different types of sources. The first one is from a json file, the second is from a html file, and the third is from an xml file. All the tables are at the end of this document.

library(rvest)
library(rjson)
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rjson':
## 
##     fromJSON, toJSON
jsonData <- fromJSON("https://raw.githubusercontent.com/juliaDataScience-22/cuny-fall-23/Assignment-7/books.json")

library(RCurl)
library(XML)
url <- getURL("https://github.com/juliaDataScience-22/cuny-fall-23/blob/Assignment-7/books.html", ssl.verifyPeer=FALSE)
htmlData1 <- read_html(url)
tables <- html_nodes(htmlData1, "table")
htmlData <- html_table(tables)
htmlData <- as.data.frame(htmlData)


library(xml2)
newXml <- read_xml("https://raw.githubusercontent.com/juliaDataScience-22/cuny-fall-23/edefcf444d4ef72ead919f974d8836d0f62e58ab/books.xml")
parsed <- xmlParse(newXml)
xmlData <- xmlToDataFrame(parsed)
library(stringr)
jsonData$books$book[c('Author_One', 'Author_Two')] <- str_split_fixed(jsonData$books$book$Author, ',', 2)
jsonData$books$book <- jsonData$books$book[,-2]

htmlData[c('Author_One', 'Author_Two')] <- str_split_fixed(htmlData$Author, ',', 2)
htmlData <- htmlData[,-2]

xmlData[c('Author_One', 'Author_Two')] <- str_split_fixed(xmlData$Author, ',', 2)
xmlData <- xmlData[,-2]
library(gt)
gt(jsonData$books$book) |>
  tab_header(
    title = "Table 1",
    subtitle = "JSON Data"
  )
Table 1
JSON Data
Title Pages Publisher Year Author_One Author_Two
Illuminae 608 Knopf Books for Young Readers 2015 Amie Kaufman Jay Kristoff
The Martian 387 Broadway Books 2014 Andy Weir
Stringers 428 Angry Robot 2022 Chris Panatier
gt(htmlData) |>
  tab_header(
    title = "Table 2",
    subtitle = "HTML Data"
  )
Table 2
HTML Data
Title Pages Publisher Year Author_One Author_Two
Illuminae 608 Knopf Books for Young Readers 2015 Amie Kaufman Jay Kristoff
The Martian 387 Broadway Books 2014 Andy Weir
Stringers 428 Angry Robot 2022 Chris Panatier
gt(xmlData) |>
  tab_header(
    title = "Table 3",
    subtitle = "XML Data"
  )
Table 3
XML Data
Title Pages Publisher Year Author_One Author_Two
Illuminae 608 Knopf Books for Young Readers 2015 Amie Kaufman Jay Kristoff
The Martian 387 Broadway Books 2014 Andy Weir
Stringers 428 Angry Robot 2022 Chris Panatier