#install.packages('rvest')
#install.packages('XML')
#install.packages('jsonlite')
library(rvest)
library(XML)
library(jsonlite)
library(httr)
# Reading from HTML, also you can access this html text file directly on the web by clicking on the link below.
gitHtml <- "https://raw.githubusercontent.com/Doumgit/Working-With-XML-and-JSON-in-R/main/FBooks.html"
FBooksHtml <- gitHtml %>%
read_html() %>%
html_table(fill = TRUE) %>%
.[[1]]
head(FBooksHtml)
## # A tibble: 3 × 7
## Title Author_1 Author_2 Author_3 Attribute_1 Attribute_2 Attribute_3
## <chr> <chr> <chr> <chr> <chr> <chr> <lgl>
## 1 Good Omens: Th… Neil Ga… Terry P… "" Fantasy Humor NA
## 2 The Federalist… Alexand… James M… "John J… Founding d… Persuasive NA
## 3 Principia Math… Alfred … Bertran… "" Landmark w… Influential NA
# Reading from JSON, also you can access this json text file directly on the web by clicking on the link below.
gitJson <- "https://raw.githubusercontent.com/Doumgit/Working-With-XML-and-JSON-in-R/main/FBooks.json"
FBooksJson <- fromJSON(gitJson, flatten = TRUE)
head(FBooksJson)
## title
## 1 Good Omens: The Nice and Accurate Prophecies of Agnes Nutter, Witch
## 2 The Federalist Papers
## 3 Principia Mathematica
## author_1 author_2
## 1 Neil Gaiman Terry Pratchett
## 2 Alexander Hamilton James Madison
## 3 Alfred North Whitehead Bertrand Russell
## attribute_1 attribute_2 author_3
## 1 Fantasy Humor <NA>
## 2 Founding document of the United States Persuasive John Jay
## 3 Landmark work in mathematical logic Influential <NA>
# Fetching the content, also you can access this xml text file directly on the web by clicking on the link below.
gitXML <- GET("https://raw.githubusercontent.com/Doumgit/Working-With-XML-and-JSON-in-R/main/FBooks.xml")
# Ensuring the request was successful
if (http_status(gitXML)$category == "Success") {
FBooksXml1 <- xmlParse(content(gitXML, as = "text", encoding = "UTF-8"))
} else {
stop("Failed to fetch the XML from GitHub.")
}
# Extracting book nodes
FBooksXml2 <- getNodeSet(FBooksXml1, "//book")
# Function to parse each book
parseBook <- function(book) {
title <- xpathSApply(book, "./title", xmlValue)
author_1 <- xpathSApply(book, "./author_1", xmlValue)
author_2 <- xpathSApply(book, "./author_2", xmlValue)
author_3 <- xpathSApply(book, "./author_3", xmlValue)
attribute_1 <- xpathSApply(book, "./attribute_1", xmlValue)
attribute_2 <- xpathSApply(book, "./attribute_2", xmlValue)
attribute_3 <- xpathSApply(book, "./attribute_3", xmlValue)
data.frame(
title = ifelse(length(title) > 0, title, NA),
author_1 = ifelse(length(author_1) > 0, author_1, NA),
author_2 = ifelse(length(author_2) > 0, author_2, NA),
author_3 = ifelse(length(author_3) > 0, author_3, NA),
attribute_1 = ifelse(length(attribute_1) > 0, attribute_1, NA),
attribute_2 = ifelse(length(attribute_2) > 0, attribute_2, NA),
attribute_3 = ifelse(length(attribute_3) > 0, attribute_3, NA),
stringsAsFactors = FALSE
)
}
FBooksXml3 <- lapply(FBooksXml2, parseBook)
FBooksXml4 <- do.call(rbind, FBooksXml3)
head(FBooksXml4)
## title
## 1 Good Omens: The Nice and Accurate Prophecies of Agnes Nutter, Witch
## 2 The Federalist Papers
## 3 Principia Mathematica
## author_1 author_2 author_3
## 1 Neil Gaiman Terry Pratchett <NA>
## 2 Alexander Hamilton James Madison John Jay
## 3 Alfred North Whitehead Bertrand Russell <NA>
## attribute_1 attribute_2 attribute_3
## 1 Fantasy Humor NA
## 2 Founding document of the United States Persuasive NA
## 3 Landmark work in mathematical logic Influential NA
# Checking if the dataframes are identical
identical(FBooksHtml, FBooksXml4)
## [1] FALSE
identical(FBooksHtml, FBooksJson)
## [1] FALSE
identical(FBooksXml4, FBooksJson)
## [1] FALSE
The three dataframes are not identical between each other