After having created the files by hand using Notepad, I have uploaded
them to my GitHub and loaded them below:
library(RCurl)
library(XML)
library(rjson)
library(rvest)
library(kableExtra)
# loading
html_content <- getURL("https://raw.githubusercontent.com/unsecuredAMRAP/607/main/html%20page3.html")
html_data <- readHTMLTable(html_content, which = 1, stringsAsFactors = FALSE)
df_html <- data.frame(html_data)
kable(df_html, caption = "Books Data from HTML", format = "html", table.attr = "style='width:85%;'", align = 'c')
Books Data from HTML
title
|
author
|
firstReleaseYear
|
numberOfPages
|
averageAmazonRating
|
Thinking, Fast and Slow
|
Daniel Kahneman
|
2011
|
499
|
4.6
|
The Age of Surveillance Capitalism
|
Shoshana Zuboff
|
2019
|
704
|
4.5
|
Deep Medicine
|
Eric Topol
|
2019
|
400
|
4.7
|
xml_content <- getURL("https://raw.githubusercontent.com/unsecuredAMRAP/607/main/xml%20page.xml")
xml_data <- xmlParse(xml_content)
df_xml <- xmlToDataFrame(nodes = getNodeSet(xml_data, "//book"))
kable(df_xml, caption = "Books Data from XML", format = "html", table.attr = "style='width:85%;'", align = 'c')
Books Data from XML
title
|
author
|
firstReleaseYear
|
numberOfPages
|
averageAmazonRating
|
Thinking, Fast and Slow
|
Daniel Kahneman
|
2011
|
499
|
4.6
|
The Age of Surveillance Capitalism
|
Shoshana Zuboff
|
2019
|
704
|
4.5
|
Deep Medicine
|
Eric Topol
|
2019
|
400
|
4.7
|
json_file <- file("https://raw.githubusercontent.com/unsecuredAMRAP/607/main/json%20file.json", "r")
json_data <- fromJSON(file = json_file)
close(json_file)
df_json <- do.call(rbind, lapply(json_data$books, as.data.frame, stringsAsFactors=FALSE))
kable(df_json, caption = "Books Data from JSON", format = "html", table.attr = "style='width:85%;'", align = 'c')
Books Data from JSON
title
|
author
|
firstReleaseYear
|
numberOfPages
|
averageAmazonRating
|
Thinking, Fast and Slow
|
Daniel Kahneman
|
2011
|
499
|
4.6
|
The Age of Surveillance Capitalism
|
Shoshana Zuboff
|
2019
|
704
|
4.5
|
Deep Medicine
|
Eric Topol
|
2019
|
400
|
4.7
|