library(rvest) # for working with HTML
library(xml2) # for working with XML
library(jsonlite) # for working with JSON
library(httr) # for working with HTTP
library(tidyverse)
library(XML)Working with XML and JSON in R
Load required packages
Read HTML Table
# Read HTML table using rvest
df_html <- rvest::read_html("https://raw.githubusercontent.com/RDLong718/DATA607-Spring24/main/DATA607-Spring2024/Assignments/Working%20with%20XML%20and%20JSON%20on%20R/Working%20with%20XML%20and%20JSON%20in%20R.html") %>%
rvest::html_nodes("table") %>%
rvest::html_table(fill = TRUE)
print(df_html)[[1]]
# A tibble: 3 × 3
Title Authors Attributes
<chr> <chr> <chr>
1 No Filter: The Inside Story of Instagram "Sarah Frier" "Profiles…
2 Rich Dad Poor Dad "Robert T. Kiyosaki" "Financia…
3 Getting to Yes "Roger Fisher\n … "Tips on …
Read XML
df_xml <- read_xml("https://raw.githubusercontent.com/RDLong718/DATA607-Spring24/main/DATA607-Spring2024/Assignments/Working%20with%20XML%20and%20JSON%20on%20R/Working%20with%20XML%20and%20JSON%20in%20R.xml")
print(df_xml){xml_document}
<books>
[1] <book>\n <title>No Filter: The Inside Story of Instagram</title>\n <aut ...
[2] <book>\n <title>Rich Dad Poor Dad</title>\n <author>Robert T. Kiyosaki< ...
[3] <book>\n <title>Getting to Yes</title>\n <author>Roger Fisher</author>\ ...
Read JSON
# Read JSON file using `jsonlite` package
# Then we have to load JSON data
json_data <- fromJSON("https://raw.githubusercontent.com/RDLong718/DATA607-Spring24/main/DATA607-Spring2024/Assignments/Working%20with%20XML%20and%20JSON%20on%20R/Working%20with%20XML%20and%20JSON%20in%20R.json")
# Then convert the JSON data to dataframe
df_json <- as.data.frame(json_data)
# At last print the output dataframe
print(df_json) title author
1 No Filter: The Inside Story of Instagram Sarah Frier
2 Rich Dad Poor Dad Robert T. Kiyosaki
3 Getting to Yes Roger Fisher, William Ury
attributes
1 Profiles Mark Zukerberg., Discusses the rapid growth of Instagram.
2 Financial literacy means a broad understanding of accounting and investing and knowing the markets and the law., Each chapter ends with a “Study Session,” which reviews the material and poses questions.
3 Tips on developing a cordial relationship with the other side., Avoiding the trap of being “nice” and getting walked all over.
Are they all identical?
The three dataframes are not identical. The all require some extra parsing to be done to make them identical.