library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
library(openintro)
## Warning: package 'openintro' was built under R version 4.4.3

We will choose three books and deliverable would be the three source files.

Package

# Install zoo if not already installed
if (!requireNamespace("xml2", quietly = TRUE)) {install.packages("xml2")}
if (!requireNamespace("jsonlite", quietly = TRUE)) {install.packages("jsonlite")}
if (!requireNamespace("rvest", quietly = TRUE)) install.packages("rvest")

LIBRARY

# Load necessary libraries
library(xml2)
## Warning: package 'xml2' was built under R version 4.4.3
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 4.4.3
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding

Load data afor HTML

# Load HTML file
html_data <- read_html("https://raw.githubusercontent.com/tanzil64/DATA-607-Assignment-07/refs/heads/main/books.html")
table_data <- html_table(html_nodes(html_data, "table")[[1]])
#print("HTML Data:")
#print(table_data)
df_html <-data.frame(table_data)
print(df_html)
##                                                                                             Title
## 1 The Politics of Innovation: Why Some Countries Are Better Than Others at Science and Technology
## 2                               How the West Came to Rule: The Geopolitical Origins of Capitalism
## 3                                                                The Fourth Industrial Revolution
##                                             Authors
## 1                               Mark Zachary Taylor
## 2 Alexander Anievas, Kerem NiÅ\u009fancıoÄ\u009flu
## 3                                      Klaus Schwab
##                                                           Interesting.Attributes
## 1     Explores how politics, rather than institutions, drive S&T competitiveness
## 2 Challenges Eurocentric views by arguing capitalism's rise was a global process
## 3                        Discusses technological advancements shaping the future

Load data afor XML

# Load XML file using xml2
xml_data <- read_xml("https://raw.githubusercontent.com/tanzil64/DATA-607-Assignment-07/refs/heads/main/books.xml")
xml_books <- xml_find_all(xml_data, "//book")
books_list <- lapply(xml_books, function(book) {
  list(
    title = xml_text(xml_find_first(book, "title")),
    authors = xml_text(xml_find_all(book, "authors/author")),
    attributes = xml_text(xml_find_all(book, "attributes/attribute"))
  )
})


df_xml <-data.frame(books_list)
print(df_xml)
##                                                                                             title
## 1 The Politics of Innovation: Why Some Countries Are Better Than Others at Science and Technology
## 2 The Politics of Innovation: Why Some Countries Are Better Than Others at Science and Technology
##               authors
## 1 Mark Zachary Taylor
## 2 Mark Zachary Taylor
##                                                                   attributes
## 1 Explores how politics, rather than institutions, drive S&T competitiveness
## 2 Explores how politics, rather than institutions, drive S&T competitiveness
##                                                             title.1
## 1 How the West Came to Rule: The Geopolitical Origins of Capitalism
## 2 How the West Came to Rule: The Geopolitical Origins of Capitalism
##           authors.1
## 1 Alexander Anievas
## 2 Kerem Nişancıoğlu
##                                                                     attributes.1
## 1 Challenges Eurocentric views by arguing capitalism's rise was a global process
## 2 Challenges Eurocentric views by arguing capitalism's rise was a global process
##                            title.2    authors.2
## 1 The Fourth Industrial Revolution Klaus Schwab
## 2 The Fourth Industrial Revolution Klaus Schwab
##                                              attributes.2
## 1 Discusses technological advancements shaping the future
## 2 Discusses technological advancements shaping the future
##print("XML Data:")
#print(books_list)

Load data afor Json

# Load JSON file
json_data <- fromJSON("https://raw.githubusercontent.com/tanzil64/DATA-607-Assignment-07/refs/heads/main/books.json")
df_json <-data.frame(json_data)
print(df_json)
##                                                                                       books.title
## 1 The Politics of Innovation: Why Some Countries Are Better Than Others at Science and Technology
## 2                               How the West Came to Rule: The Geopolitical Origins of Capitalism
## 3                                                                The Fourth Industrial Revolution
##                          books.authors
## 1                  Mark Zachary Taylor
## 2 Alexander Anievas, Kerem Nişancıoğlu
## 3                         Klaus Schwab
##                                                                 books.attributes
## 1     Explores how politics, rather than institutions, drive S&T competitiveness
## 2 Challenges Eurocentric views by arguing capitalism's rise was a global process
## 3                        Discusses technological advancements shaping the future
#print("JSON Data:")
#print(json_data)

Check Identical

# Check if df_html and df_xml are identical
identical(df_html, df_xml)
## [1] FALSE
# Check if df_html and df_json are identical
identical(df_html, df_json)
## [1] FALSE
# Check if df_xml and df_json are identical
identical(df_xml, df_json)
## [1] FALSE

Conclusion: In conclusion we can say that the files are in different format and not identical.

LS0tDQp0aXRsZTogIkRBVEEgNjA3IEFzc2lnbm1lbnQgMDciDQphdXRob3I6ICJNZC4gVGFuemlsIEVoc2FuIg0KZGF0ZTogImByIFN5cy5EYXRlKClgIg0Kb3V0cHV0OiBvcGVuaW50cm86OmxhYl9yZXBvcnQNCi0tLQ0KDQpgYGB7ciBsb2FkLXBhY2thZ2VzLCBtZXNzYWdlPUZBTFNFfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KG9wZW5pbnRybykNCmBgYA0KDQojIyMgV2Ugd2lsbCBjaG9vc2UgdGhyZWUgYm9va3MgYW5kIGRlbGl2ZXJhYmxlIHdvdWxkIGJlICB0aGUgdGhyZWUgc291cmNlIGZpbGVzLg0KDQoNCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQ0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0KYGBgDQoNCg0KDQoNCiMjIFBhY2thZ2UNCmBgYHtyfQ0KIyBJbnN0YWxsIHpvbyBpZiBub3QgYWxyZWFkeSBpbnN0YWxsZWQNCmlmICghcmVxdWlyZU5hbWVzcGFjZSgieG1sMiIsIHF1aWV0bHkgPSBUUlVFKSkge2luc3RhbGwucGFja2FnZXMoInhtbDIiKX0NCmlmICghcmVxdWlyZU5hbWVzcGFjZSgianNvbmxpdGUiLCBxdWlldGx5ID0gVFJVRSkpIHtpbnN0YWxsLnBhY2thZ2VzKCJqc29ubGl0ZSIpfQ0KaWYgKCFyZXF1aXJlTmFtZXNwYWNlKCJydmVzdCIsIHF1aWV0bHkgPSBUUlVFKSkgaW5zdGFsbC5wYWNrYWdlcygicnZlc3QiKQ0KYGBgDQoNCiMjIExJQlJBUlkNCmBgYHtyfQ0KIyBMb2FkIG5lY2Vzc2FyeSBsaWJyYXJpZXMNCmxpYnJhcnkoeG1sMikNCmxpYnJhcnkoanNvbmxpdGUpDQpsaWJyYXJ5KHJ2ZXN0KQ0KYGBgDQoNCiMjIExvYWQgZGF0YSBhZm9yIEhUTUwgDQpgYGB7cn0NCiMgTG9hZCBIVE1MIGZpbGUNCmh0bWxfZGF0YSA8LSByZWFkX2h0bWwoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS90YW56aWw2NC9EQVRBLTYwNy1Bc3NpZ25tZW50LTA3L3JlZnMvaGVhZHMvbWFpbi9ib29rcy5odG1sIikNCnRhYmxlX2RhdGEgPC0gaHRtbF90YWJsZShodG1sX25vZGVzKGh0bWxfZGF0YSwgInRhYmxlIilbWzFdXSkNCiNwcmludCgiSFRNTCBEYXRhOiIpDQojcHJpbnQodGFibGVfZGF0YSkNCmRmX2h0bWwgPC1kYXRhLmZyYW1lKHRhYmxlX2RhdGEpDQpwcmludChkZl9odG1sKQ0KYGBgDQoNCiMjIExvYWQgZGF0YSBhZm9yIFhNTA0KYGBge3J9DQojIExvYWQgWE1MIGZpbGUgdXNpbmcgeG1sMg0KeG1sX2RhdGEgPC0gcmVhZF94bWwoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS90YW56aWw2NC9EQVRBLTYwNy1Bc3NpZ25tZW50LTA3L3JlZnMvaGVhZHMvbWFpbi9ib29rcy54bWwiKQ0KeG1sX2Jvb2tzIDwtIHhtbF9maW5kX2FsbCh4bWxfZGF0YSwgIi8vYm9vayIpDQpib29rc19saXN0IDwtIGxhcHBseSh4bWxfYm9va3MsIGZ1bmN0aW9uKGJvb2spIHsNCiAgbGlzdCgNCiAgICB0aXRsZSA9IHhtbF90ZXh0KHhtbF9maW5kX2ZpcnN0KGJvb2ssICJ0aXRsZSIpKSwNCiAgICBhdXRob3JzID0geG1sX3RleHQoeG1sX2ZpbmRfYWxsKGJvb2ssICJhdXRob3JzL2F1dGhvciIpKSwNCiAgICBhdHRyaWJ1dGVzID0geG1sX3RleHQoeG1sX2ZpbmRfYWxsKGJvb2ssICJhdHRyaWJ1dGVzL2F0dHJpYnV0ZSIpKQ0KICApDQp9KQ0KDQoNCmRmX3htbCA8LWRhdGEuZnJhbWUoYm9va3NfbGlzdCkNCnByaW50KGRmX3htbCkNCiMjcHJpbnQoIlhNTCBEYXRhOiIpDQojcHJpbnQoYm9va3NfbGlzdCkNCmBgYA0KDQojIyBMb2FkIGRhdGEgYWZvciBKc29uDQpgYGB7cn0NCiMgTG9hZCBKU09OIGZpbGUNCmpzb25fZGF0YSA8LSBmcm9tSlNPTigiaHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL3RhbnppbDY0L0RBVEEtNjA3LUFzc2lnbm1lbnQtMDcvcmVmcy9oZWFkcy9tYWluL2Jvb2tzLmpzb24iKQ0KZGZfanNvbiA8LWRhdGEuZnJhbWUoanNvbl9kYXRhKQ0KcHJpbnQoZGZfanNvbikNCg0KI3ByaW50KCJKU09OIERhdGE6IikNCiNwcmludChqc29uX2RhdGEpDQoNCmBgYA0KDQoNCg0KIyMgQ2hlY2sgSWRlbnRpY2FsDQpgYGB7cn0NCiMgQ2hlY2sgaWYgZGZfaHRtbCBhbmQgZGZfeG1sIGFyZSBpZGVudGljYWwNCmlkZW50aWNhbChkZl9odG1sLCBkZl94bWwpDQoNCiMgQ2hlY2sgaWYgZGZfaHRtbCBhbmQgZGZfanNvbiBhcmUgaWRlbnRpY2FsDQppZGVudGljYWwoZGZfaHRtbCwgZGZfanNvbikNCg0KIyBDaGVjayBpZiBkZl94bWwgYW5kIGRmX2pzb24gYXJlIGlkZW50aWNhbA0KaWRlbnRpY2FsKGRmX3htbCwgZGZfanNvbikNCg0KYGBgDQpDb25jbHVzaW9uOiBJbiBjb25jbHVzaW9uIHdlIGNhbiBzYXkgIHRoYXQgdGhlIGZpbGVzIGFyZSBpbiBkaWZmZXJlbnQgZm9ybWF0IGFuZCBub3QgaWRlbnRpY2FsLg0K