# Load the necessary libraries
install.packages("xml2", repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//RtmpMrzF96/downloaded_packages
library(xml2)
install.packages("XML", repos = "http://cran.us.r-project.org" )
##
## The downloaded binary packages are in
## /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//RtmpMrzF96/downloaded_packages
library(XML)
## Warning: package 'XML' was built under R version 4.3.2
install.packages("jsonlite", repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//RtmpMrzF96/downloaded_packages
library(jsonlite)
install.packages("rvest", repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//RtmpMrzF96/downloaded_packages
library(rvest)
## Warning: package 'rvest' was built under R version 4.3.2
# Loading the HTML file into a data frame called "html_file"
html_file <- "https://raw.githubusercontent.com/ursulapodosenin/DAT-607/main/Sheet1.html"
html_data <- read_html(html_file)
# Reading the HTML file into a data frame
html_table <- html_data |>
html_nodes("table") |>
html_table(fill = TRUE)
books_df_html <- html_table[[1]]
# Final result
books_df_html
## # A tibble: 4 × 6
## `` A B C D E
## <int> <chr> <chr> <chr> <chr> <chr>
## 1 1 Title Author Genre Pages Rati…
## 2 2 Basic Physics Karl F. K… Scie… 352 4.6
## 3 3 History of the World Map by Map DK Hist… 448 4.8
## 4 4 Essential Calculus Skills Practice Workbook Chris McM… Math 361 4.6
# Loading the XML file into a data frame called "xml_file"
xml_URL <- "https://raw.githubusercontent.com/ursulapodosenin/DAT-607/main/book_two.xml"
xmlContent <- read_xml(xml_URL)
# Taking the data from the XML file and putting it into a data frame
booksXML <- xmlParse(xmlContent)
booksXML <- xmlToDataFrame(booksXML)
as.data.frame(booksXML)
## Title Author
## 1 Basic Physics Karl F. Kuhn, Frank Noschese
## 2 History of the World Map by Map DK
## 3 Essential Calculus Skills Practice Workbook Chris McMullen
## Genre Pages Rating
## 1 Science 352 4.6
## 2 History 448 4.8
## 3 Math 361 4.6
# Final result
booksXML
## Title Author
## 1 Basic Physics Karl F. Kuhn, Frank Noschese
## 2 History of the World Map by Map DK
## 3 Essential Calculus Skills Practice Workbook Chris McMullen
## Genre Pages Rating
## 1 Science 352 4.6
## 2 History 448 4.8
## 3 Math 361 4.6
# Loading the JSON file into a data frame called "json_file"
json_file <- "https://raw.githubusercontent.com/ursulapodosenin/DAT-607/main/Book_Data_Three%20-%20Sheet1.json"
# Taking the data from the JSOn file and putting it into a data frame
json_data <- jsonlite::fromJSON(json_file)
books_df_json <- as.data.frame(json_data)
# Final result
books_df_json
## Title Author
## 1 Basic Physics Karl F. Kuhn, Frank Noschese
## 2 History of the World Map by Map DK
## 3 Essential Calculus Skills Practice Workbook Chris McMullen
## Genre Pages Rating
## 1 Science 352 4.6
## 2 History 448 4.8
## 3 Math 361 4.6
The three data frames are not entirely identical as the HTML file did not make the variable names of each column as as the headers. Additionally, the XML file interprets the numeric values as characters while the JSON file recognizes the numbers as integers and decimals. The JSON format provides the most accurate and interpretable data frame of all three formats.