HTML

library(rvest)
## Loading required package: xml2
library(xml2)
library(kableExtra)

bookshtml <- read_html("https://raw.githubusercontent.com/dhairavc/DATA607/master/books.html")
dfhtml <-bookshtml %>% html_table()
str(dfhtml)
## List of 1
##  $ :'data.frame':    3 obs. of  5 variables:
##   ..$ Title           : chr [1:3] "Cosmos" "The Hobbit and The Lord of the Rings" "A Game of Thrones"
##   ..$ Author(s)       : chr [1:3] "Carl Sagan, Neil deGrasse Tyson, Ann Druyan" "J. R. R. Tolkien" "George R. R. Martin"
##   ..$ Publisher       : chr [1:3] "Random House Publishing Group" "Houghton Mifflin Harcourt" "Random House Publishing Group"
##   ..$ Publication Date: chr [1:3] "12/10/2013" "09/18/2012" "08/04/1997"
##   ..$ Pages           : int [1:3] 432 1504 864
dfhtml %>% kable() %>% kable_styling()
Title Author(s) Publisher Publication Date Pages
Cosmos Carl Sagan, Neil deGrasse Tyson, Ann Druyan Random House Publishing Group 12/10/2013 432
The Hobbit and The Lord of the Rings J. R. R. Tolkien Houghton Mifflin Harcourt 09/18/2012 1504
A Game of Thrones George R. R. Martin Random House Publishing Group 08/04/1997 864

XML

library(XML)
## 
## Attaching package: 'XML'
## The following object is masked from 'package:rvest':
## 
##     xml
booksxml <- read_xml('https://raw.githubusercontent.com/dhairavc/DATA607/master/books.xml')
dfxml <- booksxml %>% xmlParse() %>% xmlToDataFrame()
str(dfxml)
## 'data.frame':    3 obs. of  5 variables:
##  $ title          : Factor w/ 3 levels " A Game of Thrones ",..: 2 3 1
##  $ author         : Factor w/ 3 levels " Carl Sagan, Neil deGrasse Tyson, Ann Druyan ",..: 1 3 2
##  $ Publisher      : Factor w/ 2 levels " Houghton Mifflin Harcourt ",..: 2 1 2
##  $ PublicationDate: Factor w/ 3 levels " 08/04/1997 ",..: 3 2 1
##  $ Pages          : Factor w/ 3 levels " 1504 "," 432 ",..: 2 1 3
dfxml %>% kable() %>% kable_styling()
title author Publisher PublicationDate Pages
Cosmos Carl Sagan, Neil deGrasse Tyson, Ann Druyan Random House Publishing Group 12/10/2013 432
The Hobbit and The Lord of the Rings J. R. R. Tolkien Houghton Mifflin Harcourt 09/18/2012 1504
A Game of Thrones George R. R. Martin Random House Publishing Group 08/04/1997 864
library(jsonlite)

booksjson <- fromJSON("https://raw.githubusercontent.com/dhairavc/DATA607/master/books.json")
str(booksjson)
## List of 1
##  $ favoritebooks:'data.frame':   3 obs. of  5 variables:
##   ..$ title          : chr [1:3] "Cosmos" "The Hobbit and The Lord of the Rings" "A Game of Thrones"
##   ..$ author         :List of 3
##   .. ..$ : chr [1:3] "Carl Sagan" "Neil deGrasse Tyson" "Ann Druyan"
##   .. ..$ : chr "J. R. R. Tolkien"
##   .. ..$ : chr "George R. R. Martin"
##   ..$ publisher      : chr [1:3] "Random House Publishing Group" "Houghton Mifflin Harcourt" "Random House Publishing Group"
##   ..$ publicationdate: chr [1:3] "12/10/2013" "09/18/2012" "08/04/1997"
##   ..$ pages          : int [1:3] 432 1504 864
booksjson %>% kable() %>% kable_styling()
title author publisher publicationdate pages
Cosmos c(“Carl Sagan”, “Neil deGrasse Tyson”, “Ann Druyan”) Random House Publishing Group 12/10/2013 432
The Hobbit and The Lord of the Rings J. R. R. Tolkien Houghton Mifflin Harcourt 09/18/2012 1504
A Game of Thrones George R. R. Martin Random House Publishing Group 08/04/1997 864

Conclusion

When scraping data from HTML, XML, & JSON, the formats of the dataframes is different. For example:

Depending on what the format of the datasource is, they will require unique handling across all 3 to normalize the data