library(XML)
library(RCurl)
## Loading required package: bitops
library(jsonlite)
htmlFileURL = getURL("https://raw.githubusercontent.com/asajjad1234/DATA-607/master/books.html")
htmlFile_df = data.frame(readHTMLTable(htmlFileURL, header=TRUE))
class(htmlFile_df)
## [1] "data.frame"
print (htmlFile_df)
## NULL.ID NULL.Title NULL.Author1 NULL.Author2 NULL.Author3 NULL.Author4 NULL.ISBN NULL.Publisher NULL.Publication.Date
## 1 1 R in a Nutshell, 2nd Edition Joseph Adler 978-1-4493-1208-4 O'Reilly Media, Inc 10/09/2012
## 2 2 Data Science for Business Foster Provost Tom Fawcett 978-1-4493-6132-7 O'Reilly Media, Inc 08/09/2013
## 3 3 Automated Data Collection with R Simon Munzert Christian Rubba Peter Meissner Dominic Nyhuis 978-1-449-31208-4 John Wiley & Sons 01/20/2015
xmlFileURL = getURL("https://raw.githubusercontent.com/asajjad1234/DATA-607/master/books.xml")
xmlFile = xmlParse(xmlFileURL)
xmlFile_df = data.frame(xmlToList(xmlFile))
class(xmlFile_df)
## [1] "data.frame"
print(xmlFile_df)
## book.id book.title book.author book.isbn book.publisher book.pubdate book.id.1 book.title.1 book.author.1 book.author.2 book.isbn.1 book.publisher.1 book.pubdate.1 book.id.2 book.title.2 book.author.3 book.author.4 book.author.5 book.author.6 book.isbn.2 book.publisher.2 book.pubdate.2
## 1 1 R in a Nutshell, 2nd Edition Joseph Adler 978-1-4493-1208-4 O'Reilly Media, Inc 10/09/2012 2 Data Science for Business Foster Provost Tom Fawcett 978-1-4493-6132-7 O'Reilly Media, Inc 08/09/2013 3 Automated Data Collection with R Simon Munzert Christian Rubba Peter Meissner Dominic Nyhuis 978-1-449-31208-4 John Wiley & Sons 01/20/2015
jsonFileURL = getURL("https://raw.githubusercontent.com/asajjad1234/DATA-607/master/books.json")
jsonData_df = data.frame(fromJSON(jsonFileURL, flatten = FALSE))
class(jsonData_df)
## [1] "data.frame"
print(jsonData_df)
## catalog.book.id catalog.book.title catalog.book.author catalog.book.isbn catalog.book.publisher catalog.book.pubdate
## 1 1 R in a Nutshell, 2nd Edition Joseph Adler 978-1-4493-1208-4 O'Reilly Media, Inc 10/09/2012
## 2 2 Data Science for Business Foster Provost, Tom Fawcett 978-1-4493-6132-7 O'Reilly Media, Inc 08/09/2013
## 3 3 Automated Data Collection with R Simon Munzert, Christian Rubba, Peter Meissner, Dominic Nyhuis 978-1-449-31208-4 John Wiley & Sons 01/20/2015
The files contain the same contents about books in three different formats (HTML, XML and JSON). But when they are loaded as R data frames, they look different.