## Loading required package: XML
## Loading required package: RJSONIO
## Loading required package: RCurl
## Loading required package: bitops
# load html file from github
url <- getURL("https://raw.githubusercontent.com/jasonjgy2000/IS607/master/Assignments/Week%207/books.html")
html_data_set <- readHTMLTable(url)
# convert list to dataframe
html_data_set <- do.call("rbind",html_data_set)
# remove row names
row.names(html_data_set) <- NULL
html_data_set
## Title Authors Year Published
## 1 Doing Bayesian Data Analysis John Kruschke 2015
## 2 Further Pure 1 for OCR Douglas Quadling, Hugh Neill 2004
## 3 Into Thin Air Jon Krakauer, Randy Rackliff 1999
## Publisher ISBN-10
## 1 Academic Press 0124058884
## 2 Cambridge University Press 0521548985
## 3 Anchor 0385494785
# load xml file from github
url <- getURL("https://raw.githubusercontent.com/jasonjgy2000/IS607/master/Assignments/Week%207/books.xml")
xml_data_set <- xmlParse(url)
# get root element of the data set
root <- xmlRoot(xml_data_set)
xml_data_set <- xmlToDataFrame(root)
xml_data_set
## Title Authors Year_Published
## 1 Doing Bayesian Data Analysis John Kruschke 2015
## 2 Further Pure 1 for OCR Douglas Quadling, Hugh Neill 2004
## 3 Into Thin Air Jon Krakauer, Randy Rackliff 1999
## Publisher ISBN-10
## 1 Academic Press 0124058884
## 2 Cambridge University Press 0521548985
## 3 Anchor 0385494785
# load json file from github
url <- getURL("https://raw.githubusercontent.com/jasonjgy2000/IS607/master/Assignments/Week%207/books.xml")
json_data_set <- fromJSON("books.json")
# unlisting data set from outer list
json_data_set <- sapply(json_data_set[[1]], unlist)
# convert list to dataframe
json_data_set <- do.call("rbind",json_data_set)
json_data_set
## Title Authors
## [1,] "Doing Bayesian Data Analysis" "John Kruschke"
## [2,] "Further Pure 1 for OCR" "Douglas Quadling, Hugh Neill"
## [3,] "Into Thin Air" "Jon Krakauer, Randy Rackliff"
## Year_Published Publisher ISBN-10
## [1,] "2015" "Academic Press" "0124058884"
## [2,] "2004" "Cambridge University Press" "0521548985"
## [3,] "1999" "Anchor" "0385494785"
Even though each file type required different processing, the dataframe produced by each are identical.