library(XML)
library(RCurl)
html_url <- "https://raw.githubusercontent.com/v-sinha/data607/week_07/books.html"
# Read the HTML file and build dataframe.
htmldf <- readHTMLTable(getURL(html_url))
str(htmldf)
## List of 1
## $ NULL:'data.frame': 3 obs. of 5 variables:
## ..$ Title : Factor w/ 3 levels "C Programming Language, 2nd Edition",..: 1 3 2
## ..$ Authors : Factor w/ 3 levels "Brian W. Kernighan; Dennis M. Ritchie",..: 1 3 2
## ..$ Publisher : Factor w/ 2 levels "Addison-Wesley Professional",..: 2 2 1
## ..$ Year Published : Factor w/ 3 levels "1986","1988",..: 2 1 3
## ..$ Amazon Best Sellers Rank: Factor w/ 3 levels "559,261","73,148",..: 3 2 1
head(htmldf)
## $`NULL`
## Title
## 1 C Programming Language, 2nd Edition
## 2 The Design of the UNIX Operating System 1st Edition
## 3 The Design and Implementation of the FreeBSD Operating System 1st Edition
## Authors
## 1 Brian W. Kernighan; Dennis M. Ritchie
## 2 Maurice J. Bach
## 3 Marshall Kirk McKusick; George V. Neville-Neil
## Publisher Year Published Amazon Best Sellers Rank
## 1 Prentice Hall 1988 9,605
## 2 Prentice Hall 1986 73,148
## 3 Addison-Wesley Professional 2004 559,261
xml_url <- "https://raw.githubusercontent.com/v-sinha/data607/week_07/books.xml"
# Read the XML file.
xmldata <- getURL(xml_url)
# XML Tree Parse generates an R structure for the XML tree.
xmltree <- xmlTreeParse(xmldata, useInternalNodes = FALSE)
topxml <- xmlRoot(xmltree)
topxml <- xmlSApply(topxml, function(x) xmlSApply(x, xmlValue))
xmldf <- data.frame(t(topxml), row.names = NULL)
str(xmldf)
## 'data.frame': 3 obs. of 5 variables:
## $ Title : Factor w/ 3 levels "C Programming Language, 2nd Edition",..: 1 3 2
## ..- attr(*, "names")= chr "book" "book" "book"
## $ Authors : Factor w/ 3 levels "Brian W. Kernighan; Dennis M. Ritchie",..: 1 3 2
## ..- attr(*, "names")= chr "book" "book" "book"
## $ Publisher : Factor w/ 2 levels "Addison-Wesley Professional",..: 2 2 1
## ..- attr(*, "names")= chr "book" "book" "book"
## $ Year_Published : Factor w/ 3 levels "1986","1988",..: 2 1 3
## ..- attr(*, "names")= chr "book" "book" "book"
## $ Amazon_Best_Sellers_Rank: Factor w/ 3 levels "559,261","73,148",..: 3 2 1
## ..- attr(*, "names")= chr "book" "book" "book"
head(xmldf)
## Title
## 1 C Programming Language, 2nd Edition
## 2 The Design of the UNIX Operating System 1st Edition
## 3 The Design and Implementation of the FreeBSD Operating System 1st Edition
## Authors
## 1 Brian W. Kernighan; Dennis M. Ritchie
## 2 Maurice J. Bach
## 3 Marshall Kirk McKusick; George V. Neville-Neil
## Publisher Year_Published Amazon_Best_Sellers_Rank
## 1 Prentice Hall 1988 9,605
## 2 Prentice Hall 1986 73,148
## 3 Addison-Wesley Professional 2004 559,261
library(rjson)
json_url <- "https://raw.githubusercontent.com/v-sinha/data607/week_07/books.json"
# Read the JSON file.
jsondata <- fromJSON(file = json_url)
# Convert the list into a dataframe
jsondf <- data.frame()
for (i in 1:length(jsondata[[1]])) {
df <- rbind(jsondf, data.frame(jsondata[[1]][i]))
jsondf <- df
}
str(jsondf)
## 'data.frame': 3 obs. of 5 variables:
## $ Title : Factor w/ 3 levels "C Programming Language, 2nd Edition",..: 1 2 3
## $ Authors : Factor w/ 3 levels "Brian W. Kernighan; Dennis M. Ritchie",..: 1 2 3
## $ Publisher : Factor w/ 2 levels "Prentice Hall",..: 1 1 2
## $ Year_Published : Factor w/ 3 levels "1988","1986",..: 1 2 3
## $ Amazon_Best_Sellers_Rank: Factor w/ 3 levels "9,605","73,148",..: 1 2 3
head(jsondf)
## Title
## 1 C Programming Language, 2nd Edition
## 2 The Design of the UNIX Operating System 1st Edition
## 3 The Design and Implementation of the FreeBSD Operating System 1st Edition
## Authors
## 1 Brian W. Kernighan; Dennis M. Ritchie
## 2 Maurice J. Bach
## 3 Marshall Kirk McKusick; George V. Neville-Neil
## Publisher Year_Published Amazon_Best_Sellers_Rank
## 1 Prentice Hall 1988 9,605
## 2 Prentice Hall 1986 73,148
## 3 Addison-Wesley Professional 2004 559,261
The three data frames have identical content.