Read table data in html, xml and json formats.

1. Read HTML data.

library(XML)
library(RCurl)


html_url <- "https://raw.githubusercontent.com/v-sinha/data607/week_07/books.html"

# Read the HTML file and build dataframe.
htmldf <- readHTMLTable(getURL(html_url))
str(htmldf)
## List of 1
##  $ NULL:'data.frame':    3 obs. of  5 variables:
##   ..$ Title                   : Factor w/ 3 levels "C Programming Language, 2nd Edition",..: 1 3 2
##   ..$ Authors                 : Factor w/ 3 levels "Brian W. Kernighan; Dennis M. Ritchie",..: 1 3 2
##   ..$ Publisher               : Factor w/ 2 levels "Addison-Wesley Professional",..: 2 2 1
##   ..$ Year Published          : Factor w/ 3 levels "1986","1988",..: 2 1 3
##   ..$ Amazon Best Sellers Rank: Factor w/ 3 levels "559,261","73,148",..: 3 2 1
head(htmldf)
## $`NULL`
##                                                                       Title
## 1                                       C Programming Language, 2nd Edition
## 2                       The Design of the UNIX Operating System 1st Edition
## 3 The Design and Implementation of the FreeBSD Operating System 1st Edition
##                                          Authors
## 1          Brian W. Kernighan; Dennis M. Ritchie
## 2                                Maurice J. Bach
## 3 Marshall Kirk McKusick; George V. Neville-Neil
##                     Publisher Year Published Amazon Best Sellers Rank
## 1               Prentice Hall           1988                    9,605
## 2               Prentice Hall           1986                   73,148
## 3 Addison-Wesley Professional           2004                  559,261

2. Read and parse XML data.

xml_url <- "https://raw.githubusercontent.com/v-sinha/data607/week_07/books.xml"

# Read the XML file.
xmldata <- getURL(xml_url)

# XML Tree Parse generates an R structure for the XML tree.
xmltree <- xmlTreeParse(xmldata, useInternalNodes = FALSE)
topxml <- xmlRoot(xmltree)
topxml <- xmlSApply(topxml, function(x) xmlSApply(x, xmlValue))
xmldf <- data.frame(t(topxml), row.names = NULL)
str(xmldf)
## 'data.frame':    3 obs. of  5 variables:
##  $ Title                   : Factor w/ 3 levels "C Programming Language, 2nd Edition",..: 1 3 2
##   ..- attr(*, "names")= chr  "book" "book" "book"
##  $ Authors                 : Factor w/ 3 levels "Brian W. Kernighan; Dennis M. Ritchie",..: 1 3 2
##   ..- attr(*, "names")= chr  "book" "book" "book"
##  $ Publisher               : Factor w/ 2 levels "Addison-Wesley Professional",..: 2 2 1
##   ..- attr(*, "names")= chr  "book" "book" "book"
##  $ Year_Published          : Factor w/ 3 levels "1986","1988",..: 2 1 3
##   ..- attr(*, "names")= chr  "book" "book" "book"
##  $ Amazon_Best_Sellers_Rank: Factor w/ 3 levels "559,261","73,148",..: 3 2 1
##   ..- attr(*, "names")= chr  "book" "book" "book"
head(xmldf)
##                                                                       Title
## 1                                       C Programming Language, 2nd Edition
## 2                       The Design of the UNIX Operating System 1st Edition
## 3 The Design and Implementation of the FreeBSD Operating System 1st Edition
##                                          Authors
## 1          Brian W. Kernighan; Dennis M. Ritchie
## 2                                Maurice J. Bach
## 3 Marshall Kirk McKusick; George V. Neville-Neil
##                     Publisher Year_Published Amazon_Best_Sellers_Rank
## 1               Prentice Hall           1988                    9,605
## 2               Prentice Hall           1986                   73,148
## 3 Addison-Wesley Professional           2004                  559,261

3. Read and parse JSON data.

library(rjson)

json_url <- "https://raw.githubusercontent.com/v-sinha/data607/week_07/books.json"

# Read the JSON file.
jsondata <- fromJSON(file = json_url)

# Convert the list into a dataframe

jsondf <- data.frame()
for (i in 1:length(jsondata[[1]])) {
    df <- rbind(jsondf, data.frame(jsondata[[1]][i]))
    jsondf <- df
}
str(jsondf)
## 'data.frame':    3 obs. of  5 variables:
##  $ Title                   : Factor w/ 3 levels "C Programming Language, 2nd Edition",..: 1 2 3
##  $ Authors                 : Factor w/ 3 levels "Brian W. Kernighan; Dennis M. Ritchie",..: 1 2 3
##  $ Publisher               : Factor w/ 2 levels "Prentice Hall",..: 1 1 2
##  $ Year_Published          : Factor w/ 3 levels "1988","1986",..: 1 2 3
##  $ Amazon_Best_Sellers_Rank: Factor w/ 3 levels "9,605","73,148",..: 1 2 3
head(jsondf)
##                                                                       Title
## 1                                       C Programming Language, 2nd Edition
## 2                       The Design of the UNIX Operating System 1st Edition
## 3 The Design and Implementation of the FreeBSD Operating System 1st Edition
##                                          Authors
## 1          Brian W. Kernighan; Dennis M. Ritchie
## 2                                Maurice J. Bach
## 3 Marshall Kirk McKusick; George V. Neville-Neil
##                     Publisher Year_Published Amazon_Best_Sellers_Rank
## 1               Prentice Hall           1988                    9,605
## 2               Prentice Hall           1986                   73,148
## 3 Addison-Wesley Professional           2004                  559,261

The three data frames have identical content.