library(XML)
## Warning: package 'XML' was built under R version 3.5.2
library(RCurl)
## Warning: package 'RCurl' was built under R version 3.5.2
## Loading required package: bitops
library(jsonlite)
Parsing HTML
books_html_url <-
getURL("https://raw.githubusercontent.com/Chris-Ayre/DATA607-ASSIGN7/master/books.html")
books_html <- readHTMLTable(books_html_url, header = TRUE)
books_html
## $`NULL`
## ISBN TITLE Author 1 Author 2
## 1 0062748203 Barracoon Zora Neale Hurston
## 2 1501135945 Indianapolis Lynn Vincent Sara Vladic
## 3 1101871849 Our Towns James Fallows Deborah Fallows
## PUBLISHER
## 1 Amistad
## 2 Simon & Schuster
## 3 Amistad
Parsing XML
books_xml_url <-
getURL("https://raw.githubusercontent.com/Chris-Ayre/DATA607-ASSIGN7/master/books.xml")
books_xml1 <- xmlParse(books_xml_url, ignoreBlanks=TRUE)
books_xml <- xmlToDataFrame(books_xml1)
books_xml
## title author publisher ISBN
## 1 Barracoon Zora Neale Hurston Amistad 0062748203
## 2 Indianopolis Lynn Vincent Simon Schuster 1501135945
## 3 Our Towns James Fallows Amistad 1101871849
## author2
## 1 <NA>
## 2 Sara Vladic
## 3 Deborah Fallows
Parsing JSON
books_json_URL <-
getURL("https://raw.githubusercontent.com/Chris-Ayre/DATA607-ASSIGN7/master/books.json")
books_json <- fromJSON(books_json_URL)
books_json
## $books
## ISBN Title Author Publisher
## 1 0062748203 Barracoon Zora Neale Hurston Amistad
## 2 1501135945 Indianopolis Lynn Vincent, Sara Vladic Simon & Schuster
## 3 1101871849 Our Towns James Fallows, Deborah Fallows Amistad
THE DATA FRAMES ARE NOT IDENTICAL - EACH PROTOCOL HAS DIFFERENT DEFAULTS THAT ORDER THE COLUMNS DIFFERENTLY