The goal of this assignment is to create and import HTML, XML, and JSON files into R.

Load Packages

library(XML) #for readHTMLTable, xmlTreeParse, and xmlSApply
library(RCurl) #for getURL function
library(rjson) #for fromJSON function
library(jsonlite) #for fromJSON function
library(DT) #for datatables
library(tidyr) #for data wrangling
library(dplyr) #for data wrangling
library(magrittr) #for data wrangling

HTML

Import

books_html <- as.data.frame(readHTMLTable(getURL("https://raw.githubusercontent.com/mkivenson/Data-Acquisition-and-Management/master/Assignment%205/books.html"), header = TRUE))
datatable(books_html)

Tidy

books_html %<>%
  separate(books.author,c("author1","author2","author3","author4"), sep = ",") %>%
  gather("id","author",2:5) %>%
  na.omit()  %>%
  arrange(books.title)
datatable(books_html)

XML

Import

books_xml <- xmlTreeParse(getURL("https://raw.githubusercontent.com/mkivenson/Data-Acquisition-and-Management/master/Assignment%205/books.xml"))
books_xml <- xmlSApply(books_xml,function(x) xmlSApply(x, xmlValue))
books_xml <- as.data.frame(t(books_xml), row.names=NULL)
datatable(books_xml)

Tidy

books_xml %<>%
  separate(author,c("author1","author2","author3","author4"), sep = ",") %>%
  gather("id","author",2:5) %>%
  na.omit() %>%
  arrange(title)
datatable(books_xml)

JSON

Import

books_json <- as.data.frame(fromJSON(getURL("https://raw.githubusercontent.com/mkivenson/Data-Acquisition-and-Management/master/Assignment%205/books.json")))
datatable(books_json)