XML and Parsing XML

library(XML)
library(RCurl)
## Loading required package: bitops
library(plyr)
## Warning: package 'plyr' was built under R version 3.2.5
require(knitr)
## Loading required package: knitr
## Warning: package 'knitr' was built under R version 3.2.5
Xmlurl <- "https://raw.githubusercontent.com/xkong100/IS607/master/HW-XML-HML-Json/book.xml"
books <- getURL(url=Xmlurl)
xmlbooks <- xmlParse(books, validate=F)
books1 <- ldply(xmlToList(books), data.frame)
str(books1)
## 'data.frame':    3 obs. of  7 variables:
##  $ .id      : chr  "book" "book" "book"
##  $ Title    : Factor w/ 3 levels "Linear Algebra And Its Application",..: 1 2 3
##  $ Authors  : Factor w/ 3 levels "David C. Jay",..: 1 2 3
##  $ Publisher: Factor w/ 3 levels "Addision Westley",..: 1 2 3
##  $ ISBN     : Factor w/ 3 levels "0-201-77014-8",..: 1 2 3
##  $ Edition  : Factor w/ 3 levels "3rd","9th","5th": 1 2 3
##  $ .attrs   : Factor w/ 3 levels "1","2","3": 1 2 3
kable(books1)
.id Title Authors Publisher ISBN Edition .attrs
book Linear Algebra And Its Application David C. Jay Addision Westley 0-201-77014-8 3rd 1
book Introduction to Operation Research Frederick Hillier, Gerald Lieberman Mcgraw Hill 978-0-07-337629-5 9th 2
book Calculus James Stewart Thomson 0-534-39339-x 5th 3

HTML and Parsing HTML

htmlurl <- "https://raw.githubusercontent.com/xkong100/IS607/master/HW-XML-HML-Json/book.html"
books2 <-getURL(url=htmlurl)
htmltext <- htmlParse(books2, asText = TRUE)
htmltable <- xpathApply(htmltext, "//table//tbody//tr")
htmlbooks <- as.data.frame(t(sapply(htmltable, function(x)unname(xmlSApply(x, xmlValue))[c(1,3,5,7,9)])))
colnames(htmlbooks) <- c("Title", "Authors", "Publisher", "ISBN", "Edition")
kable(htmlbooks)
Title Authors Publisher ISBN Edition
Linear Algebra And Its Application David C. Jay Addision Westley 0-201-77014-8 3rd
Introduction to Operation Research Frederick Hillier, Gerald Lieberman Mcgraw Hill 978-0-07-337629-5 9th
Calculus James Stewart Thomson 0-534-39339-x 5th

Json and Parsing Json

library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.2.5
jsonurl <- "https://raw.githubusercontent.com/xkong100/IS607/master/HW-XML-HML-Json/books.json"
books3 <- getURL(url=jsonurl)
jsonbooks <-fromJSON(books3)
str(jsonbooks)
## 'data.frame':    3 obs. of  5 variables:
##  $ Title    : chr  "Linear Algebra And Its Application" "Introduction to Operation Research" "Calculus"
##  $ Authors  : chr  "David C. Jay" "Frederick Hillier, Gerald Lieberman" "James Stewart"
##  $ Publisher: chr  "Addision Westley" "Mcgraw Hill" "Thomson"
##  $ ISBN     : chr  "0-201-77014-8" "978-0-07-337629-5" "0-534-39339-x"
##  $ Edition  : chr  "3rd" "9th" "5th"
kable(jsonbooks)
Title Authors Publisher ISBN Edition
Linear Algebra And Its Application David C. Jay Addision Westley 0-201-77014-8 3rd
Introduction to Operation Research Frederick Hillier, Gerald Lieberman Mcgraw Hill 978-0-07-337629-5 9th
Calculus James Stewart Thomson 0-534-39339-x 5th