XML and Parsing XML
library(XML)
library(RCurl)
## Loading required package: bitops
library(plyr)
## Warning: package 'plyr' was built under R version 3.2.5
require(knitr)
## Loading required package: knitr
## Warning: package 'knitr' was built under R version 3.2.5
Xmlurl <- "https://raw.githubusercontent.com/xkong100/IS607/master/HW-XML-HML-Json/book.xml"
books <- getURL(url=Xmlurl)
xmlbooks <- xmlParse(books, validate=F)
books1 <- ldply(xmlToList(books), data.frame)
str(books1)
## 'data.frame': 3 obs. of 7 variables:
## $ .id : chr "book" "book" "book"
## $ Title : Factor w/ 3 levels "Linear Algebra And Its Application",..: 1 2 3
## $ Authors : Factor w/ 3 levels "David C. Jay",..: 1 2 3
## $ Publisher: Factor w/ 3 levels "Addision Westley",..: 1 2 3
## $ ISBN : Factor w/ 3 levels "0-201-77014-8",..: 1 2 3
## $ Edition : Factor w/ 3 levels "3rd","9th","5th": 1 2 3
## $ .attrs : Factor w/ 3 levels "1","2","3": 1 2 3
kable(books1)
| book |
Linear Algebra And Its Application |
David C. Jay |
Addision Westley |
0-201-77014-8 |
3rd |
1 |
| book |
Introduction to Operation Research |
Frederick Hillier, Gerald Lieberman |
Mcgraw Hill |
978-0-07-337629-5 |
9th |
2 |
| book |
Calculus |
James Stewart |
Thomson |
0-534-39339-x |
5th |
3 |
HTML and Parsing HTML
htmlurl <- "https://raw.githubusercontent.com/xkong100/IS607/master/HW-XML-HML-Json/book.html"
books2 <-getURL(url=htmlurl)
htmltext <- htmlParse(books2, asText = TRUE)
htmltable <- xpathApply(htmltext, "//table//tbody//tr")
htmlbooks <- as.data.frame(t(sapply(htmltable, function(x)unname(xmlSApply(x, xmlValue))[c(1,3,5,7,9)])))
colnames(htmlbooks) <- c("Title", "Authors", "Publisher", "ISBN", "Edition")
kable(htmlbooks)
| Linear Algebra And Its Application |
David C. Jay |
Addision Westley |
0-201-77014-8 |
3rd |
| Introduction to Operation Research |
Frederick Hillier, Gerald Lieberman |
Mcgraw Hill |
978-0-07-337629-5 |
9th |
| Calculus |
James Stewart |
Thomson |
0-534-39339-x |
5th |
Json and Parsing Json
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.2.5
jsonurl <- "https://raw.githubusercontent.com/xkong100/IS607/master/HW-XML-HML-Json/books.json"
books3 <- getURL(url=jsonurl)
jsonbooks <-fromJSON(books3)
str(jsonbooks)
## 'data.frame': 3 obs. of 5 variables:
## $ Title : chr "Linear Algebra And Its Application" "Introduction to Operation Research" "Calculus"
## $ Authors : chr "David C. Jay" "Frederick Hillier, Gerald Lieberman" "James Stewart"
## $ Publisher: chr "Addision Westley" "Mcgraw Hill" "Thomson"
## $ ISBN : chr "0-201-77014-8" "978-0-07-337629-5" "0-534-39339-x"
## $ Edition : chr "3rd" "9th" "5th"
kable(jsonbooks)
| Linear Algebra And Its Application |
David C. Jay |
Addision Westley |
0-201-77014-8 |
3rd |
| Introduction to Operation Research |
Frederick Hillier, Gerald Lieberman |
Mcgraw Hill |
978-0-07-337629-5 |
9th |
| Calculus |
James Stewart |
Thomson |
0-534-39339-x |
5th |