library(XML2R)
## Warning: package 'XML2R' was built under R version 3.1.3
## Loading required package: XML
## Warning: package 'XML' was built under R version 3.1.3
library(htmltools)
## Warning: package 'htmltools' was built under R version 3.1.3
library(htmlTable)
## Warning: package 'htmlTable' was built under R version 3.1.3
library(RJSONIO)
## Warning: package 'RJSONIO' was built under R version 3.1.3
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.1.3
##
## Attaching package: 'jsonlite'
##
## The following objects are masked from 'package:RJSONIO':
##
## fromJSON, toJSON
##
## The following object is masked from 'package:utils':
##
## View
html <- readLines(“http://www.gutenberg.org/files/17185/17185-h/17185-h.htm”)
head(html, n=200)
# html file
url1 <- "https://github.com/jameyletherton/week9_assignment/blob/f12a908cd9ca8abd7dda725192c776f46f4d4c5c/book.html"
tables1 <- readHTMLTable(url1,header = T)
head(tables1)
## named list()
str(tables1)
## Named list()
url <- "file:///C:/Users/jamey/Documents/IS%20607/book.html"
tables <- readHTMLTable(url,header = T)
head(tables)
## $`NULL`
## Title
## 1 "Stories about the Instinct of Animals, Their Characters, and Habits"
## Author Illustrator eBook URL
## 1 Thomas Bingley T. Landseer www.gutenberg.net
str(tables)
## List of 1
## $ NULL:'data.frame': 1 obs. of 4 variables:
## ..$ Title : Factor w/ 1 level "\"Stories about the Instinct of Animals, Their Characters, and Habits\"": 1
## ..$ Author : Factor w/ 1 level "Thomas Bingley": 1
## ..$ Illustrator: Factor w/ 1 level "T. Landseer": 1
## ..$ eBook URL : Factor w/ 1 level "www.gutenberg.net": 1
#xml file
#doc1 <- xmlTreeParse("https://github.com/jameyletherton/week9_assignment/blob/master/book.xml") ; failed
#class(doc1)
doc <- xmlTreeParse("C://Users/jamey/Documents/IS 607/book.xml")
class(doc)
## [1] "XMLDocument" "XMLAbstractDocument"
root <- xmlRoot(doc)
child <- xmlChildren(root)
root
## <book category="WEB">
## <title lang="en">XQuery Kick Start</title>
## <author>James McGovern</author>
## <author>Per Bothner</author>
## <author>Kurt Cagle</author>
## <author>James Linn</author>
## <author>Vaidyanathan Nagarajan</author>
## <year>2003</year>
## <price>49.99</price>
## </book>
child
## $title
## <title lang="en">XQuery Kick Start</title>
##
## $author
## <author>James McGovern</author>
##
## $author
## <author>Per Bothner</author>
##
## $author
## <author>Kurt Cagle</author>
##
## $author
## <author>James Linn</author>
##
## $author
## <author>Vaidyanathan Nagarajan</author>
##
## $year
## <year>2003</year>
##
## $price
## <price>49.99</price>
##
## attr(,"class")
## [1] "XMLNodeList"
sapply(child, xmlValue)
## title author author
## "XQuery Kick Start" "James McGovern" "Per Bothner"
## author author author
## "Kurt Cagle" "James Linn" "Vaidyanathan Nagarajan"
## year price
## "2003" "49.99"
book2 <- "C://Users/jamey/Documents/IS 607/book.json"
bookjason <- readLines(book2)
str(bookjason)
## chr [1:3] "" ...
readbook2 <- fromJSON(bookjason)
readbook2
## $Title
## [1] " Everyday Italian"
##
## $Author
## [1] "Giada De Laurentiis"
##
## $Year
## [1] "2005"
##
## $Price
## [1] "$30.00"