#bookChilds <- xmlChildren(root)
library(XML)
## Warning: package 'XML' was built under R version 3.3.1
library(jsonlite)
library(stringr)
getDfFromXML <-function(filename)
{
xml <- xmlParse(filename)
root <- xmlRoot(xml)
#get first "column" of data (eventually transposed to a row below)
dfXML <- data.frame(xmlSApply(root[[1]], xmlValue))
for (i in 2:xmlSize(root))
{
dfXML <- cbind(dfXML, data.frame(xmlSApply(root[[i]], xmlValue)))
}
#transpose, since rows were loaded as columns
dfXML <- data.frame(t(dfXML),stringsAsFactors = FALSE)
#just make rows numeric...rather than defaulted un-uesful names
rownames(dfXML) <- seq(1:nrow(dfXML))
return(dfXML)
}
getDfFromHTML <- function(filename)
{
h <- htmlParse(filename)
root <- xmlRoot(h)
#pull header row which we will use as column names
header <- xpathSApply(root, "//table/tr/th", xmlValue)
dfH <- data.frame(xpathSApply(root, "//table/tr/td[1]", xmlValue), stringsAsFactors = FALSE)
#loop through all the table rows (# note we make assumption the table we want will be first in page...)
for(i in 2:length(xpathSApply(root, "//table/tr")))
{
dfH <- cbind(dfH, xpathSApply(root, str_c("//table/tr/td[", i, "]"), xmlValue))
}
colnames(dfH) <- header
return(dfH)
}
getDfFromJSON <- function(filename)
{
js <- fromJSON("books.json")
dfJS <- data.frame(js, stringsAsFactors = FALSE)
colnames(dfJS) <- str_replace(colnames(dfJS), "Uncoventional_Literature.", "")
return(dfJS)
}
dfBooksXML <- getDfFromXML("books.xml")
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 3,4,5 --> row.names NOT used
dfBooksHTML <- getDfFromHTML("books.html")
dfBooksJSON <- getDfFromJSON("books.json")
head(dfBooksXML)
## Author
## 1 Neil Gaiman, Terry Pratchett
## 2 Douglas Adams
## 3 Philip K. Dick
## 4 John Kennedy Toole
## 5 Karen Russell
## title copyright
## 1 Good Omens, The Nice and Accurate Prophecies of Agnes Nutter 1990
## 2 The Long Dark Tea Time of the Soul 1988
## 3 Do Androids Dream of Electric Sheep 1968
## 4 A Confederacy of Dunces 1980
## 5 Swamplandia 2011
## odd_character
## 1 Anathema Device
## 2 Dirk Gently
## 3 Buster Friendly
## 4 Betty Bumper
## 5 The Bird Man
## quote
## 1 "It's Tchaikovsky's 'Another One Bites the Dust'," said Crowley, closing his eyes as they went through Slough. To while away the time as they crossed the sleeping Chilterns, they also listened to William Byrd's "We Are the Champions" and Beethoven's "I Want To Break Free." Neither were as good as Vaughan Williams's "Fat-Bottomed Girls."
## 2 I may not have gone where I intended to go, but I think I have ended up where I needed to be.
## 3 You will be required to do wrong no matter where you go. It is the basic condition of life, to be required to violate your own identity. At some time, every creature which lives must do so. It is the ultimate shadow, the defeat of creation; this is the curse at work, the curse that feeds on all life. Everywhere in the universe.
## 4 You can always tell employees of the government by the total vacancy which occupies the space where most other people have faces.
## 5 I came to hate the complainers, with their dry and crumbly lipsticks and their wrinkled rage and their stupid, flaccid, old-people sun hats with brims the breadth of Saturn's rings.
## award
## 1 Locus and World Fantasy Nominee for Best Novel
## 2 Alas, none
## 3 Nebula Award Nominee
## 4 Pulitzer Prize for Fiction
## 5 One of Three Nominees for Pulitzer Prize for Fiction 2012
head(dfBooksHTML)
## Author(s)
## 1 Neil Gaiman, Terry Pratchett
## 2 Douglas Adams
## 3 Philip K. Dick
## 4 John Kennedy Toole
## 5 Karen Russell
## Title
## 1 Good Omens, The Nice and Accurate Prophecies of Agnes Nutter
## 2 The Long Dark Tea Time of the Soul
## 3 Do Androids Dream of Electric Sheep
## 4 A Confederacy of Dunces
## 5 Swamplandia
## Copyright Date Oddest Character Name
## 1 1990 Anathema Device
## 2 1988 Dirk Gently
## 3 1968 Buster Friendly
## 4 1980 Betty Bumper
## 5 2011 The Bird Man
## Oddest Quote
## 1 "It's Tchaikovsky's 'Another One Bites the Dust'," said Crowley, closing his eyes as they went through Slough. To while away the time as they crossed the sleeping Chilterns, they also listened to William Byrd's "We Are the Champions" and Beethoven's "I Want To Break Free." Neither were as good as Vaughan Williams's "Fat-Bottomed Girls."
## 2 I may not have gone where I intended to go, but I think I have ended up where I needed to be.
## 3 You will be required to do wrong no matter where you go. It is the basic condition of life, to be required to violate your own identity. At some time, every creature which lives must do so. It is the ultimate shadow, the defeat of creation; this is the curse at work, the curse that feeds on all life. Everywhere in the universe.
## 4 You can always tell employees of the government by the total vacancy which occupies the space where most other people have faces.
## 5 I came to hate the complainers, with their dry and crumbly lipsticks and their wrinkled rage and their stupid, flaccid, old-people sun hats with brims the breadth of Saturn's rings.
## Notable Award
## 1 Locus and World Fantasy Nominee for Best Novel
## 2 Alas, none
## 3 Nebula Award Nominee
## 4 Pulitzer Prize for Fiction
## 5 One of Three Nominees for Pulitzer Prize for Fiction 2012
head(dfBooksJSON)
## Author
## 1 Terry Pratchett
## 2 Douglas Adams
## 3 Philip K. Dick
## 4 John Kennedy Toole
## 5 Karen Russell
## title copyright
## 1 Good Omens, The Nice and Accurate Prophecies of Agnes Nutter 1990
## 2 The Long Dark Tea Time of the Soul 1988
## 3 Do Androids Dream of Electric Sheep 1968
## 4 A Confederacy of Dunces 1980
## 5 Swamplandia 2011
## odd_character
## 1 Anathema Device
## 2 Dirk Gently
## 3 Buster Friendly
## 4 Betty Bumper
## 5 The Bird Man
## quote
## 1 It's Tchaikovsky's 'Another One Bites the Dust'," said Crowley, closing his eyes as they went through Slough. To while away the time as they crossed the sleeping Chilterns, they also listened to William Byrd's "We Are the Champions" and Beethoven's "I Want To Break Free." Neither were as good as Vaughan Williams's "Fat-Bottomed Girls.
## 2 I may not have gone where I intended to go, but I think I have ended up where I needed to be.
## 3 You will be required to do wrong no matter where you go. It is the basic condition of life, to be required to violate your own identity. At some time, every creature which lives must do so. It is the ultimate shadow, the defeat of creation; this is the curse at work, the curse that feeds on all life. Everywhere in the universe.
## 4 You can always tell employees of the government by the total vacancy which occupies the space where most other people have faces.
## 5 I came to hate the complainers, with their dry and crumbly lipsticks and their wrinkled rage and their stupid, flaccid, old-people sun hats with brims the breadth of Saturn's rings.
## award
## 1 Locus and World Fantasy Nominee for Best Novel
## 2 Alas, none
## 3 Nebula Award Nominee
## 4 Pulitzer Prize for Fiction
## 5 One of Three Nominees for Pulitzer Prize for Fiction 2012