Install the XML package
#installing xml
install.packages("XML", repos="http://www.omegahat.net/RSXML/",dependencies = TRUE)
## Installing package into 'C:/Users/bkl2001/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository http://www.omegahat.net/RSXML/src/contrib:
## cannot open URL 'http://www.omegahat.net/RSXML/src/contrib/PACKAGES'
## Warning: package 'XML' is not available (for R version 3.4.1)
## Warning: unable to access index for repository http://www.omegahat.net/RSXML/bin/windows/contrib/3.4:
## cannot open URL 'http://www.omegahat.net/RSXML/bin/windows/contrib/3.4/PACKAGES'
#load xml
library(XML)
#parsing an xml document
books_xml = xmlParse("C:\\Users\\bkl2001\\Documents\\Personal\\CUNY\\Classes\\DATA 607 Data Acquisition and Management\\Week 6\\books.xml")
#class
class(books_xml)
## [1] "XMLInternalDocument" "XMLAbstractDocument"
#get access to the root node and its elements
root=xmlRoot(books_xml)
#examine class
class(root)
## [1] "XMLInternalElementNode" "XMLInternalNode"
## [3] "XMLAbstractNode"
#display root node
root
## <books>
## <book>
## <title>Lies My Teacher Told Me</title>
## <author>James w. Loewen</author>
## <author2/>
## <publisher>Touchstone</publisher>
## <year>1996</year>
## <pages>384</pages>
## <isbn>0684818868</isbn>
## </book>
## <book>
## <title>From Slavery to Freedom: A History of African Americans</title>
## <author>John Hope Franklin</author>
## <author2>Evelyn Brooks Higginbotham</author2>
## <publisher>McGraw-Hill</publisher>
## <year>2010</year>
## <pages>736</pages>
## <isbn>0072963786</isbn>
## </book>
## <book>
## <title>From the Browder File: 22 Essays on the African American Experience</title>
## <author>Anthony T. Browder</author>
## <author2/>
## <publisher>Inst of Karmic Guidance</publisher>
## <year>1989</year>
## <pages>157</pages>
## <isbn>0924944005</isbn>
## </book>
## </books>
#children of root node
books_xml_child = xmlChildren(root)
#display the books_xml_child
books_xml_child
## $book
## <book>
## <title>Lies My Teacher Told Me</title>
## <author>James w. Loewen</author>
## <author2/>
## <publisher>Touchstone</publisher>
## <year>1996</year>
## <pages>384</pages>
## <isbn>0684818868</isbn>
## </book>
##
## $book
## <book>
## <title>From Slavery to Freedom: A History of African Americans</title>
## <author>John Hope Franklin</author>
## <author2>Evelyn Brooks Higginbotham</author2>
## <publisher>McGraw-Hill</publisher>
## <year>2010</year>
## <pages>736</pages>
## <isbn>0072963786</isbn>
## </book>
##
## $book
## <book>
## <title>From the Browder File: 22 Essays on the African American Experience</title>
## <author>Anthony T. Browder</author>
## <author2/>
## <publisher>Inst of Karmic Guidance</publisher>
## <year>1989</year>
## <pages>157</pages>
## <isbn>0924944005</isbn>
## </book>
##
## attr(,"class")
## [1] "XMLInternalNodeList" "XMLNodeList"
#view the first book
lies = books_xml_child[[1]]
#display lies
lies
## <book>
## <title>Lies My Teacher Told Me</title>
## <author>James w. Loewen</author>
## <author2/>
## <publisher>Touchstone</publisher>
## <year>1996</year>
## <pages>384</pages>
## <isbn>0684818868</isbn>
## </book>
#view the second book
slavery = books_xml_child[[2]]
#display slavery
slavery
## <book>
## <title>From Slavery to Freedom: A History of African Americans</title>
## <author>John Hope Franklin</author>
## <author2>Evelyn Brooks Higginbotham</author2>
## <publisher>McGraw-Hill</publisher>
## <year>2010</year>
## <pages>736</pages>
## <isbn>0072963786</isbn>
## </book>
#view the third book
browder = books_xml_child[[3]]
#display browder
browder
## <book>
## <title>From the Browder File: 22 Essays on the African American Experience</title>
## <author>Anthony T. Browder</author>
## <author2/>
## <publisher>Inst of Karmic Guidance</publisher>
## <year>1989</year>
## <pages>157</pages>
## <isbn>0924944005</isbn>
## </book>
#display the attributes
sapply(books_xml_child,xmlValue)
## book
## "Lies My Teacher Told MeJames w. LoewenTouchstone19963840684818868"
## book
## "From Slavery to Freedom: A History of African AmericansJohn Hope FranklinEvelyn Brooks HigginbothamMcGraw-Hill20107360072963786"
## book
## "From the Browder File: 22 Essays on the African American ExperienceAnthony T. BrowderInst of Karmic Guidance19891570924944005"
xml_data <- xmlToList(books_xml)
xml_data
## $book
## $book$title
## [1] "Lies My Teacher Told Me"
##
## $book$author
## [1] "James w. Loewen"
##
## $book$author2
## NULL
##
## $book$publisher
## [1] "Touchstone"
##
## $book$year
## [1] "1996"
##
## $book$pages
## [1] "384"
##
## $book$isbn
## [1] "0684818868"
##
##
## $book
## $book$title
## [1] "From Slavery to Freedom: A History of African Americans"
##
## $book$author
## [1] "John Hope Franklin"
##
## $book$author2
## [1] "Evelyn Brooks Higginbotham"
##
## $book$publisher
## [1] "McGraw-Hill"
##
## $book$year
## [1] "2010"
##
## $book$pages
## [1] "736"
##
## $book$isbn
## [1] "0072963786"
##
##
## $book
## $book$title
## [1] "From the Browder File: 22 Essays on the African American Experience"
##
## $book$author
## [1] "Anthony T. Browder"
##
## $book$author2
## NULL
##
## $book$publisher
## [1] "Inst of Karmic Guidance"
##
## $book$year
## [1] "1989"
##
## $book$pages
## [1] "157"
##
## $book$isbn
## [1] "0924944005"
#create dataframe
xmldata <-xmlToDataFrame(books_xml)
xmldata
## title
## 1 Lies My Teacher Told Me
## 2 From Slavery to Freedom: A History of African Americans
## 3 From the Browder File: 22 Essays on the African American Experience
## author author2 publisher
## 1 James w. Loewen Touchstone
## 2 John Hope Franklin Evelyn Brooks Higginbotham McGraw-Hill
## 3 Anthony T. Browder Inst of Karmic Guidance
## year pages isbn
## 1 1996 384 0684818868
## 2 2010 736 0072963786
## 3 1989 157 0924944005
#read html
books_html <- readHTMLTable("C:\\Users\\bkl2001\\Documents\\Personal\\CUNY\\Classes\\DATA 607 Data Acquisition and Management\\Week 6\\books.html")
#view the html table
books_html
## $`NULL`
## Title
## 1 Lies My Teacher Told Me
## 2 From Slavery to Freedom: A History of African Americans
## 3 From the Browder File: 22 Essays on the African American Experience
## Author Author 2 Publisher
## 1 James W. Loewen Touchstone
## 2 John Hope Franklin Evelyn Brooks Higginbotham McGraw-Hill
## 3 Anthony T. Browder Inst of Karmic Guidance
## Year Pages ISBN
## 1 1996 384 0684818868
## 2 2010 736 0072963786
## 3 1989 157 0924944005
#activate RJSONIO & jsonlite
library(RJSONIO)
library(jsonlite)
##
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:RJSONIO':
##
## fromJSON, toJSON