Working with XML and JSON in R

Install the XML package

#installing xml
install.packages("XML", repos="http://www.omegahat.net/RSXML/",dependencies = TRUE)
## Installing package into 'C:/Users/bkl2001/Documents/R/win-library/3.4'
## (as 'lib' is unspecified)
## Warning: unable to access index for repository http://www.omegahat.net/RSXML/src/contrib:
##   cannot open URL 'http://www.omegahat.net/RSXML/src/contrib/PACKAGES'
## Warning: package 'XML' is not available (for R version 3.4.1)
## Warning: unable to access index for repository http://www.omegahat.net/RSXML/bin/windows/contrib/3.4:
##   cannot open URL 'http://www.omegahat.net/RSXML/bin/windows/contrib/3.4/PACKAGES'
#load xml
library(XML)

XML Content

#parsing an xml document
books_xml = xmlParse("C:\\Users\\bkl2001\\Documents\\Personal\\CUNY\\Classes\\DATA 607 Data Acquisition and Management\\Week 6\\books.xml")
#class
class(books_xml)
## [1] "XMLInternalDocument" "XMLAbstractDocument"
#get access to the root node and its elements
root=xmlRoot(books_xml)
#examine class
class(root)
## [1] "XMLInternalElementNode" "XMLInternalNode"       
## [3] "XMLAbstractNode"
#display root node
root
## <books>
##   <book>
##     <title>Lies My Teacher Told Me</title>
##     <author>James w. Loewen</author>
##     <author2/>
##     <publisher>Touchstone</publisher>
##     <year>1996</year>
##     <pages>384</pages>
##     <isbn>0684818868</isbn>
##   </book>
##   <book>
##     <title>From Slavery to Freedom: A History of African Americans</title>
##     <author>John Hope Franklin</author>
##     <author2>Evelyn Brooks Higginbotham</author2>
##     <publisher>McGraw-Hill</publisher>
##     <year>2010</year>
##     <pages>736</pages>
##     <isbn>0072963786</isbn>
##   </book>
##   <book>
##     <title>From the Browder File: 22 Essays on the African American Experience</title>
##     <author>Anthony T. Browder</author>
##     <author2/>
##     <publisher>Inst of Karmic Guidance</publisher>
##     <year>1989</year>
##     <pages>157</pages>
##     <isbn>0924944005</isbn>
##   </book>
## </books>
#children of root node
books_xml_child = xmlChildren(root)
#display the books_xml_child
books_xml_child
## $book
## <book>
##   <title>Lies My Teacher Told Me</title>
##   <author>James w. Loewen</author>
##   <author2/>
##   <publisher>Touchstone</publisher>
##   <year>1996</year>
##   <pages>384</pages>
##   <isbn>0684818868</isbn>
## </book> 
## 
## $book
## <book>
##   <title>From Slavery to Freedom: A History of African Americans</title>
##   <author>John Hope Franklin</author>
##   <author2>Evelyn Brooks Higginbotham</author2>
##   <publisher>McGraw-Hill</publisher>
##   <year>2010</year>
##   <pages>736</pages>
##   <isbn>0072963786</isbn>
## </book> 
## 
## $book
## <book>
##   <title>From the Browder File: 22 Essays on the African American Experience</title>
##   <author>Anthony T. Browder</author>
##   <author2/>
##   <publisher>Inst of Karmic Guidance</publisher>
##   <year>1989</year>
##   <pages>157</pages>
##   <isbn>0924944005</isbn>
## </book> 
## 
## attr(,"class")
## [1] "XMLInternalNodeList" "XMLNodeList"
#view the first book
lies = books_xml_child[[1]]
#display lies
lies
## <book>
##   <title>Lies My Teacher Told Me</title>
##   <author>James w. Loewen</author>
##   <author2/>
##   <publisher>Touchstone</publisher>
##   <year>1996</year>
##   <pages>384</pages>
##   <isbn>0684818868</isbn>
## </book>
#view the second book
slavery = books_xml_child[[2]]
#display slavery
slavery
## <book>
##   <title>From Slavery to Freedom: A History of African Americans</title>
##   <author>John Hope Franklin</author>
##   <author2>Evelyn Brooks Higginbotham</author2>
##   <publisher>McGraw-Hill</publisher>
##   <year>2010</year>
##   <pages>736</pages>
##   <isbn>0072963786</isbn>
## </book>
#view the third book
browder = books_xml_child[[3]]
#display browder
browder
## <book>
##   <title>From the Browder File: 22 Essays on the African American Experience</title>
##   <author>Anthony T. Browder</author>
##   <author2/>
##   <publisher>Inst of Karmic Guidance</publisher>
##   <year>1989</year>
##   <pages>157</pages>
##   <isbn>0924944005</isbn>
## </book>
#display the attributes 
sapply(books_xml_child,xmlValue)
##                                                                                                                              book 
##                                                               "Lies My Teacher Told MeJames w. LoewenTouchstone19963840684818868" 
##                                                                                                                              book 
## "From Slavery to Freedom: A History of African AmericansJohn Hope FranklinEvelyn Brooks HigginbothamMcGraw-Hill20107360072963786" 
##                                                                                                                              book 
##   "From the Browder File: 22 Essays on the African American ExperienceAnthony T. BrowderInst of Karmic Guidance19891570924944005"
xml_data <- xmlToList(books_xml)
xml_data
## $book
## $book$title
## [1] "Lies My Teacher Told Me"
## 
## $book$author
## [1] "James w. Loewen"
## 
## $book$author2
## NULL
## 
## $book$publisher
## [1] "Touchstone"
## 
## $book$year
## [1] "1996"
## 
## $book$pages
## [1] "384"
## 
## $book$isbn
## [1] "0684818868"
## 
## 
## $book
## $book$title
## [1] "From Slavery to Freedom: A History of African Americans"
## 
## $book$author
## [1] "John Hope Franklin"
## 
## $book$author2
## [1] "Evelyn Brooks Higginbotham"
## 
## $book$publisher
## [1] "McGraw-Hill"
## 
## $book$year
## [1] "2010"
## 
## $book$pages
## [1] "736"
## 
## $book$isbn
## [1] "0072963786"
## 
## 
## $book
## $book$title
## [1] "From the Browder File: 22 Essays on the African American Experience"
## 
## $book$author
## [1] "Anthony T. Browder"
## 
## $book$author2
## NULL
## 
## $book$publisher
## [1] "Inst of Karmic Guidance"
## 
## $book$year
## [1] "1989"
## 
## $book$pages
## [1] "157"
## 
## $book$isbn
## [1] "0924944005"
#create dataframe
xmldata <-xmlToDataFrame(books_xml)
xmldata
##                                                                 title
## 1                                             Lies My Teacher Told Me
## 2             From Slavery to Freedom: A History of African Americans
## 3 From the Browder File: 22 Essays on the African American Experience
##               author                    author2               publisher
## 1    James w. Loewen                                         Touchstone
## 2 John Hope Franklin Evelyn Brooks Higginbotham             McGraw-Hill
## 3 Anthony T. Browder                            Inst of Karmic Guidance
##   year pages       isbn
## 1 1996   384 0684818868
## 2 2010   736 0072963786
## 3 1989   157 0924944005

HTML Content

#read html 
books_html <- readHTMLTable("C:\\Users\\bkl2001\\Documents\\Personal\\CUNY\\Classes\\DATA 607 Data Acquisition and Management\\Week 6\\books.html")
#view the html table
books_html
## $`NULL`
##                                                                 Title
## 1                                             Lies My Teacher Told Me
## 2             From Slavery to Freedom: A History of African Americans
## 3 From the Browder File: 22 Essays on the African American Experience
##               Author                   Author 2               Publisher
## 1    James W. Loewen                                         Touchstone
## 2 John Hope Franklin Evelyn Brooks Higginbotham             McGraw-Hill
## 3 Anthony T. Browder                            Inst of Karmic Guidance
##   Year Pages       ISBN
## 1 1996   384 0684818868
## 2 2010   736 0072963786
## 3 1989   157 0924944005

JSON

#activate RJSONIO & jsonlite
library(RJSONIO)
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:RJSONIO':
## 
##     fromJSON, toJSON