Load in Libraries

library(knitr)
## Warning: package 'knitr' was built under R version 3.4.4
library(rvest)
library(RCurl)
library(XML)
library(htmltab)
## Warning: package 'htmltab' was built under R version 3.4.4
library(kableExtra)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.4
## Warning: package 'ggplot2' was built under R version 3.4.4
library(dplyr)

Load in HTML File

url <- "https://raw.githubusercontent.com/justinherman42/Justin-Data-607/master/tidy_data_week4/Politicalbooks.html"
my_books<- htmltab(doc = url, which = "/html/body/table")
## Neither <thead> nor <th> information found. Taking first table row for the header. If incorrect, specifiy header argument.
kable(my_books)
Book Author Publication Date Publisher
2 Understanding Power Noam Chomsky 2002 The New Press
3 Understanding Power Author B 2002 The New Press
4 Blackwater Jeremy Scahill 2008 Nation Books
5 Shock Doctrine Naomi Klein 2008 Picador

Load in XML File

fileURL <- "https://raw.githubusercontent.com/justinherman42/Justin-Data-607/master/tidy_data_week4/Politicalbooks2.xml"
books_xml <- getURL(fileURL,ssl.verifypeer = FALSE)
books_xml %>% 
    xmlParse(.,useInternal = TRUE) %>% 
    xmlToList(.) %>%  
    plyr::ldply(., data.frame) %>% 
    select(-.id) %>%
    mutate(Coauthor=Author.1) %>%
    select(-Author.1) %>% 
    kable(.)
Title Author Publication_Date Publisher Coauthor
Understanding Power Noam Chomsky 2002 The New Press Author B
Blackwater Jeremy Scahill 2008 Nation Books NA
The Shock Doctrine Naomi Klein 2008 Picador NA
#books_xml <- xmlParse(books_xml ,useInternal = TRUE)
#xL <- xmlToList(books_xml)
#kable(ldply(xL, data.frame))

Alternate example of loading in xml

xml.url <- "file:///C:/Users/JN/Documents/No%20interenet/Politicalbooks.XML"
xmlfile <- xmlTreeParse(xml.url)
class(xmlfile)
## [1] "XMLDocument"         "XMLAbstractDocument"
xmlfile = xmlRoot(xmlfile)
print(xmlfile)[1:2]
## <Political_Books>
##  <Book>
##   <Title>Understanding Power</Title>
##   <Author>Noam Chomsky</Author>
##   <Publication_Date>2002</Publication_Date>
##   <Publisher>The New Press</Publisher>
##  </Book>
##  <Book>
##   <Title>Blackwater</Title>
##   <Author>Jeremy Scahill</Author>
##   <Publication_Date>2008</Publication_Date>
##   <Publisher>Nation Books</Publisher>
##  </Book>
##  <Book>
##   <Title>The Shock Doctrine</Title>
##   <Author>Naomi Klein</Author>
##   <Publication_Date>2008</Publication_Date>
##   <Publisher>Picador</Publisher>
##  </Book>
## </Political_Books>
## NULL
my_xml <- xmlSApply(xmlfile, function(x) xmlSApply(x, xmlValue))
my_xml_df <- t(as_data_frame(my_xml))
colnames(my_xml_df) <-c("Book", "Author", "Publication Date","Publisher")
kable(my_xml_df)
Book Author Publication Date Publisher
Book Understanding Power Noam Chomsky 2002 The New Press
Book1 Blackwater Jeremy Scahill 2008 Nation Books
Book2 The Shock Doctrine Naomi Klein 2008 Picador

Load in JSON file

library(curl)
## 
## Attaching package: 'curl'
## The following object is masked from 'package:readr':
## 
##     parse_date
library(rjson)
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rjson':
## 
##     fromJSON, toJSON
## The following object is masked from 'package:purrr':
## 
##     flatten
json_file <- 'https://raw.githubusercontent.com/justinherman42/Justin-Data-607/master/tidy_data_week4/Politicalbooks.json'
text <- readLines(curl(json_file))
text %>% 
    jsonlite::fromJSON(.,flatten = TRUE ) %>% 
    kable(.)
Title Author Author_2 Publication_Date Publisher
Understanding Power Noam Chomsky Author B 2002 The New Press
Blackwater Jeremy Scahill NA 2008 Nation Books
The Shock Doctrine Naomi Klein NA 2008 Picador
warnings()
## NULL
#text
#dd  <-  as.data.frame(t(matrix(unlist(json_data), nrow=4)))
#colnames(dd) <- c("Book", "Author", "Publication Date","Publisher")
#kable(dd)