Set up environment and load libraries
rm(list = ls())
library(kableExtra)
library(dplyr)
library(class)
library(knitr)
library(RCurl)
library(XML)
library(jsonlite)
HTML
urlHTML <- getURL("https://raw.githubusercontent.com/stipton/CUNY-SPS/master/DATA%20607/Week%207/books.html")
htmlParse(file = urlHTML)
## <!DOCTYPE html>
## <html>
## <head><title>Books!</title></head>
## <body>
## <h1>My Favorite Books</h1>
## <table style="width:100%">
## <tr>
## <th>Title</th>
## <th>Author1</th>
## <th>Author2</th>
## <th>Publisher</th>
## <th>Publication Date</th>
## <th>Number of Pages</th>
## </tr>
## <tr>
## <td>War and Peace</td>
## <td>Leo Tolstoy</td>
## <td></td>
## <td>Penguin Books</td>
## <td>2005</td>
## <td>1408</td>
## </tr>
## <tr>
## <td>The Talented Mr. Ripley</td>
## <td>Patricia Highsmith</td>
## <td></td>
## <td>1955</td>
## <td>Everyman's Library</td>
## <td>290</td>
## </tr>
## <tr>
## <td>The Whole World Was Watching</td>
## <td>Patrick Hinds</td>
## <td>Romaine Patterson</td>
## <td>Advocate Books</td>
## <td>2005</td>
## <td>286</td>
## </tr>
## </table>
## </body>
## </html>
##
tables <- readHTMLTable(urlHTML)
tables
## $`NULL`
## Title Author1 Author2
## 1 War and Peace Leo Tolstoy
## 2 The Talented Mr. Ripley Patricia Highsmith
## 3 The Whole World Was Watching Patrick Hinds Romaine Patterson
## Publisher Publication Date Number of Pages
## 1 Penguin Books 2005 1408
## 2 1955 Everyman's Library 290
## 3 Advocate Books 2005 286
JSON
urlJSON <- fromJSON("https://raw.githubusercontent.com/stipton/CUNY-SPS/master/DATA%20607/Week%207/books.json")
urlJSON[[1]] %>% kable("html", caption = "JSON File") %>%
kable_styling(bootstrap_options = c("striped"))
JSON File
|
title
|
authors
|
publisher
|
publication_date
|
number_pages
|
|
War and Peace
|
Leo Tolstoy
|
Penguin Books
|
2005
|
1408
|
|
The Talented Mr. Ripley
|
Patricia Highsmith
|
Everyman’s Library
|
1955
|
290
|
|
The Whole World Was Watching
|
c(“Patrick Hinds”, “Romaine Patterson”)
|
Advocate Books
|
2005
|
286
|
XML
urlXML <- getURL("https://raw.githubusercontent.com/stipton/CUNY-SPS/master/DATA%20607/Week%207/books.xml")
x.parsed <- xmlParse(file = urlXML)
x.root <- xmlRoot(x.parsed)
xmlName(x.root)
## [1] "books"
## [1] 3
## <title>War and Peace</title>
x.df <- xmlToDataFrame(x.root)
x.df %>% kable("html", caption = "XML File") %>%
kable_styling(bootstrap_options = c("striped"))
XML File
|
title
|
author
|
publisher
|
publication_date
|
number_pages
|
|
War and Peace
|
Leo Tolstoy
|
Penguin Books
|
2005
|
1408
|
|
The Talented Mr. Ripley
|
Patricia Highsmith
|
Everyman’s Library
|
1955
|
290
|
|
The Whole World Was Watching
|
Patrick Hinds, Romaine Patterson
|
Advocate Books
|
2005
|
286
|