library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(XML)
library(rvest)
##
## Attaching package: 'rvest'
##
## The following object is masked from 'package:readr':
##
## guess_encoding
library(RCurl)
##
## Attaching package: 'RCurl'
##
## The following object is masked from 'package:tidyr':
##
## complete
library(jsonlite)
##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:purrr':
##
## flatten
library(RJSONIO)
##
## Attaching package: 'RJSONIO'
##
## The following objects are masked from 'package:jsonlite':
##
## fromJSON, toJSON
library(rjson)
##
## Attaching package: 'rjson'
##
## The following objects are masked from 'package:RJSONIO':
##
## fromJSON, toJSON
##
## The following objects are masked from 'package:jsonlite':
##
## fromJSON, toJSON
Title | Author | Publisher | Year | Edition | ISBN |
---|---|---|---|---|---|
R Graphics Cookbook | Winston Chang | O’Reilly Media Inc | 2019 | 2nd | 978-1-4919-7860-3 |
R for Everyone | Jared P. Lander | Addison-Wesley Professional | 2014 | 2nd | 978-0-1345-4692-6 |
Data Science for Business | Foster Provost, Tom Fawcett | O’Reilly Media Inc. | 2013 | 1st | 978-1-4493-6132-7 |
<Author>Winston Chang</Author>
<Publisher>O'Reilly Media Inc</Publisher>
<Year>2019</Year>
<Edition>2nd</Edition>
<ISBN>078-1-4919-7860-3</ISBN>
</Book>
<Book ID = "2">
<Title>R for Everyonek</Title>
<Author>Jared P. Lander</Author>
<Publisher>Addison-Wesley Professional</Publisher>
<Year>2014</Year>
<Edition>2nd</Edition>
<ISBN>978-0-1345-4692-6</ISBN>
</Book>
<Book ID = "3">
<Title>Data Science for Business</Title>
<Author>Foster Provost, Tom Fawcett</Author>
<Publisher>O'Reilly Media Inc.</Publisher>
<Year>2013</Year>
<Edition>1st</Edition>
<ISBN>978-1-4493-6132-7</ISBN>
</Book>
{“My_Books” :[ { “Title” : “R Graphics Cookbook”, “Author” : “Winston
Chang”, “Publisher” : “O’Reilly Media Inc”, “Year” : “2019”, “Edition” :
“2nd”, “ISBN” : “978-1-4919-7860-3” }, { “Title” : “R for Everyone”,
“Author” : “Jared P. Lander”, “Publisher” : “Addison-Wesley
Professional”, “Year” : “2014”, “Edition” : “2nd”, “ISBN” :
“978-0-1345-4692-6” }, {
“Title” : “Data Science for Business”, “Authors” : [“Foster Provost”,
“Tom Fawcett”], “Publisher” : “O’Reilly Media Inc.”, “Year” : “2013”,
“Edition” : “1st”, “ISBN” : “978-1-4493-6132-7” }] }
url <- getURL('https://raw.githubusercontent.com/enidroman/data_607_data_acquisition_and_management/main/books.html')
df_HTML <- url %>%
read_html(encoding = 'UTF-8') %>%
html_table(header = NA, trim = TRUE) %>%
.[[1]]
df_HTML
## # A tibble: 3 × 6
## Title Author Publi…¹ Year Edition ISBN
## <chr> <chr> <chr> <int> <chr> <chr>
## 1 R Graphics Cookbook Winston Chang O'Reil… 2019 2nd 978-…
## 2 R for Everyone Jared P. Lander Addiso… 2014 2nd 978-…
## 3 Data Science for Business Foster Provost, Tom Faw… O'Reil… 2013 1st 978-…
## # … with abbreviated variable name ¹Publisher
url <- getURL('https://raw.githubusercontent.com/enidroman/data_607_data_acquisition_and_management/main/books.xml')
df_XML <- url %>%
xmlParse() %>%
xmlRoot() %>%
xmlToDataFrame(stringsAsFactors = FALSE)
df_XML
## Title Author
## 1 R Graphics Cookbook Winston Chang
## 2 R for Everyonek Jared P. Lander
## 3 Data Science for Business Foster Provost, Tom Fawcett
## Publisher Year Edition ISBN
## 1 O'Reilly Media Inc 2019 2nd 078-1-4919-7860-3
## 2 Addison-Wesley Professional 2014 2nd 978-0-1345-4692-6
## 3 O'Reilly Media Inc. 2013 1st 978-1-4493-6132-7
all.equal(df_HTML,df_XML)
## [1] "Attributes: < Component \"class\": Lengths (3, 1) differ (string compare on first 1) >"
## [2] "Attributes: < Component \"class\": 1 string mismatch >"
## [3] "Component \"Title\": 1 string mismatch"
## [4] "Component \"Year\": Modes: numeric, character"
## [5] "Component \"Year\": target is numeric, current is character"
## [6] "Component \"ISBN\": 1 string mismatch"
all.equal(df_HTML$Year, as.integer(df_XML$Year))
## [1] TRUE