Working with HTML and JSON

Load Libraries

library(rvest)
library(jsonlite)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Load HTML Data

html_data <- read_html("books.html")

books_html <- html_data |>
  html_table(fill = TRUE)

books_html <- books_html[[1]]

books_html
# A tibble: 3 × 5
  Title                                          Authors  Year Publisher    ISBN
  <chr>                                          <chr>   <int> <chr>       <dbl>
1 Python for Data Analysis                       Wes Mc…  2022 O'Reilly  9.78e12
2 Hands-On Machine Learning with Scikit-Learn, … Aureli…  2022 O'Reilly  9.78e12
3 An Introduction to Statistical Learning        Gareth…  2021 Springer  9.78e12

Load JSON Data

books_json <- fromJSON("books.json")

books_json
                                                            title
1                                        Python for Data Analysis
2 Hands-On Machine Learning with Scikit-Learn, Keras & TensorFlow
3                         An Introduction to Statistical Learning
                                                         authors year publisher
1                                                   Wes McKinney 2022  O'Reilly
2                                                 Aurelien Geron 2022  O'Reilly
3 Gareth James, Daniela Witten, Trevor Hastie, Robert Tibshirani 2021  Springer
           isbn
1 9781098104030
2 9781098125974
3 9781071614174

Compare the Data Frames

identical(books_html, books_json)
[1] FALSE