Week7

Author

Radzhana Rabdanova

Introduction

In this assignment, we load book data from two different formats: HTML and JSON, and compare the resulting data frames in R.

# LOAD LIBRARY
library(rvest)
library(jsonlite)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# READ HTML 
html_raw <- read_html("books.html")
df_html <- html_raw %>% 
  html_table() %>% 
  .[[1]]


df_html <- df_html %>% mutate(year = as.integer(year))

knitr::kable(df_html, caption = "Data Frame from HTML")
Data Frame from HTML
title authors year
The Notebook Nicholas Sparks 1996
Pride and Prejudice Jane Austen 1813
Wild Roses Nora Roberts 2004
# READ JSON
df_json <- fromJSON("books.json")


if (is.list(df_json$authors)) {
  df_json$authors <- sapply(df_json$authors, paste, collapse = ", ")
}

df_json <- df_json %>% mutate(year = as.integer(year))

knitr::kable(df_json, caption = "Data Frame from JSON")
Data Frame from JSON
title authors year
The Notebook Nicholas Sparks 1996
Pride and Prejudice Jane Austen 1813
Wild Roses Nora Roberts 2004
# Comparison tables
identical_check <- all.equal(df_html, df_json)

print(paste("Are the data frames identical?", 
            if(isTRUE(identical_check)) "Yes!" else "No, there are small differences."))
[1] "Are the data frames identical? No, there are small differences."
identical_check
[1] "Attributes: < Component \"class\": Lengths (3, 1) differ (string compare on first 1) >"
[2] "Attributes: < Component \"class\": 1 string mismatch >"