library("XML")
library("methods")
library("rjson")
library("httr")
library("htmltab")
library("RCurl")

Introduction

The purpose of this assignment is to familiarize ourselves with the structures of json, extended markup language and html and how each stores objects and attributes. We generated a simple dataset of books with attributes such as title, author and publish date and created a separate file for each language to represent these books and attributes contained. These files were then read into r utilizing specific packages for this purpose and transformed into tidy dataframes for further use.

#read json file into R
books_json <- fromJSON(file="https://raw.githubusercontent.com/cassie-boylan/DATA-607/main/books_json_version.json")

#Convert json output to data frame
books_json_data_frame <- as.data.frame(books_json)
print(books_json_data_frame)
##                           Title                            Author Rating
## 1                The Trespasser                       Tana French   4.41
## 2                  Frankenstein                      Mary Shelley   4.53
## 3                      Uprooted                       Nadia Novik   4.07
## 4                       Vicious                       V.E. Schwab   4.20
## 5             Missing, Presumed                     Susie Steiner   3.49
## 6 Stay Sexy & Dont Get Murdered Karen Kilgariff,Georgia Hardstark   4.09
##      Genre Year       Publisher
## 1  Mystery 2016         Penguin
## 2 Classics 1818 Signet Classics
## 3  Fantasy 2015         Del Ray
## 4  Fantasy 2013       Tor Books
## 5  Mystery 2016    Random House
## 6   Memoir 2019     Forge Books
#Convert xml output to data frame
books_xml_dataframe <- xmlToDataFrame("booksv2.xml")
print(books_xml_dataframe)
##                               title                             author rating
## 1                   The Trespasser                         Tana French   4.41
## 2                     Frankenstein                       Mary Shelley    4.53
## 3                         Uprooted                         Naomi Novik   4.07
## 4                          Vicious                         V.E. Schwab   4.20
## 5                 Missing,Presumed                       Susie Steiner   3.49
## 6  Stay Sexy and Dont Get Murdered  Karen Kilgariff, Georgia Hardstark   4.09
##      genre year       publisher
## 1  Mystery 2016         Penguin
## 2 Classics 1818 Signet Classics
## 3  Fantasy 2015         Del Ray
## 4  Fantasy 2013         Del Ray
## 5  Mystery 2016    Random House
## 6   Memoir 2019     Forge Books
books_html <- htmltab("https://raw.githubusercontent.com/cassie-boylan/DATA-607/main/books.html", which =1)

print(books_html)
##                            title                                  author
## 2                 The Trespasser                             Tana French
## 3                   Frankenstein                            Mary Shelley
## 4                       Uprooted                             Naomi Novik
## 5                        Vicious                             V.E. Schwab
## 6              Missing, Presumed                           Susie Steiner
## 7 Stay Sexy & Don't Get Murdered Karen Kilgariff       Georgia Hardstark
##      genre rating year       publisher
## 2  Mystery   4.41 2016         Penguin
## 3 Classics   4.53 1818 Signet Classics
## 4  Fantasy   4.07 2015         Del Ray
## 5  Fantasy   4.20 2013       Tor Books
## 6  Mystery   3.49 2016    Random House
## 7   Memoir   4.09 2019     Forge Books