#Install Packages
#install.packages("XML")
#install.packages("rjson")
#Import Libraries
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(RCurl)
##
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
##
## complete
library(XML)
library(jsonlite)
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
library(methods)
library(dplyr)
#Read HTML table
HTML_File <- getURL("https://raw.githubusercontent.com/letisalba/Data607_Assignment_Week7/main/Books.html")
#Set file as data frame
books_html <- as.data.frame(readHTMLTable(HTML_File, stringAsFactors = FALSE))
books_html
## NULL.Title NULL.Authors NULL.Type
## 1 The Talisman Stephen King, Peter Straub Paperback
## 2 And Then There Were None Agatha Christie Mass Market Paperback
## 3 Night Shift Stephen King Mass Market Paperback
## NULL.Publisher NULL.ISBN.13 NULL.Price
## 1 Gallery Books 978-1501192272 13.50
## 2 William Morrow 978-0062073488 7.99
## 3 Anchor 978-0307743640 8.99
#Read XML File
XML_File <- getURL("https://raw.githubusercontent.com/letisalba/Data607_Assignment_Week7/main/Books2.xml")
#Parse File
books_xml <- xmlParse(XML_File)
#Set file as data frame
books_xml2 <- xmlToDataFrame(books_xml)
books_xml2
## Title Authors Type
## 1 The Talisman Stephen King, Peter Straub Paperback
## 2 And Then There Were None Agatha Christie Mass Market Paperback
## 3 Night Shift Stephen King Mass Market Paperback
## Publisher ISBN-13 Price
## 1 Gallery Books 978-1501192272 13.50
## 2 William Morrow 978-0062073488 7.99
## 3 Anchor 978-0307743640 8.99
#Load JSON file
JSON_File <- fromJSON("https://raw.githubusercontent.com/letisalba/Data607_Assignment_Week7/main/Books2.json")
#Set file as data frame
books_json <- as.data.frame(JSON_File)
books_json
## Books.Title Books.Author Books.Type
## 1 The Talisman Stephen King, Peter Straub Paperback
## 2 And Then There Were None Agatha Christie Mass Market Paperback
## 3 Night Shift Stephen King Mass Market Paperback
## Books.Publisher Books.ISBN.13 Books.Price
## 1 Gallery Books 978-1501192272 13.50
## 2 William Morrow 978-0062073488 7.99
## 3 Anchor 978-0307743640 8.99
#Glimpse of HTML
glimpse(books_html)
## Rows: 3
## Columns: 6
## $ NULL.Title <chr> "The Talisman", "And Then There Were None", "Night Shif…
## $ NULL.Authors <chr> "Stephen King, Peter Straub", "Agatha Christie", "Steph…
## $ NULL.Type <chr> "Paperback", "Mass Market Paperback", "Mass Market Pape…
## $ NULL.Publisher <chr> "Gallery Books", "William Morrow", "Anchor"
## $ NULL.ISBN.13 <chr> "978-1501192272", "978-0062073488", "978-0307743640"
## $ NULL.Price <chr> "13.50", "7.99", "8.99"
#Glimpse of XML
glimpse(books_xml2)
## Rows: 3
## Columns: 6
## $ Title <chr> "The Talisman", "And Then There Were None", "Night Shift"
## $ Authors <chr> "Stephen King, Peter Straub", "Agatha Christie", "Stephen Ki…
## $ Type <chr> "Paperback", "Mass Market Paperback", "Mass Market Paperback"
## $ Publisher <chr> "Gallery Books", "William Morrow", "Anchor"
## $ `ISBN-13` <chr> "978-1501192272", "978-0062073488", "978-0307743640"
## $ Price <chr> "13.50", "7.99", "8.99"
#Glimpse of JSON
glimpse(books_json)
## Rows: 3
## Columns: 6
## $ Books.Title <chr> "The Talisman", "And Then There Were None", "Night Shi…
## $ Books.Author <chr> "Stephen King, Peter Straub", "Agatha Christie", "Step…
## $ Books.Type <chr> "Paperback", "Mass Market Paperback", "Mass Market Pap…
## $ Books.Publisher <chr> "Gallery Books", "William Morrow", "Anchor"
## $ Books.ISBN.13 <chr> "978-1501192272", "978-0062073488", "978-0307743640"
## $ Books.Price <dbl> 13.50, 7.99, 8.99