#install packages into R # XML / RJSON
#1 # HTML file # Loading html file into R
library(XML)
books_html_table <- readHTMLTable("/Users/joycealdrich/Documents/SPS Data Science/Data 607/Assignment_5/Books.html", which = 1)
head(books_html_table)
## Books Author_1 Author_2
## 1 The Silent Patient Alex Michaelides
## 2 Where the Crawdads Sing Delia Ownes
## 3 Mad Honey Jodi Picoult Jennifer Finney Boylan
## Publisher Pages ISBN
## 1 Celadon Books 336 9781250301703
## 2 Penguin Publishing Group 384 9780735219090
## 3 Random House Publishing Group 464 9780593597675
#2 #XML file #loadin XML file into R and turn into df
library(XML)
xml_parsed<-xmlParse(file = "/Users/joycealdrich/Documents/SPS Data Science/Data 607/Assignment_5/Books.xml")
books_xml_table<-xmlToDataFrame(xml_parsed, stringsAsFactors = FALSE)
head(books_xml_table)
## Title Author_1 Author_2
## 1 The Silent Patient Alex Michaelides
## 2 Where the Crawdads Sing Delia Ownes
## 3 Mad Honey Jodi Picoult Jennifer Finney Boylan
## Publisher Pages ISBN
## 1 Celadon Books 336 9781250301703
## 2 Penguin Publishing Group 384 9780735219090
## 3 Random House Publishing Group 464 9780593597675
#3 #json #loading json file into R
library(rjson)
json_parsed <-fromJSON(file = "/Users/joycealdrich/Documents/SPS Data Science/Data 607/Assignment_5/Books.json")
print(json_parsed)
## $Books
## [1] "The Silent Patient" "Where the Crawdads Sing"
## [3] "Mad Honey"
##
## $Author_1
## [1] "Alex Michaelides" "Delia Ownes" "Delia Ownes"
##
## $Author_2
## [1] "" "" "Jennifer Finney Boylan"
##
## $Publisher
## [1] "Celadon Books" "Penguin Publishing Group"
## [3] "Random House Publishing Group"
##
## $Pages
## [1] "336" "384" "464"
##
## $ISBN
## [1] "9781250301703" "9780735219090" "9780593597675"
books_json_table<- as.data.frame(json_parsed)
head(books_json_table)
## Books Author_1 Author_2
## 1 The Silent Patient Alex Michaelides
## 2 Where the Crawdads Sing Delia Ownes
## 3 Mad Honey Delia Ownes Jennifer Finney Boylan
## Publisher Pages ISBN
## 1 Celadon Books 336 9781250301703
## 2 Penguin Publishing Group 384 9780735219090
## 3 Random House Publishing Group 464 9780593597675