html <- "https://raw.githubusercontent.com/miasiracusa/Data607/master/assignment7/books.html"
#read html file form html to r
html_books <- read_html(html)
html_books <- htmlParse(html_books)
#convert ot data frame
html_books_df <- as.data.frame(readHTMLTable(html_books))
#edit column names
colnames(html_books_df) <- c("title", "author", "year published", "goodreads rating")
#print
html_books_df
## title author year published
## 1 Bad Feminist Roxane Gay 2014
## 2 Art and Feminisim Helena Reckitt, Peggy Phelan 2001
## 3 Women, Race & Class Angela Davis 1981
## goodreads rating
## 1 3.9
## 2 4.4
## 3 4.4
xml <- "https://raw.githubusercontent.com/miasiracusa/Data607/master/assignment7/books.xml"
xmldata <- read_xml(xml)
#parse xml file to r object
xml_books <- xmlParse(file = xmldata)
#convert r object to data frame
xml_books <- xmlToDataFrame(xml_books)
#print
xml_books
## title author year goodreadsrating
## 1 Bad Feminist Roxane Gay 2014 3.9
## 2 Art and Feminism Helena Reckitt, Peggy Phelan 2001 4.4
## 3 Women, Race and Class Angela Davis 1981 4.4
json <- "https://raw.githubusercontent.com/miasiracusa/Data607/master/assignment7/books.json"
#check validity of code
isValidJSON(json)
## [1] TRUE
#load json file into R object
json_books <- fromJSON(json)
#convert to data frame
json_books <- as.data.frame(json_books)
#clean up data frame
json_books$books.author.1[1] <- "Helena Reckitt and Peggy Phelan"
## Warning in `[<-.factor`(`*tmp*`, 1, value = structure(c(NA, 2L), .Label =
## c("Helena Reckitt", : invalid factor level, NA generated
json_books <- json_books[-2,]
a <- json_books[,(1:4)]
colnames(a) <- c("title", "author", "year published", "goodreads rating")
b <- json_books[,(5:8)]
colnames(b) <- c("title", "author", "year published", "goodreads rating")
c <- json_books[,(9:12)]
colnames(c) <- c("title", "author", "year published", "goodreads rating")
json_books_df <- rbind(a, b, c)
i <- sapply(json_books_df, is.factor)
json_books_df[i] <- lapply(json_books_df[i], as.character)
json_books_df$author[2] <- "Helena Reckitt and Peggy Phelan"
#print
json_books_df
## title author year published
## 1 Bad Feminist Roxane Gay 2014
## 2 Art and Feminism Helena Reckitt and Peggy Phelan 2001
## 3 Women, Race & Class Angela Davis 1981
## goodreads rating
## 1 3.9
## 2 4.4
## 3 4.4
The 3 are very similar, but I found json to be the most different from the html and the xml. I had to edit that data frmae the most to make it look like the other data frames.