# Week 7 Assignment
install.packages("RCurl",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'RCurl' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\jenny_000\AppData\Local\Temp\RtmpyWykqi\downloaded_packages
install.packages("XML", repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'XML' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\jenny_000\AppData\Local\Temp\RtmpyWykqi\downloaded_packages
library(RCurl)
## Loading required package: bitops
library(XML)
library(jsonlite)
# Loading HTML
html.URL <-
getURL("https://raw.githubusercontent.com/Jennier2015/DATA-607/master/Week%207%20Assignment/books.html")
books.html <- readHTMLTable(html.URL, header = TRUE)
books.html
## $`NULL`
## Title Year Author Goodreadrating
## 1 The Road Less Traveled 1978 Scott Peck, John Driscoll 4.02
## 2 Mindset 2006 Carol Dweck 4.03
## 3 Switch 2014 Dan Harris 3.93
## Publisher
## 1 Touchstone
## 2 Random House
## 3 It Books
# Loading XML
xml.URL <-
getURL("https://raw.githubusercontent.com/Jennier2015/DATA-607/master/Week%207%20Assignment/books.xml")
books.xml <- xmlParse(xml.URL)
books_xml <- xmlToDataFrame(books.xml)
books_xml
## title author year Goodreadrating
## 1 The Road Less Traveled Scott Peck and John Driscoll 1978 30.00
## 2 Mindset Carol Dweck 2006 4.03
## 3 Switch Dan Harris 2006 3.93
## Publisher
## 1 Touchstone
## 2 Random House
## 3 It Books
# Loading json
json.URL <-
getURL("https://raw.githubusercontent.com/Jennier2015/DATA-607/master/Week%207%20Assignment/books.json")
books.json <- fromJSON(json.URL)
books.json
## book1
## 1 The Road Less Traveled, Scott Peck and John Driscoll, 1978, 4.02, Touchstone
## 2 Mindset, Carol Dweck, 1967, 4.03, Random House
## 3 Switch, Dan Harris, 2014, 3.93, It Books
# Without the additional processing, the three data frame are not identical.