# Week 7 Assignment

install.packages("RCurl",repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'RCurl' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\jenny_000\AppData\Local\Temp\RtmpyWykqi\downloaded_packages
install.packages("XML", repos = "http://cran.us.r-project.org")
## Installing package into 'C:/Users/jenny_000/Documents/R/win-library/3.3'
## (as 'lib' is unspecified)
## package 'XML' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\jenny_000\AppData\Local\Temp\RtmpyWykqi\downloaded_packages
library(RCurl)
## Loading required package: bitops
library(XML)
library(jsonlite)

# Loading HTML
html.URL <-
  getURL("https://raw.githubusercontent.com/Jennier2015/DATA-607/master/Week%207%20Assignment/books.html")
books.html <- readHTMLTable(html.URL, header = TRUE)
books.html
## $`NULL`
##                    Title Year                    Author Goodreadrating
## 1 The Road Less Traveled 1978 Scott Peck, John Driscoll           4.02
## 2                Mindset 2006               Carol Dweck           4.03
## 3                 Switch 2014                Dan Harris           3.93
##      Publisher
## 1   Touchstone
## 2 Random House
## 3     It Books
# Loading XML
xml.URL <-
  getURL("https://raw.githubusercontent.com/Jennier2015/DATA-607/master/Week%207%20Assignment/books.xml")
books.xml <- xmlParse(xml.URL)
books_xml <- xmlToDataFrame(books.xml)
books_xml
##                    title                       author year Goodreadrating
## 1 The Road Less Traveled Scott Peck and John Driscoll 1978          30.00
## 2                Mindset                  Carol Dweck 2006           4.03
## 3                 Switch                   Dan Harris 2006           3.93
##      Publisher
## 1   Touchstone
## 2 Random House
## 3     It Books
# Loading json
json.URL <-
  getURL("https://raw.githubusercontent.com/Jennier2015/DATA-607/master/Week%207%20Assignment/books.json")
books.json <- fromJSON(json.URL)
books.json 
##                                                                          book1
## 1 The Road Less Traveled, Scott Peck and John Driscoll, 1978, 4.02, Touchstone
## 2                               Mindset, Carol Dweck, 1967, 4.03, Random House
## 3                                     Switch, Dan Harris, 2014, 3.93, It Books
# Without the additional processing, the three data frame are not identical.