Pick three of your favorite books on one of your favorite subjects. At least one of the books should have more than one author. For each book, include the title, authors, and two or three other attributes that you find interesting.
library(RCurl)
## Warning: package 'RCurl' was built under R version 3.1.3
## Loading required package: bitops
library(XML)
## Warning: package 'XML' was built under R version 3.1.3
library(rjson)
## Warning: package 'rjson' was built under R version 3.1.3
## get the files from web
html.url<- "https://raw.githubusercontent.com/srajeev1/MSDA_Assignment/master/week9/HTMLPage.htm"
xml.url <- "https://raw.githubusercontent.com/srajeev1/MSDA_Assignment/master/week9/week9AssignmentXML.xml"
json.url <- "https://raw.githubusercontent.com/srajeev1/MSDA_Assignment/master/week9/week9AssignmentJSON.json"
## save file to local drive
#download.file(html.url, "htmlfileweek9")
#download.file(xml.url, "xmlfileweek9.xml")
#download.file(json.url, "jsonfileweek9")
# read the file from local drive
html.data <- readHTMLTable("htmlfileweek9")
head(html.data)
## $`NULL`
## Title
## 1 Probability, With Applications and R
## 2 Machine Learning with R
## 3 XML and Web Technologies for Data Sciences with R
## Author ISBN Price
## 1 Robert P. Dobrow 978-1-118-24125-7 140.00
## 2 Brett Lantz 978-1782162148 49.99
## 3 Deborah Nolan; Duncan Lang Temple 978-1782162148 49.99
xml.list <- xmlToList("xmlfileweek9")
#convert list to dataframe
xml.data<- data.frame(t(sapply(xml.list,c)))
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 2,3 --> row.names NOT used
head(xml.data)
## Title
## 1 Probability, With Applications and R
## 2 Machine Learning with R
## 3 XML and Web Technologies for Data Sciences with R
## Author ISBN Price
## 1 Robert P. Dobrow 978-1-118-24125-7 140.00
## 2 Brett Lantz 978-1782162148 49.99
## 3 Deborah Nolan; Duncan Lang Temple 978-1782162148 49.99
json.list <- fromJSON( file = "jsonfileweek9")
#convert list to dataframe
json.data<- data.frame(t(sapply(json.list,c)))
head(json.data)
## X1 X2
## Title Probability, With Applications and R Machine Learning with R
## Author Robert P. Dobrow Brett Lantz
## ISBN 978-1-118-24125-7 978-1782162148
## Price 140 49.99
## X3
## Title XML and Web Technologies for Data Sciences with R
## Author Deborah Nolan; Duncan Lang Temple
## ISBN 978-1782162148
## Price 49.99
#Compare Dataframes
##Use all.equal function.
##It will simply check each cell in data frame against the same cell in another one.
##We can also use identical() function
all.equal(html.data, xml.data)
## [1] "Names: 1 string mismatch"
## [2] "Attributes: < names for current but not for target >"
## [3] "Attributes: < Length mismatch: comparison on first 0 components >"
## [4] "Length mismatch: comparison on first 1 components"
## [5] "Component 1: Names: 3 string mismatches"
## [6] "Component 1: Attributes: < names for target but not for current >"
## [7] "Component 1: Attributes: < Length mismatch: comparison on first 0 components >"
## [8] "Component 1: Length mismatch: comparison on first 3 components"
## [9] "Component 1: Component 1: 'current' is not a factor"
## [10] "Component 1: Component 2: 'current' is not a factor"
## [11] "Component 1: Component 3: 'current' is not a factor"
all.equal(json.data, xml.data)
## [1] "Names: 3 string mismatches"
## [2] "Attributes: < Component \"row.names\": Modes: character, numeric >"
## [3] "Attributes: < Component \"row.names\": target is character, current is numeric >"
## [4] "Length mismatch: comparison on first 3 components"
## [5] "Component 1: 'current' is not a factor"
## [6] "Component 2: 'current' is not a factor"
## [7] "Component 3: 'current' is not a factor"
identical(html.data, xml.data)
## [1] FALSE
identical(json.data, xml.data)
## [1] FALSE