Loading packages
#install.packages("XML")
#install.packages("jsonlite")
#install.packages("RCurl")
library(XML)
## Warning: package 'XML' was built under R version 3.2.5
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.2.5
library(RCurl)
## Warning: package 'RCurl' was built under R version 3.2.5
## Loading required package: bitops
## Warning: package 'bitops' was built under R version 3.2.5
Loading The Data
url1="https://raw.githubusercontent.com/mathsanu/CUNY_MSDA/master/DATA607/W7/Data/books.html"
url2="https://raw.githubusercontent.com/mathsanu/CUNY_MSDA/master/DATA607/W7/Data/books.json"
url3="https://raw.githubusercontent.com/mathsanu/CUNY_MSDA/master/DATA607/W7/Data/books.xml"
Lhtml= getURL(url1)
Ljson = getURL(url2)
Lxml =getURL(url3)
HTML Process
table = readHTMLTable(Lhtml, header = TRUE)
htmlDF = data.frame(table )
colnames( htmlDF) = c("Title","Authors","Publisher","YearPublished")
htmlDF
## Title
## 1 Automated Data Collection with R
## 2 Probability, With Applications and R
## 3 Machine Learning with R
## Authors
## 1 Simon Munzert; DChristian Rubba;Peter Meibner;Dominic Nyhuis
## 2 Robert P. Dobrow
## 3 Brett Lantz
## Publisher YearPublished
## 1 978-1782162148 2015
## 2 978-1-118-24125-7 2016
## 3 978-1782162148 2015
JSON Process
jsonL = fromJSON(Ljson)
jsonL
## $fbooks
## Title
## 1 Automated Data Collection with R
## 2 Probability, With Applications and R
## 3 Machine Learning with R
## Authors
## 1 Simon Munzert, Christian Rubba, Peter Meibner, Dominic Nyhuis
## 2 Robert P. Dobrow
## 3 Brett Lantz
## ISBN Yearpublished
## 1 978-1782162148 2015
## 2 978-1-118-24125-7 2016
## 3 978-1782162148 2015
XML Process
xmlh = xmlParse(Lxml)
xmlDF = xmlToDataFrame(xmlh)
xmlDF
## Title
## 1 Automated Data Collection with R
## 2 Probability, With Applications and R
## 3 Machine Learning with R
## Authors
## 1 Simon Munzert,Christian Rubba,Peter Meibner,Dominic Nyhuis
## 2 Robert P. Dobrow
## 3 Brett Lantz
## ISBN Yearpublished
## 1 978-1782162148 2015
## 2 978-1-118-24125-7 2016
## 3 978-1782162148 2015
Are the three data frames identical? < Yes they have same structure >