library(XML)
library(RCurl)
## Loading required package: bitops
library(rjson)
library(RJSONIO)
##
## Attaching package: 'RJSONIO'
##
## The following objects are masked from 'package:rjson':
##
## fromJSON, toJSON
library(jsonlite)
##
## Attaching package: 'jsonlite'
##
## The following objects are masked from 'package:RJSONIO':
##
## fromJSON, toJSON
##
## The following objects are masked from 'package:rjson':
##
## fromJSON, toJSON
##
## The following object is masked from 'package:utils':
##
## View
#File location
url4html <-"https://raw.githubusercontent.com/fangseup88/Assignment_Week7/master/books.html"
#Getting the data file
file4html <- getURL(url4html)
#Create dataframe
df4html <- data.frame(readHTMLTable(file4html))
url4xml <-"https://raw.githubusercontent.com/fangseup88/Assignment_Week7/master/books.xml"
#Getting the data file
file4xml <- getURL(url4xml)
#Create dataframe
intermxml <- xmlTreeParse(file4xml)
df4xml<-data.frame(xmlToList(intermxml, addAttributes=FALSE))
#Location of the file
url4json <-"https://raw.githubusercontent.com/fangseup88/Assignment_Week7/master/books.json"
#Getting the data file
file4json <- getURL(url4json)
#Create dataframe
df4json <- data.frame(fromJSON(file4json))
#data frame for HTML
print(df4html)
## NULL.Title
## 1 Fundamentals of Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples, and Case Studies
## 2 Machine Learning: Hands-On for Developers and Technical Professionals
## 3 Machine Learning with R
## NULL.Author.s. NULL.Publication.Date
## 1 John D. Kelleher, Brian Mac Namee, Aoife D'Arcy 2015-07-24
## 2 Jason Bell N2014-11-03
## 3 Brett Lantz 2013-10-25
## NULL.Publisher
## 1 Massachusetts Institute of Technology
## 2 John Wiley and Sons Inc
## 3 Packt Publishing
#data frame for XML
print(df4xml)
## book
## title Fundamentals of Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples, and Case Studies
## author John D. Kelleher, Brian Mac Namee, Aoife D'Arcy
## publicationDate 2015-07-04
## publisher Massachusetts Institute of Technology
## book.1
## title Machine Learning: Hands-On for Developers and Technical Professionals
## author Jason Bell
## publicationDate 2014-11-03
## publisher John Wiley and Sons Inc
## book.2
## title Machine Learning with R
## author Brett Lantz
## publicationDate 2013-11-25
## publisher Packt Publishing
#data frame for JSON
print(df4json)
## book.title
## 1 Fundamentals of Machine Learning for Predictive Data Analytics: Algorithms, Worked Examples, and Case Studies
## 2 Machine Learning: Hands-On for Developers and Technical Professionals
## 3 Machine Learning with R
## book.author book.date
## 1 John D. Kelleher, Brian Mac Namee, Aoife D'Arcy 2015-07-24T00:00:00Z
## 2 Jason Bell 2014-11-03T00:00:00Z
## 3 Brett Lantz 2013-10-25T00:00:00Z
## book.publisher
## 1 Massachusetts Institute of Technology
## 2 John Wiley and Sons Inc
## 3 Packt Publishing
All the data frames contain the informations displayed in the initial files. It is showing that the data frames for HTML and JSON have the same structure. In those datafiles, the informations about the books are mixed together. The data frame for XML is different, it follows the input file structure.