Load Libraries

library(knitr)
library(XML)
library(httr)
library(rjson)
library(RCurl)
## Loading required package: bitops
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following objects are masked from 'package:rjson':
## 
##     fromJSON, toJSON
library(bitops)

Loading in the html data from github

#Loading in the html data from github
html_url<-readHTMLTable(getURL("https://raw.githubusercontent.com/crarnouts/CUNY-MSDS/master/books.html"))
html_url<-lapply(html_url[[1]], function(x) {unlist(x)})
df.html<-as.data.frame(html_url)
kable(df.html)
title authors price publisher publishedDate type isbn
Calculus Eigth Edition Ron Larson, Robert Hosteltler, Bruce Edwards $49.35 Houghton Mifflin Company 2006 Hard Cover 0618-50298-X
Mathletics John D. Barrow $9.99 W.W.Norton & Company 2012 Paperback 978-0-393-34550-6
Quantitative Investment Analysis Richard DeFusco, Dennis Mcleavey, Jerald Pinto, David Runkle $75.00 CFA Institute 2015 Hard Cover 978-1-119-10422-3
is.data.frame(df.html)
## [1] TRUE

Loading in the JSON data from github

#Loading in the JSON data from github
json_url<-fromJSON(getURL("https://raw.githubusercontent.com/crarnouts/CUNY-MSDS/master/books.JSON"))
json_url<-lapply(json_url[[1]], function(x) {unlist(x)})
df.json<-as.data.frame(do.call("rbind", json_url))
kable(df.json)
V1 V2 V3
title Calculus Eigth Edition Mathletics Quantitative Investment Analysis
authors Ron Larson, Robert Hosteltler, Bruce Edwards John D. Barrow Richard DeFusco, Dennis Mcleavey, Jerald Pinto, David Runkle
price $49.35 $9.99 $75.00
publisher Houghton Mifflin Company W.W.Norton & Company CFA Institute
publishedDate 2006 2012 2015
type Hard Cover Paperback Hard Cover
isbn 0618-50298-X 978-0-393-34550-6 978-1-119-10422-3
is.data.frame(df.json)
## [1] TRUE

Loading in the XML data from github

#Loading in the XML data from github
xml_url<-xmlInternalTreeParse(getURL("https://raw.githubusercontent.com/crarnouts/CUNY-MSDS/master/books3.xml"))
xml_apply<-xmlSApply(xmlRoot(xml_url), function(x) xmlSApply(x, xmlValue))
df.xml<-data.frame(t(xml_apply), row.names = NULL)
kable(df.xml)
title authors price publisher publishedDate type isbn
Calculus Eigth Edition Ron Larson, Robert Hosteltler, Bruce Edwards $49.35 Houghton Mifflin Company 2006 Hard Cover 0618-50298-X
Mathletics John D. Barrow $9.99 W.W.Norton Company 2012 Paperback 978-0-393-34550-6
Quantitative Investment Analysis Richard DeFusco, Dennis Mcleavey, Jerald Pinto, David Runkle $75.00 CFA Institute 2015 Hard Cover 978-1-119-10422-3
is.data.frame(df.xml)
## [1] TRUE