library(XML)
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.2.3
library(curl)
## Warning: package 'curl' was built under R version 3.2.3
library(stringr)
library(httr)
## Warning: package 'httr' was built under R version 3.2.3
##
## Attaching package: 'httr'
## The following object is masked from 'package:curl':
##
## handle_reset
xml_url = 'https://raw.githubusercontent.com/cyadusha/somebooks/master/somebooks.xml'
html_url = 'https://raw.githubusercontent.com/cyadusha/somebooks/master/somebooks.html'
json_url = 'https://raw.githubusercontent.com/cyadusha/somebooks/master/somebooks.json'
html_file = curl(html_url)
html_table = readLines(html_file)
h = as.data.frame(readHTMLTable(html_table, header = T, stringsAsFactors = F))
colnames(h) = str_sub(colnames(h), start = 6) #removes the first 5 characters from the column names.
colnames(h) = str_replace_all(colnames(h), '_', ' ') #replaces the underscore in each column name with space.
h
## Author Author 2 Author 3 Author 4
## 1 Rizzo, Maria L. NA NA NA
## 2 Banks, Jerry Carlson, John S. Nelson, Barry L. Nicol, David M.
## 3 Joines, Jeffrey A. Roberts, Stephen D. NA NA
## Title ISBN Year
## 1 Statistical Computing With R 1-58488-545-9 2008
## 2 Discrete-Event Systems Simulation 078-0-13-606212-7 2010
## 3 Simulation Modeling with SIMIO: A Workbook 978-1-938207-76-1 2012
xml_file = curl(xml_url)
xml_table = readLines(xml_file)
## Warning in readLines(xml_file): incomplete final line found on 'https://
## raw.githubusercontent.com/cyadusha/somebooks/master/somebooks.xml'
x = xmlToDataFrame(xml_table, stringsAsFactors = F)
colnames(x) = str_replace_all(colnames(x), '_', ' ')
x
## Author Author 2 Author 3 Author 4
## 1 Rizzo, Maria L. NA NA NA
## 2 Banks, Jerry Carlson, John S. Nelson, Barry L. Nicol, David M.
## 3 Joines, Jeffrey A. Roberts, Stephen D. NA NA
## Title ISBN Year
## 1 Statistical Computing With R 1-58488-545-9 2008
## 2 Discrete-Event Systems Simulation 078-0-13-606212-7 2010
## 3 Simulation Modeling with SIMIO: A Workbook 978-1-938207-76-1 2012
json_file = GET(json_url)
json_table = content(json_file)
j = fromJSON(json_table)
colnames(j) = str_replace_all(colnames(j), '_', ' ')
j
## Author Author 2 Author 3 Author 4
## 1 Rizzo, Maria L. NA NA NA
## 2 Banks, Jerry Carlson, John S. Nelson, Barry L. Nicol, David M.
## 3 Joines, Jeffrey A. Roberts, Stephen D. NA NA
## Title ISBN Year
## 1 Statistical Computing With R 1-58488-545-9 2008
## 2 Discrete-Event Systems Simulation 078-0-13-606212-7 2010
## 3 Simulation Modeling with SIMIO: A Workbook 978-1-938207-76-1 2012
h == x
## Author Author 2 Author 3 Author 4 Title ISBN Year
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [2,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [3,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
h == j
## Author Author 2 Author 3 Author 4 Title ISBN Year
## [1,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [2,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [3,] TRUE TRUE TRUE TRUE TRUE TRUE TRUE
j == x
## Author Author 2 Author 3 Author 4 Title ISBN Year
## 1 TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## 2 TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## 3 TRUE TRUE TRUE TRUE TRUE TRUE TRUE
All of the data frames are identical.