library(RCurl)
## Loading required package: bitops
library(XML)
library(knitr)
library(jsonlite)
library(plyr)
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Html table to data frame in R
html2.books <- "https://raw.githubusercontent.com/AlainKuiete/DATA607/master/mypreferedbooks.html"
html.books <- readHTMLTable('C:\\DATA607\\Assignments\\mypreferedbooks.html', header = TRUE)[[1]]
tibble(html.books)
## # A tibble: 3 x 1
## html.books$`ISB~ $Title $Authors $Publisher $`Edition-Numbe~
## <fct> <fct> <fct> <fct> <fct>
## 1 9780199390489 INTER~ Ronald ~ OXOFORD U~ 13
## 2 9780596009205 Head ~ Kathy S~ OREILLY 2
## 3 9780205972241 Psych~ Saudra ~ PEARSON 4
## # ... with 1 more variable: $`Copyright@` <fct>
html.books
## ISBN-13 Title
## 1 9780199390489 INTERPLAY The Process of Interpersonal Communication
## 2 9780596009205 Head First Java 5.
## 3 9780205972241 Psychology
## Authors
## 1 Ronald B. Alder, Lawrence B. Rosenfeld, Rusell F, Proctor II
## 2 Kathy Sierra, Bert Bates
## 3 Saudra K. Ciccarelli, J. Noland White
## Publisher Edition-Number Copyright@
## 1 OXOFORD University Press 13 2015
## 2 OREILLY 2 2005
## 3 PEARSON 4 2015
XML to data frame
xml2.url <- "https://raw.githubusercontent.com/AlainKuiete/DATA607/master/mypreferedbooks.xml"
xml.url <- "C:\\DATA607\\Assignments\\mypreferedbooks.xml"
xml.books <- xmlParse(xml.url)
xml.mydf <- xmlToDataFrame(nodes = xmlChildren(xmlRoot(xml.books)[["books"]]), stringsAsFactors=TRUE)
colnames(xml.mydf) <- c("ISBN-13", "Title", "Authors", "Publisher", "Edition_Number", "Copyright@")
tibble(xml.mydf)
## # A tibble: 3 x 1
## xml.mydf$`ISBN-~ $Title $Authors $Publisher $Edition_Number $`Copyright@`
## <fct> <fct> <fct> <fct> <fct> <fct>
## 1 9780199390489 "INTE~ Ronald ~ "OXOFORD ~ 13 2015
## 2 9780596009205 Head ~ Kathy S~ OXOFORD U~ 2 2005
## 3 9780205972241 Psych~ Saudra ~ PEARSON 4 2015
xml.mydf
## ISBN-13 Title
## 1 9780199390489 INTERPLAY The Process of Interpersonal Communication
## 2 9780596009205 Head First Java 5.
## 3 9780205972241 Psychology
## Authors
## 1 Ronald B. Alder, Lawrence B. Rosenfeld, Rusell F., Proctor II
## 2 Kathy Sierra,Bert Bates
## 3 Saudra K. Ciccarelli, J. Noland White
## Publisher Edition_Number Copyright@
## 1 OXOFORD University Press 13 2015
## 2 OXOFORD University Press 2 2005
## 3 PEARSON 4 2015
Json to data frame
json.url <- "https://raw.githubusercontent.com/AlainKuiete/DATA607/master/mypreferedbooks.json"
json.books <- "C:\\DATA607\\Assignments\\mypreferedbooks.json"
json.document <- fromJSON(json.books, simplifyVector = TRUE)
json.mydf <- as.data.frame(json.document)
colnames(json.mydf) <- c("ISBN-13", "Title", "Authors", "Publisher", "Edition_Number", "Copyright@")
tibble(json.mydf)
## # A tibble: 3 x 1
## json.mydf$`ISBN~ $Title $Authors $Publisher $Edition_Number $`Copyright@`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 9780199390489 INTER~ Ronald ~ OXOFORD U~ 13 2015
## 2 9780596009205 Head ~ Kathy S~ OREILLY 2 2005
## 3 99780205972241 Psych~ Saudra ~ PEARSON 4 2015
json.mydf
## ISBN-13 Title
## 1 9780199390489 INTERPLAY The Process of Interpersonal Communication
## 2 9780596009205 Head First Java 5.
## 3 99780205972241 Psychology
## Authors
## 1 Ronald B. Alder, Lawrence B. Rosenfeld, Rusell F, Proctor II
## 2 Kathy Sierra, Bert Bates
## 3 Saudra K. Ciccarelli, J. Noland White
## Publisher Edition_Number Copyright@
## 1 OXOFORD University Press 13 2015
## 2 OREILLY 2 2005
## 3 PEARSON 4 2015
The three data frames present the same structure; but they are uploaded differently.