XML Import into R
if("XML" %in% rownames(installed.packages()) == FALSE) {install.packages("XML")}
if("RCurl" %in% rownames(installed.packages()) == FALSE) {install.packages("RCurl")}
require(XML)
## Loading required package: XML
## Warning: package 'XML' was built under R version 3.3.2
require(RCurl)
## Loading required package: RCurl
## Warning: package 'RCurl' was built under R version 3.3.2
## Loading required package: bitops
## Warning: package 'bitops' was built under R version 3.3.2
fileURL <- "https://raw.githubusercontent.com/nobieyi00/CUNY_MSDA_R/master/books.xml"
xData <- getURL(fileURL)
doc3 <- xmlTreeParse(xData)
root <- xmlRoot(doc3, useInternalNodes = FALSE)
list_root <- xmlToList(root)
book_xml_matrix <-t(list_root[-2,])
n<-xmlSize(root)
#get the author names
author_vector <- vector(mode="character", length=n)
for (i in 1:n)
{
author_vector[i] <- paste(unlist(list_root[2,][[i]][1,]), collapse = ', ')
}
book_xml_df<-cbind.data.frame(unlist(book_xml_matrix[,6]),unlist(book_xml_matrix[,1]),author_vector,unlist(book_xml_matrix[,2]),unlist(book_xml_matrix[,3]),unlist(book_xml_matrix[,4]),unlist(book_xml_matrix[,5]))
colnames(book_xml_df)<- c("Book_ID","Title","Author","Genre","Price","Page_Count","Language")
book_xml_df
## Book_ID Title Author
## 1 1 Think and Grow Rich Napoleon Hill
## 2 2 Uberleben fur Marsianer Alfred Bekker, Claude Faine
## 3 3 Nineteen Eighty-Four George Orwell
## 4 4 Principles of mathematical analysis Walter Rudin
## Genre Price Page_Count Language
## 1 Personal Development 44.95 279 English
## 2 Science Fiction 1.06 208 German
## 3 Science Fiction 10 336 English
## 4 Mathematics 40 306 English
JSON implementation
if("RJSONIO" %in% rownames(installed.packages()) == FALSE) {install.packages("RJSONIO")}
if("RCurl" %in% rownames(installed.packages()) == FALSE) {install.packages("RCurl")}
require(RCurl)
require(RJSONIO)
## Loading required package: RJSONIO
## Warning: package 'RJSONIO' was built under R version 3.3.2
fileURL2 <- "https://raw.githubusercontent.com/nobieyi00/CUNY_MSDA_R/master/json_xml.json"
Json_Data4 <- getURL(fileURL2)
j_book <- fromJSON(Json_Data4)
n<-length(j_book)
title_vector <- vector(mode="character", length=n)
for (i in 1:n)
{
title_vector[i] <- j_book[[i]][[1]]
}
author_vector <- vector(mode="character", length=n)
for (i in 1:n)
{
author_vector[i] <- paste(j_book[[i]][[2]], collapse = ', ')
}
genre_vector <- vector(mode="character", length=n)
for(i in 1:n)
{
genre_vector[i] <- j_book[[i]][[3]]
}
price_vector <- vector(mode="character", length=n)
for(i in 1:n)
{
price_vector[i] <- j_book[[i]][[4]]
}
page_count_vector <- vector(mode="character", length=n)
for(i in 1:n)
{
page_count_vector[i] <- j_book[[i]][[5]]
}
language_vector <- vector(mode="character", length=n)
for(i in 1:n)
{
language_vector[i] <- j_book[[i]][[6]]
}
book_json_df<-cbind.data.frame(title_vector,author_vector,genre_vector,price_vector,page_count_vector,language_vector)
colnames(book_json_df)<- c("Title","Author","Genre","Price","Page_Count","Language")
book_json_df
## Title Author
## 1 Think and Grow Rich Napoleon Hill
## 2 Uberleben fur Marsianer Alfred Bekker, Claude Faine
## 3 Nineteen Eighty-Four George Orwell
## 4 Principles of mathematical analysis Walter Rudin
## Genre Price Page_Count Language
## 1 Personal Development 44.95 279 English
## 2 Science Fiction 1.06 208 German
## 3 Science Fiction 10 336 English
## 4 Mathematics 40 306 English
Html table
if("XML" %in% rownames(installed.packages()) == FALSE) {install.packages("XML")}
if("RCurl" %in% rownames(installed.packages()) == FALSE) {install.packages("RCurl")}
require(XML)
require(RCurl)
fileURL <- "https://raw.githubusercontent.com/nobieyi00/CUNY_MSDA_R/master/html_books.html"
htmlData <- getURL(fileURL)
book_html <- readHTMLTable(htmlData)
book_html_df <-data.frame(book_html[[1]])
book_html_df
## title author
## 1 Think and Grow Rich Napoleon Hill
## 2 Uberleben fur Marsianer Alfred Bekker, Claude Faine
## 3 Nineteen Eighty-Four George Orwell
## 4 Principles of mathematical analysis Walter Rudin
## Genre Price page_count language
## 1 Personal Development 44.95 279 English
## 2 Science Fiction 1.06 208 German
## 3 Science Fiction 10 336 English
## 4 Mathematics 40 306 English