XML Import into R

if("XML" %in% rownames(installed.packages()) == FALSE) {install.packages("XML")}
if("RCurl" %in% rownames(installed.packages()) == FALSE) {install.packages("RCurl")}
require(XML)
## Loading required package: XML
## Warning: package 'XML' was built under R version 3.3.2
require(RCurl)
## Loading required package: RCurl
## Warning: package 'RCurl' was built under R version 3.3.2
## Loading required package: bitops
## Warning: package 'bitops' was built under R version 3.3.2
fileURL <- "https://raw.githubusercontent.com/nobieyi00/CUNY_MSDA_R/master/books.xml"
xData <- getURL(fileURL)
doc3 <- xmlTreeParse(xData)
root <- xmlRoot(doc3, useInternalNodes = FALSE)

list_root <- xmlToList(root)


book_xml_matrix <-t(list_root[-2,])


n<-xmlSize(root)
#get the author names
author_vector <- vector(mode="character", length=n)

for (i in 1:n)
{
  author_vector[i] <- paste(unlist(list_root[2,][[i]][1,]), collapse = ', ')
}

book_xml_df<-cbind.data.frame(unlist(book_xml_matrix[,6]),unlist(book_xml_matrix[,1]),author_vector,unlist(book_xml_matrix[,2]),unlist(book_xml_matrix[,3]),unlist(book_xml_matrix[,4]),unlist(book_xml_matrix[,5]))

colnames(book_xml_df)<- c("Book_ID","Title","Author","Genre","Price","Page_Count","Language")
book_xml_df
##   Book_ID                               Title                      Author
## 1       1                 Think and Grow Rich               Napoleon Hill
## 2       2             Uberleben fur Marsianer Alfred Bekker, Claude Faine
## 3       3                Nineteen Eighty-Four               George Orwell
## 4       4 Principles of mathematical analysis                Walter Rudin
##                  Genre Price Page_Count Language
## 1 Personal Development 44.95        279  English
## 2      Science Fiction  1.06        208   German
## 3      Science Fiction    10        336  English
## 4          Mathematics    40        306  English

JSON implementation

if("RJSONIO" %in% rownames(installed.packages()) == FALSE) {install.packages("RJSONIO")}
if("RCurl" %in% rownames(installed.packages()) == FALSE) {install.packages("RCurl")}


require(RCurl)
require(RJSONIO)
## Loading required package: RJSONIO
## Warning: package 'RJSONIO' was built under R version 3.3.2
fileURL2 <- "https://raw.githubusercontent.com/nobieyi00/CUNY_MSDA_R/master/json_xml.json"
Json_Data4 <- getURL(fileURL2)
j_book <- fromJSON(Json_Data4)


n<-length(j_book) 
title_vector <- vector(mode="character", length=n)
for (i in 1:n)
{
  title_vector[i] <- j_book[[i]][[1]]
}


author_vector <- vector(mode="character", length=n)

for (i in 1:n)
{
  author_vector[i] <- paste(j_book[[i]][[2]], collapse = ', ')
}

genre_vector <- vector(mode="character", length=n)

for(i in 1:n)
{
  genre_vector[i] <-  j_book[[i]][[3]]
}

price_vector <- vector(mode="character", length=n)
for(i in 1:n)
{
  price_vector[i] <-  j_book[[i]][[4]]
}

page_count_vector <- vector(mode="character", length=n)
for(i in 1:n)
{
  page_count_vector[i] <-  j_book[[i]][[5]]
}

language_vector <- vector(mode="character", length=n)
for(i in 1:n)
{
  language_vector[i] <-  j_book[[i]][[6]]
}

book_json_df<-cbind.data.frame(title_vector,author_vector,genre_vector,price_vector,page_count_vector,language_vector)

colnames(book_json_df)<- c("Title","Author","Genre","Price","Page_Count","Language")
book_json_df
##                                 Title                      Author
## 1                 Think and Grow Rich               Napoleon Hill
## 2             Uberleben fur Marsianer Alfred Bekker, Claude Faine
## 3                Nineteen Eighty-Four               George Orwell
## 4 Principles of mathematical analysis                Walter Rudin
##                  Genre Price Page_Count Language
## 1 Personal Development 44.95        279  English
## 2      Science Fiction  1.06        208   German
## 3      Science Fiction    10        336  English
## 4          Mathematics    40        306  English

Html table

if("XML" %in% rownames(installed.packages()) == FALSE) {install.packages("XML")}
if("RCurl" %in% rownames(installed.packages()) == FALSE) {install.packages("RCurl")}
require(XML)
require(RCurl)

fileURL <- "https://raw.githubusercontent.com/nobieyi00/CUNY_MSDA_R/master/html_books.html"
htmlData <- getURL(fileURL)
book_html <- readHTMLTable(htmlData)

book_html_df <-data.frame(book_html[[1]])
book_html_df
##                                 title                      author
## 1                 Think and Grow Rich               Napoleon Hill
## 2             Uberleben fur Marsianer Alfred Bekker, Claude Faine
## 3                Nineteen Eighty-Four               George Orwell
## 4 Principles of mathematical analysis                Walter Rudin
##                  Genre Price page_count language
## 1 Personal Development 44.95        279  English
## 2      Science Fiction  1.06        208   German
## 3      Science Fiction    10        336  English
## 4          Mathematics    40        306  English