****Working with HTML file

  1. Load the R packages
library(XML)
## Warning: package 'XML' was built under R version 3.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(RJSONIO)
## Warning: package 'RJSONIO' was built under R version 3.2.3
library(stringr)
  1. Load HTML file and then create a R data frame without title
book_html<-htmlParse(file = "book.html")
book_details<-xpathSApply(book_html,"/html/body/table/tr/td",xmlValue)
str(book_details)
##  chr [1:15] " Blue Ocean Strategy" " W. Chan Kim ,Renee Mauborgne" ...
df_book_details<-matrix(book_details,nrow=5, ncol=3)
df_book_details<-as.data.frame(df_book_details)
df_book_details<-t(df_book_details)
colnames(df_book_details)<-c("Title","Authors","Release Year","Genre","Rating")

df_book_details
##    Title                                         
## V1 " Blue Ocean Strategy"                        
## V2 " The seven habits of highly effective people"
## V3 " Who Says Elephants Can't Dance?"            
##    Authors                         Release Year Genre           Rating
## V1 " W. Chan Kim ,Renee Mauborgne" " 2005"      " Business"     " 4.2"
## V2 " Stephen Covey"                " 1989"      " Non Friction" " 4.5"
## V3 " Louis V., Jr. Gerstner"       " 2003"      " Business"     " 4"
  1. Load HTML file and then create a R data frame without title- Option 2
book_html1<-htmlParse(file = "book.html")
book_details1<-xpathSApply(book_html1,"/html/body/table/tr",xmlValue)
df_book_details1 <- unlist(strsplit(book_details1, "\r\n"))
df_book_details1<-matrix(df_book_details1,nrow=5, ncol=4)
df_book_details1<-t(df_book_details1)
colnames(df_book_details1)<-df_book_details1[1,]
df_book_details1<-df_book_details1[-1,]
df_book_details1
##       Title                                        
## [1,] " Blue Ocean Strategy"                        
## [2,] " The seven habits of highly effective people"
## [3,] " Who Says Elephants Can't Dance?"            
##       Author                          Release   Genre           Rating
## [1,] " W. Chan Kim ,Renee Mauborgne" " 2005"   " Business"     " 4.2" 
## [2,] " Stephen Covey"                " 1989"   " Non Friction" " 4.5" 
## [3,] " Louis V., Jr. Gerstner"       " 2003"   " Business"     " 4"
str(book_details)
##  chr [1:15] " Blue Ocean Strategy" " W. Chan Kim ,Renee Mauborgne" ...
df_book_details<-matrix(book_details,nrow=5, ncol=3)
df_book_details<-as.data.frame(df_book_details)
df_book_details<-t(df_book_details)
colnames(df_book_details)<-c("Title","Authors","Release Year","Genre","Rating")
df_book_details
##    Title                                         
## V1 " Blue Ocean Strategy"                        
## V2 " The seven habits of highly effective people"
## V3 " Who Says Elephants Can't Dance?"            
##    Authors                         Release Year Genre           Rating
## V1 " W. Chan Kim ,Renee Mauborgne" " 2005"      " Business"     " 4.2"
## V2 " Stephen Covey"                " 1989"      " Non Friction" " 4.5"
## V3 " Louis V., Jr. Gerstner"       " 2003"      " Business"     " 4"

*** Working with XML file

  1. Loading XML file into a data frame
book_xml<-xmlParse(file = "book.xml")
df_book_xml= xmlToDataFrame(getNodeSet(book_xml, "///book"))
#timeStamps = t(xpathSApply(book_xml, '////*[@ID]', function(x) c(name=xmlName(x), xmlAttrs(x))))
timeStamps = t(xpathSApply(book_xml, '////*[@ID]', function(x) c(xmlAttrs(x))))
df_book_xml$ID<-c(timeStamps)
df_book_xml
##                                         Title                      Author
## 1                       Blue Ocean Strategy   W. Chan Kim,Renee Mauborgne
## 2 The seven habits of highly effective people               Stephen Covey
## 3             Who Says Elephants Can't Dance?      Louis V., Jr. Gerstner
##   Release        Genre Rating ID
## 1    2005     Business    4.2  1
## 2    1989 Non-friction    4.5  2
## 3    2003     Business      4  3

**** Working with JSON file

  1. Loading JSON file into a data frame
isValidJSON("book.json")
## [1] TRUE
fav_book<-fromJSON(content = "book.json", simplify = FALSE)
fav_book_tb<-do.call("rbind",lapply(fav_book,data.frame,stringsAsFactors= FALSE))
t1 <-select(fav_book_tb,1:5)
t2<-select(fav_book_tb,6:10)
colnames(t2)<-colnames(t1)
t3<-select(fav_book_tb,11:15)
colnames(t3)<-colnames(t1)
fav_book_tb_t<-rbind(t1,t2,t3)
fav_book_tb_t
##                                                       Title
##  Favorite Book                          Blue Ocean Strategy
##  Favorite Book1 The seven habits of highly effective people
##  Favorite Book2             Who Says Elephants Can't Dance?
##                                       Author Release        Genre Rating
##  Favorite Book  W. Chan Kim ,Renée Mauborgne    2005     Business    4.2
##  Favorite Book1                Stephen Covey    1989 Non-friction    4.5
##  Favorite Book2       Louis V., Jr. Gerstner    2003     Business    4.0