Parse XML file

xmlFileUrl <- "https://raw.githubusercontent.com/binishkurian/DATA-607/master/week-05/movie.xml"
xData <- getURL(xmlFileUrl)
(doc <- xmlParse(xData, useInternalNodes = TRUE))
## <?xml version="1.0"?>
## <books>
##   <book>
##     <title>R for Data Science: Import, Tidy, Transform, Visualize, and Model Data</title>
##     <year>2017</year>
##     <price>18.17</price>
##     <isbn>1491910399</isbn>
##     <authors>
##       <author>Hadley Wickham</author>
##       <author>Garrett Grolemund</author>
##     </authors>
##   </book>
##   <book>
##     <title>An Introduction to Statistical Learning: with Applications in R</title>
##     <year>2013</year>
##     <price>50.74</price>
##     <isbn>1461471370</isbn>
##     <authors>
##       <author>Gareth James</author>
##       <author>Daniel Witten</author>
##       <author>Trevor Hastie</author>
##       <author>Robert Tibshirani</author>
##     </authors>
##   </book>
##   <book>
##     <title>Advanced R</title>
##     <year>2014</year>
##     <price>43.78</price>
##     <isbn>1466586966</isbn>
##     <authors>
##       <author>Hadley Wickham</author>
##     </authors>
##   </book>
## </books>
## 
do.call(rbind, xpathApply(doc, "/books/book", function(node) {
        title <- xmlValue(node[["title"]])
        year <- xmlValue(node[["year"]])
        price <- as.numeric(xmlValue(node[["price"]]))
        isbn <- as.numeric(xmlValue(node[["isbn"]]))
        
        xp <- "./authors/author"
        author <- xpathSApply(node, xp, xmlValue)
        
        data.frame(title, author, year, price, isbn, stringsAsFactors = FALSE)
}))
##                                                                    title
## 1 R for Data Science: Import, Tidy, Transform, Visualize, and Model Data
## 2 R for Data Science: Import, Tidy, Transform, Visualize, and Model Data
## 3        An Introduction to Statistical Learning: with Applications in R
## 4        An Introduction to Statistical Learning: with Applications in R
## 5        An Introduction to Statistical Learning: with Applications in R
## 6        An Introduction to Statistical Learning: with Applications in R
## 7                                                             Advanced R
##              author year price       isbn
## 1    Hadley Wickham 2017 18.17 1491910399
## 2 Garrett Grolemund 2017 18.17 1491910399
## 3      Gareth James 2013 50.74 1461471370
## 4     Daniel Witten 2013 50.74 1461471370
## 5     Trevor Hastie 2013 50.74 1461471370
## 6 Robert Tibshirani 2013 50.74 1461471370
## 7    Hadley Wickham 2014 43.78 1466586966

Parse HTML file

htmlFileUrl <- "https://raw.githubusercontent.com/binishkurian/DATA-607/master/week-05/movie.html"
xData <- getURL(htmlFileUrl)

(html_doc <- htmlParse(xData, useInternalNodes = TRUE))
## <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
## <html><body>
##      <div>
##          <h1>R for Data Science: Import, Tidy, Transform, Visualize, and Model Data</h1>
##         <b>2017</b>  
##         <strong>18.17</strong>
##         <i>1491910399</i>
##         <p>
##        <b>Hadley Wickham</b>
##        <b>Garrett Grolemund</b>
##         </p>         
##      </div>  
##      <div>
##          <h1>An Introduction to Statistical Learning: with Applications in R</h1>
##         <b>2013</b>  
##         <strong>50.74</strong>
##         <i>1461471370</i>
##         <p>
##        <b>Gareth James</b>
##        <b>Daniel Witten</b>
##        <b>Trevor Hastie</b>
##        <b>Robert Tibshirani</b>
##         </p>         
##      </div>  
##      <div>
##          <h1>Advanced R</h1>
##         <b>2014</b>  
##         <strong>43.78</strong>
##         <i>1466586966</i>
##         <p>
##        <b>Hadley Wickham</b>
##         </p>         
##      </div>  
##  </body></html>
## 
do.call(rbind, xpathApply(html_doc, "/html/body/div", function(node) {
        title <- xmlValue(node[["h1"]])
        year <- xmlValue(node[["b"]])
        price <- as.numeric(xmlValue(node[["strong"]]))
        isbn <- as.numeric(xmlValue(node[["i"]]))
        
        xp <- "./p/b"
        author <- xpathSApply(node, xp, xmlValue)
        
        data.frame(title, year, price, isbn, author, stringsAsFactors = FALSE)
}))
##                                                                    title
## 1 R for Data Science: Import, Tidy, Transform, Visualize, and Model Data
## 2 R for Data Science: Import, Tidy, Transform, Visualize, and Model Data
## 3        An Introduction to Statistical Learning: with Applications in R
## 4        An Introduction to Statistical Learning: with Applications in R
## 5        An Introduction to Statistical Learning: with Applications in R
## 6        An Introduction to Statistical Learning: with Applications in R
## 7                                                             Advanced R
##   year price       isbn            author
## 1 2017 18.17 1491910399    Hadley Wickham
## 2 2017 18.17 1491910399 Garrett Grolemund
## 3 2013 50.74 1461471370      Gareth James
## 4 2013 50.74 1461471370     Daniel Witten
## 5 2013 50.74 1461471370     Trevor Hastie
## 6 2013 50.74 1461471370 Robert Tibshirani
## 7 2014 43.78 1466586966    Hadley Wickham

Parse JSON file

jsonFileUrl <- "https://raw.githubusercontent.com/binishkurian/DATA-607/master/week-05/movie.json"
xData <- getURL(jsonFileUrl)
(doc <- fromJSON(xData))
##                                                                    title
## 1 R for Data Science: Import, Tidy, Transform, Visualize, and Model Data
## 2        An Introduction to Statistical Learning: with Applications in R
## 3                                                             Advanced R
##   year price       isbn
## 1 2017 18.17 1491910399
## 2 2013 50.74 1461471370
## 3 2014 43.78 1466586966
##                                                         authors
## 1                             Hadley Wickham, Garrett Grolemund
## 2 Gareth James, Daniel Witten, Trevor Hastie, Robert Tibshirani
## 3                                                Hadley Wickham