library(RCurl)
library(kableExtra)
library(stringr)
library(plyr)
library(XML)
library(jsonlite)
library(rvest)
#Read  the HTML Code
html <- "http://htmlpreview.github.io/?https://https://github.com/ErindaB/Data607_Assignment/blob/master/books.html"


#Parsing the Code using htmlParse
htmlParse <- htmlParse(file = html)
htmlParse
## <!DOCTYPE html>
## <html>
## <head>
## <meta charset="utf-8">
## <title>GitHub &amp; BitBucket HTML Preview</title>
## <style>
##  body {
##      font: 12px 'Helvetica Neue', Helvetica, Arial, freesans, clean, sans-serif;
##      color: #333;
##  }
##  h1 {
##      font-size: 20px;
##  }
##  a {
##      color: #666;
##  }
##  #previewform {
##      display: none;
##      padding: 20px; 
##      text-align: center;
##  }
##  strong {
##      color: #333;
##      background-color: #FAFFA6;
##      padding: 0.1em;
##  }
##  #footer {
##      margin: 20px 0;
##      font-size: 10px;
##      color: #666;
##  }
##  </style>
## </head>
## <body>
##  <form id="previewform" onsubmit="location.href='/?'+this.file.value;return false">
##      <h1>GitHub &amp; BitBucket HTML Preview</h1>
##      <p>
##          Enter URL of the HTML file to preview:
##          <input type="url" id="file" value="" placeholder="e.g. https://github.com/user/repo/blob/master/index.html" size="60" autofocus><input type="submit" value="Preview"></p>
##      <p>or prepend to the URL: <code><strong>http://htmlpreview.github.io/?</strong>https://github.com/twbs/bootstrap/blob/gh-pages/2.3.2/index.html</code></p>
##      <p id="footer">Developed by <a href="https://github.com/niutech">niu tech</a> | Contribute on <a href="https://github.com/htmlpreview/htmlpreview.github.com">GitHub</a></p>
##  </form>
##  <script src="/htmlpreview.js"></script>
## </body>
## </html>
## 
#Read the XML Code

xmlURL <- getURL('https://raw.githubusercontent.com/ErindaB/Data607_Assignment/master/Books.xml', ssl.verifyhost=FALSE, ssl.verifypeer=FALSE)

#Parsing the Code using xmlParse
xmlParse <- xmlParse(file = xmlURL[1])
xmlParse
## <?xml version="1.0" encoding="UTF-8"?>
## <Books_Table>
##   <Book>
##     <Book_Name>Python Data Science Handbook</Book_Name>
##     <Publication_Year>2018</Publication_Year>
##     <Book_Authors>Jake VanderPlas</Book_Authors>
##     <Book_Comment>The book introduces the core libraries essential for working with data in Python</Book_Comment>
##     <Book_Price>35</Book_Price>
##   </Book>
##   <Book>
##     <Book_Name>Natural Language Processing with Python</Book_Name>
##     <Publication_Year>2017</Publication_Year>
##     <Book_Authors>Steven Bird, Ewan Klein, and Edward Loper</Book_Authors>
##     <Book_Comment>This is a book about Natural Language Processing. By "natural language" we mean a language that is used for everyday communication by humans
## </Book_Comment>
##     <Book_Price>41</Book_Price>
##   </Book>
##   <Book>
##     <Book_Name>R for Data Science</Book_Name>
##     <Publication_Year>2017</Publication_Year>
##     <Book_Authors>Hadley Wickham, Garrett Grolemund</Book_Authors>
##     <Book_Comment>This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it</Book_Comment>
##     <Book_Price>49</Book_Price>
##   </Book>
## </Books_Table>
## 
#Reading in the JSON Code
json <- "https://raw.githubusercontent.com/ErindaB/Data607_Assignment/master/Books.json"

#Parsing the Code using fromJSON
fromJSON <- fromJSON(json)
fromJSON
## $Books_Table
##                                 Book Name Publication Year
## 1            Python Data Science Handbook             2018
## 2 Natural Language Processing with Python             2017
## 3                      R for Data Science             2017
##                                Book Authors
## 1                           Jake VanderPlas
## 2 Steven Bird, Ewan Klein, and Edward Loper
## 3                                  O'Reilly
##                                                                                                                                                                      Book Comments
## 1                                                                                                 The book introduces the core libraries essential for working with data in Python
## 2                                       This is a book about Natural Language Processing. By natural language we mean a language that is used for everyday communication by humans
## 3 This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it
##   Book Price
## 1         35
## 2         41
## 3         49
htmlURL <- "https://raw.githubusercontent.com/ErindaB/Data607_Assignment/master/books.html"


#Parsing the Code using htmlParse
readHtml <- read_html(htmlURL)
sample <- html_nodes(readHtml,"table")
sample1 <- html_table(sample, fill = TRUE)
BooksHtml <- as.data.frame(sample1)


head(BooksHtml) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="400px")
Book.Name Publication.Year Book.Authors Book.Comments Book.Price
Python Data Science Handbook 2018 Jake VanderPlas The book introduces the core libraries essential for working with data in Python 35
Natural Language Processing with Python 2017 Steven Bird, Ewan Klein, and Edward Loper This is a book about Natural Language Processing. By “natural language” we mean a language that is used for everyday communication by humans 41
R for Data Science 2017 O’Reilly This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it 49
#Extract XML file's root
root <- xmlRoot(xmlParse)

#Loading into Data Frame
BooksXML <- xmlToDataFrame(root)

head(BooksXML) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="300px")
Book_Name Publication_Year Book_Authors Book_Comment Book_Price
Python Data Science Handbook 2018 Jake VanderPlas The book introduces the core libraries essential for working with data in Python 35
Natural Language Processing with Python 2017 Steven Bird, Ewan Klein, and Edward Loper This is a book about Natural Language Processing. By “natural language” we mean a language that is used for everyday communication by humans 41
R for Data Science 2017 Hadley Wickham, Garrett Grolemund This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it 49
jsonURL <- "https://raw.githubusercontent.com/ErindaB/Data607_Assignment/master/Books.json"


readjson <- read_json(jsonURL)
sample1 <- html_table(sample, fill = TRUE)
Booksjson <- as.data.frame(sample1)


head(Booksjson) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="400px")
Book.Name Publication.Year Book.Authors Book.Comments Book.Price
Python Data Science Handbook 2018 Jake VanderPlas The book introduces the core libraries essential for working with data in Python 35
Natural Language Processing with Python 2017 Steven Bird, Ewan Klein, and Edward Loper This is a book about Natural Language Processing. By “natural language” we mean a language that is used for everyday communication by humans 41
R for Data Science 2017 O’Reilly This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it 49

The three data frames which were generated from .html , .json and .xml are the same