library(RCurl)
library(kableExtra)
library(stringr)
library(plyr)
library(XML)
library(jsonlite)
library(rvest)
#Read the HTML Code
html <- "http://htmlpreview.github.io/?https://https://github.com/ErindaB/Data607_Assignment/blob/master/books.html"
#Parsing the Code using htmlParse
htmlParse <- htmlParse(file = html)
htmlParse
## <!DOCTYPE html>
## <html>
## <head>
## <meta charset="utf-8">
## <title>GitHub & BitBucket HTML Preview</title>
## <style>
## body {
## font: 12px 'Helvetica Neue', Helvetica, Arial, freesans, clean, sans-serif;
## color: #333;
## }
## h1 {
## font-size: 20px;
## }
## a {
## color: #666;
## }
## #previewform {
## display: none;
## padding: 20px;
## text-align: center;
## }
## strong {
## color: #333;
## background-color: #FAFFA6;
## padding: 0.1em;
## }
## #footer {
## margin: 20px 0;
## font-size: 10px;
## color: #666;
## }
## </style>
## </head>
## <body>
## <form id="previewform" onsubmit="location.href='/?'+this.file.value;return false">
## <h1>GitHub & BitBucket HTML Preview</h1>
## <p>
## Enter URL of the HTML file to preview:
## <input type="url" id="file" value="" placeholder="e.g. https://github.com/user/repo/blob/master/index.html" size="60" autofocus><input type="submit" value="Preview"></p>
## <p>or prepend to the URL: <code><strong>http://htmlpreview.github.io/?</strong>https://github.com/twbs/bootstrap/blob/gh-pages/2.3.2/index.html</code></p>
## <p id="footer">Developed by <a href="https://github.com/niutech">niu tech</a> | Contribute on <a href="https://github.com/htmlpreview/htmlpreview.github.com">GitHub</a></p>
## </form>
## <script src="/htmlpreview.js"></script>
## </body>
## </html>
##
#Read the XML Code
xmlURL <- getURL('https://raw.githubusercontent.com/ErindaB/Data607_Assignment/master/Books.xml', ssl.verifyhost=FALSE, ssl.verifypeer=FALSE)
#Parsing the Code using xmlParse
xmlParse <- xmlParse(file = xmlURL[1])
xmlParse
## <?xml version="1.0" encoding="UTF-8"?>
## <Books_Table>
## <Book>
## <Book_Name>Python Data Science Handbook</Book_Name>
## <Publication_Year>2018</Publication_Year>
## <Book_Authors>Jake VanderPlas</Book_Authors>
## <Book_Comment>The book introduces the core libraries essential for working with data in Python</Book_Comment>
## <Book_Price>35</Book_Price>
## </Book>
## <Book>
## <Book_Name>Natural Language Processing with Python</Book_Name>
## <Publication_Year>2017</Publication_Year>
## <Book_Authors>Steven Bird, Ewan Klein, and Edward Loper</Book_Authors>
## <Book_Comment>This is a book about Natural Language Processing. By "natural language" we mean a language that is used for everyday communication by humans
## </Book_Comment>
## <Book_Price>41</Book_Price>
## </Book>
## <Book>
## <Book_Name>R for Data Science</Book_Name>
## <Publication_Year>2017</Publication_Year>
## <Book_Authors>Hadley Wickham, Garrett Grolemund</Book_Authors>
## <Book_Comment>This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it</Book_Comment>
## <Book_Price>49</Book_Price>
## </Book>
## </Books_Table>
##
#Reading in the JSON Code
json <- "https://raw.githubusercontent.com/ErindaB/Data607_Assignment/master/Books.json"
#Parsing the Code using fromJSON
fromJSON <- fromJSON(json)
fromJSON
## $Books_Table
## Book Name Publication Year
## 1 Python Data Science Handbook 2018
## 2 Natural Language Processing with Python 2017
## 3 R for Data Science 2017
## Book Authors
## 1 Jake VanderPlas
## 2 Steven Bird, Ewan Klein, and Edward Loper
## 3 O'Reilly
## Book Comments
## 1 The book introduces the core libraries essential for working with data in Python
## 2 This is a book about Natural Language Processing. By natural language we mean a language that is used for everyday communication by humans
## 3 This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it
## Book Price
## 1 35
## 2 41
## 3 49
htmlURL <- "https://raw.githubusercontent.com/ErindaB/Data607_Assignment/master/books.html"
#Parsing the Code using htmlParse
readHtml <- read_html(htmlURL)
sample <- html_nodes(readHtml,"table")
sample1 <- html_table(sample, fill = TRUE)
BooksHtml <- as.data.frame(sample1)
head(BooksHtml) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="400px")
|
Book.Name
|
Publication.Year
|
Book.Authors
|
Book.Comments
|
Book.Price
|
|
Python Data Science Handbook
|
2018
|
Jake VanderPlas
|
The book introduces the core libraries essential for working with data in Python
|
35
|
|
Natural Language Processing with Python
|
2017
|
Steven Bird, Ewan Klein, and Edward Loper
|
This is a book about Natural Language Processing. By “natural language” we mean a language that is used for everyday communication by humans
|
41
|
|
R for Data Science
|
2017
|
O’Reilly
|
This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it
|
49
|
#Extract XML file's root
root <- xmlRoot(xmlParse)
#Loading into Data Frame
BooksXML <- xmlToDataFrame(root)
head(BooksXML) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="300px")
|
Book_Name
|
Publication_Year
|
Book_Authors
|
Book_Comment
|
Book_Price
|
|
Python Data Science Handbook
|
2018
|
Jake VanderPlas
|
The book introduces the core libraries essential for working with data in Python
|
35
|
|
Natural Language Processing with Python
|
2017
|
Steven Bird, Ewan Klein, and Edward Loper
|
This is a book about Natural Language Processing. By “natural language” we mean a language that is used for everyday communication by humans
|
41
|
|
R for Data Science
|
2017
|
Hadley Wickham, Garrett Grolemund
|
This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it
|
49
|
jsonURL <- "https://raw.githubusercontent.com/ErindaB/Data607_Assignment/master/Books.json"
readjson <- read_json(jsonURL)
sample1 <- html_table(sample, fill = TRUE)
Booksjson <- as.data.frame(sample1)
head(Booksjson) %>% kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width="100%",height="400px")
|
Book.Name
|
Publication.Year
|
Book.Authors
|
Book.Comments
|
Book.Price
|
|
Python Data Science Handbook
|
2018
|
Jake VanderPlas
|
The book introduces the core libraries essential for working with data in Python
|
35
|
|
Natural Language Processing with Python
|
2017
|
Steven Bird, Ewan Klein, and Edward Loper
|
This is a book about Natural Language Processing. By “natural language” we mean a language that is used for everyday communication by humans
|
41
|
|
R for Data Science
|
2017
|
O’Reilly
|
This book will teach you how to do data science with R: You’ll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it
|
49
|
The three data frames which were generated from .html , .json and .xml are the same