#Location
These are the files used in this assignment.
#Loading libraries
if (!require('XML')) install.packages('XML')
## Loading required package: XML
if (!require('jsonlite')) install.packages('jsonlite')
## Loading required package: jsonlite
library(XML)
library(jsonlite)
#1:HTML
#Reading HTML file
# Reading HTML file from github
con <-'https://raw.githubusercontent.com/bsvmelo/CUNY/master/Sci-Fi_bookshelf1.html'
bookshelf_raw<-readLines(con, warn = FALSE)
# Printing
head(bookshelf_raw)
## [1] "<h1>Sci-fi Bookshelf</h1>"
## [2] "<p>This is a sample of my sci-fi book collection.</p>"
## [3] "<p> </p>"
## [4] "<table>"
## [5] "<thead>"
## [6] "<tr>"
#Parsing HTML
parsed_bookshelf<-htmlParse(bookshelf_raw)
print(parsed_bookshelf)
## <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
## <html><body>
## <h1>Sci-fi Bookshelf</h1>
## <p>This is a sample of my sci-fi book collection.</p>
## <p> </p>
## <table>
## <thead><tr>
## <td>Title</td>
## <td>Author</td>
## <td>Publisher</td>
## <td>Publication Date</td>
## <td>Pages</td>
## <td>ISBN</td>
## </tr></thead>
## <tbody>
## <tr>
## <td>Aurora</td>
## <td>Kim Stanley Robinson</td>
## <td>Orbit</td>
## <td>July 2015</td>
## <td>512</td>
## <td>0316526991</td>
## </tr>
## <tr>
## <td>Use of Weapons</td>
## <td>Iain M. Banks</td>
## <td>MacDonald & Co</td>
## <td>1990</td>
## <td>510</td>
## <td>0316030570</td>
## </tr>
## <tr>
## <td>Good Omens</td>
## <td>Terry Pratchett & Neil Gaiman</td>
## <td>William Morrow Paperbacks</td>
## <td>March 2019</td>
## <td>400</td>
## <td>0062697250</td>
## </tr>
## </tbody>
## </table>
## </body></html>
##
#Converting to R data.frame
df_bookshelf <- readHTMLTable(parsed_bookshelf, stringsAsFactors=FALSE)
df_bookshelf[[1]]
## Title Author Publisher
## 1 Aurora Kim Stanley Robinson Orbit
## 2 Use of Weapons Iain M. Banks MacDonald & Co
## 3 Good Omens Terry Pratchett & Neil Gaiman William Morrow Paperbacks
## Publication Date Pages ISBN
## 1 July 2015 512 0316526991
## 2 1990 510 0316030570
## 3 March 2019 400 0062697250
#Displaying
names(df_bookshelf[[1]])
## [1] "Title" "Author" "Publisher" "Publication Date"
## [5] "Pages" "ISBN"
df_bookshelf[[1]]$Title
## [1] "Aurora" "Use of Weapons" "Good Omens"
#2:XML
#Reading XML file
# Reading XML file from github
con <-'https://raw.githubusercontent.com/bsvmelo/CUNY/master/Sci-Fi_bookshelf1.xml'
bookshelf_raw_xml<-readLines(con, warn = FALSE)
# Printing
head(bookshelf_raw_xml)
## [1] "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
## [2] "<Sci-Fi_bookshelf>"
## [3] " <book>"
## [4] " <Title>Aurora</Title>"
## [5] " <Author>Kim Stanley Robinson</Author>"
## [6] " <Publisher>Orbit</Publisher>"
#Parsing XML
parsed_bookshelf_xml<-xmlParse(bookshelf_raw_xml)
print(parsed_bookshelf_xml)
## <?xml version="1.0" encoding="UTF-8"?>
## <Sci-Fi_bookshelf>
## <book>
## <Title>Aurora</Title>
## <Author>Kim Stanley Robinson</Author>
## <Publisher>Orbit</Publisher>
## <Publication-Date>July 2015</Publication-Date>
## <Pages>512</Pages>
## <ISBN>0316526991</ISBN>
## </book>
## <book>
## <Title>Use of Weapons</Title>
## <Author>Iain M. Banks</Author>
## <Publisher>MacDonald & Co</Publisher>
## <Publication-Date>1990</Publication-Date>
## <Pages>510</Pages>
## <ISBN>0316030570</ISBN>
## </book>
## <book>
## <Title>Good Omens</Title>
## <Author>Terry Pratchett & Neil Gaiman</Author>
## <Publisher>William Morrow Paperbacks</Publisher>
## <Publication-Date>March 2019</Publication-Date>
## <Pages>400</Pages>
## <ISBN>0062697250</ISBN>
## </book>
## </Sci-Fi_bookshelf>
##
#Converting to R data.frame
(df_bookshelf_xml <- xmlToDataFrame(parsed_bookshelf_xml))
## Title Author Publisher
## 1 Aurora Kim Stanley Robinson Orbit
## 2 Use of Weapons Iain M. Banks MacDonald & Co
## 3 Good Omens Terry Pratchett & Neil Gaiman William Morrow Paperbacks
## Publication-Date Pages ISBN
## 1 July 2015 512 0316526991
## 2 1990 510 0316030570
## 3 March 2019 400 0062697250
#Displaying
df_bookshelf_xml$Title
## [1] Aurora Use of Weapons Good Omens
## Levels: Aurora Good Omens Use of Weapons
df_bookshelf_xml$`Publication-Date`
## [1] July 2015 1990 March 2019
## Levels: 1990 July 2015 March 2019
df_bookshelf_xml$Author
## [1] Kim Stanley Robinson Iain M. Banks
## [3] Terry Pratchett & Neil Gaiman
## Levels: Iain M. Banks Kim Stanley Robinson Terry Pratchett & Neil Gaiman
#3:JSON
#Reading JSON
# Reading JSON file from github
con <-'https://raw.githubusercontent.com/bsvmelo/CUNY/master/sci-fi_bookshelf2.json'
bookshelf_raw_js<-readLines(con, warn = FALSE)
# Printing
head(bookshelf_raw_js)
## [1] "{"
## [2] " \"Sci-Fi_bookshelf\":"
## [3] " ["
## [4] " {"
## [5] " \"Title\": \"Aurora\","
## [6] " \"Author\": \"Kim Stanley Robinson\","
#Converting to R data.frame
bookshelf_js<-fromJSON(bookshelf_raw_js)
bookshelf_js
## $`Sci-Fi_bookshelf`
## Title Author Publisher
## 1 Aurora Kim Stanley Robinson Orbit
## 2 Use of Weapons Iain M. Banks MacDonald & Co
## 3 Good Omens Terry Pratchett & Neil Gaiman William Morrow Paperbacks
## Publication-Date Pages ISBN
## 1 July 2015 512 0316526991
## 2 1990 510 0316030570
## 3 March 2019 400 0062697250
#Displaying
bookshelf_js$`Sci-Fi_bookshelf`$Title
## [1] "Aurora" "Use of Weapons" "Good Omens"
bookshelf_js$`Sci-Fi_bookshelf`$Publisher
## [1] "Orbit" "MacDonald & Co"
## [3] "William Morrow Paperbacks"
All data frames are identical.