library(XML)
library(RCurl)
## Loading required package: bitops
library(httr)
library(RJSONIO)
library(plyr)
library(stringr)
url <- "https://raw.githubusercontent.com/DanielBrooks39/IS607/master/Week%208/books.xml"
doc.xml <- xmlParse(rawToChar(GET(url)$content))
doc.xml
## <?xml version="1.0" encoding="ISO-8859-1"?>
## <Textbooks>
##   <book>
##     <id>1</id>
##     <title>Differential Equations with Boundary Value Problems</title>
##     <author_one>John Porking</author_one>
##     <author_two>Al Boggess</author_two>
##     <author_three>David Arnold</author_three>
##     <ISBN>0131862367</ISBN>
##     <publisher>Prentice Hall</publisher>
##     <year>2006</year>
##     <book_type>Hardback</book_type>
##     <used_price>$150.60</used_price>
##     <new_price>$158.40</new_price>
##     <website><![CDATA[Textbooks.com]]></website>
##   </book>
##   <book>
##     <id>2</id>
##     <title>Fourier Series</title>
##     <author_one>Rajendra Bhatia</author_one>
##     <ISBN>0883857405</ISBN>
##     <publisher>Mathematical Association of America</publisher>
##     <year>2004</year>
##     <book_type>Hardback</book_type>
##     <used_price>$12.21</used_price>
##     <new_price>$35.00</new_price>
##     <website><![CDATA[Amazon.com]]></website>
##   </book>
##   <book>
##     <id>3</id>
##     <title>Linear Algebra</title>
##     <author_one>Stephen H. Freidberg</author_one>
##     <author_two>Arnold J. Insel</author_two>
##     <author_three>Lawrence E. Spence</author_three>
##     <ISBN>0130084514</ISBN>
##     <publisher>Prentice Hall</publisher>
##     <year>2003</year>
##     <book_type>Hardback</book_type>
##     <used_price>$115.21</used_price>
##     <new_price>$163.48</new_price>
##     <website><![CDATA[Textbooks.com]]></website>
##   </book>
## </Textbooks>
## 
root <- xmlRoot(doc.xml)
xml.df <- xmlToDataFrame(root)
names(xml.df) <- c("Ranks", "Title", "Author One", "Author Two", "Author Three", "ISBN", "Publisher", "Year", "Book Type", "Used Price", "New Price", "Website")
xml.df
##   Ranks                                               Title
## 1     1 Differential Equations with Boundary Value Problems
## 2     2                                      Fourier Series
## 3     3                                      Linear Algebra
##             Author One      Author Two       Author Three       ISBN
## 1         John Porking      Al Boggess       David Arnold 0131862367
## 2      Rajendra Bhatia            <NA>               <NA> 0883857405
## 3 Stephen H. Freidberg Arnold J. Insel Lawrence E. Spence 0130084514
##                             Publisher Year Book Type Used Price New Price
## 1                       Prentice Hall 2006  Hardback    $150.60   $158.40
## 2 Mathematical Association of America 2004  Hardback     $12.21    $35.00
## 3                       Prentice Hall 2003  Hardback    $115.21   $163.48
##         Website
## 1 Textbooks.com
## 2    Amazon.com
## 3 Textbooks.com
url <- "https://raw.githubusercontent.com/DanielBrooks39/IS607/master/Week%208/books.json"
doc.json <- fromJSON(url)
doc.json
## $textbooks
## $textbooks[[1]]
## $textbooks[[1]]$rank
## [1] 1
## 
## $textbooks[[1]]$title
## [1] "Differential Equations with Boundary Value Problems"
## 
## $textbooks[[1]]$authors
##            one            two          three 
## "John Porking"   "Al Boggess" "David Arnold" 
## 
## $textbooks[[1]]$ISBN
## [1] "0131862367"
## 
## $textbooks[[1]]$publisher
## [1] "Prentice Hall"
## 
## $textbooks[[1]]$Year
## [1] 2006
## 
## $textbooks[[1]]$Book_Type
## [1] "Harback"
## 
## $textbooks[[1]]$Used_Price
## [1] 105.6
## 
## $textbooks[[1]]$New_Price
## [1] 158.4
## 
## $textbooks[[1]]$website
## [1] "Textbooks.com"
## 
## 
## $textbooks[[2]]
## $textbooks[[2]]$rank
## [1] 2
## 
## $textbooks[[2]]$title
## [1] "Fourier Series"
## 
## $textbooks[[2]]$authors
##               one 
## "Rajendra Bhatia" 
## 
## $textbooks[[2]]$ISBN
## [1] "0883857405"
## 
## $textbooks[[2]]$publisher
## [1] "Mathematical Association of America"
## 
## $textbooks[[2]]$Year
## [1] 2004
## 
## $textbooks[[2]]$Book_Type
## [1] "Harback"
## 
## $textbooks[[2]]$Used_Price
## [1] 12.21
## 
## $textbooks[[2]]$New_Price
## [1] 35
## 
## $textbooks[[2]]$website
## [1] "Amazon.com"
## 
## 
## $textbooks[[3]]
## $textbooks[[3]]$rank
## [1] 3
## 
## $textbooks[[3]]$title
## [1] "Linear Algebra"
## 
## $textbooks[[3]]$authors
##                    one                    two                  three 
## "Stephen H. Freidberg"      "Arnold J. Insel"   "Lawrence E. Spence" 
## 
## $textbooks[[3]]$ISBN
## [1] "0130084514"
## 
## $textbooks[[3]]$publisher
## [1] "Prentice Hall"
## 
## $textbooks[[3]]$Year
## [1] 2003
## 
## $textbooks[[3]]$Book_Type
## [1] "Harback"
## 
## $textbooks[[3]]$Used_Price
## [1] 115.21
## 
## $textbooks[[3]]$New_Price
## [1] 163.48
## 
## $textbooks[[3]]$website
## [1] "Textbooks.com"
unlist.doc <- sapply(doc.json[[1]], unlist)
json.df <- do.call("rbind.fill", lapply(lapply(unlist.doc,t),data.frame, stringAsFactors = FALSE))
names(json.df) <- c("Rank", "Title", "Author One", "Author Two", "Author Three", "ISBN", "Publisher", "Year", "Book Type", "Used Price", "New Price", "Website")
json.df
##   Rank                                               Title
## 1    1 Differential Equations with Boundary Value Problems
## 2    2                                      Fourier Series
## 3    3                                      Linear Algebra
##             Author One      Author Two       Author Three       ISBN
## 1         John Porking      Al Boggess       David Arnold 0131862367
## 2      Rajendra Bhatia            <NA>               <NA> 0883857405
## 3 Stephen H. Freidberg Arnold J. Insel Lawrence E. Spence 0130084514
##                             Publisher Year Book Type Used Price New Price
## 1                       Prentice Hall 2006   Harback      105.6     158.4
## 2 Mathematical Association of America 2004   Harback      12.21        35
## 3                       Prentice Hall 2003   Harback     115.21    163.48
##         Website    NA
## 1 Textbooks.com FALSE
## 2    Amazon.com FALSE
## 3 Textbooks.com FALSE
url <- "https://raw.githubusercontent.com/DanielBrooks39/IS607/master/Week%208/books.html"
doc.html <-htmlParse(rawToChar(GET(url)$content))
doc.html
## <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
## <html><body>
##       <table>
## <tr>
## <th>Rank</th> <th> Title </th> <th> Author 1 </th> <th> Author 2 </th> <th> Author 3 </th> <th> ISBN </th> <th> Publisher </th> <th> Year </th> <th> Book Type </th> <th> Used Price </th> <th> New Price </th> <th> Website </th> </tr>
## <tr>
## <td>1</td> <td> Differential Equations With Boundry Value Problems </td> <td> John Porking </td> <td> Al Boggess </td> <td> David Arnold </td> <td> 0131862367 </td> <td> Prentince Hall </td> <td> 2006 </td> <td> Hardback </td> <td> $ 105.60 </td> <td> $ 158.40 </td> <td> Textbooks.com </td> </tr>
## <tr>
## <td>2</td> <td> Fourier Series </td> <td> Rajendra Bhatia </td> <td>  </td> <td>  </td> <td> 0883857405 </td> <td> Mathematical Association of America </td> <td> 2004 </td> <td> Hardback </td> <td> $ 12.21 </td> <td> $ 35.00 </td> <td> Amazon.com </td> </tr>
## <tr>
## <td>3</td> <td> Linear Algebra </td> <td> Stephen H. Freidberg </td> <td> Arnold J. Insel </td> <td> Lawrence E. Spence </td> <td> 0130084514 </td> <td> Prentince Hall </td> <td> 2003 </td> <td> Hardback </td> <td> $ 115.21 </td> <td> $ 163.48 </td> <td> Textbooks.com </td> </tr>
## </table>
## </body></html>
## 
html.df <- as.data.frame(readHTMLTable(doc.html))
names(html.df) <- c("Rank", "Title", "Author One", "Author Two", "Author Three", "ISBN", "Publisher", "Year", "Book Type", "Used Price", "New Price", "Website")
html.df
##   Rank                                              Title
## 1    1 Differential Equations With Boundry Value Problems
## 2    2                                     Fourier Series
## 3    3                                     Linear Algebra
##             Author One      Author Two       Author Three       ISBN
## 1         John Porking      Al Boggess       David Arnold 0131862367
## 2      Rajendra Bhatia                                    0883857405
## 3 Stephen H. Freidberg Arnold J. Insel Lawrence E. Spence 0130084514
##                             Publisher Year Book Type Used Price New Price
## 1                      Prentince Hall 2006  Hardback   $ 105.60  $ 158.40
## 2 Mathematical Association of America 2004  Hardback    $ 12.21   $ 35.00
## 3                      Prentince Hall 2003  Hardback   $ 115.21  $ 163.48
##         Website
## 1 Textbooks.com
## 2    Amazon.com
## 3 Textbooks.com