library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.3.3
library(xml2)
## Warning: package 'xml2' was built under R version 3.3.3
library(rvest)
## Warning: package 'rvest' was built under R version 3.3.3
library(XML)
## Warning: package 'XML' was built under R version 3.3.3
## 
## Attaching package: 'XML'
## The following object is masked from 'package:rvest':
## 
##     xml
library(RCurl)
## Warning: package 'RCurl' was built under R version 3.3.3
## Loading required package: bitops
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.3
## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0     v purrr   0.2.4
## v tibble  1.4.2     v dplyr   0.7.4
## v tidyr   0.8.0     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.3.3
## Warning: package 'tidyr' was built under R version 3.3.3
## Warning: package 'readr' was built under R version 3.3.3
## Warning: package 'purrr' was built under R version 3.3.3
## Warning: package 'dplyr' was built under R version 3.3.3
## Warning: package 'forcats' was built under R version 3.3.3
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x tidyr::complete()       masks RCurl::complete()
## x dplyr::filter()         masks stats::filter()
## x purrr::flatten()        masks jsonlite::flatten()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag()            masks stats::lag()
## x purrr::pluck()          masks rvest::pluck()
## x XML::xml()              masks rvest::xml()

Read the JSON FILE

json_books <- fromJSON(txt = "https://raw.githubusercontent.com/ghh2001/607Assignment7-/master/booksGH3.json")
class (json_books)
## [1] "list"
json_books
## $books
##                                                                                           Title
## 1                             Start with Why: How Great Leaders Inspire Everyone to Take Action
## 2                               The Four: The Hidden DNA of Amazon, Apple, Facebook, and Google
## 3                                    Change or Die The Three Keys to Change at Work and in Life
## 4                                                    Crushing it! How Great Entrepreneurs Build
## 5 Data Science for Business: What You Need to Know about Data Mining and Data-Analytic Thinking
##                                     Authors              Type Length
## 1                               Simon Sinek Audible Audiobook    288
## 2                            Scott Galloway         Paperpack    320
## 3                           Alan Deutschman         Paperpack    256
## 4                           Gary Vaynerchuk Audible Audiobook    420
## 5 Foster Provost, Karen Dillon, Tom Fawcett         Paperpack    414
##   AmazonRating
## 1          4.5
## 2          4.5
## 3          4.5
## 4          4.5
## 5          4.5
# the list of author did not work (currently a list, Chr1, chr2 etc), otherwise all worked

Alternative method of reading the Json File, using getURL function

raw_JSON = getURL("https://raw.githubusercontent.com/ghh2001/607Assignment7-/master/booksGH3.json", ssl.verifypeer = FALSE)

booksGH_JSON = fromJSON(txt = raw_JSON)

str(booksGH_JSON)
## List of 1
##  $ books:'data.frame':   5 obs. of  5 variables:
##   ..$ Title       : chr [1:5] "Start with Why: How Great Leaders Inspire Everyone to Take Action" "The Four: The Hidden DNA of Amazon, Apple, Facebook, and Google" "Change or Die The Three Keys to Change at Work and in Life" "Crushing it! How Great Entrepreneurs Build" ...
##   ..$ Authors     :List of 5
##   .. ..$ : chr "Simon Sinek"
##   .. ..$ : chr "Scott Galloway"
##   .. ..$ : chr "Alan Deutschman"
##   .. ..$ : chr "Gary Vaynerchuk"
##   .. ..$ : chr [1:3] "Foster Provost" "Karen Dillon" "Tom Fawcett"
##   ..$ Type        : chr [1:5] "Audible Audiobook" "Paperpack" "Paperpack" "Audible Audiobook" ...
##   ..$ Length      : chr [1:5] "288" "320" "256" "420" ...
##   ..$ AmazonRating: chr [1:5] "4.5" "4.5" "4.5" "4.5" ...
class (booksGH_JSON)
## [1] "list"

Create a dataframe from the Json Files just read

booksGH_JSON_df = as.data.frame(booksGH_JSON)
class (booksGH_JSON_df)
## [1] "data.frame"
#kable (booksGH_JSON_df)
# but, the aurthors showd as C ("author1, author2") format,

try the 3rd way below to read JSON file

json.url <- "https://raw.githubusercontent.com/ghh2001/607Assignment7-/master/booksGH3.json"


json.file <- getURLContent(json.url)

json.df <- as.data.frame(fromJSON(json.file[[1]]))
colnames(json.df) <- str_replace(colnames(json.df),"books\\.", "")
colnames(json.df) <- str_replace(colnames(json.df),"\\.", " ")
dim(json.df)
## [1] 5 5
#kable(json.df)
# it works, but it appears 3 times, which does not make sense

Read in Xml File

data2 <- getURL("https://raw.githubusercontent.com/ghh2001/607Assignment7-/master/booksGH3.xml")
xbooks <- xmlParse(data2)
class (xbooks)
## [1] "XMLInternalDocument" "XMLAbstractDocument"
root <- xmlRoot(xbooks)
class (root)
## [1] "XMLInternalElementNode" "XMLInternalNode"       
## [3] "XMLAbstractNode"
books_xml <- xmlToDataFrame(root)
class (books_xml)
## [1] "data.frame"
books_xml
##                                                                                           Title
## 1                             Start with Why: How Great Leaders Inspire Everyone to Take Action
## 2                               The Four: The Hidden DNA of Amazon, Apple, Facebook, and Google
## 3                                    Change or Die The Three Keys to Change at Work and in Life
## 4                                                    Crushing it! How Great Entrepreneurs Build
## 5 Data Science for Business: What You Need to Know about Data Mining and Data-Analytic Thinking
##                                      Author              Type Length
## 1                               Simon Sinek Audible Audiobook    288
## 2                            Scott Galloway            Kindle    320
## 3                           Alan Deutschman         Paperpack    256
## 4                           Gary Vaynerchuk Audible Audiobook    420
## 5 Foster Provost; Karen Dillon; Tom Fawcett         Paperpack    414
##   AmazonRating
## 1          4.5
## 2          4.5
## 3          4.5
## 4          4.5
## 5          4.5

Read in HtML file

books_html <- "https://raw.githubusercontent.com/ghh2001/TestAug28GH/master/booksGH3.html" %>%
  read_html () %>%
  html_nodes("table") %>%
  html_table(fill = T)
books_html <- as.data.frame(books_html)
books_html
##                                                                                           Title
## 1                             Start with Why: How Great Leaders Inspire Everyone to Take Action
## 2                               The Four: The Hidden DNA of Amazon, Apple, Facebook, and Google
## 3                                   Change or Die: The Three Keys to Change at Work and in Life
## 4                                                    Crushing it! How Great Entrepreneurs Build
## 5 Data Science for Business: What You Need to Know about Data Mining and Data-Analytic Thinking
## 6                                                                                          <NA>
##                                     Authors              Type Length
## 1                               SImon Sinek Audible Audiobook    288
## 2                            Scott Galloway            Kindle    320
## 3                           Alan Deutschman         Paperpack    256
## 4                           Gary Vaynerchuk Audible AUdiobook    420
## 5 Foster Provost; Karen Dillon; Tom Fawcett         Paperpack    414
## 6                                      <NA>              <NA>     NA
##   AmazonRating
## 1          4.5
## 2          4.5
## 3          4.5
## 4          4.5
## 5          4.5
## 6           NA