Libraries

library(tidyverse)
library(RCurl)
library(XML)
library(xml2)

Strategy

I will be using some packages I have used in the past, and some I have researched on the internet to most efficiently load html, json and xml tables from the web.

Loading an HTML file

library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:XML':
## 
##     xml
## The following object is masked from 'package:purrr':
## 
##     pluck
## The following object is masked from 'package:readr':
## 
##     guess_encoding
df<-read_html('https://raw.githubusercontent.com/JackJosephWright/Data-Collection-Homework/master/html%20books.html')
df<-html_table(df)
df
## [[1]]
##                          book                authors         subgenre
## 1 In the Mountains of Madness         H.P. Lovecraft     hard science
## 2        Silence of the Lambs          Thomas Harris crime procedural
## 3             Rosemary's Baby Ira Levin, Jack Wright          mystery
##                 theme         trait
## 1 forbidden knowledge  anti-realist
## 2         psychopathy       baroque
## 3                cult devil worship

Loading JSON file

library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
df <- read_json("https://raw.githubusercontent.com/JackJosephWright/Data-Collection-Homework/master/json_books.json", simplifyVector = TRUE)
df$authors[3]<-print(df$authors[3])
## [1] "Ira Levin, Jack Wright"
df
##                         title                authors         subgenre
## 1 In the Mountains of Madness         H.P. Lovecraft     hard science
## 2        Silence of the Lambs          Thomas Harris crime procedural
## 3             Rosemary's Baby Ira Levin, Jack Wright          mystery
##                theme         trait
## 1 forbidden knowlege  anti-realist
## 2        psychopathy       baroque
## 3               cult devil worship
df <- fromJSON("https://raw.githubusercontent.com/JackJosephWright/Data-Collection-Homework/master/json_books.json", simplifyVector = TRUE)

df
##                         title                authors         subgenre
## 1 In the Mountains of Madness         H.P. Lovecraft     hard science
## 2        Silence of the Lambs          Thomas Harris crime procedural
## 3             Rosemary's Baby Ira Levin, Jack Wright          mystery
##                theme         trait
## 1 forbidden knowlege  anti-realist
## 2        psychopathy       baroque
## 3               cult devil worship

Loading XML file

pg_xml<-getURL("https://raw.githubusercontent.com/JackJosephWright/Data-Collection-Homework/master/xml_books.xml")
df<-xmlToDataFrame(pg_xml)
df
##                         title                authors         subgenre
## 1 In the Mountains of Madness         H.P. Lovecraft     hard science
## 2        Silence of the Lambs          Thomas Harris crime procedural
## 3             Rosemary's Baby Ira Levin, Jack Wright          mystery
##                 theme         trait
## 1 forbidden knowledge  anti-realist
## 2         psychopathy       baroque
## 3                cult devil worship