library(tidyverse)
library(RCurl)
library(XML)
library(xml2)
I will be using some packages I have used in the past, and some I have researched on the internet to most efficiently load html, json and xml tables from the web.
library(rvest)
##
## Attaching package: 'rvest'
## The following object is masked from 'package:XML':
##
## xml
## The following object is masked from 'package:purrr':
##
## pluck
## The following object is masked from 'package:readr':
##
## guess_encoding
df<-read_html('https://raw.githubusercontent.com/JackJosephWright/Data-Collection-Homework/master/html%20books.html')
df<-html_table(df)
df
## [[1]]
## book authors subgenre
## 1 In the Mountains of Madness H.P. Lovecraft hard science
## 2 Silence of the Lambs Thomas Harris crime procedural
## 3 Rosemary's Baby Ira Levin, Jack Wright mystery
## theme trait
## 1 forbidden knowledge anti-realist
## 2 psychopathy baroque
## 3 cult devil worship
library(jsonlite)
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
df <- read_json("https://raw.githubusercontent.com/JackJosephWright/Data-Collection-Homework/master/json_books.json", simplifyVector = TRUE)
df$authors[3]<-print(df$authors[3])
## [1] "Ira Levin, Jack Wright"
df
## title authors subgenre
## 1 In the Mountains of Madness H.P. Lovecraft hard science
## 2 Silence of the Lambs Thomas Harris crime procedural
## 3 Rosemary's Baby Ira Levin, Jack Wright mystery
## theme trait
## 1 forbidden knowlege anti-realist
## 2 psychopathy baroque
## 3 cult devil worship
df <- fromJSON("https://raw.githubusercontent.com/JackJosephWright/Data-Collection-Homework/master/json_books.json", simplifyVector = TRUE)
df
## title authors subgenre
## 1 In the Mountains of Madness H.P. Lovecraft hard science
## 2 Silence of the Lambs Thomas Harris crime procedural
## 3 Rosemary's Baby Ira Levin, Jack Wright mystery
## theme trait
## 1 forbidden knowlege anti-realist
## 2 psychopathy baroque
## 3 cult devil worship
pg_xml<-getURL("https://raw.githubusercontent.com/JackJosephWright/Data-Collection-Homework/master/xml_books.xml")
df<-xmlToDataFrame(pg_xml)
df
## title authors subgenre
## 1 In the Mountains of Madness H.P. Lovecraft hard science
## 2 Silence of the Lambs Thomas Harris crime procedural
## 3 Rosemary's Baby Ira Levin, Jack Wright mystery
## theme trait
## 1 forbidden knowledge anti-realist
## 2 psychopathy baroque
## 3 cult devil worship