Justin Kaplan
Workshop 3
library(robotstxt)
library(rvest)
Deal with ethical concerns / Question 5
paths_allowed("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
## www.imdb.com
## [1] TRUE
Read the HTML Webpage
IMDB <- read_html("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
Select the HTML Elements
table_HTML <- html_elements(IMDB, "table")
table_HTML[3]
## {xml_nodeset (1)}
## [1] <table class="cast_list">\n<tr><td colspan="4" class="castlist_label"></t ...
Parse the tables into tibbles
tibble_list <- html_table(table_HTML[3])
tibble_list
## [[1]]
## # A tibble: 3,152 × 4
## X1 X2 X3 X4
## <lgl> <chr> <chr> <chr>
## 1 NA "" "" ""
## 2 NA "Angela Bassett" "..." "Athena Grant\n / ... \n …
## 3 NA "" "" ""
## 4 NA "Peter Krause" "..." "Bobby Nash\n 115 episodes, 20…
## 5 NA "" "" ""
## 6 NA "Oliver Stark" "..." "Evan 'Buck' Buckley\n 115 epi…
## 7 NA "" "" ""
## 8 NA "Aisha Hinds" "..." "Henrietta 'Hen' Wilson\n 115 …
## 9 NA "" "" ""
## 10 NA "Kenneth Choi" "..." "Howie 'Chimney' Han\n 115 epi…
## # ℹ 3,142 more rows