Justin Kaplan

Workshop 3

library(robotstxt)
library(rvest)

Deal with ethical concerns / Question 5

paths_allowed("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
##  www.imdb.com
## [1] TRUE

Read the HTML Webpage

IMDB <- read_html("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")

Select the HTML Elements

table_HTML <- html_elements(IMDB, "table")
table_HTML[3]
## {xml_nodeset (1)}
## [1] <table class="cast_list">\n<tr><td colspan="4" class="castlist_label"></t ...

Parse the tables into tibbles

tibble_list <- html_table(table_HTML[3])
tibble_list
## [[1]]
## # A tibble: 3,152 × 4
##    X1    X2               X3    X4                                              
##    <lgl> <chr>            <chr> <chr>                                           
##  1 NA    ""               ""    ""                                              
##  2 NA    "Angela Bassett" "..." "Athena Grant\n         / ...  \n              …
##  3 NA    ""               ""    ""                                              
##  4 NA    "Peter Krause"   "..." "Bobby Nash\n                  115 episodes, 20…
##  5 NA    ""               ""    ""                                              
##  6 NA    "Oliver Stark"   "..." "Evan 'Buck' Buckley\n                  115 epi…
##  7 NA    ""               ""    ""                                              
##  8 NA    "Aisha Hinds"    "..." "Henrietta 'Hen' Wilson\n                  115 …
##  9 NA    ""               ""    ""                                              
## 10 NA    "Kenneth Choi"   "..." "Howie 'Chimney' Han\n                  115 epi…
## # ℹ 3,142 more rows