library(rvest)
## Warning: package 'rvest' was built under R version 4.4.2
library(robotstxt)
## Warning: package 'robotstxt' was built under R version 4.4.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()         masks stats::filter()
## ✖ readr::guess_encoding() masks rvest::guess_encoding()
## ✖ dplyr::lag()            masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Question Five

paths_allowed("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
##  www.imdb.com
## [1] TRUE
imbd_html <- read_html("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
imbd_html
## {html_document}
## <html xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body id="styleguide-v2" class="fixed">\n            <img height="1" widt ...
table_html <- html_elements(imbd_html, "table")
table_html
## {xml_nodeset (30)}
##  [1] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [2] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [3] <table class="cast_list">\n<tr><td colspan="4" class="castlist_label"></ ...
##  [4] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [5] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [6] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [7] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [8] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
##  [9] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [10] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [11] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [12] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [13] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [14] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [15] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [16] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [17] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [18] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [19] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## [20] <table class="simpleTable simpleCreditsTable">\n<colgroup>\n<col class=" ...
## ...

Question 6

tibble_list <- html_table(table_html[3])
tibble_list
## [[1]]
## # A tibble: 3,152 × 4
##    X1    X2               X3    X4                                              
##    <lgl> <chr>            <chr> <chr>                                           
##  1 NA    ""               ""    ""                                              
##  2 NA    "Angela Bassett" "..." "Athena Grant\n         / ...  \n              …
##  3 NA    ""               ""    ""                                              
##  4 NA    "Peter Krause"   "..." "Bobby Nash\n                  115 episodes, 20…
##  5 NA    ""               ""    ""                                              
##  6 NA    "Oliver Stark"   "..." "Evan 'Buck' Buckley\n                  115 epi…
##  7 NA    ""               ""    ""                                              
##  8 NA    "Aisha Hinds"    "..." "Henrietta 'Hen' Wilson\n                  115 …
##  9 NA    ""               ""    ""                                              
## 10 NA    "Kenneth Choi"   "..." "Howie 'Chimney' Han\n                  115 epi…
## # ℹ 3,142 more rows
series_cast <- tibble_list[[1]]
series_cast
## # A tibble: 3,152 × 4
##    X1    X2               X3    X4                                              
##    <lgl> <chr>            <chr> <chr>                                           
##  1 NA    ""               ""    ""                                              
##  2 NA    "Angela Bassett" "..." "Athena Grant\n         / ...  \n              …
##  3 NA    ""               ""    ""                                              
##  4 NA    "Peter Krause"   "..." "Bobby Nash\n                  115 episodes, 20…
##  5 NA    ""               ""    ""                                              
##  6 NA    "Oliver Stark"   "..." "Evan 'Buck' Buckley\n                  115 epi…
##  7 NA    ""               ""    ""                                              
##  8 NA    "Aisha Hinds"    "..." "Henrietta 'Hen' Wilson\n                  115 …
##  9 NA    ""               ""    ""                                              
## 10 NA    "Kenneth Choi"   "..." "Howie 'Chimney' Han\n                  115 epi…
## # ℹ 3,142 more rows

Question 7

cleaned_tibble <- series_cast[, c(2, 4)]
cleaned_cast <- subset(cleaned_tibble, cleaned_tibble[,1] != "" & cleaned_tibble[,2] != "")
tail(cleaned_cast)  
## # A tibble: 6 × 2
##   X2                  X4                                                        
##   <chr>               <chr>                                                     
## 1 Aly Fabrizio        "Trick or Treater\n  \n  \n  (uncredited)\n  \n          …
## 2 Buffy Milner        "Volleyball Player\n  \n  \n  (uncredited)\n  \n         …
## 3 Ithaka Darin Pappas "Migrant\n  \n  \n  (uncredited)\n  \n                  1…
## 4 Bryce Schmidt       "Police Bugler\n  \n  \n  (uncredited)\n  \n             …
## 5 Timothy T Tyler     "Patient\n  \n  \n  (uncredited)\n  \n                  1…
## 6 Jeffrey Viner       "Car\n         / ...  \n  \n  \n  (uncredited)\n  \n     …

Question 9

visual_effects_html <- html_element(imbd_html, "#fullcredits_content > table:nth-child(38)")
visual_effects_html
## {html_node}
## <table class="simpleTable simpleCreditsTable">
## [1] <colgroup>\n<col class="column1">\n<col class="column2">\n<col class="col ...
## [2] <tbody>\n<tr>\n<td class="name">\n<a href="/name/nm3824642/?ref_=ttfc_fc_ ...
visual_effects <- html_table(visual_effects_html)
visual_effects
## # A tibble: 196 × 3
##    X1                     X2    X3                                              
##    <chr>                  <chr> <chr>                                           
##  1 Christian Zeiler       ...   digital compositor / digital compositor: FuseFX…
##  2 Katrina Duclos         ...   visual effects editor / visual effects editor: …
##  3 Bryant Reif            ...   cg supervisor (50 episodes, 2019-2022)          
##  4 Tony Pirzadeh          ...   visual effects producer: FuseFX / visual effect…
##  5 Ezra Christian         ...   managing producer (46 episodes, 2021-2024)      
##  6 Timothy Michael Cairns ...   compositing supervisor (44 episodes, 2019-2022) 
##  7 Luciano DiGeronimo     ...   compositing supervisor: FuseFX / digital effect…
##  8 Zachary Goodson        ...   visual effects supervisor / visual effects arti…
##  9 Esmeralda Ramirez      ...   vfx coordinator: FuseFX / vfx coordinator: Fuse…
## 10 Brigitte Bourque       ...   digital effects supervisor: Fuse FX / composite…
## # ℹ 186 more rows