library(robotstxt)
## Warning: package 'robotstxt' was built under R version 4.4.2
library(rvest)

5) Is scraping this web page allowed?

paths_allowed("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
##  www.imdb.com
## [1] TRUE

6) How many rows and columns are in the table?

bas_html <- read_html("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
bas_html
## {html_document}
## <html xmlns:og="http://ogp.me/ns#" xmlns:fb="http://www.facebook.com/2008/fbml">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body id="styleguide-v2" class="fixed">\n            <img height="1" widt ...
table_html <- html_elements(bas_html, "table")
table_html[3]
## {xml_nodeset (1)}
## [1] <table class="cast_list">\n<tr><td colspan="4" class="castlist_label"></t ...
tibble_list <- html_table(table_html[3])
tibble_list
## [[1]]
## # A tibble: 3,150 × 4
##    X1    X2               X3    X4                                              
##    <lgl> <chr>            <chr> <chr>                                           
##  1 NA    ""               ""    ""                                              
##  2 NA    "Angela Bassett" "..." "Athena Grant\n         / ...  \n              …
##  3 NA    ""               ""    ""                                              
##  4 NA    "Peter Krause"   "..." "Bobby Nash\n                  115 episodes, 20…
##  5 NA    ""               ""    ""                                              
##  6 NA    "Oliver Stark"   "..." "Evan 'Buck' Buckley\n                  115 epi…
##  7 NA    ""               ""    ""                                              
##  8 NA    "Aisha Hinds"    "..." "Henrietta 'Hen' Wilson\n                  115 …
##  9 NA    ""               ""    ""                                              
## 10 NA    "Kenneth Choi"   "..." "Howie 'Chimney' Han\n                  115 epi…
## # ℹ 3,140 more rows
eastern_tibble <- tibble_list[[1]]
eastern_tibble

7) Clean data and find exact number of people in cast.

cast_df <- eastern_tibble[, c(2, 4)]
cast_df <- subset(cast_df, cast_df[[1]] != "" & cast_df[[2]] != "")
cast_df <- cast_df[apply(cast_df, 1, function(row) !all(row == "")), ]
dim(cast_df)
## [1] 1574    2

8) Series Visual Effects staff names.

library(rvest)
page <- read_html("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
effects_section <- page %>% html_elements(xpath = "//*[contains(text(), 'Series Visual Effects')]")
if (length(effects_section) > 0) {
  effects_table <- effects_section[[1]] %>%
    html_element(xpath = "./following::table[1]")  # Selects the next table after the heading
  if (!is.null(effects_table)) {
    visual_effects_df <- effects_table %>% html_table(fill = TRUE)
    visual_effects_df <- visual_effects_df[apply(visual_effects_df, 1, function(row) any(nzchar(as.character(row)))), ]
    num_staff <- nrow(visual_effects_df)
    print(num_staff)
  } else {
    print("Table not found. Check the webpage structure.")
  }
} else {
  print("Section 'Series Visual Effects' not found. Verify XPath.")
}
## [1] 196