library(robotstxt)
paths_allowed("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")

 www.imdb.com                      
[1] TRUE

Question 6

library(rvest)

url <- "https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm"

page <- read_html(url)

tables <- page %>% html_table()


length(tables)
[1] 30
series_cast_table <- tables[[3]]

rows <- nrow(series_cast_table)
columns <- ncol(series_cast_table)

cat('The table has', rows, 'rows and', columns, 'columns.\n')
The table has 2978 rows and 4 columns.
print(head(series_cast_table))

Question 7

library(rvest)

url <- "https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm"

page <- read_html(url)

tables <- page %>% html_table()

series_cast_table <- tables[[3]]

cleaned_table <- series_cast_table %>% select(2, 4)

cleaned_table <- subset(cleaned_table, cleaned_table[, 1] != "" & cleaned_table[, 2] != "")


cleaned_table <- cleaned_table[!apply(cleaned_table == "", 1, all), ]


final_rows <- nrow(cleaned_table)
final_columns <- ncol(cleaned_table)

cat("The cleaned cast table has", final_rows, "observations and", final_columns, "columns.\n")
The cleaned cast table has 1488 observations and 2 columns.
print(head(cleaned_table))
NA

# Question 9

<!-- rnb-text-end -->


<!-- rnb-chunk-begin -->


<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxubGlicmFyeShydmVzdClcbmxpYnJhcnkoZHBseXIpXG5cblxudXJsIDwtICdodHRwczovL3d3dy5pbWRiLmNvbS90aXRsZS90dDcyMzU0NjYvZnVsbGNyZWRpdHM/cmVmXz10dF9jbF9zbSdcblxuXG5wYWdlIDwtIHJlYWRfaHRtbCh1cmwpXG5cblxudmlzdWFsX2VmZmVjdHNfdGFibGUgPC0gcGFnZSAlPiVcbiAgaHRtbF9ub2Rlcyh4cGF0aCA9IFwiLy9oNFtjb250YWlucyh0ZXh0KCksICdTZXJpZXMgVmlzdWFsIEVmZmVjdHMnKV0vZm9sbG93aW5nLXNpYmxpbmc6OnRhYmxlWzFdXCIpICU+JVxuICBodG1sX3RhYmxlKClcblxudmlzdWFsX2VmZmVjdHNfdGFibGUgPC0gdmlzdWFsX2VmZmVjdHNfdGFibGVbWzFdXVxuXG52aXN1YWxfZWZmZWN0c190YWJsZSA8LSB2aXN1YWxfZWZmZWN0c190YWJsZVshaXMubmEodmlzdWFsX2VmZmVjdHNfdGFibGVbWzFdXSkgJiB2aXN1YWxfZWZmZWN0c190YWJsZVtbMV1dICE9IFwiXCIsIF1cblxuXG5udW1fc3RhZmYgPC0gbnJvdyh2aXN1YWxfZWZmZWN0c190YWJsZSlcblxuXG5jYXQoXCJUaGUgbnVtYmVyIG9mIHN0YWZmIHdobyB3b3JrZWQgb24gU2VyaWVzIFZpc3VhbCBFZmZlY3RzIGlzXCIsIG51bV9zdGFmZiwgXCIuXFxuXCIpXG5cblxucHJpbnQoaGVhZCh2aXN1YWxfZWZmZWN0c190YWJsZSkpXG5cbmBgYCJ9 -->

```r
library(rvest)
library(dplyr)


url <- 'https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm'


page <- read_html(url)


visual_effects_table <- page %>%
  html_nodes(xpath = "//h4[contains(text(), 'Series Visual Effects')]/following-sibling::table[1]") %>%
  html_table()

visual_effects_table <- visual_effects_table[[1]]

visual_effects_table <- visual_effects_table[!is.na(visual_effects_table[[1]]) & visual_effects_table[[1]] != "", ]


num_staff <- nrow(visual_effects_table)


cat("The number of staff who worked on Series Visual Effects is", num_staff, ".\n")


print(head(visual_effects_table))
LS0tCnRpdGxlOiAiQXNzaWdubWVudCAzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeShyb2JvdHN0eHQpCnBhdGhzX2FsbG93ZWQoImh0dHBzOi8vd3d3LmltZGIuY29tL3RpdGxlL3R0NzIzNTQ2Ni9mdWxsY3JlZGl0cz9yZWZfPXR0X2NsX3NtIikKYGBgCgpRdWVzdGlvbiA2IApgYGB7cn0KbGlicmFyeShydmVzdCkKCnVybCA8LSAiaHR0cHM6Ly93d3cuaW1kYi5jb20vdGl0bGUvdHQ3MjM1NDY2L2Z1bGxjcmVkaXRzP3JlZl89dHRfY2xfc20iCgpwYWdlIDwtIHJlYWRfaHRtbCh1cmwpCgp0YWJsZXMgPC0gcGFnZSAlPiUgaHRtbF90YWJsZSgpCgoKbGVuZ3RoKHRhYmxlcykKCnNlcmllc19jYXN0X3RhYmxlIDwtIHRhYmxlc1tbM11dCgpyb3dzIDwtIG5yb3coc2VyaWVzX2Nhc3RfdGFibGUpCmNvbHVtbnMgPC0gbmNvbChzZXJpZXNfY2FzdF90YWJsZSkKCmNhdCgnVGhlIHRhYmxlIGhhcycsIHJvd3MsICdyb3dzIGFuZCcsIGNvbHVtbnMsICdjb2x1bW5zLlxuJykKCnByaW50KGhlYWQoc2VyaWVzX2Nhc3RfdGFibGUpKQpgYGAKCiMgUXVlc3Rpb24gNwpgYGB7cn0KbGlicmFyeShydmVzdCkKCnVybCA8LSAiaHR0cHM6Ly93d3cuaW1kYi5jb20vdGl0bGUvdHQ3MjM1NDY2L2Z1bGxjcmVkaXRzP3JlZl89dHRfY2xfc20iCgpwYWdlIDwtIHJlYWRfaHRtbCh1cmwpCgp0YWJsZXMgPC0gcGFnZSAlPiUgaHRtbF90YWJsZSgpCgpzZXJpZXNfY2FzdF90YWJsZSA8LSB0YWJsZXNbWzNdXQoKY2xlYW5lZF90YWJsZSA8LSBzZXJpZXNfY2FzdF90YWJsZSAlPiUgc2VsZWN0KDIsIDQpCgpjbGVhbmVkX3RhYmxlIDwtIHN1YnNldChjbGVhbmVkX3RhYmxlLCBjbGVhbmVkX3RhYmxlWywgMV0gIT0gIiIgJiBjbGVhbmVkX3RhYmxlWywgMl0gIT0gIiIpCgoKY2xlYW5lZF90YWJsZSA8LSBjbGVhbmVkX3RhYmxlWyFhcHBseShjbGVhbmVkX3RhYmxlID09ICIiLCAxLCBhbGwpLCBdCgoKZmluYWxfcm93cyA8LSBucm93KGNsZWFuZWRfdGFibGUpCmZpbmFsX2NvbHVtbnMgPC0gbmNvbChjbGVhbmVkX3RhYmxlKQoKY2F0KCJUaGUgY2xlYW5lZCBjYXN0IHRhYmxlIGhhcyIsIGZpbmFsX3Jvd3MsICJvYnNlcnZhdGlvbnMgYW5kIiwgZmluYWxfY29sdW1ucywgImNvbHVtbnMuXG4iKQoKcHJpbnQoaGVhZChjbGVhbmVkX3RhYmxlKSkKCmBgYAoKCmBgYAoKIyBRdWVzdGlvbiA5CmBgYHtyfQpsaWJyYXJ5KHJ2ZXN0KQpsaWJyYXJ5KGRwbHlyKQoKCnVybCA8LSAnaHR0cHM6Ly93d3cuaW1kYi5jb20vdGl0bGUvdHQ3MjM1NDY2L2Z1bGxjcmVkaXRzP3JlZl89dHRfY2xfc20nCgoKcGFnZSA8LSByZWFkX2h0bWwodXJsKQoKCnZpc3VhbF9lZmZlY3RzX3RhYmxlIDwtIHBhZ2UgJT4lCiAgaHRtbF9ub2Rlcyh4cGF0aCA9ICIvL2g0W2NvbnRhaW5zKHRleHQoKSwgJ1NlcmllcyBWaXN1YWwgRWZmZWN0cycpXS9mb2xsb3dpbmctc2libGluZzo6dGFibGVbMV0iKSAlPiUKICBodG1sX3RhYmxlKCkKCnZpc3VhbF9lZmZlY3RzX3RhYmxlIDwtIHZpc3VhbF9lZmZlY3RzX3RhYmxlW1sxXV0KCnZpc3VhbF9lZmZlY3RzX3RhYmxlIDwtIHZpc3VhbF9lZmZlY3RzX3RhYmxlWyFpcy5uYSh2aXN1YWxfZWZmZWN0c190YWJsZVtbMV1dKSAmIHZpc3VhbF9lZmZlY3RzX3RhYmxlW1sxXV0gIT0gIiIsIF0KCgpudW1fc3RhZmYgPC0gbnJvdyh2aXN1YWxfZWZmZWN0c190YWJsZSkKCgpjYXQoIlRoZSBudW1iZXIgb2Ygc3RhZmYgd2hvIHdvcmtlZCBvbiBTZXJpZXMgVmlzdWFsIEVmZmVjdHMgaXMiLCBudW1fc3RhZmYsICIuXG4iKQoKCnByaW50KGhlYWQodmlzdWFsX2VmZmVjdHNfdGFibGUpKQoKYGBgCgoKCg==