library(robotstxt)
paths_allowed("https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm")
www.imdb.com
[1] TRUE
Question 6
library(rvest)
url <- "https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm"
page <- read_html(url)
tables <- page %>% html_table()
length(tables)
[1] 30
series_cast_table <- tables[[3]]
rows <- nrow(series_cast_table)
columns <- ncol(series_cast_table)
cat('The table has', rows, 'rows and', columns, 'columns.\n')
The table has 2978 rows and 4 columns.
print(head(series_cast_table))
library(rvest)
url <- "https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm"
page <- read_html(url)
tables <- page %>% html_table()
series_cast_table <- tables[[3]]
cleaned_table <- series_cast_table %>% select(2, 4)
cleaned_table <- subset(cleaned_table, cleaned_table[, 1] != "" & cleaned_table[, 2] != "")
cleaned_table <- cleaned_table[!apply(cleaned_table == "", 1, all), ]
final_rows <- nrow(cleaned_table)
final_columns <- ncol(cleaned_table)
cat("The cleaned cast table has", final_rows, "observations and", final_columns, "columns.\n")
The cleaned cast table has 1488 observations and 2 columns.
print(head(cleaned_table))
NA
# Question 9
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxubGlicmFyeShydmVzdClcbmxpYnJhcnkoZHBseXIpXG5cblxudXJsIDwtICdodHRwczovL3d3dy5pbWRiLmNvbS90aXRsZS90dDcyMzU0NjYvZnVsbGNyZWRpdHM/cmVmXz10dF9jbF9zbSdcblxuXG5wYWdlIDwtIHJlYWRfaHRtbCh1cmwpXG5cblxudmlzdWFsX2VmZmVjdHNfdGFibGUgPC0gcGFnZSAlPiVcbiAgaHRtbF9ub2Rlcyh4cGF0aCA9IFwiLy9oNFtjb250YWlucyh0ZXh0KCksICdTZXJpZXMgVmlzdWFsIEVmZmVjdHMnKV0vZm9sbG93aW5nLXNpYmxpbmc6OnRhYmxlWzFdXCIpICU+JVxuICBodG1sX3RhYmxlKClcblxudmlzdWFsX2VmZmVjdHNfdGFibGUgPC0gdmlzdWFsX2VmZmVjdHNfdGFibGVbWzFdXVxuXG52aXN1YWxfZWZmZWN0c190YWJsZSA8LSB2aXN1YWxfZWZmZWN0c190YWJsZVshaXMubmEodmlzdWFsX2VmZmVjdHNfdGFibGVbWzFdXSkgJiB2aXN1YWxfZWZmZWN0c190YWJsZVtbMV1dICE9IFwiXCIsIF1cblxuXG5udW1fc3RhZmYgPC0gbnJvdyh2aXN1YWxfZWZmZWN0c190YWJsZSlcblxuXG5jYXQoXCJUaGUgbnVtYmVyIG9mIHN0YWZmIHdobyB3b3JrZWQgb24gU2VyaWVzIFZpc3VhbCBFZmZlY3RzIGlzXCIsIG51bV9zdGFmZiwgXCIuXFxuXCIpXG5cblxucHJpbnQoaGVhZCh2aXN1YWxfZWZmZWN0c190YWJsZSkpXG5cbmBgYCJ9 -->
```r
library(rvest)
library(dplyr)
url <- 'https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm'
page <- read_html(url)
visual_effects_table <- page %>%
html_nodes(xpath = "//h4[contains(text(), 'Series Visual Effects')]/following-sibling::table[1]") %>%
html_table()
visual_effects_table <- visual_effects_table[[1]]
visual_effects_table <- visual_effects_table[!is.na(visual_effects_table[[1]]) & visual_effects_table[[1]] != "", ]
num_staff <- nrow(visual_effects_table)
cat("The number of staff who worked on Series Visual Effects is", num_staff, ".\n")
print(head(visual_effects_table))