Charlie Stevens
library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
Question 6:
url <- "https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm"
page <- read_html(url)
tables <- page %>% html_elements("table")
series_cast_table <- tables[[3]] %>% html_table()
head(series_cast_table)
dim(series_cast_table)
## [1] 3152 4
Question 7:
cleaned_table <- series_cast_table %>%
select(X2, X4) %>%
filter(!(is.na(X2) | X2 == "" | is.na(X4) | X4 == "")) %>%
distinct()
dim(cleaned_table)
## [1] 1575 2
Question 8:
tibble_name <- tibble(A = 1:3, B = 4:6)
print(tibble_name)
## # A tibble: 3 × 2
## A B
## <int> <int>
## 1 1 4
## 2 2 5
## 3 3 6
colnames(tibble_name) <- c("v1", "v2")
print(tibble_name)
## # A tibble: 3 × 2
## v1 v2
## <int> <int>
## 1 1 4
## 2 2 5
## 3 3 6
Answer: A and D
Question 9:
library(rvest)
# IMDb full credits URL
url <- "https://www.imdb.com/title/tt7235466/fullcredits?ref_=tt_cl_sm"
# Read the webpage
page <- read_html(url)
# Extract the 38th table body
vfx_table <- page %>% html_element("#fullcredits_content > table:nth-child(38) > tbody")
# Extract names from table rows (adjust the selector if needed)
vfx_names <- vfx_table %>% html_elements("tr td:nth-child(1)") %>% html_text(trim = TRUE)
# Count number of extracted names
length(vfx_names)
## [1] 196