library("rvest")
library("robotstxt")
Q5 Check If Data Scraping Is Allowed
paths_allowed("https://en.wikipedia.org/wiki/2026_in_film")
## Warning: package 'future' was built under R version 4.4.3
## en.wikipedia.org
## [1] TRUE
Q6 Scrape 2026 Highest-Grossing Movie Data
wrld_gross <- read_html("https://en.wikipedia.org/wiki/2026_in_film")
table_wrldgross <- html_elements(wrld_gross, "table")
target_table <- table_wrldgross[[3]]
mvs_gross <- html_table(target_table, fill = TRUE)
head(mvs_gross)
## # A tibble: 6 × 4
## Rank Title Distributor `Worldwide gross`
## <int> <chr> <chr> <chr>
## 1 1 Cheburashka 2 † Central Partnership $79,500,909
## 2 2 28 Years Later: The Bone Temple † Sony $56,641,191
## 3 3 Send Help † 20th Century Studios $53,524,493
## 4 4 Border 2 † AA Films $51,204,000[3]
## 5 5 Mercy Amazon MGM Studios … $49,508,904
## 6 6 Primate † Paramount Pictures $39,677,703