library("rvest")
library("robotstxt")

Q5 Check If Data Scraping Is Allowed

paths_allowed("https://en.wikipedia.org/wiki/2026_in_film")
## Warning: package 'future' was built under R version 4.4.3
##  en.wikipedia.org
## [1] TRUE

Q6 Scrape 2026 Highest-Grossing Movie Data

wrld_gross <- read_html("https://en.wikipedia.org/wiki/2026_in_film")
table_wrldgross <- html_elements(wrld_gross, "table")
target_table <- table_wrldgross[[3]]

mvs_gross <- html_table(target_table, fill = TRUE)

head(mvs_gross)
## # A tibble: 6 × 4
##    Rank Title                             Distributor          `Worldwide gross`
##   <int> <chr>                             <chr>                <chr>            
## 1     1 Cheburashka 2 †                   Central Partnership  $79,500,909      
## 2     2 28 Years Later: The Bone Temple † Sony                 $56,641,191      
## 3     3 Send Help †                       20th Century Studios $53,524,493      
## 4     4 Border 2 †                        AA Films             $51,204,000[3]   
## 5     5 Mercy                             Amazon MGM Studios … $49,508,904      
## 6     6 Primate †                         Paramount Pictures   $39,677,703