library(pacman)
pacman::p_load(robotstxt, rvest)
# Q5
paths_allowed("https://en.wikipedia.org/wiki/2026_in_film")
##  en.wikipedia.org
## [1] TRUE
# Q6
film_page <- read_html("https://en.wikipedia.org/wiki/2026_in_film")

table_html <- html_elements(film_page, "table")

html_table(table_html[3])
## [[1]]
## # A tibble: 10 × 4
##     Rank Title                             Distributor         `Worldwide gross`
##    <int> <chr>                             <chr>               <chr>            
##  1     1 Cheburashka 2 †                   Central Partnership $79,559,272      
##  2     2 28 Years Later: The Bone Temple † Sony                $56,722,595      
##  3     3 Send Help †                       20th Century Studi… $54,977,893      
##  4     4 Border 2 †                        AA Films            $51,204,000[3]   
##  5     5 Mercy †                           Amazon MGM Studios… $49,802,465      
##  6     6 Return to Silent Hill †           Iconic Events Rele… $41,586,056[4]   
##  7     7 Primate †                         Paramount Pictures  $39,702,818      
##  8     8 Iron Lung †                       Markiplier Studios  $38,965,988      
##  9     9 Dracula †                         SND (France)        $33,593,404      
## 10    10 Mana Shankara Vara Prasad Garu †  Gold Box Entertain… $32,225,000[5][6]
highest_grossing <- html_table(table_html[3])[[1]]
# Q8

library(pacman)
pacman::p_load(robotstxt, rvest, dplyr, purrr)

hg_films <- map_dfr(2020:2025, function(y) {
  
  url <- paste0("https://en.wikipedia.org/wiki/", y, "_in_film")
  
  if (!paths_allowed(url)) return(NULL)
  
  page <- read_html(url)
  
  # Get all tables on the page
  tables <- html_elements(page, "table")
  
  # Use the 3rd table (like your 2026 example)
  if(length(tables) < 3) return(NULL)  # skip if table 3 doesn't exist
  
  df <- html_table(tables[3])[[1]]
  
  df$year <- y  # add year column
  
  return(df)
})
##  en.wikipedia.org                      
## 
##  en.wikipedia.org                      
## 
##  en.wikipedia.org                      
## 
##  en.wikipedia.org                      
## 
##  en.wikipedia.org                      
## 
##  en.wikipedia.org
hg_films
## # A tibble: 60 × 5
##     Rank Title                               Distributor `Worldwide gross`  year
##    <int> <chr>                               <chr>       <chr>             <int>
##  1     1 Demon Slayer: Kimetsu no Yaiba Mug… Toho / Ani… $507,127,293[4]    2020
##  2     2 The Eight Hundred                   CMC Pictur… $461,421,559       2020
##  3     3 My People, My Homeland              China Lion  $433,241,288[5]    2020
##  4     4 Bad Boys for Life                   Sony        $426,505,244       2020
##  5     5 Tenet                               Warner Bro… $365,309,519       2020
##  6     6 Sonic the Hedgehog                  Paramount   $319,715,683       2020
##  7     7 Dolittle                            Universal   $251,410,631       2020
##  8     8 Jiang Ziya                          Beijing En… $243,883,429       2020
##  9     9 A Little Red Flower                 HG Enterta… $238,600,000[6][…  2020
## 10    10 Shock Wave 2                        Universe F… $226,400,000       2020
## # ℹ 50 more rows