library(pacman)
pacman::p_load(robotstxt, rvest)
# Q5
paths_allowed("https://en.wikipedia.org/wiki/2026_in_film")
## en.wikipedia.org
## [1] TRUE
# Q6
film_page <- read_html("https://en.wikipedia.org/wiki/2026_in_film")
table_html <- html_elements(film_page, "table")
html_table(table_html[3])
## [[1]]
## # A tibble: 10 × 4
## Rank Title Distributor `Worldwide gross`
## <int> <chr> <chr> <chr>
## 1 1 Cheburashka 2 † Central Partnership $79,559,272
## 2 2 28 Years Later: The Bone Temple † Sony $56,722,595
## 3 3 Send Help † 20th Century Studi… $54,977,893
## 4 4 Border 2 † AA Films $51,204,000[3]
## 5 5 Mercy † Amazon MGM Studios… $49,802,465
## 6 6 Return to Silent Hill † Iconic Events Rele… $41,586,056[4]
## 7 7 Primate † Paramount Pictures $39,702,818
## 8 8 Iron Lung † Markiplier Studios $38,965,988
## 9 9 Dracula † SND (France) $33,593,404
## 10 10 Mana Shankara Vara Prasad Garu † Gold Box Entertain… $32,225,000[5][6]
highest_grossing <- html_table(table_html[3])[[1]]
# Q8
library(pacman)
pacman::p_load(robotstxt, rvest, dplyr, purrr)
hg_films <- map_dfr(2020:2025, function(y) {
url <- paste0("https://en.wikipedia.org/wiki/", y, "_in_film")
if (!paths_allowed(url)) return(NULL)
page <- read_html(url)
# Get all tables on the page
tables <- html_elements(page, "table")
# Use the 3rd table (like your 2026 example)
if(length(tables) < 3) return(NULL) # skip if table 3 doesn't exist
df <- html_table(tables[3])[[1]]
df$year <- y # add year column
return(df)
})
## en.wikipedia.org
##
## en.wikipedia.org
##
## en.wikipedia.org
##
## en.wikipedia.org
##
## en.wikipedia.org
##
## en.wikipedia.org
hg_films
## # A tibble: 60 × 5
## Rank Title Distributor `Worldwide gross` year
## <int> <chr> <chr> <chr> <int>
## 1 1 Demon Slayer: Kimetsu no Yaiba Mug… Toho / Ani… $507,127,293[4] 2020
## 2 2 The Eight Hundred CMC Pictur… $461,421,559 2020
## 3 3 My People, My Homeland China Lion $433,241,288[5] 2020
## 4 4 Bad Boys for Life Sony $426,505,244 2020
## 5 5 Tenet Warner Bro… $365,309,519 2020
## 6 6 Sonic the Hedgehog Paramount $319,715,683 2020
## 7 7 Dolittle Universal $251,410,631 2020
## 8 8 Jiang Ziya Beijing En… $243,883,429 2020
## 9 9 A Little Red Flower HG Enterta… $238,600,000[6][… 2020
## 10 10 Shock Wave 2 Universe F… $226,400,000 2020
## # ℹ 50 more rows