install.packages(“robotstxt”)
library(robotstxt)
paths_allowed("https://en.wikipedia.org/wiki/2026_in_film")
## en.wikipedia.org
## [1] TRUE
install.packages(“rvest”)
library(rvest)
url <- "https://en.wikipedia.org/wiki/2026_in_film"
page <- read_html(url)
highest_grossing <- page %>%
html_element("table.wikitable") %>%
html_table()
highest_grossing
## # A tibble: 10 × 4
## Rank Title Distributor `Worldwide gross`
## <int> <chr> <chr> <chr>
## 1 1 Cheburashka 2 † Central Partnership $79,559,272
## 2 2 28 Years Later: The Bone Temple † Sony $56,794,677
## 3 3 Send Help † 20th Century Studi… $55,900,522
## 4 4 Border 2 † AA Films $51,204,000[3]
## 5 5 Mercy † Amazon MGM Studios… $49,994,297
## 6 6 Iron Lung † Markiplier Studios $42,434,920
## 7 7 Return to Silent Hill † Iconic Events Rele… $41,586,056[4]
## 8 8 Primate † Paramount Pictures $39,722,402
## 9 9 Dracula † SND (France) $33,618,925
## 10 10 Mana Shankara Vara Prasad Garu † Gold Box Entertain… $32,225,000[5][6]
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ readr::guess_encoding() masks rvest::guess_encoding()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
years <- 2020:2025
hg_films <- map_dfr(years, function(year) {
url <- paste0("https://en.wikipedia.org/wiki/", year, "_in_film")
page <- read_html(url)
tables <- page %>%
html_elements("table.wikitable") %>%
html_table(fill = TRUE)
hg_table <- tables[[1]]
hg_table$Year <- year
hg_table
})
hg_films
## # A tibble: 60 × 5
## Rank Title Distributor `Worldwide gross` Year
## <int> <chr> <chr> <chr> <int>
## 1 1 Demon Slayer: Kimetsu no Yaiba Mug… Toho / Ani… $507,127,293[4] 2020
## 2 2 The Eight Hundred CMC Pictur… $461,421,559 2020
## 3 3 My People, My Homeland China Lion $433,241,288[5] 2020
## 4 4 Bad Boys for Life Sony $426,505,244 2020
## 5 5 Tenet Warner Bro… $365,309,519 2020
## 6 6 Sonic the Hedgehog Paramount $319,715,683 2020
## 7 7 Dolittle Universal $251,410,631 2020
## 8 8 Jiang Ziya Beijing En… $243,883,429 2020
## 9 9 A Little Red Flower HG Enterta… $238,600,000[6][… 2020
## 10 10 Shock Wave 2 Universe F… $226,400,000 2020
## # ℹ 50 more rows