Question 5
paths_allowed("https://en.wikipedia.org/wiki/2026_in_film")
## en.wikipedia.org
## [1] TRUE
Question 6
src_html <- read_html("https://en.wikipedia.org/wiki/2026_in_film")
tables_html <- html_elements(src_html, "table")
highest_grossing <- html_table(tables_html[3])
highest_grossing
## [[1]]
## # A tibble: 10 × 4
## Rank Title Distributor `Worldwide gross`
## <int> <chr> <chr> <chr>
## 1 1 Cheburashka 2 † Central Partnership $79,500,909
## 2 2 28 Years Later: The Bone Temple † Sony $56,641,191
## 3 3 Send Help † 20th Century Studi… $53,524,493
## 4 4 Border 2 † AA Films $51,204,000[3]
## 5 5 Mercy Amazon MGM Studios… $49,508,904
## 6 6 Primate † Paramount Pictures $39,677,703
## 7 7 Return to Silent Hill † Iconic Events Rele… $38,606,428
## 8 8 Dracula † SND (France) $33,593,404
## 9 9 Mana Shankara Vara Prasad Garu † Gold Box Entertain… $32,225,000[4][5]
## 10 10 Prostokvashino [ru]† Cinema Atmosphere $31,934,076
Question 8
library(rvest)
library(purrr)
library(dplyr)
Function to scrape highest-grossing films
years <- 2020:2025
get_highest_grossing <- function(year) {
url <- paste0("https://en.wikipedia.org/wiki/", year, "_in_film")
page <- read_html(url)
tables <- html_elements(page, "table")
hg_table <- html_table(tables[[3]])
hg_table %>%
mutate(year = year)
}
hg_films <- map_dfr(years, get_highest_grossing)
hg_films
## # A tibble: 60 × 5
## Rank Title Distributor `Worldwide gross` year
## <int> <chr> <chr> <chr> <int>
## 1 1 Demon Slayer: Kimetsu no Yaiba Mug… Toho / Ani… $507,127,293[4] 2020
## 2 2 The Eight Hundred CMC Pictur… $461,421,559 2020
## 3 3 My People, My Homeland China Lion $433,241,288[5] 2020
## 4 4 Bad Boys for Life Sony $426,505,244 2020
## 5 5 Tenet Warner Bro… $365,309,519 2020
## 6 6 Sonic the Hedgehog Paramount $319,715,683 2020
## 7 7 Dolittle Universal $251,410,631 2020
## 8 8 Jiang Ziya Beijing En… $243,883,429 2020
## 9 9 A Little Red Flower HG Enterta… $238,600,000[6][… 2020
## 10 10 Shock Wave 2 Universe F… $226,400,000 2020
## # ℹ 50 more rows