options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages(c("rvest", "dplyr", "purrr", "robotstxt"))
##
## The downloaded binary packages are in
## /var/folders/y3/y4gykf3j2wq3sdb40qkxtfph0000gn/T//RtmpjXUCGn/downloaded_packages
library(rvest)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(purrr)
## Warning: package 'purrr' was built under R version 4.5.2
library(robotstxt)
Question 1
No R code required
Question 2
No R code required
Question 3
No R code required
Question 4
No R code required
Question 5
paths_allowed("https://en.wikipedia.org/wiki/2026_in_film")
## Warning: package 'future' was built under R version 4.5.2
## en.wikipedia.org
## [1] TRUE
Question 6
url_2026 <- "https://en.wikipedia.org/wiki/2026_in_film"
page_2026 <- read_html(url_2026)
tables_2026 <- page_2026 %>%
html_elements("table")
tables_2026_parsed <- tables_2026 %>%
html_table(fill = TRUE)
# HIGHEST-GROSSING FILMS = TABLE #3
hg_2026 <- tables_2026_parsed[[3]]
hg_2026
## # A tibble: 10 × 4
## Rank Title Distributor `Worldwide gross`
## <int> <chr> <chr> <chr>
## 1 1 Cheburashka 2 † Central Partnership $79,559,272
## 2 2 28 Years Later: The Bone Temple † Sony $56,794,677
## 3 3 Send Help † 20th Century Studi… $55,900,522
## 4 4 Border 2 † AA Films $51,204,000[3]
## 5 5 Mercy † Amazon MGM Studios… $49,994,297
## 6 6 Iron Lung † Markiplier Studios $42,434,920
## 7 7 Return to Silent Hill † Iconic Events Rele… $41,586,056[4]
## 8 8 Primate † Paramount Pictures $39,722,402
## 9 9 Dracula † SND (France) $33,618,925
## 10 10 Mana Shankara Vara Prasad Garu † Gold Box Entertain… $32,225,000[5][6]
Question 7
No R code required
Question 8: Bonus Question
years <- 2020:2025
print(years) # 2020 2021 2022 2023 2024 2025
## [1] 2020 2021 2022 2023 2024 2025
get_highest_grossing <- function(year) {
url <- paste0("https://en.wikipedia.org/wiki/", year, "_in_film")
page <- read_html(url)
tables <- page %>%
html_elements("table") %>%
html_table(fill = TRUE)
hg_table <- tables[[3]]
return(hg_table %>% mutate(year = year))
}
hg_films <- map_dfr(years, get_highest_grossing)
head(hg_films)
## # A tibble: 6 × 5
## Rank Title Distributor `Worldwide gross` year
## <int> <chr> <chr> <chr> <int>
## 1 1 Demon Slayer: Kimetsu no Yaiba Muge… Toho / Ani… $507,127,293[4] 2020
## 2 2 The Eight Hundred CMC Pictur… $461,421,559 2020
## 3 3 My People, My Homeland China Lion $433,241,288[5] 2020
## 4 4 Bad Boys for Life Sony $426,505,244 2020
## 5 5 Tenet Warner Bro… $365,309,519 2020
## 6 6 Sonic the Hedgehog Paramount $319,715,683 2020