options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages(c("rvest", "dplyr", "purrr", "robotstxt"))
## 
## The downloaded binary packages are in
##  /var/folders/y3/y4gykf3j2wq3sdb40qkxtfph0000gn/T//RtmpjXUCGn/downloaded_packages
library(rvest)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(purrr)
## Warning: package 'purrr' was built under R version 4.5.2
library(robotstxt)

Question 1

No R code required

Question 2

No R code required

Question 3

No R code required

Question 4

No R code required

Question 5

paths_allowed("https://en.wikipedia.org/wiki/2026_in_film")
## Warning: package 'future' was built under R version 4.5.2
##  en.wikipedia.org
## [1] TRUE

Question 6

url_2026 <- "https://en.wikipedia.org/wiki/2026_in_film"
page_2026 <- read_html(url_2026)

tables_2026 <- page_2026 %>%
  html_elements("table")

tables_2026_parsed <- tables_2026 %>%
  html_table(fill = TRUE)

# HIGHEST-GROSSING FILMS = TABLE #3
hg_2026 <- tables_2026_parsed[[3]] 
hg_2026
## # A tibble: 10 × 4
##     Rank Title                             Distributor         `Worldwide gross`
##    <int> <chr>                             <chr>               <chr>            
##  1     1 Cheburashka 2 †                   Central Partnership $79,559,272      
##  2     2 28 Years Later: The Bone Temple † Sony                $56,794,677      
##  3     3 Send Help †                       20th Century Studi… $55,900,522      
##  4     4 Border 2 †                        AA Films            $51,204,000[3]   
##  5     5 Mercy †                           Amazon MGM Studios… $49,994,297      
##  6     6 Iron Lung †                       Markiplier Studios  $42,434,920      
##  7     7 Return to Silent Hill †           Iconic Events Rele… $41,586,056[4]   
##  8     8 Primate †                         Paramount Pictures  $39,722,402      
##  9     9 Dracula †                         SND (France)        $33,618,925      
## 10    10 Mana Shankara Vara Prasad Garu †  Gold Box Entertain… $32,225,000[5][6]

Question 7

No R code required

Question 8: Bonus Question

years <- 2020:2025
print(years) # 2020 2021 2022 2023 2024 2025
## [1] 2020 2021 2022 2023 2024 2025
get_highest_grossing <- function(year) {
  url <- paste0("https://en.wikipedia.org/wiki/", year, "_in_film")
  
  page <- read_html(url)
  tables <- page %>%
    html_elements("table") %>%
    html_table(fill = TRUE)
  
  hg_table <- tables[[3]]
  
  return(hg_table %>% mutate(year = year))
}

hg_films <- map_dfr(years, get_highest_grossing)
head(hg_films)
## # A tibble: 6 × 5
##    Rank Title                                Distributor `Worldwide gross`  year
##   <int> <chr>                                <chr>       <chr>             <int>
## 1     1 Demon Slayer: Kimetsu no Yaiba Muge… Toho / Ani… $507,127,293[4]    2020
## 2     2 The Eight Hundred                    CMC Pictur… $461,421,559       2020
## 3     3 My People, My Homeland               China Lion  $433,241,288[5]    2020
## 4     4 Bad Boys for Life                    Sony        $426,505,244       2020
## 5     5 Tenet                                Warner Bro… $365,309,519       2020
## 6     6 Sonic the Hedgehog                   Paramount   $319,715,683       2020