Question 5

Scraping table data from the Wiki webpage

library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
url <- "https://en.wikipedia.org/wiki/2026_in_film"
page <- read_html(url)

Question 6

Scraping the “Highest Grossing Film” table data from the Wiki webpage

tables <- page %>% html_elements("table") %>% html_table(fill = TRUE)
length(tables)
## [1] 7
tables[[3]] %>% head()
## # A tibble: 6 × 4
##    Rank Title                             Distributor          `Worldwide gross`
##   <int> <chr>                             <chr>                <chr>            
## 1     1 Cheburashka 2 †                   Central Partnership  $79,559,272      
## 2     2 28 Years Later: The Bone Temple † Sony                 $56,794,677      
## 3     3 Send Help †                       20th Century Studios $55,900,522      
## 4     4 Border 2 †                        AA Films             $51,204,000[3]   
## 5     5 Mercy †                           Amazon MGM Studios … $49,994,297      
## 6     6 Iron Lung †                       Markiplier Studios   $42,434,920
highest_grossing_films <- tables[[3]]

Question 8

Writing code to scrape the highest-grossing films for each year from 2020 to 2025 using a loop or functional approach

library(rvest)
library(dplyr)
library(purrr)
library(stringr)
scrape_year <- function(year) {
  
  url <- paste0("https://en.wikipedia.org/wiki/", year, "_in_film")
  
  page <- read_html(url)
  
  tables <- page %>%
    html_elements("table.wikitable") %>%
    html_table(fill = TRUE)
  
  hg_table <- tables[[1]] %>%
    mutate(year = year)
  
  return(hg_table)
}
hg_films_2020_2025 <- map_dfr(2020:2025, scrape_year)
head(hg_films_2020_2025)
## # A tibble: 6 × 5
##    Rank Title                                Distributor `Worldwide gross`  year
##   <int> <chr>                                <chr>       <chr>             <int>
## 1     1 Demon Slayer: Kimetsu no Yaiba Muge… Toho / Ani… $507,127,293[4]    2020
## 2     2 The Eight Hundred                    CMC Pictur… $461,421,559       2020
## 3     3 My People, My Homeland               China Lion  $433,241,288[5]    2020
## 4     4 Bad Boys for Life                    Sony        $426,505,244       2020
## 5     5 Tenet                                Warner Bro… $365,309,519       2020
## 6     6 Sonic the Hedgehog                   Paramount   $319,715,683       2020