pacman::p_load(robotstxt, rvest)

Webscrappiong

Read in Html doc

paths_allowed("https://en.wikipedia.org/wiki/2024_in_film", domain= "auto")
##  en.wikipedia.org
## [1] TRUE
basref <- read_html("https://en.wikipedia.org/wiki/2024_in_film")

Select the table elements

tbls <- html_elements(basref, "table")

Parse the html table into a data frame

Highest_grossing_movies <- html_table(tbls[[2]])

Highest_grossing_movies
## # A tibble: 10 × 4
##     Rank Title                           Distributor  `Worldwide gross`
##    <int> <chr>                           <chr>        <chr>            
##  1     1 Inside Out 2                    Disney       $1,698,863,816   
##  2     2 Deadpool & Wolverine            Disney       $1,338,073,645   
##  3     3 Moana 2                         Disney       $1,059,242,164   
##  4     4 Despicable Me 4                 Universal    $971,315,095     
##  5     5 Wicked                          Universal    $756,535,681     
##  6     6 Mufasa: The Lion King           Disney       $723,060,982     
##  7     7 Dune: Part Two                  Warner Bros. $715,409,065     
##  8     8 Godzilla x Kong: The New Empire Warner Bros. $572,505,338     
##  9     9 Kung Fu Panda 4                 Universal    $547,689,492     
## 10    10 Sonic the Hedgehog 3            Paramount    $492,162,604