Package Load

pacman::p_load(robotstxt, rvest)
paths_allowed("https://en.wikipedia.org/wiki/2024_in_film")
##  en.wikipedia.org
## [1] TRUE

Web Scraping

Read in html doc

bas_html <- read_html("https://en.wikipedia.org/wiki/2024_in_film")
bas_html
## {html_document}
## <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 vector-feature-limited-width-content-enabled vector-feature-custom-font-size-clientpref-1 vector-feature-appearance-pinned-clientpref-1 vector-feature-night-mode-enabled skin-theme-clientpref-day vector-sticky-header-enabled vector-toc-available" lang="en" dir="ltr">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body class="skin--responsive skin-vector skin-vector-search-vue mediawik ...
highest_grossing_films_html <- html_element(bas_html,"#mw-content-text > div.mw-content-ltr.mw-parser-output > table:nth-child(11)")
highest_grossing_films_html
## {html_node}
## <table class="wikitable sortable" style="margin:auto; text-align:left">
## [1] <caption>Highest-grossing films of 2024<sup id="cite_ref-1" class="refere ...
## [2] <tbody>\n<tr>\n<th>Rank</th>\n<th>Title</th>\n<th>Distributor</th>\n<th>W ...
highest_grossing_films <- html_table(highest_grossing_films_html)
highest_grossing_films
## # A tibble: 10 × 4
##     Rank Title                           Distributor  `Worldwide gross`
##    <int> <chr>                           <chr>        <chr>            
##  1     1 Inside Out 2                    Disney       $1,698,863,816   
##  2     2 Deadpool & Wolverine            Disney       $1,338,073,645   
##  3     3 Moana 2                         Disney       $1,059,242,164   
##  4     4 Despicable Me 4                 Universal    $971,315,095     
##  5     5 Wicked                          Universal    $756,535,681     
##  6     6 Mufasa: The Lion King           Disney       $723,060,982     
##  7     7 Dune: Part Two                  Warner Bros. $715,409,065     
##  8     8 Godzilla x Kong: The New Empire Warner Bros. $572,505,338     
##  9     9 Kung Fu Panda 4                 Universal    $547,689,492     
## 10    10 Sonic the Hedgehog 3            Paramount    $492,162,604