Install and load packages
# install.packages("robotstxt", "rvest")
# install.package("pacman")
pacman::p_load(robotstxt,rvest)
Check paths
paths_allowed("https://en.wikipedia.org/wiki/2024_in_film")
## en.wikipedia.org
## [1] TRUE
Table data
tbl <- read_html("https://en.wikipedia.org/wiki/2024_in_film")
movie_html <- html_elements(tbl, "#mw-content-text > div.mw-content-ltr.mw-parser-output > table:nth-child(11)")
films <- html_table(movie_html)
films
## [[1]]
## # A tibble: 10 × 4
## Rank Title Distributor `Worldwide gross`
## <int> <chr> <chr> <chr>
## 1 1 Inside Out 2 Disney $1,698,863,816
## 2 2 Deadpool & Wolverine Disney $1,338,073,645
## 3 3 Moana 2 Disney $1,059,242,164
## 4 4 Despicable Me 4 Universal $971,315,095
## 5 5 Wicked Universal $756,535,681
## 6 6 Mufasa: The Lion King Disney $723,060,982
## 7 7 Dune: Part Two Warner Bros. $715,409,065
## 8 8 Godzilla x Kong: The New Empire Warner Bros. $572,505,338
## 9 9 Kung Fu Panda 4 Universal $547,689,492
## 10 10 Sonic the Hedgehog 3 Paramount $492,162,604