options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("robotstxt")
## 
## The downloaded binary packages are in
##  /var/folders/db/t5yc87r97x503sdd2n6m5rjc0000gn/T//RtmpA9CEC2/downloaded_packages
library(robotstxt)
library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
url <- "https://en.wikipedia.org/wiki/2024_in_film"
paths_allowed(url)
##  en.wikipedia.org
## [1] TRUE
page <- read_html(url)
tables <- html_elements(page, "table")
length(tables)
## [1] 7
highest_grossing_df <- html_table(tables[[2]], fill = TRUE)
head(highest_grossing_df)
## # A tibble: 6 × 4
##    Rank Title                 Distributor `Worldwide gross`
##   <int> <chr>                 <chr>       <chr>            
## 1     1 Inside Out 2          Disney      $1,698,863,816   
## 2     2 Deadpool & Wolverine  Disney      $1,338,073,645   
## 3     3 Moana 2               Disney      $1,059,242,164   
## 4     4 Despicable Me 4       Universal   $971,315,095     
## 5     5 Wicked                Universal   $756,535,681     
## 6     6 Mufasa: The Lion King Disney      $723,060,982