library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
url <- "https://en.wikipedia.org/wiki/2024_in_film"
# 2. Read the HTML content from the URL
webpage <- read_html(url)
# 3. Extract ALL tables from the page
all_tables <- webpage %>%
html_table(header = TRUE)
# 4. Select the specific table: "Highest-grossing films"
highest_grossing_films <- all_tables[[3]]
# 5. Print the first few rows of the resulting table for verification
print(highest_grossing_films)
## # A tibble: 37 × 5
## Date Event Host `Location(s)` Ref.
## <chr> <chr> <chr> <chr> <chr>
## 1 January 6 7th Astra Film Awards Hollywood Crea… Los Angeles,… [27]
## 2 January 7 81st Golden Globe Awards Golden Globes,… Beverly Hill… [28]
## 3 January 14 29th Critics' Choice Awards Critics Choice… Santa Monica… [29]
## 4 January 26 11th Feroz Awards Asociación de … Madrid, Spain [30]
## 5 January 27–28 69th Filmfare Awards The Times Group GIFT City, I… [31]
## 6 February 3 3rd Carmen Awards Academia del C… Huelva, Anda… [32]
## 7 February 4 16th Gaudí Awards Catalan Film A… Barcelona, C… [33]
## 8 February 4 51st Saturn Awards Academy of Sci… Los Angeles,… [34]
## 9 February 9 2024 Movieguide Awards Movieguide Los Angeles,… [35]
## 10 February 10 13th AACTA Awards Australian Aca… Gold Coast, … [36]
## # ℹ 27 more rows