Install Packages
library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
##
## Attaching package: 'readr'
## The following object is masked from 'package:rvest':
##
## guess_encoding
Uploading Website
url <- "https://en.wikipedia.org/wiki/2024_in_film"
ua <- "GriffinSchoolProjectBot/1.0 (+contact@example.com)"
Fetching the Page
pg <- read_html(httr::GET(url, httr::add_headers(`User-Agent` = ua)))
Recieving Table
tables <- pg |>
html_elements("table.wikitable") |>
html_table(fill = TRUE)
Inspecting Length of Table
length(tables)
## [1] 5
Finding Highest Grossing Film
pick_highest_grossing <- function(tbls) {
idx <- which(vapply(tbls, function(t) any(grepl("Worldwide", names(t), ignore.case = TRUE)), logical(1)))
if (length(idx)) tbls[[idx[1]]] else NULL
}
highest_grossing <- pick_highest_grossing(tables)
if (!is.null(highest_grossing)) head(highest_grossing)
## # A tibble: 6 × 4
## Rank Title Distributor `Worldwide gross`
## <int> <chr> <chr> <chr>
## 1 1 Inside Out 2 Disney $1,698,863,816
## 2 2 Deadpool & Wolverine Disney $1,338,073,645
## 3 3 Moana 2 Disney $1,059,242,164
## 4 4 Despicable Me 4 Universal $971,315,095
## 5 5 Wicked Universal $756,535,681
## 6 6 Mufasa: The Lion King Disney $723,060,982