library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)
library(robotstxt)
library(jsonlite)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(nycflights13)

1 GOVERNERS TABLE

gov_link <- "https://en.wikipedia.org/wiki/List_of_current_United_States_governors"
gov_page <- rvest::read_html(gov_link)
gov_tables <- rvest::html_table(gov_page, fill = TRUE)
governors_df <- as_tibble(gov_tables[[2]], .name_repair = "unique")
## New names:
## • `Party[14]` -> `Party[14]...4`
## • `Party[14]` -> `Party[14]...5`
head(governors_df)
## # A tibble: 6 × 10
##   State             Image `Governor[14]`   `Party[14]...4` `Party[14]...5` Born 
##   <chr>             <lgl> <chr>            <lgl>           <chr>           <chr>
## 1 Alabama (list)    NA    Kay Ivey         NA              Republican      (194…
## 2 Alaska (list)     NA    Mike Dunleavy    NA              Republican      (196…
## 3 Arizona (list)    NA    Katie Hobbs      NA              Democratic      (196…
## 4 Arkansas (list)   NA    Sarah Huckabee … NA              Republican      (198…
## 5 California (list) NA    Gavin Newsom     NA              Democratic      (196…
## 6 Colorado (list)   NA    Jared Polis      NA              Democratic      (197…
## # ℹ 4 more variables: `Prior public experience[15]` <chr>,
## #   `Inauguration[14]` <chr>, `End of term[14]` <chr>, Ref. <chr>
str(governors_df)
## tibble [50 × 10] (S3: tbl_df/tbl/data.frame)
##  $ State                      : chr [1:50] "Alabama (list)" "Alaska (list)" "Arizona (list)" "Arkansas (list)" ...
##  $ Image                      : logi [1:50] NA NA NA NA NA NA ...
##  $ Governor[14]               : chr [1:50] "Kay Ivey" "Mike Dunleavy" "Katie Hobbs" "Sarah Huckabee Sanders" ...
##  $ Party[14]...4              : logi [1:50] NA NA NA NA NA NA ...
##  $ Party[14]...5              : chr [1:50] "Republican" "Republican" "Democratic" "Republican" ...
##  $ Born                       : chr [1:50] "(1944-10-15) October 15, 1944 (age 80)" "(1961-05-05) May 5, 1961 (age 64)" "(1969-12-28) December 28, 1969 (age 55)" "(1982-08-13) August 13, 1982 (age 43)" ...
##  $ Prior public experience[15]: chr [1:50] "Lieutenant GovernorState Treasurer" "Alaska Senate" "Secretary of StateMinority Leader of the Arizona SenateArizona House" "White House Press Secretary" ...
##  $ Inauguration[14]           : chr [1:50] "April 10, 2017" "December 3, 2018" "January 2, 2023" "January 10, 2023" ...
##  $ End of term[14]            : chr [1:50] "2027 (term limits)" "2026 (term limits)" "2027" "2027" ...
##  $ Ref.                       : chr [1:50] "[17]" "[18]" "[19]" "[20]" ...
sum(sapply(governors_df, function(x) all(is.na(x))))
## [1] 2

###2

 pg   <- read_html("https://en.wikipedia.org/wiki/List_of_current_United_States_governors")
  tabs <- html_table(pg, fill = TRUE)
  governors_df <- as_tibble(tabs[[1]])


keep_cols <- colSums(!is.na(governors_df)) > 0
governors_df <- governors_df[, keep_cols, drop = FALSE]
nm <- names(governors_df)
nm <- gsub("\\[[^\\]]*\\]", "", nm)   # re*

###3 OPEN HISTORY

library(jsonlite)
library(tibble)
library(dplyr)
library(lubridate)

url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=XMR&tsym=USD&limit=150"
raw <- fromJSON(url)
stopifnot(raw$Response == "Success")
xmr <- as_tibble(raw$Data$Data)
xmr <- xmr %>%
  mutate(time_utc = as_datetime(time, tz = "UTC"))
min_open_value <- min(xmr$open, na.rm = TRUE)
min_open_row    <- which.min(xmr$open)
min_open_time   <- xmr$time_utc[min_open_row]
cat("Minimum daily OPEN for XMR (USD) over last 150 days:", min_open_value, "\n")
## Minimum daily OPEN for XMR (USD) over last 150 days: 235.68
cat("Occurred at (UTC):", format(min_open_time, "%Y-%m-%d %H:%M:%S %Z"), "\n")
## Occurred at (UTC): 2025-08-16 00:00:00 UTC
print(as.POSIXct(xmr$time[min_open_row], origin = "1970-01-01"))
## [1] "2025-08-15 20:00:00 EDT"

###3 AIR QUALITY

data(airquality)
summary(airquality)
##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
## 
data(airquality)
hot_days <- subset(airquality, Wind < 8 & Temp > 90)
nrow(hot_days)
## [1] 9
data(airquality)
plot(airquality$Month, airquality$Temp,
     xlab = "Month", ylab = "Temperature (°F)",
     main = "Temperature by Month")

## ITS MAY

###4 FLIGHTS

library(nycflights13)
library(dplyr)

flights %>%
  group_by(origin) %>%
  summarise(total_dep_delay = sum(dep_delay, na.rm = TRUE)) %>%
  arrange(desc(total_dep_delay))
## # A tibble: 3 × 2
##   origin total_dep_delay
##   <chr>            <dbl>
## 1 EWR            1776635
## 2 JFK            1325264
## 3 LGA            1050301
flights %>%
  filter(hour >= 17) %>%
  group_by(origin) %>%
  summarise(count_after5 = n()) %>%
  arrange(desc(count_after5))
## # A tibble: 3 × 2
##   origin count_after5
##   <chr>         <int>
## 1 JFK           38415
## 2 EWR           33290
## 3 LGA           27317