library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
library(robotstxt)
library(jsonlite)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(nycflights13)
gov_link <- "https://en.wikipedia.org/wiki/List_of_current_United_States_governors"
gov_page <- rvest::read_html(gov_link)
gov_tables <- rvest::html_table(gov_page, fill = TRUE)
governors_df <- as_tibble(gov_tables[[2]], .name_repair = "unique")
## New names:
## • `Party[14]` -> `Party[14]...4`
## • `Party[14]` -> `Party[14]...5`
head(governors_df)
## # A tibble: 6 × 10
## State Image `Governor[14]` `Party[14]...4` `Party[14]...5` Born
## <chr> <lgl> <chr> <lgl> <chr> <chr>
## 1 Alabama (list) NA Kay Ivey NA Republican (194…
## 2 Alaska (list) NA Mike Dunleavy NA Republican (196…
## 3 Arizona (list) NA Katie Hobbs NA Democratic (196…
## 4 Arkansas (list) NA Sarah Huckabee … NA Republican (198…
## 5 California (list) NA Gavin Newsom NA Democratic (196…
## 6 Colorado (list) NA Jared Polis NA Democratic (197…
## # ℹ 4 more variables: `Prior public experience[15]` <chr>,
## # `Inauguration[14]` <chr>, `End of term[14]` <chr>, Ref. <chr>
str(governors_df)
## tibble [50 × 10] (S3: tbl_df/tbl/data.frame)
## $ State : chr [1:50] "Alabama (list)" "Alaska (list)" "Arizona (list)" "Arkansas (list)" ...
## $ Image : logi [1:50] NA NA NA NA NA NA ...
## $ Governor[14] : chr [1:50] "Kay Ivey" "Mike Dunleavy" "Katie Hobbs" "Sarah Huckabee Sanders" ...
## $ Party[14]...4 : logi [1:50] NA NA NA NA NA NA ...
## $ Party[14]...5 : chr [1:50] "Republican" "Republican" "Democratic" "Republican" ...
## $ Born : chr [1:50] "(1944-10-15) October 15, 1944 (age 80)" "(1961-05-05) May 5, 1961 (age 64)" "(1969-12-28) December 28, 1969 (age 55)" "(1982-08-13) August 13, 1982 (age 43)" ...
## $ Prior public experience[15]: chr [1:50] "Lieutenant GovernorState Treasurer" "Alaska Senate" "Secretary of StateMinority Leader of the Arizona SenateArizona House" "White House Press Secretary" ...
## $ Inauguration[14] : chr [1:50] "April 10, 2017" "December 3, 2018" "January 2, 2023" "January 10, 2023" ...
## $ End of term[14] : chr [1:50] "2027 (term limits)" "2026 (term limits)" "2027" "2027" ...
## $ Ref. : chr [1:50] "[17]" "[18]" "[19]" "[20]" ...
sum(sapply(governors_df, function(x) all(is.na(x))))
## [1] 2
###2
pg <- read_html("https://en.wikipedia.org/wiki/List_of_current_United_States_governors")
tabs <- html_table(pg, fill = TRUE)
governors_df <- as_tibble(tabs[[1]])
keep_cols <- colSums(!is.na(governors_df)) > 0
governors_df <- governors_df[, keep_cols, drop = FALSE]
nm <- names(governors_df)
nm <- gsub("\\[[^\\]]*\\]", "", nm) # re*
###3 OPEN HISTORY
library(jsonlite)
library(tibble)
library(dplyr)
library(lubridate)
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=XMR&tsym=USD&limit=150"
raw <- fromJSON(url)
stopifnot(raw$Response == "Success")
xmr <- as_tibble(raw$Data$Data)
xmr <- xmr %>%
mutate(time_utc = as_datetime(time, tz = "UTC"))
min_open_value <- min(xmr$open, na.rm = TRUE)
min_open_row <- which.min(xmr$open)
min_open_time <- xmr$time_utc[min_open_row]
cat("Minimum daily OPEN for XMR (USD) over last 150 days:", min_open_value, "\n")
## Minimum daily OPEN for XMR (USD) over last 150 days: 235.68
cat("Occurred at (UTC):", format(min_open_time, "%Y-%m-%d %H:%M:%S %Z"), "\n")
## Occurred at (UTC): 2025-08-16 00:00:00 UTC
print(as.POSIXct(xmr$time[min_open_row], origin = "1970-01-01"))
## [1] "2025-08-15 20:00:00 EDT"
###3 AIR QUALITY
data(airquality)
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
data(airquality)
hot_days <- subset(airquality, Wind < 8 & Temp > 90)
nrow(hot_days)
## [1] 9
data(airquality)
plot(airquality$Month, airquality$Temp,
xlab = "Month", ylab = "Temperature (°F)",
main = "Temperature by Month")
## ITS MAY
###4 FLIGHTS
library(nycflights13)
library(dplyr)
flights %>%
group_by(origin) %>%
summarise(total_dep_delay = sum(dep_delay, na.rm = TRUE)) %>%
arrange(desc(total_dep_delay))
## # A tibble: 3 × 2
## origin total_dep_delay
## <chr> <dbl>
## 1 EWR 1776635
## 2 JFK 1325264
## 3 LGA 1050301
flights %>%
filter(hour >= 17) %>%
group_by(origin) %>%
summarise(count_after5 = n()) %>%
arrange(desc(count_after5))
## # A tibble: 3 × 2
## origin count_after5
## <chr> <int>
## 1 JFK 38415
## 2 EWR 33290
## 3 LGA 27317