library(robotstxt)
# Check if scraping is allowed for the page
paths_allowed("https://en.wikipedia.org/wiki/List_of_current_United_States_governors")
## en.wikipedia.org
## [1] TRUE
library(rvest)
url <- "https://en.wikipedia.org/wiki/List_of_current_United_States_governors"
page <- read_html(url)
# Step 2: select tables & Step 3: parse all
tables <- html_elements(page, "table.wikitable")
dfs <- html_table(tables, fill = TRUE)
# Step 4: pick the table by its caption
caps <- html_text2(html_elements(tables, "caption"))
ix <- which(grepl("^Current state governors", caps, ignore.case = TRUE))
governors <- dfs[[ix]]
# Step 5: quick clean/verify (optional)
names(governors) <- trimws(names(governors))
governors <- subset(governors, governors[[1]] != "")
class(governors)
## [1] "tbl_df" "tbl" "data.frame"
head(governors)
## # A tibble: 6 × 10
## State Image `Governor[14]` `Party[14]` `Party[14]` Born
## <chr> <lgl> <chr> <lgl> <chr> <chr>
## 1 Alabama (list) NA Kay Ivey NA Republican (1944-…
## 2 Alaska (list) NA Mike Dunleavy NA Republican (1961-…
## 3 Arizona (list) NA Katie Hobbs NA Democratic (1969-…
## 4 Arkansas (list) NA Sarah Huckabee Sanders NA Republican (1982-…
## 5 California (list) NA Gavin Newsom NA Democratic (1967-…
## 6 Colorado (list) NA Jared Polis NA Democratic (1975-…
## # ℹ 4 more variables: `Prior public experience[15]` <chr>,
## # `Inauguration[14]` <chr>, `End of term[14]` <chr>, Ref. <chr>
names(governors)[sapply(governors, function(x) all(is.na(x)))]
## [1] "Image" "Party[14]"
# Step 1: remove columns that are entirely NA
governors <- governors[, colSums(is.na(governors)) < nrow(governors)]
# Step 2: rename columns by stripping out numbers inside [ ]
names(governors) <- gsub("\\[.*\\]", "", names(governors))
# Step 3: show all column names for a double-check
names(governors)
## [1] "State" "Governor"
## [3] "Party" "Born"
## [5] "Prior public experience" "Inauguration"
## [7] "End of term" "Ref."
# Packages
library(jsonlite)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Build the URL per docs: Daily Pair OHLCV, last 150 days, XMR vs USD
url <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=XMR&tsym=USD&limit=150"
# Parse JSON → list → the data sublist → data frame/tibble
xmr <- fromJSON(url)$Data$Data
# Compute the minimum daily open and when it occurred
min_open <- min(xmr$open, na.rm = TRUE)
when_min <- xmr %>%
filter(open == min_open) %>%
slice(1) %>% # in case of ties
pull(time) %>%
as.POSIXct(origin = "1970-01-01", tz = "UTC")
min_open
## [1] 235.68
when_min
## [1] "2025-08-16 UTC"
as.POSIXct(xmr$time[which.min(xmr$open)],
origin = "1970-01-01",
tz = "America/New_York")
## [1] "2025-08-15 20:00:00 EDT"
data(airquality)
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
data(airquality)
hot_days <- subset(airquality, Wind < 8 & Temp > 90)
nrow(hot_days)
## [1] 9
# Load dataset
data(airquality)
# Plot Temp by Month
plot(airquality$Month, airquality$Temp,
xlab = "Month", ylab = "Temperature",
main = "Temperature by Month",
col = "blue", pch = 19)

library(nycflights13)
library(dplyr)
flights %>%
group_by(origin) %>%
summarise(total_delay = sum(dep_delay, na.rm = TRUE)) %>%
arrange(desc(total_delay))
## # A tibble: 3 × 2
## origin total_delay
## <chr> <dbl>
## 1 EWR 1776635
## 2 JFK 1325264
## 3 LGA 1050301
library(nycflights13)
library(dplyr)
# Count how many flights per origin airport
flights %>%
count(origin, sort = TRUE)
## # A tibble: 3 × 2
## origin n
## <chr> <int>
## 1 EWR 120835
## 2 JFK 111279
## 3 LGA 104662