library(rvest)
library(wbstats)
library(quantmod)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: TTR

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(VIM)

## Loading required package: colorspace

## Loading required package: grid

## VIM is ready to use.

## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues

## 
## Attaching package: 'VIM'

## The following object is masked from 'package:datasets':
## 
##     sleep

library(dplyr)

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:xts':
## 
##     first, last

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(robotstxt)

Question 2-7

library(robotstxt)

paths_allowed("https://en.wikipedia.org/wiki/List_of_current_United_States_governors")

##  en.wikipedia.org

## [1] TRUE

webpage <- read_html("https://en.wikipedia.org/wiki/List_of_current_United_States_governors")
tables <- html_elements(webpage, "table")
governors <- html_table(tables)[[2]]

str(governors)

## tibble [50 × 10] (S3: tbl_df/tbl/data.frame)
##  $ State                      : chr [1:50] "Alabama (list)" "Alaska (list)" "Arizona (list)" "Arkansas (list)" ...
##  $ Image                      : logi [1:50] NA NA NA NA NA NA ...
##  $ Governor[14]               : chr [1:50] "Kay Ivey" "Mike Dunleavy" "Katie Hobbs" "Sarah Huckabee Sanders" ...
##  $ Party[14]                  : logi [1:50] NA NA NA NA NA NA ...
##  $ Party[14]                  : chr [1:50] "Republican" "Republican" "Democratic" "Republican" ...
##  $ Born                       : chr [1:50] "(1944-10-15) October 15, 1944 (age 80)" "(1961-05-05) May 5, 1961 (age 64)" "(1969-12-28) December 28, 1969 (age 55)" "(1982-08-13) August 13, 1982 (age 43)" ...
##  $ Prior public experience[15]: chr [1:50] "Lieutenant GovernorState Treasurer" "Alaska Senate" "Secretary of StateMinority Leader of the Arizona SenateArizona House" "White House Press Secretary" ...
##  $ Inauguration[14]           : chr [1:50] "April 10, 2017" "December 3, 2018" "January 2, 2023" "January 10, 2023" ...
##  $ End of term[14]            : chr [1:50] "2027 (term limits)" "2026 (term limits)" "2027" "2027" ...
##  $ Ref.                       : chr [1:50] "[17]" "[18]" "[19]" "[20]" ...

sum(colSums(is.na(governors)) == nrow(governors))

## [1] 2

governors <- governors[, colSums(is.na(governors)) < nrow(governors)]
names(governors) <- gsub("\\[.*\\]", "", names(governors))
names(governors)

## [1] "State"                   "Governor"               
## [3] "Party"                   "Born"                   
## [5] "Prior public experience" "Inauguration"           
## [7] "End of term"             "Ref."

Question 8-9

library(jsonlite)
library(lubridate)
url <- "https://min-api.cryptocompare.com/data/v2/histoday?tsym=USD&limit=150&fsym=XMR"
data <- fromJSON(url)

xmr <- data$Data$Data
xmr$time <- as_datetime(xmr$time)
min_open <- min(xmr$open, na.rm = TRUE)
min_time <- xmr$time[which.min(xmr$open)]
min_open

## [1] 235.68

min_time

## [1] "2025-08-16 UTC"

print(as.POSIXct(xmr$time[which.min(xmr$open)], origin = "1970-01-01"))

## [1] "2025-08-16 UTC"

print(as.POSIXct(xmr$time[which.min(xmr$open)], origin = "1970-01-01", tz = "America/New_York"))

## [1] "2025-08-15 20:00:00 EDT"

Question 10-13

data(airquality)
summary(airquality)

##      Ozone           Solar.R           Wind             Temp      
##  Min.   :  1.00   Min.   :  7.0   Min.   : 1.700   Min.   :56.00  
##  1st Qu.: 18.00   1st Qu.:115.8   1st Qu.: 7.400   1st Qu.:72.00  
##  Median : 31.50   Median :205.0   Median : 9.700   Median :79.00  
##  Mean   : 42.13   Mean   :185.9   Mean   : 9.958   Mean   :77.88  
##  3rd Qu.: 63.25   3rd Qu.:258.8   3rd Qu.:11.500   3rd Qu.:85.00  
##  Max.   :168.00   Max.   :334.0   Max.   :20.700   Max.   :97.00  
##  NA's   :37       NA's   :7                                       
##      Month            Day      
##  Min.   :5.000   Min.   : 1.0  
##  1st Qu.:6.000   1st Qu.: 8.0  
##  Median :7.000   Median :16.0  
##  Mean   :6.993   Mean   :15.8  
##  3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :9.000   Max.   :31.0  
##

colSums(is.na(airquality))

##   Ozone Solar.R    Wind    Temp   Month     Day 
##      37       7       0       0       0       0

hot_days <- subset(airquality, Wind < 8 & Temp > 90)
nrow(hot_days)

## [1] 9

plot(airquality$Month, airquality$Temp, xlab = "Month", ylab = "Temperature", main = "Temperature by Month")

#Question 15-16

library(nycflights13)
library(dplyr)

flights %>%
  group_by(origin) %>%
  summarise(total_dep_delay = sum(dep_delay, na.rm = TRUE)) %>%
  arrange(desc(total_dep_delay))

## # A tibble: 3 × 2
##   origin total_dep_delay
##   <chr>            <dbl>
## 1 EWR            1776635
## 2 JFK            1325264
## 3 LGA            1050301

flights %>%
  group_by(origin) %>%
  summarise(total_flights = n()) %>%
  arrange(desc(total_flights))

## # A tibble: 3 × 2
##   origin total_flights
##   <chr>          <int>
## 1 EWR           120835
## 2 JFK           111279
## 3 LGA           104662

Midterm

Aidan Schnapf

2025-10-10

Question 2-7

Question 8-9

Question 10-13