library(rvest)
library(wbstats)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(robotstxt)
library(robotstxt)
paths_allowed("https://en.wikipedia.org/wiki/List_of_current_United_States_governors")
## en.wikipedia.org
## [1] TRUE
webpage <- read_html("https://en.wikipedia.org/wiki/List_of_current_United_States_governors")
tables <- html_elements(webpage, "table")
governors <- html_table(tables)[[2]]
str(governors)
## tibble [50 × 10] (S3: tbl_df/tbl/data.frame)
## $ State : chr [1:50] "Alabama (list)" "Alaska (list)" "Arizona (list)" "Arkansas (list)" ...
## $ Image : logi [1:50] NA NA NA NA NA NA ...
## $ Governor[14] : chr [1:50] "Kay Ivey" "Mike Dunleavy" "Katie Hobbs" "Sarah Huckabee Sanders" ...
## $ Party[14] : logi [1:50] NA NA NA NA NA NA ...
## $ Party[14] : chr [1:50] "Republican" "Republican" "Democratic" "Republican" ...
## $ Born : chr [1:50] "(1944-10-15) October 15, 1944 (age 80)" "(1961-05-05) May 5, 1961 (age 64)" "(1969-12-28) December 28, 1969 (age 55)" "(1982-08-13) August 13, 1982 (age 43)" ...
## $ Prior public experience[15]: chr [1:50] "Lieutenant GovernorState Treasurer" "Alaska Senate" "Secretary of StateMinority Leader of the Arizona SenateArizona House" "White House Press Secretary" ...
## $ Inauguration[14] : chr [1:50] "April 10, 2017" "December 3, 2018" "January 2, 2023" "January 10, 2023" ...
## $ End of term[14] : chr [1:50] "2027 (term limits)" "2026 (term limits)" "2027" "2027" ...
## $ Ref. : chr [1:50] "[17]" "[18]" "[19]" "[20]" ...
sum(colSums(is.na(governors)) == nrow(governors))
## [1] 2
governors <- governors[, colSums(is.na(governors)) < nrow(governors)]
names(governors) <- gsub("\\[.*\\]", "", names(governors))
names(governors)
## [1] "State" "Governor"
## [3] "Party" "Born"
## [5] "Prior public experience" "Inauguration"
## [7] "End of term" "Ref."
library(jsonlite)
library(lubridate)
url <- "https://min-api.cryptocompare.com/data/v2/histoday?tsym=USD&limit=150&fsym=XMR"
data <- fromJSON(url)
xmr <- data$Data$Data
xmr$time <- as_datetime(xmr$time)
min_open <- min(xmr$open, na.rm = TRUE)
min_time <- xmr$time[which.min(xmr$open)]
min_open
## [1] 235.68
min_time
## [1] "2025-08-16 UTC"
print(as.POSIXct(xmr$time[which.min(xmr$open)], origin = "1970-01-01"))
## [1] "2025-08-16 UTC"
print(as.POSIXct(xmr$time[which.min(xmr$open)], origin = "1970-01-01", tz = "America/New_York"))
## [1] "2025-08-15 20:00:00 EDT"
data(airquality)
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
colSums(is.na(airquality))
## Ozone Solar.R Wind Temp Month Day
## 37 7 0 0 0 0
hot_days <- subset(airquality, Wind < 8 & Temp > 90)
nrow(hot_days)
## [1] 9
plot(airquality$Month, airquality$Temp, xlab = "Month", ylab = "Temperature", main = "Temperature by Month")
#Question 15-16
library(nycflights13)
library(dplyr)
flights %>%
group_by(origin) %>%
summarise(total_dep_delay = sum(dep_delay, na.rm = TRUE)) %>%
arrange(desc(total_dep_delay))
## # A tibble: 3 × 2
## origin total_dep_delay
## <chr> <dbl>
## 1 EWR 1776635
## 2 JFK 1325264
## 3 LGA 1050301
flights %>%
group_by(origin) %>%
summarise(total_flights = n()) %>%
arrange(desc(total_flights))
## # A tibble: 3 × 2
## origin total_flights
## <chr> <int>
## 1 EWR 120835
## 2 JFK 111279
## 3 LGA 104662