##Q2
library(rvest)
library(robotstxt)
paths_allowed("https://en.wikipedia.org/wiki/List_of_current_United_States_governors")
## en.wikipedia.org
## [1] TRUE
##Q3
library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
url <- "https://en.wikipedia.org/wiki/List_of_current_United_States_governors"
webpage <- read_html(url)
governors_table <- webpage %>%
html_node("table.wikitable") %>%
html_table()
governors_tibble <- as_tibble(governors_table, .name_repair = "unique")
## New names:
## • `Party[12]` -> `Party[12]...4`
## • `Party[12]` -> `Party[12]...5`
print(governors_tibble)
## # A tibble: 50 × 10
## State Image `Governor[12]` `Party[12]...4` `Party[12]...5` Born
## <chr> <lgl> <chr> <lgl> <chr> <chr>
## 1 Alabama (list) NA Kay Ivey NA Republican (194…
## 2 Alaska (list) NA Mike Dunleavy NA Republican (196…
## 3 Arizona (list) NA Katie Hobbs NA Democratic (196…
## 4 Arkansas (list) NA Sarah Huckabe… NA Republican (198…
## 5 California (list) NA Gavin Newsom NA Democratic (196…
## 6 Colorado (list) NA Jared Polis NA Democratic (197…
## 7 Connecticut (list) NA Ned Lamont NA Democratic (195…
## 8 Delaware (list) NA Matt Meyer NA Democratic (197…
## 9 Florida (list) NA Ron DeSantis NA Republican (197…
## 10 Georgia (list) NA Brian Kemp NA Republican (196…
## # ℹ 40 more rows
## # ℹ 4 more variables: `Prior public experience[13]` <chr>,
## # `Inauguration[12]` <chr>, `End of term[12]` <chr>, Ref. <chr>
##Q4
str(governors_tibble)
## tibble [50 × 10] (S3: tbl_df/tbl/data.frame)
## $ State : chr [1:50] "Alabama (list)" "Alaska (list)" "Arizona (list)" "Arkansas (list)" ...
## $ Image : logi [1:50] NA NA NA NA NA NA ...
## $ Governor[12] : chr [1:50] "Kay Ivey" "Mike Dunleavy" "Katie Hobbs" "Sarah Huckabee Sanders" ...
## $ Party[12]...4 : logi [1:50] NA NA NA NA NA NA ...
## $ Party[12]...5 : chr [1:50] "Republican" "Republican" "Democratic" "Republican" ...
## $ Born : chr [1:50] "(1944-10-15) October 15, 1944 (age 80)" "(1961-05-05) May 5, 1961 (age 63)" "(1969-12-28) December 28, 1969 (age 55)" "(1982-08-13) August 13, 1982 (age 42)" ...
## $ Prior public experience[13]: chr [1:50] "Lieutenant GovernorState Treasurer" "Alaska Senate" "Secretary of StateMinority Leader of the Arizona SenateArizona House" "White House Press Secretary" ...
## $ Inauguration[12] : chr [1:50] "April 10, 2017" "December 3, 2018" "January 2, 2023" "January 10, 2023" ...
## $ End of term[12] : chr [1:50] "2027 (term limits)" "2026 (term limits)" "2027" "2027" ...
## $ Ref. : chr [1:50] "[16]" "[17]" "[18]" "[19]" ...
##Q5
all_na_columns <- sum(sapply(governors_tibble, function(column) all(is.na(column))))
print(all_na_columns)
## [1] 2
##Q6
cleaned_governors <- governors_tibble %>%
select(where(~ !all(is.na(.))))
colnames(cleaned_governors) <- gsub("\\[\\d+\\]", "", colnames(cleaned_governors))
print(colnames(cleaned_governors))
## [1] "State" "Governor"
## [3] "Party...5" "Born"
## [5] "Prior public experience" "Inauguration"
## [7] "End of term" "Ref."
##Q8
library(jsonlite)
library(httr)
url2 <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=XMR&tsym=USD&limit=200"
data <- fromJSON(url2)
xmr_data <- as_tibble(data$Data$Data)
xmr_data <- xmr_data %>%
mutate(time = as.POSIXct(time, origin = "1970-01-01"))
min_open_price <- min(xmr_data$open, na.rm = TRUE)
min_open_time <- xmr_data %>%
filter(open == min_open_price) %>%
select(time) %>%
pull()
print(min_open_price)
## [1] 136.72
print(min_open_time)
## [1] "2024-10-02 20:00:00 EDT"
##Q9
observation_number <- which(xmr_data$open == min_open_price)
regular_date_time <- as.POSIXct(xmr_data$time[observation_number], origin = "1970-01-01")
print(regular_date_time)
## [1] "2024-10-02 20:00:00 EDT"
##Q10
data(airquality)
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
##Q11
data(airquality)
hot_days <- subset(airquality, Wind < 8 & Temp > 90)
num_hot_days <- nrow(hot_days)
num_hot_days
## [1] 9
##Q12
plot(airquality$Month, airquality$Temp, main="Temperature by Month",
xlab="Month", ylab="Temperature", pch=19)
lowest_temp_month <- airquality$Month[which.min(airquality$Temp)]
lowest_temp_month
## [1] 5
##Q15
library(nycflights13)
total_delay_by_airport <- flights %>%
group_by(origin) %>%
summarise(total_delay = sum(dep_delay, na.rm = TRUE)) %>%
arrange(desc(total_delay))
total_delay_by_airport
## # A tibble: 3 × 2
## origin total_delay
## <chr> <dbl>
## 1 EWR 1776635
## 2 JFK 1325264
## 3 LGA 1050301