packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(robotstxt)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(nycflights13)
library(rvest)
library(ggplot2)
library(jsonlite)
check paths
webpage <- "https://en.wikipedia.org/wiki/List_of_current_United_States_governors"
paths_allowed(webpage)
## en.wikipedia.org
## [1] TRUE
table from webpage
page <- read_html(webpage)
tbls <- html_elements(page, "table")
current_gov <- html_table(tbls[[2]])
current_gov
## # A tibble: 50 × 10
## State Image `Governor[14]` `Party[14]` `Party[14]` Born
## <chr> <lgl> <chr> <lgl> <chr> <chr>
## 1 Alabama (list) NA Kay Ivey NA Republican (194…
## 2 Alaska (list) NA Mike Dunleavy NA Republican (196…
## 3 Arizona (list) NA Katie Hobbs NA Democratic (196…
## 4 Arkansas (list) NA Sarah Huckabee Sanders NA Republican (198…
## 5 California (list) NA Gavin Newsom NA Democratic (196…
## 6 Colorado (list) NA Jared Polis NA Democratic (197…
## 7 Connecticut (list) NA Ned Lamont NA Democratic (195…
## 8 Delaware (list) NA Matt Meyer NA Democratic (197…
## 9 Florida (list) NA Ron DeSantis NA Republican (197…
## 10 Georgia (list) NA Brian Kemp NA Republican (196…
## # ℹ 40 more rows
## # ℹ 4 more variables: `Prior public experience[15]` <chr>,
## # `Inauguration[14]` <chr>, `End of term[14]` <chr>, Ref. <chr>
empty columns
colSums(is.na(current_gov)) > 0
## State Image
## FALSE TRUE
## Governor[14] Party[14]
## FALSE TRUE
## Party[14] Born
## FALSE FALSE
## Prior public experience[15] Inauguration[14]
## FALSE FALSE
## End of term[14] Ref.
## FALSE FALSE
select and rename columns
new_current_gov <- current_gov %>%
select(-Image, -`Party[14]`)
print(new_current_gov)
## # A tibble: 50 × 7
## State `Governor[14]` Born Prior public experie…¹ `Inauguration[14]`
## <chr> <chr> <chr> <chr> <chr>
## 1 Alabama (list) Kay Ivey (194… Lieutenant GovernorSt… April 10, 2017
## 2 Alaska (list) Mike Dunleavy (196… Alaska Senate December 3, 2018
## 3 Arizona (list) Katie Hobbs (196… Secretary of StateMin… January 2, 2023
## 4 Arkansas (lis… Sarah Huckabe… (198… White House Press Sec… January 10, 2023
## 5 California (l… Gavin Newsom (196… Lieutenant GovernorMa… January 7, 2019
## 6 Colorado (lis… Jared Polis (197… U.S. HouseColorado St… January 8, 2019
## 7 Connecticut (… Ned Lamont (195… Chair of the State In… January 9, 2019
## 8 Delaware (lis… Matt Meyer (197… Executive of New Cast… January 21, 2025
## 9 Florida (list) Ron DeSantis (197… U.S. House January 8, 2019
## 10 Georgia (list) Brian Kemp (196… Secretary of StateGeo… January 14, 2019
## # ℹ 40 more rows
## # ℹ abbreviated name: ¹`Prior public experience[15]`
## # ℹ 2 more variables: `End of term[14]` <chr>, Ref. <chr>
new_current_gov <- new_current_gov %>%
rename('Governor' = 'Governor[14]',
'Prior public experience' = 'Prior public experience[15]',
'Inaguration' = 'Inauguration[14]',
'End of term' = 'End of term[14]')
print(new_current_gov)
## # A tibble: 50 × 7
## State Governor Born Prior public experie…¹ Inaguration `End of term` Ref.
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Alabam… Kay Ivey (194… Lieutenant GovernorSt… April 10, … 2027 (term l… [17]
## 2 Alaska… Mike Du… (196… Alaska Senate December 3… 2026 (term l… [18]
## 3 Arizon… Katie H… (196… Secretary of StateMin… January 2,… 2027 [19]
## 4 Arkans… Sarah H… (198… White House Press Sec… January 10… 2027 [20]
## 5 Califo… Gavin N… (196… Lieutenant GovernorMa… January 7,… 2027 (term l… [21]
## 6 Colora… Jared P… (197… U.S. HouseColorado St… January 8,… 2027 (term l… [22]
## 7 Connec… Ned Lam… (195… Chair of the State In… January 9,… 2027 [23]
## 8 Delawa… Matt Me… (197… Executive of New Cast… January 21… 2029 [24]
## 9 Florid… Ron DeS… (197… U.S. House January 8,… 2027 (term l… [25]
## 10 Georgi… Brian K… (196… Secretary of StateGeo… January 14… 2027 (term l… [26]
## # ℹ 40 more rows
## # ℹ abbreviated name: ¹`Prior public experience`
gather, clean, read crypto data
data <- "https://min-api.cryptocompare.com/data/v2/histoday?fsym=XMR&tsym=USD&limit=150"
raw_data <- fromJSON(data)
xmr_data <- raw_data$Data$Data
xmr_data <- xmr_data %>%
mutate(time = as.POSIXct(time, origin = "1970-01-01"))
min_open <- min(xmr_data$open, na.rm = TRUE)
min_time <- xmr_data$time[which.min(xmr_data$open)]
read data
data(airquality)
summary(airquality)
## Ozone Solar.R Wind Temp
## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
## Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
## Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
## 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
## Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
## NA's :37 NA's :7
## Month Day
## Min. :5.000 Min. : 1.0
## 1st Qu.:6.000 1st Qu.: 8.0
## Median :7.000 Median :16.0
## Mean :6.993 Mean :15.8
## 3rd Qu.:8.000 3rd Qu.:23.0
## Max. :9.000 Max. :31.0
##
how many hot days
hot_days <- subset(airquality,c((Wind < 8)&(Temp > 90)))
nrow(hot_days)
## [1] 9
create plot
plot(airquality$Month, airquality$Temp,
main = "Temp by Month",
xlab = "Month",
ylab = "Temperature")

total departure delays
flights %>%
group_by(origin) %>%
summarize(total_dep_delay = sum(dep_delay, na.rm = TRUE)) %>%
arrange(desc(total_dep_delay))
## # A tibble: 3 × 2
## origin total_dep_delay
## <chr> <dbl>
## 1 EWR 1776635
## 2 JFK 1325264
## 3 LGA 1050301
total distance and # flights
flights %>%
group_by(origin) %>%
summarize(total_distance = sum(distance, na.rm = TRUE)) %>%
arrange(desc(total_distance))
## # A tibble: 3 × 2
## origin total_distance
## <chr> <dbl>
## 1 JFK 140906931
## 2 EWR 127691515
## 3 LGA 81619161
flights %>%
group_by(origin) %>%
summarize(num_flights = n()) %>%
arrange(desc(num_flights))
## # A tibble: 3 × 2
## origin num_flights
## <chr> <int>
## 1 EWR 120835
## 2 JFK 111279
## 3 LGA 104662