library(tidyverse)
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr     1.1.4     v readr     2.1.6
## v forcats   1.0.1     v stringr   1.6.0
## v ggplot2   4.0.1     v tibble    3.3.1
## v lubridate 1.9.4     v tidyr     1.3.2
## v purrr     1.2.1     
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
library(lubridate)
ymd(c("2010-10-10", "bananas"))
## Warning: 1 failed to parse.
## [1] "2010-10-10" NA
d1 <- "January 1, 2010"
mdy(d1)
## [1] "2010-01-01"
d2 <- "2015-Mar-07"
ymd(d2)
## [1] "2015-03-07"
d3 <- "06-Jun-2017"
dmy(d3)
## [1] "2017-06-06"
d4 <- c("August 19 (2015)", "July 1 (2015)")
mdy(d4)
## [1] "2015-08-19" "2015-07-01"
d5 <- "12/30/14"
mdy(d5)
## [1] "2014-12-30"
make_datetime_100 <- function(year, month, day, time, tz = "EST") {
  make_datetime(year, month, day, time %/% 100, time %% 100, 0, tz)
}
flights %>% 
  filter(!is.na(dep_time), !is.na(arr_time)) %>% 
  mutate(
    dep_time = make_datetime_100(year, month, day, dep_time),
    arr_time = make_datetime_100(year, month, day, arr_time),
    sched_dep_time = make_datetime_100(year, month, day, sched_dep_time),
    sched_arr_time = make_datetime_100(year, month, day, sched_arr_time)
  )  
## # A tibble: 328,063 x 19
##     year month   day dep_time            sched_dep_time      dep_delay
##    <int> <int> <int> <dttm>              <dttm>                  <dbl>
##  1  2013     1     1 2013-01-01 05:17:00 2013-01-01 05:15:00         2
##  2  2013     1     1 2013-01-01 05:33:00 2013-01-01 05:29:00         4
##  3  2013     1     1 2013-01-01 05:42:00 2013-01-01 05:40:00         2
##  4  2013     1     1 2013-01-01 05:44:00 2013-01-01 05:45:00        -1
##  5  2013     1     1 2013-01-01 05:54:00 2013-01-01 06:00:00        -6
##  6  2013     1     1 2013-01-01 05:54:00 2013-01-01 05:58:00        -4
##  7  2013     1     1 2013-01-01 05:55:00 2013-01-01 06:00:00        -5
##  8  2013     1     1 2013-01-01 05:57:00 2013-01-01 06:00:00        -3
##  9  2013     1     1 2013-01-01 05:57:00 2013-01-01 06:00:00        -3
## 10  2013     1     1 2013-01-01 05:58:00 2013-01-01 06:00:00        -2
## # i 328,053 more rows
## # i 13 more variables: arr_time <dttm>, sched_arr_time <dttm>, arr_delay <dbl>,
## #   carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## #   air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
airports1 <- airports %>%
  select(faa, tzone)

flights1 <- flights %>%
  left_join(airports1, by = c("dest" = "faa")) %>%
  rename("dest_tzone" = "tzone") 
flights1 %>%
  left_join(airports1, by = c("origin" = "faa")) %>%
  rename("origin_tzone" = "tzone") -> flights1
flights_dt <- flights1 %>% 
  filter(!is.na(dep_time), !is.na(arr_time)) %>% 
  mutate(
    dep_time = make_datetime_100(year, month, day, dep_time, tz = origin_tzone),
    arr_time = make_datetime_100(year, month, day, arr_time, tz = dest_tzone),
    sched_dep_time = make_datetime_100(year, month, day, sched_dep_time, tz = origin_tzone),
    sched_arr_time = make_datetime_100(year, month, day, sched_arr_time, tz = dest_tzone)
  ) %>% 
  select(origin, dest, ends_with("delay"), ends_with("time"), air_time, origin_tzone, dest_tzone)

flights_dt
## # A tibble: 328,063 x 11
##    origin dest  dep_delay arr_delay dep_time            sched_dep_time     
##    <chr>  <chr>     <dbl>     <dbl> <dttm>              <dttm>             
##  1 EWR    IAH           2        11 2013-01-01 05:17:00 2013-01-01 05:15:00
##  2 LGA    IAH           4        20 2013-01-01 05:33:00 2013-01-01 05:29:00
##  3 JFK    MIA           2        33 2013-01-01 05:42:00 2013-01-01 05:40:00
##  4 JFK    BQN          -1       -18 2013-01-01 05:44:00 2013-01-01 05:45:00
##  5 LGA    ATL          -6       -25 2013-01-01 05:54:00 2013-01-01 06:00:00
##  6 EWR    ORD          -4        12 2013-01-01 05:54:00 2013-01-01 05:58:00
##  7 EWR    FLL          -5        19 2013-01-01 05:55:00 2013-01-01 06:00:00
##  8 LGA    IAD          -3       -14 2013-01-01 05:57:00 2013-01-01 06:00:00
##  9 JFK    MCO          -3        -8 2013-01-01 05:57:00 2013-01-01 06:00:00
## 10 LGA    ORD          -2         8 2013-01-01 05:58:00 2013-01-01 06:00:00
## # i 328,053 more rows
## # i 5 more variables: arr_time <dttm>, sched_arr_time <dttm>, air_time <dbl>,
## #   origin_tzone <chr>, dest_tzone <chr>
flights_dt %>%
  filter(dep_time > ymd("2013-06-01", tz = Sys.timezone())) %>%
  arrange(dep_time)
## # A tibble: 194,177 x 11
##    origin dest  dep_delay arr_delay dep_time            sched_dep_time     
##    <chr>  <chr>     <dbl>     <dbl> <dttm>              <dttm>             
##  1 JFK    PSE           3        -9 2013-06-01 00:02:00 2013-06-01 23:59:00
##  2 EWR    CLT          -9       -16 2013-06-01 04:51:00 2013-06-01 05:00:00
##  3 EWR    IAH          -9       -45 2013-06-01 05:06:00 2013-06-01 05:15:00
##  4 LGA    IAH         -11       -29 2013-06-01 05:34:00 2013-06-01 05:45:00
##  5 JFK    BQN          -7         3 2013-06-01 05:38:00 2013-06-01 05:45:00
##  6 JFK    MIA          -1        -8 2013-06-01 05:39:00 2013-06-01 05:40:00
##  7 EWR    RSW         -14       -20 2013-06-01 05:46:00 2013-06-01 06:00:00
##  8 LGA    DFW          -9       -22 2013-06-01 05:51:00 2013-06-01 06:00:00
##  9 LGA    PHL          -8        -8 2013-06-01 05:52:00 2013-06-01 06:00:00
## 10 JFK    IAD          -7       -11 2013-06-01 05:53:00 2013-06-01 06:00:00
## # i 194,167 more rows
## # i 5 more variables: arr_time <dttm>, sched_arr_time <dttm>, air_time <dbl>,
## #   origin_tzone <chr>, dest_tzone <chr>
flights_dt %>%
  filter(dep_time > ymd("2013-06-01", tz = Sys.timezone()))
## # A tibble: 194,177 x 11
##    origin dest  dep_delay arr_delay dep_time            sched_dep_time     
##    <chr>  <chr>     <dbl>     <dbl> <dttm>              <dttm>             
##  1 EWR    CLT         -13       -34 2013-10-01 04:47:00 2013-10-01 05:00:00
##  2 EWR    IAH           5       -22 2013-10-01 05:22:00 2013-10-01 05:17:00
##  3 JFK    MIA          -9       -46 2013-10-01 05:36:00 2013-10-01 05:45:00
##  4 LGA    IAH          -6       -26 2013-10-01 05:39:00 2013-10-01 05:45:00
##  5 JFK    SJU          -6       -16 2013-10-01 05:39:00 2013-10-01 05:45:00
##  6 JFK    BQN          -6       -20 2013-10-01 05:44:00 2013-10-01 05:50:00
##  7 JFK    IAD         -11       -23 2013-10-01 05:49:00 2013-10-01 06:00:00
##  8 LGA    PHL         -10       -12 2013-10-01 05:50:00 2013-10-01 06:00:00
##  9 LGA    DCA         -10       -10 2013-10-01 05:50:00 2013-10-01 06:00:00
## 10 EWR    ORD          -9        -3 2013-10-01 05:51:00 2013-10-01 06:00:00
## # i 194,167 more rows
## # i 5 more variables: arr_time <dttm>, sched_arr_time <dttm>, air_time <dbl>,
## #   origin_tzone <chr>, dest_tzone <chr>

10 the frist index is 1 The observations may appear in an unsorted or irregular order

flights_dt <- flights_dt %>%
  mutate(hour = hour(dep_time))
flights_dt %>%
  group_by(hour) %>%
  summarise(mean_arr_delay = mean(arr_delay, na.rm = TRUE)) %>%
  arrange(desc(mean_arr_delay))
## # A tibble: 24 x 2
##     hour mean_arr_delay
##    <int>          <dbl>
##  1     3          288. 
##  2     2          225. 
##  3     1          196. 
##  4     0          119. 
##  5    23           90.7
##  6    22           62.1
##  7    21           35.6
##  8    20           21.6
##  9    19           14.6
## 10    18           13.4
## # i 14 more rows
flights_dt %>%
  group_by(hour) %>%
  summarise(mean_air_time = mean(air_time, na.rm = TRUE)) %>%
  arrange(desc(mean_air_time))
## # A tibble: 24 x 2
##     hour mean_air_time
##    <int>         <dbl>
##  1    18          170.
##  2    17          169.
##  3     7          166.
##  4     9          161.
##  5    10          161.
##  6    19          156.
##  7    16          153.
##  8     6          151.
##  9    13          149.
## 10    15          149.
## # i 14 more rows
flights_dt %>%
  group_by(hour) %>%
  summarise(mean_air_time = mean(air_time, na.rm = TRUE)) %>%
  arrange(mean_air_time)
## # A tibble: 24 x 2
##     hour mean_air_time
##    <int>         <dbl>
##  1     4          84.2
##  2     3          99  
##  3    22         104. 
##  4    23         129. 
##  5     1         130. 
##  6    21         132. 
##  7    14         133. 
##  8     5         133. 
##  9     2         135. 
## 10    12         136. 
## # i 14 more rows
dt2 <- ymd_hms("2023-04-05 03:12:34pm", tz = Sys.timezone())

floor_date(dt2, "year") 
## [1] "2023-01-01 EST"
round_date(dt2, "week")
## [1] "2023-04-09 EDT"

the first day of that week

birth <- ymd_hm("2007-05-17 09:36", tz = "Asia/Shanghai")
now <- now(tzone = "Asia/Shanghai")

diff <- as.duration(now - birth)


diff / ddays(1)
## [1] 6910.519
diff
## [1] "597068804.077531s (~18.92 years)"