#1. Sections: Introduction, Prerequisites, Creating Date / Times, From Strings, From Individual Components, From other types; Exercises: 3 
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts -------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(nycflights13)
#Creating date/times
today()
## [1] "2020-07-13"
now()
## [1] "2020-07-13 18:28:08 CDT"
# From strings
ymd("2017-01-31")
## [1] "2017-01-31"
mdy("January 31st, 2017")
## [1] "2017-01-31"
dmy("31-Jan-2017")
## [1] "2017-01-31"
ymd(20170131)
## [1] "2017-01-31"
ymd_hms("2017-01-31 20:11:59")
## [1] "2017-01-31 20:11:59 UTC"
mdy_hm("01/31/2017 08:01")
## [1] "2017-01-31 08:01:00 UTC"
ymd(20170131, tz = "UTC")
## [1] "2017-01-31 UTC"
#From individual components
flights %>% 
  select(year, month, day, hour, minute)
flights %>% 
  select(year, month, day, hour, minute) %>% 
  mutate(departure = make_datetime(year, month, day, hour, minute)) 
make_datetime_100 <- function(year, month, day, time) {
  make_datetime(year, month, day, time %/% 100, time %% 100)
}

flights_dt <- flights %>% 
  filter(!is.na(dep_time), !is.na(arr_time)) %>% 
  mutate(
    dep_time = make_datetime_100(year, month, day, dep_time),
    arr_time = make_datetime_100(year, month, day, arr_time),
    sched_dep_time = make_datetime_100(year, month, day, sched_dep_time),
    sched_arr_time = make_datetime_100(year, month, day, sched_arr_time)
  ) %>% 
  select(origin, dest, ends_with("delay"), ends_with("time"))

flights_dt
flights_dt %>% 
  ggplot(aes(dep_time)) + 
  geom_freqpoly(binwidth = 86400) # 86400 seconds = 1 day 

 flights_dt %>% 
  filter(dep_time < ymd(20130102)) %>% 
  ggplot(aes(dep_time)) + 
  geom_freqpoly(binwidth = 600) # 600 s = 10 minutes

 #From other types
as_datetime(today())
## [1] "2020-07-13 UTC"
as_date(now())
## [1] "2020-07-13"
as_datetime(60 * 60 * 10)
## [1] "1970-01-01 10:00:00 UTC"
as_date(365 * 10 + 2)
## [1] "1980-01-01"
#Exercises
d1 <- "January 1, 2010"
mdy(d1)
## [1] "2010-01-01"
d2 <- "2015-Mar-07"
ymd(d2)
## [1] "2015-03-07"
d3 <- "06-Jun-2017"
dmy(d3)
## [1] "2017-06-06"
d4 <- c("August 19 (2015)", "July 1 (2015)")
mdy(d4)
## [1] "2015-08-19" "2015-07-01"
d5 <- "12/30/14" # Dec 30, 2014
mdy(d5)
## [1] "2014-12-30"
#2. Sections: Date Time Components, Getting Components, Rounding, Setting Components; Exercises:  2, 3, 4, 5

datetime <- ymd_hms("2016-07-08 12:34:56")

year(datetime)
## [1] 2016
month(datetime)
## [1] 7
mday(datetime)
## [1] 8
yday(datetime)
## [1] 190
wday(datetime)
## [1] 6
month(datetime, label = TRUE)
## [1] Jul
## 12 Levels: Jan < Feb < Mar < Apr < May < Jun < Jul < Aug < Sep < ... < Dec
wday(datetime, label = TRUE, abbr = FALSE) 
## [1] Friday
## 7 Levels: Sunday < Monday < Tuesday < Wednesday < Thursday < ... < Saturday
flights_dt %>% 
  mutate(wday = wday(dep_time, label = TRUE)) %>% 
  ggplot(aes(x = wday)) +
    geom_bar() 

flights_dt %>% 
  mutate(minute = minute(dep_time)) %>% 
  group_by(minute) %>% 
  summarise(
    avg_delay = mean(arr_delay, na.rm = TRUE),
    n = n()) %>% 
  ggplot(aes(minute, avg_delay)) +
    geom_line() 
## `summarise()` ungrouping output (override with `.groups` argument)

 sched_dep <- flights_dt %>% 
  mutate(minute = minute(sched_dep_time)) %>% 
  group_by(minute) %>% 
  summarise(
    avg_delay = mean(arr_delay, na.rm = TRUE),
    n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(sched_dep, aes(minute, avg_delay)) +
  geom_line()

 ggplot(sched_dep, aes(minute, n)) +
  geom_line()

#Rounding
flights_dt %>% 
  count(week = floor_date(dep_time, "week")) %>% 
  ggplot(aes(week, n)) +
    geom_line()

#Setting components
(datetime <- ymd_hms("2016-07-08 12:34:56"))
## [1] "2016-07-08 12:34:56 UTC"
year(datetime) <- 2020
datetime
## [1] "2020-07-08 12:34:56 UTC"
month(datetime) <- 01
datetime
## [1] "2020-01-08 12:34:56 UTC"
hour(datetime) <- hour(datetime) + 1
datetime
## [1] "2020-01-08 13:34:56 UTC"
update(datetime, year = 2020, month = 2, mday = 2, hour = 2) 
## [1] "2020-02-02 02:34:56 UTC"
ymd("2015-02-01") %>% 
  update(mday = 30)
## [1] "2015-03-02"
ymd("2015-02-01") %>% 
  update(hour = 400) 
## [1] "2015-02-17 16:00:00 UTC"
 flights_dt %>% 
  mutate(dep_hour = update(dep_time, yday = 1)) %>% 
  ggplot(aes(dep_hour)) +
    geom_freqpoly(binwidth = 300)

 #Exercise-2
flights_dt %>% select(contains('dep')) %>%
  mutate(cal_delay = as.numeric(dep_time - sched_dep_time) / 60) %>%
  filter(dep_delay != cal_delay)
  #Exercise-2-1
flights_dt %>% select(contains('dep')) %>%
  mutate(cal_delay = as.numeric(dep_time - sched_dep_time) / 60) %>%
  filter(dep_delay != cal_delay) %>%
  mutate(dep_time = update(dep_time, mday = mday(dep_time) + 1)) %>%
  mutate(cal_delay = as.numeric(dep_time - sched_dep_time)) %>%
  filter(dep_delay != cal_delay)
 #Exercise-3
flights_dt %>%
  mutate(cal_air_time = as.numeric(arr_time - dep_time)) %>%
  select(contains('air_time'))
 flights_dt %>%
  left_join(airports, by = c('origin' = 'faa')) %>%
  left_join(airports, by = c('dest' = 'faa'), suffix = c('.origin','.dest')) %>%
  select(dep_time, arr_time, air_time, contains('tzone'))
#Exercise-4
flights_dt %>%
  mutate(hour = hour(sched_dep_time)) %>%
  group_by(hour) %>%
  summarize(avg_dep_delay = mean(dep_delay, na.rm = TRUE)) %>%
  ggplot(mapping = aes(x = hour, y = avg_dep_delay)) +
  geom_point() +
  geom_smooth(se = FALSE) +
  labs(y = "Average departure delay (in minutes)",
       x = "Hour of the day")
## `summarise()` ungrouping output (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

 #Exercise-5
flights_dt %>%
  mutate(dayweek = wday(sched_dep_time, label = TRUE)) %>%
  group_by(dayweek) %>%
  summarize(avg_dep_delay = mean(dep_delay, na.rm = TRUE),
            avg_arr_delay = mean(arr_delay, na.rm = TRUE)) %>%
  gather(key = 'delay', value = 'minutes', 2:3) %>%
  ggplot() +
  geom_col(mapping = aes(x = dayweek, y = minutes, fill = delay),
           position = 'dodge')
## `summarise()` ungrouping output (override with `.groups` argument)

#3. Sections: Time Spans, Durations, Periods, Intervals; Exercises: 3, 5

h_age <- today() - ymd(19791014)
h_age 
## Time difference of 14883 days
as.duration(h_age) 
## [1] "1285891200s (~40.75 years)"
dseconds(15)
## [1] "15s"
dminutes(10)
## [1] "600s (~10 minutes)"
dhours(c(12, 24)) 
## [1] "43200s (~12 hours)" "86400s (~1 days)"
ddays(0:5)
## [1] "0s"                "86400s (~1 days)"  "172800s (~2 days)"
## [4] "259200s (~3 days)" "345600s (~4 days)" "432000s (~5 days)"
dweeks(3)
## [1] "1814400s (~3 weeks)"
dyears(1) 
## [1] "31557600s (~1 years)"
 2 * dyears(1) 
## [1] "63115200s (~2 years)"
dyears(1) + dweeks(12) + dhours(15)
## [1] "38869200s (~1.23 years)"
 tomorrow <- today() + ddays(1)
last_year <- today() - dyears(1)
one_pm <- ymd_hms("2016-03-12 13:00:00", tz = "America/New_York")

one_pm
## [1] "2016-03-12 13:00:00 EST"
one_pm + ddays(1) 
## [1] "2016-03-13 14:00:00 EDT"
#Periods
one_pm
## [1] "2016-03-12 13:00:00 EST"
one_pm + days(1)
## [1] "2016-03-13 13:00:00 EDT"
 seconds(15)
## [1] "15S"
minutes(10)
## [1] "10M 0S"
hours(c(12, 24))
## [1] "12H 0M 0S" "24H 0M 0S"
days(7)
## [1] "7d 0H 0M 0S"
months(1:6)
## [1] "1m 0d 0H 0M 0S" "2m 0d 0H 0M 0S" "3m 0d 0H 0M 0S" "4m 0d 0H 0M 0S"
## [5] "5m 0d 0H 0M 0S" "6m 0d 0H 0M 0S"
weeks(3)
## [1] "21d 0H 0M 0S"
years(1)
## [1] "1y 0m 0d 0H 0M 0S"
10 * (months(6) + days(1))
## [1] "60m 10d 0H 0M 0S"
days(50) + hours(25) + minutes(2) 
## [1] "50d 25H 2M 0S"
ymd("2016-01-01") + dyears(1)
## [1] "2016-12-31 06:00:00 UTC"
ymd("2016-01-01") + years(1)
## [1] "2017-01-01"
one_pm + ddays(1)
## [1] "2016-03-13 14:00:00 EDT"
one_pm + days(1) 
## [1] "2016-03-13 13:00:00 EDT"
flights_dt %>% 
  filter(arr_time < dep_time)  
 flights_dt <- flights_dt %>% 
  mutate(
    overnight = arr_time < dep_time,
    arr_time = arr_time + days(overnight * 1),
    sched_arr_time = sched_arr_time + days(overnight * 1)
  )
 flights_dt %>% 
  filter(overnight, arr_time < dep_time) 
#Intervals
years(1) / days(1)
## [1] 365.25
 next_year <- today() + years(1)
(today() %--% next_year) / ddays(1)
## [1] 365
(today() %--% next_year) %/% days(1) 
## [1] 365
 #Exercises-3
year_2015 <- years(2015) + months(c(1:12)) + days(1)
year_2015
##  [1] "2015y 1m 1d 0H 0M 0S"  "2015y 2m 1d 0H 0M 0S"  "2015y 3m 1d 0H 0M 0S" 
##  [4] "2015y 4m 1d 0H 0M 0S"  "2015y 5m 1d 0H 0M 0S"  "2015y 6m 1d 0H 0M 0S" 
##  [7] "2015y 7m 1d 0H 0M 0S"  "2015y 8m 1d 0H 0M 0S"  "2015y 9m 1d 0H 0M 0S" 
## [10] "2015y 10m 1d 0H 0M 0S" "2015y 11m 1d 0H 0M 0S" "2015y 12m 1d 0H 0M 0S"
 year_current <- years(year(today())) + months(c(1:12)) + days(1)
year_current
##  [1] "2020y 1m 1d 0H 0M 0S"  "2020y 2m 1d 0H 0M 0S"  "2020y 3m 1d 0H 0M 0S" 
##  [4] "2020y 4m 1d 0H 0M 0S"  "2020y 5m 1d 0H 0M 0S"  "2020y 6m 1d 0H 0M 0S" 
##  [7] "2020y 7m 1d 0H 0M 0S"  "2020y 8m 1d 0H 0M 0S"  "2020y 9m 1d 0H 0M 0S" 
## [10] "2020y 10m 1d 0H 0M 0S" "2020y 11m 1d 0H 0M 0S" "2020y 12m 1d 0H 0M 0S"
 #Exercises-5
(today() %--% (today() + years(1))) / months(1)
## [1] 12
 #4. Sections: Time Zones; No exercises

Sys.timezone()
## [1] "America/Chicago"
 length(OlsonNames())
## [1] 594
head(OlsonNames())
## [1] "Africa/Abidjan"     "Africa/Accra"       "Africa/Addis_Ababa"
## [4] "Africa/Algiers"     "Africa/Asmara"      "Africa/Asmera"
 (x1 <- ymd_hms("2015-06-01 12:00:00", tz = "America/New_York"))
## [1] "2015-06-01 12:00:00 EDT"
(x2 <- ymd_hms("2015-06-01 18:00:00", tz = "Europe/Copenhagen"))
## [1] "2015-06-01 18:00:00 CEST"
(x3 <- ymd_hms("2015-06-02 04:00:00", tz = "Pacific/Auckland"))
## [1] "2015-06-02 04:00:00 NZST"
x1 - x2
## Time difference of 0 secs
x1 - x3 
## Time difference of 0 secs
x4 <- c(x1, x2, x3)
x4
## [1] "2015-06-01 12:00:00 EDT" "2015-06-01 12:00:00 EDT"
## [3] "2015-06-01 12:00:00 EDT"
x4a <- with_tz(x4, tzone = "Australia/Lord_Howe")
x4a
## [1] "2015-06-02 02:30:00 +1030" "2015-06-02 02:30:00 +1030"
## [3] "2015-06-02 02:30:00 +1030"
x4a - x4 
## Time differences in secs
## [1] 0 0 0
x4b <- force_tz(x4, tzone = "Australia/Lord_Howe")
x4b
## [1] "2015-06-01 12:00:00 +1030" "2015-06-01 12:00:00 +1030"
## [3] "2015-06-01 12:00:00 +1030"
x4b - x4 
## Time differences in hours
## [1] -14.5 -14.5 -14.5