#1. Sections: Introduction, Prerequisites, Creating Date / Times, From Strings, From Individual Components, From other types; Exercises: 3
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts -------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(nycflights13)
#Creating date/times
today()
## [1] "2020-07-13"
now()
## [1] "2020-07-13 18:28:08 CDT"
# From strings
ymd("2017-01-31")
## [1] "2017-01-31"
mdy("January 31st, 2017")
## [1] "2017-01-31"
dmy("31-Jan-2017")
## [1] "2017-01-31"
ymd(20170131)
## [1] "2017-01-31"
ymd_hms("2017-01-31 20:11:59")
## [1] "2017-01-31 20:11:59 UTC"
mdy_hm("01/31/2017 08:01")
## [1] "2017-01-31 08:01:00 UTC"
ymd(20170131, tz = "UTC")
## [1] "2017-01-31 UTC"
#From individual components
flights %>%
select(year, month, day, hour, minute)
flights %>%
select(year, month, day, hour, minute) %>%
mutate(departure = make_datetime(year, month, day, hour, minute))
make_datetime_100 <- function(year, month, day, time) {
make_datetime(year, month, day, time %/% 100, time %% 100)
}
flights_dt <- flights %>%
filter(!is.na(dep_time), !is.na(arr_time)) %>%
mutate(
dep_time = make_datetime_100(year, month, day, dep_time),
arr_time = make_datetime_100(year, month, day, arr_time),
sched_dep_time = make_datetime_100(year, month, day, sched_dep_time),
sched_arr_time = make_datetime_100(year, month, day, sched_arr_time)
) %>%
select(origin, dest, ends_with("delay"), ends_with("time"))
flights_dt
flights_dt %>%
ggplot(aes(dep_time)) +
geom_freqpoly(binwidth = 86400) # 86400 seconds = 1 day

flights_dt %>%
filter(dep_time < ymd(20130102)) %>%
ggplot(aes(dep_time)) +
geom_freqpoly(binwidth = 600) # 600 s = 10 minutes

#From other types
as_datetime(today())
## [1] "2020-07-13 UTC"
as_date(now())
## [1] "2020-07-13"
as_datetime(60 * 60 * 10)
## [1] "1970-01-01 10:00:00 UTC"
as_date(365 * 10 + 2)
## [1] "1980-01-01"
#Exercises
d1 <- "January 1, 2010"
mdy(d1)
## [1] "2010-01-01"
d2 <- "2015-Mar-07"
ymd(d2)
## [1] "2015-03-07"
d3 <- "06-Jun-2017"
dmy(d3)
## [1] "2017-06-06"
d4 <- c("August 19 (2015)", "July 1 (2015)")
mdy(d4)
## [1] "2015-08-19" "2015-07-01"
d5 <- "12/30/14" # Dec 30, 2014
mdy(d5)
## [1] "2014-12-30"
#2. Sections: Date Time Components, Getting Components, Rounding, Setting Components; Exercises: 2, 3, 4, 5
datetime <- ymd_hms("2016-07-08 12:34:56")
year(datetime)
## [1] 2016
month(datetime)
## [1] 7
mday(datetime)
## [1] 8
yday(datetime)
## [1] 190
wday(datetime)
## [1] 6
month(datetime, label = TRUE)
## [1] Jul
## 12 Levels: Jan < Feb < Mar < Apr < May < Jun < Jul < Aug < Sep < ... < Dec
wday(datetime, label = TRUE, abbr = FALSE)
## [1] Friday
## 7 Levels: Sunday < Monday < Tuesday < Wednesday < Thursday < ... < Saturday
flights_dt %>%
mutate(wday = wday(dep_time, label = TRUE)) %>%
ggplot(aes(x = wday)) +
geom_bar()

flights_dt %>%
mutate(minute = minute(dep_time)) %>%
group_by(minute) %>%
summarise(
avg_delay = mean(arr_delay, na.rm = TRUE),
n = n()) %>%
ggplot(aes(minute, avg_delay)) +
geom_line()
## `summarise()` ungrouping output (override with `.groups` argument)

sched_dep <- flights_dt %>%
mutate(minute = minute(sched_dep_time)) %>%
group_by(minute) %>%
summarise(
avg_delay = mean(arr_delay, na.rm = TRUE),
n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(sched_dep, aes(minute, avg_delay)) +
geom_line()

ggplot(sched_dep, aes(minute, n)) +
geom_line()

#Rounding
flights_dt %>%
count(week = floor_date(dep_time, "week")) %>%
ggplot(aes(week, n)) +
geom_line()

#Setting components
(datetime <- ymd_hms("2016-07-08 12:34:56"))
## [1] "2016-07-08 12:34:56 UTC"
year(datetime) <- 2020
datetime
## [1] "2020-07-08 12:34:56 UTC"
month(datetime) <- 01
datetime
## [1] "2020-01-08 12:34:56 UTC"
hour(datetime) <- hour(datetime) + 1
datetime
## [1] "2020-01-08 13:34:56 UTC"
update(datetime, year = 2020, month = 2, mday = 2, hour = 2)
## [1] "2020-02-02 02:34:56 UTC"
ymd("2015-02-01") %>%
update(mday = 30)
## [1] "2015-03-02"
ymd("2015-02-01") %>%
update(hour = 400)
## [1] "2015-02-17 16:00:00 UTC"
flights_dt %>%
mutate(dep_hour = update(dep_time, yday = 1)) %>%
ggplot(aes(dep_hour)) +
geom_freqpoly(binwidth = 300)

#Exercise-2
flights_dt %>% select(contains('dep')) %>%
mutate(cal_delay = as.numeric(dep_time - sched_dep_time) / 60) %>%
filter(dep_delay != cal_delay)
#Exercise-2-1
flights_dt %>% select(contains('dep')) %>%
mutate(cal_delay = as.numeric(dep_time - sched_dep_time) / 60) %>%
filter(dep_delay != cal_delay) %>%
mutate(dep_time = update(dep_time, mday = mday(dep_time) + 1)) %>%
mutate(cal_delay = as.numeric(dep_time - sched_dep_time)) %>%
filter(dep_delay != cal_delay)
#Exercise-3
flights_dt %>%
mutate(cal_air_time = as.numeric(arr_time - dep_time)) %>%
select(contains('air_time'))
flights_dt %>%
left_join(airports, by = c('origin' = 'faa')) %>%
left_join(airports, by = c('dest' = 'faa'), suffix = c('.origin','.dest')) %>%
select(dep_time, arr_time, air_time, contains('tzone'))
#Exercise-4
flights_dt %>%
mutate(hour = hour(sched_dep_time)) %>%
group_by(hour) %>%
summarize(avg_dep_delay = mean(dep_delay, na.rm = TRUE)) %>%
ggplot(mapping = aes(x = hour, y = avg_dep_delay)) +
geom_point() +
geom_smooth(se = FALSE) +
labs(y = "Average departure delay (in minutes)",
x = "Hour of the day")
## `summarise()` ungrouping output (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#Exercise-5
flights_dt %>%
mutate(dayweek = wday(sched_dep_time, label = TRUE)) %>%
group_by(dayweek) %>%
summarize(avg_dep_delay = mean(dep_delay, na.rm = TRUE),
avg_arr_delay = mean(arr_delay, na.rm = TRUE)) %>%
gather(key = 'delay', value = 'minutes', 2:3) %>%
ggplot() +
geom_col(mapping = aes(x = dayweek, y = minutes, fill = delay),
position = 'dodge')
## `summarise()` ungrouping output (override with `.groups` argument)

#3. Sections: Time Spans, Durations, Periods, Intervals; Exercises: 3, 5
h_age <- today() - ymd(19791014)
h_age
## Time difference of 14883 days
as.duration(h_age)
## [1] "1285891200s (~40.75 years)"
dseconds(15)
## [1] "15s"
dminutes(10)
## [1] "600s (~10 minutes)"
dhours(c(12, 24))
## [1] "43200s (~12 hours)" "86400s (~1 days)"
ddays(0:5)
## [1] "0s" "86400s (~1 days)" "172800s (~2 days)"
## [4] "259200s (~3 days)" "345600s (~4 days)" "432000s (~5 days)"
dweeks(3)
## [1] "1814400s (~3 weeks)"
dyears(1)
## [1] "31557600s (~1 years)"
2 * dyears(1)
## [1] "63115200s (~2 years)"
dyears(1) + dweeks(12) + dhours(15)
## [1] "38869200s (~1.23 years)"
tomorrow <- today() + ddays(1)
last_year <- today() - dyears(1)
one_pm <- ymd_hms("2016-03-12 13:00:00", tz = "America/New_York")
one_pm
## [1] "2016-03-12 13:00:00 EST"
one_pm + ddays(1)
## [1] "2016-03-13 14:00:00 EDT"
#Periods
one_pm
## [1] "2016-03-12 13:00:00 EST"
one_pm + days(1)
## [1] "2016-03-13 13:00:00 EDT"
seconds(15)
## [1] "15S"
minutes(10)
## [1] "10M 0S"
hours(c(12, 24))
## [1] "12H 0M 0S" "24H 0M 0S"
days(7)
## [1] "7d 0H 0M 0S"
months(1:6)
## [1] "1m 0d 0H 0M 0S" "2m 0d 0H 0M 0S" "3m 0d 0H 0M 0S" "4m 0d 0H 0M 0S"
## [5] "5m 0d 0H 0M 0S" "6m 0d 0H 0M 0S"
weeks(3)
## [1] "21d 0H 0M 0S"
years(1)
## [1] "1y 0m 0d 0H 0M 0S"
10 * (months(6) + days(1))
## [1] "60m 10d 0H 0M 0S"
days(50) + hours(25) + minutes(2)
## [1] "50d 25H 2M 0S"
ymd("2016-01-01") + dyears(1)
## [1] "2016-12-31 06:00:00 UTC"
ymd("2016-01-01") + years(1)
## [1] "2017-01-01"
one_pm + ddays(1)
## [1] "2016-03-13 14:00:00 EDT"
one_pm + days(1)
## [1] "2016-03-13 13:00:00 EDT"
flights_dt %>%
filter(arr_time < dep_time)
flights_dt <- flights_dt %>%
mutate(
overnight = arr_time < dep_time,
arr_time = arr_time + days(overnight * 1),
sched_arr_time = sched_arr_time + days(overnight * 1)
)
flights_dt %>%
filter(overnight, arr_time < dep_time)
#Intervals
years(1) / days(1)
## [1] 365.25
next_year <- today() + years(1)
(today() %--% next_year) / ddays(1)
## [1] 365
(today() %--% next_year) %/% days(1)
## [1] 365
#Exercises-3
year_2015 <- years(2015) + months(c(1:12)) + days(1)
year_2015
## [1] "2015y 1m 1d 0H 0M 0S" "2015y 2m 1d 0H 0M 0S" "2015y 3m 1d 0H 0M 0S"
## [4] "2015y 4m 1d 0H 0M 0S" "2015y 5m 1d 0H 0M 0S" "2015y 6m 1d 0H 0M 0S"
## [7] "2015y 7m 1d 0H 0M 0S" "2015y 8m 1d 0H 0M 0S" "2015y 9m 1d 0H 0M 0S"
## [10] "2015y 10m 1d 0H 0M 0S" "2015y 11m 1d 0H 0M 0S" "2015y 12m 1d 0H 0M 0S"
year_current <- years(year(today())) + months(c(1:12)) + days(1)
year_current
## [1] "2020y 1m 1d 0H 0M 0S" "2020y 2m 1d 0H 0M 0S" "2020y 3m 1d 0H 0M 0S"
## [4] "2020y 4m 1d 0H 0M 0S" "2020y 5m 1d 0H 0M 0S" "2020y 6m 1d 0H 0M 0S"
## [7] "2020y 7m 1d 0H 0M 0S" "2020y 8m 1d 0H 0M 0S" "2020y 9m 1d 0H 0M 0S"
## [10] "2020y 10m 1d 0H 0M 0S" "2020y 11m 1d 0H 0M 0S" "2020y 12m 1d 0H 0M 0S"
#Exercises-5
(today() %--% (today() + years(1))) / months(1)
## [1] 12
#4. Sections: Time Zones; No exercises
Sys.timezone()
## [1] "America/Chicago"
length(OlsonNames())
## [1] 594
head(OlsonNames())
## [1] "Africa/Abidjan" "Africa/Accra" "Africa/Addis_Ababa"
## [4] "Africa/Algiers" "Africa/Asmara" "Africa/Asmera"
(x1 <- ymd_hms("2015-06-01 12:00:00", tz = "America/New_York"))
## [1] "2015-06-01 12:00:00 EDT"
(x2 <- ymd_hms("2015-06-01 18:00:00", tz = "Europe/Copenhagen"))
## [1] "2015-06-01 18:00:00 CEST"
(x3 <- ymd_hms("2015-06-02 04:00:00", tz = "Pacific/Auckland"))
## [1] "2015-06-02 04:00:00 NZST"
x1 - x2
## Time difference of 0 secs
x1 - x3
## Time difference of 0 secs
x4 <- c(x1, x2, x3)
x4
## [1] "2015-06-01 12:00:00 EDT" "2015-06-01 12:00:00 EDT"
## [3] "2015-06-01 12:00:00 EDT"
x4a <- with_tz(x4, tzone = "Australia/Lord_Howe")
x4a
## [1] "2015-06-02 02:30:00 +1030" "2015-06-02 02:30:00 +1030"
## [3] "2015-06-02 02:30:00 +1030"
x4a - x4
## Time differences in secs
## [1] 0 0 0
x4b <- force_tz(x4, tzone = "Australia/Lord_Howe")
x4b
## [1] "2015-06-01 12:00:00 +1030" "2015-06-01 12:00:00 +1030"
## [3] "2015-06-01 12:00:00 +1030"
x4b - x4
## Time differences in hours
## [1] -14.5 -14.5 -14.5