1. Purpose.

The purpose of this noteboook is to illustrate how the lubridate package can be used to work with dates and times.

2. Load libraries and view practice dataset.

library(dplyr)
library(lubridate)
library(nycflights13)
library(ggplot2)
flights
## # A tibble: 336,776 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      517            515         2      830
##  2  2013     1     1      533            529         4      850
##  3  2013     1     1      542            540         2      923
##  4  2013     1     1      544            545        -1     1004
##  5  2013     1     1      554            600        -6      812
##  6  2013     1     1      554            558        -4      740
##  7  2013     1     1      555            600        -5      913
##  8  2013     1     1      557            600        -3      709
##  9  2013     1     1      557            600        -3      838
## 10  2013     1     1      558            600        -2      753
## # ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

3. Convert vectors to dates and datetimes.

ymd("2017-01-31")
## [1] "2017-01-31"
mdy("January 31st, 2017")
## [1] "2017-01-31"
dmy("31-Jan-2017")
## [1] "2017-01-31"
ymd_hms("2011-06-04 12:00:00", tz = "NZ")
## [1] "2011-06-04 12:00:00 NZST"
class(ymd_hms("2011-06-04 12:00:00", tz = "NZ"))
## [1] "POSIXct" "POSIXt"
make_date(year = 1982, month = 7, day = 24)
## [1] "1982-07-24"
make_datetime(year = 1982, month = 7, day = 24, hour = 16, min = 30, sec = 30, tz = "NZ")
## [1] "1982-07-24 16:30:30 NZST"
flights %>% 
  select(year, month, day, hour, minute) %>% 
  mutate(dept_date=make_date(year, month, day))
## # A tibble: 336,776 x 6
##     year month   day  hour minute dept_date 
##    <int> <int> <int> <dbl>  <dbl> <date>    
##  1  2013     1     1     5     15 2013-01-01
##  2  2013     1     1     5     29 2013-01-01
##  3  2013     1     1     5     40 2013-01-01
##  4  2013     1     1     5     45 2013-01-01
##  5  2013     1     1     6      0 2013-01-01
##  6  2013     1     1     5     58 2013-01-01
##  7  2013     1     1     6      0 2013-01-01
##  8  2013     1     1     6      0 2013-01-01
##  9  2013     1     1     6      0 2013-01-01
## 10  2013     1     1     6      0 2013-01-01
## # ... with 336,766 more rows
flights %>% 
  select(year, month, day, hour, minute) %>% 
  mutate(dept_datetime=make_datetime(year, month, day, hour, minute))
## # A tibble: 336,776 x 6
##     year month   day  hour minute dept_datetime      
##    <int> <int> <int> <dbl>  <dbl> <dttm>             
##  1  2013     1     1     5     15 2013-01-01 05:15:00
##  2  2013     1     1     5     29 2013-01-01 05:29:00
##  3  2013     1     1     5     40 2013-01-01 05:40:00
##  4  2013     1     1     5     45 2013-01-01 05:45:00
##  5  2013     1     1     6      0 2013-01-01 06:00:00
##  6  2013     1     1     5     58 2013-01-01 05:58:00
##  7  2013     1     1     6      0 2013-01-01 06:00:00
##  8  2013     1     1     6      0 2013-01-01 06:00:00
##  9  2013     1     1     6      0 2013-01-01 06:00:00
## 10  2013     1     1     6      0 2013-01-01 06:00:00
## # ... with 336,766 more rows

4. Extract day or time information from dates and datetimes.

class(today())
## [1] "Date"
class(now())
## [1] "POSIXct" "POSIXt"
year(now())
## [1] 2018
month(now())
## [1] 6
day(now())
## [1] 18
hour(now())
## [1] 14
minute(now())
## [1] 19
second(now())
## [1] 34.9199
yday(now())
## [1] 169
wday(now(), week_start = getOption("lubridate.week.start", 1))
## [1] 1
round_date(now(), unit = "hour")
## [1] "2018-06-18 14:00:00 NZST"
floor_date(now(), unit = "hour")
## [1] "2018-06-18 14:00:00 NZST"
ceiling_date(now(), unit = "hour")
## [1] "2018-06-18 15:00:00 NZST"

5. Work with periods and durations.

#Why two classes for spans? Because the timeline is not as reliable as the number line. The Duration class will always supply mathematically precise results. A duration year will always equal 365 days. Periods, on the other hand, fluctuate the same way the timeline does to give intuitive results. This makes them useful for modeling clock times. For example, durations will be honest in the face of a leap year, but periods may return what you want

age_duration <- as.duration(today() - ymd(19820724))
age_period <- as.period(today() - ymd(19820724))
class(age_duration)
## [1] "Duration"
## attr(,"package")
## [1] "lubridate"
class(age_period)
## [1] "Period"
## attr(,"package")
## [1] "lubridate"
seconds(age_period)
## [1] "1132963200S"
hours(age_period)
## [1] "1132963200H 0M 0S"
months(age_period)
## [1] "1132963200m 0d 0H 0M 0S"
flights %>% 
  select(year, month, day, hour, minute) %>% 
  mutate(dept_date = make_date(year, month, day)) %>% 
  filter(dept_date >= ymd(20130101), dept_date < ymd(20130201)) %>% 
  ggplot(aes(x = dept_date)) + 
  geom_histogram(bins = (ymd(20130201) - ymd(20130101)))