lubridate trainingThe purpose of this noteboook is to illustrate how the lubridate package can be used to work with dates and times.
library(dplyr)
library(lubridate)
library(nycflights13)
library(ggplot2)
flights
## # A tibble: 336,776 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## 7 2013 1 1 555 600 -5 913
## 8 2013 1 1 557 600 -3 709
## 9 2013 1 1 557 600 -3 838
## 10 2013 1 1 558 600 -2 753
## # ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
ymd("2017-01-31")
## [1] "2017-01-31"
mdy("January 31st, 2017")
## [1] "2017-01-31"
dmy("31-Jan-2017")
## [1] "2017-01-31"
ymd_hms("2011-06-04 12:00:00", tz = "NZ")
## [1] "2011-06-04 12:00:00 NZST"
class(ymd_hms("2011-06-04 12:00:00", tz = "NZ"))
## [1] "POSIXct" "POSIXt"
make_date(year = 1982, month = 7, day = 24)
## [1] "1982-07-24"
make_datetime(year = 1982, month = 7, day = 24, hour = 16, min = 30, sec = 30, tz = "NZ")
## [1] "1982-07-24 16:30:30 NZST"
flights %>%
select(year, month, day, hour, minute) %>%
mutate(dept_date=make_date(year, month, day))
## # A tibble: 336,776 x 6
## year month day hour minute dept_date
## <int> <int> <int> <dbl> <dbl> <date>
## 1 2013 1 1 5 15 2013-01-01
## 2 2013 1 1 5 29 2013-01-01
## 3 2013 1 1 5 40 2013-01-01
## 4 2013 1 1 5 45 2013-01-01
## 5 2013 1 1 6 0 2013-01-01
## 6 2013 1 1 5 58 2013-01-01
## 7 2013 1 1 6 0 2013-01-01
## 8 2013 1 1 6 0 2013-01-01
## 9 2013 1 1 6 0 2013-01-01
## 10 2013 1 1 6 0 2013-01-01
## # ... with 336,766 more rows
flights %>%
select(year, month, day, hour, minute) %>%
mutate(dept_datetime=make_datetime(year, month, day, hour, minute))
## # A tibble: 336,776 x 6
## year month day hour minute dept_datetime
## <int> <int> <int> <dbl> <dbl> <dttm>
## 1 2013 1 1 5 15 2013-01-01 05:15:00
## 2 2013 1 1 5 29 2013-01-01 05:29:00
## 3 2013 1 1 5 40 2013-01-01 05:40:00
## 4 2013 1 1 5 45 2013-01-01 05:45:00
## 5 2013 1 1 6 0 2013-01-01 06:00:00
## 6 2013 1 1 5 58 2013-01-01 05:58:00
## 7 2013 1 1 6 0 2013-01-01 06:00:00
## 8 2013 1 1 6 0 2013-01-01 06:00:00
## 9 2013 1 1 6 0 2013-01-01 06:00:00
## 10 2013 1 1 6 0 2013-01-01 06:00:00
## # ... with 336,766 more rows
class(today())
## [1] "Date"
class(now())
## [1] "POSIXct" "POSIXt"
year(now())
## [1] 2018
month(now())
## [1] 6
day(now())
## [1] 18
hour(now())
## [1] 14
minute(now())
## [1] 19
second(now())
## [1] 34.9199
yday(now())
## [1] 169
wday(now(), week_start = getOption("lubridate.week.start", 1))
## [1] 1
round_date(now(), unit = "hour")
## [1] "2018-06-18 14:00:00 NZST"
floor_date(now(), unit = "hour")
## [1] "2018-06-18 14:00:00 NZST"
ceiling_date(now(), unit = "hour")
## [1] "2018-06-18 15:00:00 NZST"
#Why two classes for spans? Because the timeline is not as reliable as the number line. The Duration class will always supply mathematically precise results. A duration year will always equal 365 days. Periods, on the other hand, fluctuate the same way the timeline does to give intuitive results. This makes them useful for modeling clock times. For example, durations will be honest in the face of a leap year, but periods may return what you want
age_duration <- as.duration(today() - ymd(19820724))
age_period <- as.period(today() - ymd(19820724))
class(age_duration)
## [1] "Duration"
## attr(,"package")
## [1] "lubridate"
class(age_period)
## [1] "Period"
## attr(,"package")
## [1] "lubridate"
seconds(age_period)
## [1] "1132963200S"
hours(age_period)
## [1] "1132963200H 0M 0S"
months(age_period)
## [1] "1132963200m 0d 0H 0M 0S"
flights %>%
select(year, month, day, hour, minute) %>%
mutate(dept_date = make_date(year, month, day)) %>%
filter(dept_date >= ymd(20130101), dept_date < ymd(20130201)) %>%
ggplot(aes(x = dept_date)) +
geom_histogram(bins = (ymd(20130201) - ymd(20130101)))