d1 <- "January 1, 2010"
d2 <- "2015-Mar-07"
d3 <- "06-Jun-2017"
d4 <- c("August 19 (2015)", "July 1 (2015)")
d5 <- "12/30/14"
mdy(d1)
## [1] "2010-01-01"
ymd(d2)
## [1] "2015-03-07"
dmy(d3)
## [1] "2017-06-06"
mdy(d4)
## [1] "2015-08-19" "2015-07-01"
mdy(d5)
## [1] "2014-12-30"
make_datetime_100 <- function(year, month, day, time) {
make_datetime(year, month, day, time %/% 100, time %% 100)
}
flights_dt <- flights %>%
filter(!is.na(dep_time), !is.na(arr_time)) %>%
mutate(
dep_time = make_datetime_100(year, month, day, dep_time),
arr_time = make_datetime_100(year, month, day, arr_time),
sched_dep_time = make_datetime_100(year, month, day, sched_dep_time),
sched_arr_time = make_datetime_100(year, month, day, sched_arr_time)
) %>%
select(origin, dest, ends_with("delay"), ends_with("time"))
#if consistent, dep_time should equal to sched_dep_time+dep_delay
flights_dt %>%
mutate(dep_time_ = sched_dep_time + dep_delay * 60) %>%
filter(dep_time_ != dep_time) %>%
select(dep_time_, dep_time, sched_dep_time, dep_delay)
## # A tibble: 1,205 × 4
## dep_time_ dep_time sched_dep_time dep_delay
## <dttm> <dttm> <dttm> <dbl>
## 1 2013-01-02 08:48:00 2013-01-01 08:48:00 2013-01-01 18:35:00 853
## 2 2013-01-03 00:42:00 2013-01-02 00:42:00 2013-01-02 23:59:00 43
## 3 2013-01-03 01:26:00 2013-01-02 01:26:00 2013-01-02 22:50:00 156
## 4 2013-01-04 00:32:00 2013-01-03 00:32:00 2013-01-03 23:59:00 33
## 5 2013-01-04 00:50:00 2013-01-03 00:50:00 2013-01-03 21:45:00 185
## 6 2013-01-04 02:35:00 2013-01-03 02:35:00 2013-01-03 23:59:00 156
## 7 2013-01-05 00:25:00 2013-01-04 00:25:00 2013-01-04 23:59:00 26
## 8 2013-01-05 01:06:00 2013-01-04 01:06:00 2013-01-04 22:45:00 141
## 9 2013-01-06 00:14:00 2013-01-05 00:14:00 2013-01-05 23:59:00 15
## 10 2013-01-06 00:37:00 2013-01-05 00:37:00 2013-01-05 22:30:00 127
## # … with 1,195 more rows
flights_dt %>%
mutate(
flight_duration = as.numeric(arr_time - dep_time),
air_time_mins = air_time,
diff = flight_duration - air_time_mins
) %>%
select(origin, dest, flight_duration, air_time_mins, diff)
## # A tibble: 328,063 × 5
## origin dest flight_duration air_time_mins diff
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 EWR IAH 193 227 -34
## 2 LGA IAH 197 227 -30
## 3 JFK MIA 221 160 61
## 4 JFK BQN 260 183 77
## 5 LGA ATL 138 116 22
## 6 EWR ORD 106 150 -44
## 7 EWR FLL 198 158 40
## 8 LGA IAD 72 53 19
## 9 JFK MCO 161 140 21
## 10 LGA ORD 115 138 -23
## # … with 328,053 more rows
flights_dt %>%
mutate(sched_dep_hour = hour(sched_dep_time)) %>%
group_by(sched_dep_hour) %>%
summarise(dep_delay = mean(dep_delay)) %>%
ggplot(aes(y = dep_delay, x = sched_dep_hour)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
flights_dt %>%
mutate(dow = wday(sched_dep_time)) %>%
group_by(dow) %>%
summarise(
dep_delay = mean(dep_delay),
arr_delay = mean(arr_delay, na.rm = TRUE)
) %>%
print(n = Inf)
## # A tibble: 7 × 3
## dow dep_delay arr_delay
## <dbl> <dbl> <dbl>
## 1 1 11.5 4.82
## 2 2 14.7 9.65
## 3 3 10.6 5.39
## 4 4 11.7 7.05
## 5 5 16.1 11.7
## 6 6 14.7 9.07
## 7 7 7.62 -1.45
flights_dt %>%
mutate(wday = wday(dep_time, label = TRUE)) %>%
group_by(wday) %>%
summarize(ave_dep_delay = mean(dep_delay, na.rm = TRUE)) %>%
ggplot(aes(x = wday, y = ave_dep_delay)) +
geom_bar(stat = "identity")
flights_dt %>%
mutate(wday = wday(dep_time, label = TRUE)) %>%
group_by(wday) %>%
summarize(ave_arr_delay = mean(arr_delay, na.rm = TRUE)) %>%
ggplot(aes(x = wday, y = ave_arr_delay)) +
geom_bar(stat = "identity")
ymd("2015-01-01") + months(0:11)
## [1] "2015-01-01" "2015-02-01" "2015-03-01" "2015-04-01" "2015-05-01"
## [6] "2015-06-01" "2015-07-01" "2015-08-01" "2015-09-01" "2015-10-01"
## [11] "2015-11-01" "2015-12-01"
floor_date(today(), unit = "year") + months(0:11)
## [1] "2022-01-01" "2022-02-01" "2022-03-01" "2022-04-01" "2022-05-01"
## [6] "2022-06-01" "2022-07-01" "2022-08-01" "2022-09-01" "2022-10-01"
## [11] "2022-11-01" "2022-12-01"