ymd(c("2010-10-10", "bananas"))
## Warning: 1 failed to parse.
## [1] "2010-10-10" NA
d1 <- "January 1, 2010"
d2 <- "2015-Mar-07"
d3 <- "06-Jun-2017"
d4 <- c("August 19 (2015)", "July 1 (2015)")
d5 <- "12/30/14" # Dec 30, 2014
mdy(d1)
## [1] "2010-01-01"
ymd(d2)
## [1] "2015-03-07"
dmy(d3)
## [1] "2017-06-06"
mdy(d4)
## [1] "2015-08-19" "2015-07-01"
mdy(d5)
## [1] "2014-12-30"
make_datetime_100 <- function(year, month, day, time, tz = "EST") {
make_datetime(year, month, day, time %/% 100, time %% 100, 0, tz)
}
airports1 <- airports %>%
dplyr::select(faa, tzone)
flights1 <- flights %>%
left_join(airports1, by = c("dest" = "faa")) %>%
rename("dest_tzone" = "tzone") %>%
glimpse()
## Rows: 336,776
## Columns: 20
## $ year <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2…
## $ month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ day <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ dep_time <int> 517, 533, 542, 544, 554, 554, 555, 557, 557, 558, 558, …
## $ sched_dep_time <int> 515, 529, 540, 545, 600, 558, 600, 600, 600, 600, 600, …
## $ dep_delay <dbl> 2, 4, 2, -1, -6, -4, -5, -3, -3, -2, -2, -2, -2, -2, -1…
## $ arr_time <int> 830, 850, 923, 1004, 812, 740, 913, 709, 838, 753, 849,…
## $ sched_arr_time <int> 819, 830, 850, 1022, 837, 728, 854, 723, 846, 745, 851,…
## $ arr_delay <dbl> 11, 20, 33, -18, -25, 12, 19, -14, -8, 8, -2, -3, 7, -1…
## $ carrier <chr> "UA", "UA", "AA", "B6", "DL", "UA", "B6", "EV", "B6", "…
## $ flight <int> 1545, 1714, 1141, 725, 461, 1696, 507, 5708, 79, 301, 4…
## $ tailnum <chr> "N14228", "N24211", "N619AA", "N804JB", "N668DN", "N394…
## $ origin <chr> "EWR", "LGA", "JFK", "JFK", "LGA", "EWR", "EWR", "LGA",…
## $ dest <chr> "IAH", "IAH", "MIA", "BQN", "ATL", "ORD", "FLL", "IAD",…
## $ air_time <dbl> 227, 227, 160, 183, 116, 150, 158, 53, 140, 138, 149, 1…
## $ distance <dbl> 1400, 1416, 1089, 1576, 762, 719, 1065, 229, 944, 733, …
## $ hour <dbl> 5, 5, 5, 5, 6, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 6, 6…
## $ minute <dbl> 15, 29, 40, 45, 0, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59, 0…
## $ time_hour <dttm> 2013-01-01 05:00:00, 2013-01-01 05:00:00, 2013-01-01 0…
## $ dest_tzone <chr> "America/Chicago", "America/Chicago", "America/New_York…
flights1 %>%
left_join(airports1, by = c("origin" = "faa")) %>%
rename("origin_tzone" = "tzone") -> flights1
flights_dt <- flights1 %>%
filter(!is.na(dep_time), !is.na(arr_time)) %>%
mutate(
dep_time = make_datetime_100(year, month, day, dep_time, tz = origin_tzone),
arr_time = make_datetime_100(year, month, day, arr_time, tz = dest_tzone),
sched_dep_time = make_datetime_100(year, month, day, sched_dep_time, tz = origin_tzone),
sched_arr_time = make_datetime_100(year, month, day, sched_arr_time, tz = dest_tzone)
) %>%
dplyr::select(origin, dest, ends_with("delay"), ends_with("time"), air_time, hour, origin_tzone, dest_tzone)
flights_dt %>%
filter(dep_time > ymd("2013-06-01", tz = Sys.timezone()))
## # A tibble: 194,177 × 12
## origin dest dep_delay arr_delay dep_time sched_dep_time
## <chr> <chr> <dbl> <dbl> <dttm> <dttm>
## 1 EWR CLT -13 -34 2013-10-01 04:47:00 2013-10-01 05:00:00
## 2 EWR IAH 5 -22 2013-10-01 05:22:00 2013-10-01 05:17:00
## 3 JFK MIA -9 -46 2013-10-01 05:36:00 2013-10-01 05:45:00
## 4 LGA IAH -6 -26 2013-10-01 05:39:00 2013-10-01 05:45:00
## 5 JFK SJU -6 -16 2013-10-01 05:39:00 2013-10-01 05:45:00
## 6 JFK BQN -6 -20 2013-10-01 05:44:00 2013-10-01 05:50:00
## 7 JFK IAD -11 -23 2013-10-01 05:49:00 2013-10-01 06:00:00
## 8 LGA PHL -10 -12 2013-10-01 05:50:00 2013-10-01 06:00:00
## 9 LGA DCA -10 -10 2013-10-01 05:50:00 2013-10-01 06:00:00
## 10 EWR ORD -9 -3 2013-10-01 05:51:00 2013-10-01 06:00:00
## # ℹ 194,167 more rows
## # ℹ 6 more variables: arr_time <dttm>, sched_arr_time <dttm>, air_time <dbl>,
## # hour <dbl>, origin_tzone <chr>, dest_tzone <chr>
flights_dt %>%
filter(arr_delay >= 0) %>%
group_by(hour) %>%
summarise(mean_arr_delay = mean(arr_delay, na.rm = TRUE), mean_air_time = mean(air_time, na.rm = TRUE)) %>%
ggplot() + stat_summary(aes(x = hour, y = mean_arr_delay), geom = "bar")
## No summary function supplied, defaulting to `mean_se()`
Answer 7 pm is when we had the worst arrival delay on average
flights_dt %>%
filter(arr_delay >= 0) %>%
group_by(hour) %>%
summarise(mean_arr_delay = mean(arr_delay, na.rm = TRUE), mean_air_time = mean(air_time, na.rm = TRUE)) %>%
ggplot() + stat_summary(aes(x = hour, y = mean_air_time), geom = "bar")
## No summary function supplied, defaulting to `mean_se()`
Answer 5 am is when we had the longest air time and 10 pm is when we had the shortest air time on average
dt2 <- ymd_hms("2023-04-05 03:12:34pm", tz = Sys.timezone())
round_date(dt2, "week")
## [1] "2023-04-09 EDT"
Answer It is the first day of the next week since 3ish pm is already passed half of the day so it will round up and since we round by week then it would round to the first day of the next week