Show the code
library(tidyverse)
library(nycflights13)
library(ggh4x)
library(ggforce)
- 1
- A collection of packages including the lubridate and ggplot2 packagse.
- 2
- A package that includes the flights and airlines data used in this tutorial.
library(tidyverse)
library(nycflights13)
library(ggh4x)
library(ggforce)
The nycflights13
package comes with a number of data sets including flights
and airlines
that will be used in this exercise.
<- flights %>%
flights select(carrier, time_hour, arr_delay)
%>%
flights head(10)
# A tibble: 10 × 3
carrier time_hour arr_delay
<chr> <dttm> <dbl>
1 UA 2013-01-01 05:00:00 11
2 UA 2013-01-01 05:00:00 20
3 AA 2013-01-01 05:00:00 33
4 B6 2013-01-01 05:00:00 -18
5 DL 2013-01-01 06:00:00 -25
6 UA 2013-01-01 05:00:00 12
7 B6 2013-01-01 06:00:00 19
8 EV 2013-01-01 06:00:00 -14
9 B6 2013-01-01 06:00:00 -8
10 AA 2013-01-01 06:00:00 8
%>%
airlines head(10)
# A tibble: 10 × 2
carrier name
<chr> <chr>
1 9E Endeavor Air Inc.
2 AA American Airlines Inc.
3 AS Alaska Airlines Inc.
4 B6 JetBlue Airways
5 DL Delta Air Lines Inc.
6 EV ExpressJet Airlines Inc.
7 F9 Frontier Airlines Inc.
8 FL AirTran Airways Corporation
9 HA Hawaiian Airlines Inc.
10 MQ Envoy Air
<- flights %>%
flights left_join(airlines, by = "carrier")
%>%
flights head(10)
# A tibble: 10 × 4
carrier time_hour arr_delay name
<chr> <dttm> <dbl> <chr>
1 UA 2013-01-01 05:00:00 11 United Air Lines Inc.
2 UA 2013-01-01 05:00:00 20 United Air Lines Inc.
3 AA 2013-01-01 05:00:00 33 American Airlines Inc.
4 B6 2013-01-01 05:00:00 -18 JetBlue Airways
5 DL 2013-01-01 06:00:00 -25 Delta Air Lines Inc.
6 UA 2013-01-01 05:00:00 12 United Air Lines Inc.
7 B6 2013-01-01 06:00:00 19 JetBlue Airways
8 EV 2013-01-01 06:00:00 -14 ExpressJet Airlines Inc.
9 B6 2013-01-01 06:00:00 -8 JetBlue Airways
10 AA 2013-01-01 06:00:00 8 American Airlines Inc.
<- flights %>%
flights filter(name %in% c(
"American Airlines Inc.",
"Delta Air Lines Inc.",
"JetBlue Airways",
"United Air Lines Inc."))
%>%
flights head(10)
# A tibble: 10 × 4
carrier time_hour arr_delay name
<chr> <dttm> <dbl> <chr>
1 UA 2013-01-01 05:00:00 11 United Air Lines Inc.
2 UA 2013-01-01 05:00:00 20 United Air Lines Inc.
3 AA 2013-01-01 05:00:00 33 American Airlines Inc.
4 B6 2013-01-01 05:00:00 -18 JetBlue Airways
5 DL 2013-01-01 06:00:00 -25 Delta Air Lines Inc.
6 UA 2013-01-01 05:00:00 12 United Air Lines Inc.
7 B6 2013-01-01 06:00:00 19 JetBlue Airways
8 B6 2013-01-01 06:00:00 -8 JetBlue Airways
9 AA 2013-01-01 06:00:00 8 American Airlines Inc.
10 B6 2013-01-01 06:00:00 -2 JetBlue Airways
<- flights %>%
flights mutate(month = month(time_hour, label = TRUE, abbr = FALSE))
%>%
flights head(10)
# A tibble: 10 × 5
carrier time_hour arr_delay name month
<chr> <dttm> <dbl> <chr> <ord>
1 UA 2013-01-01 05:00:00 11 United Air Lines Inc. January
2 UA 2013-01-01 05:00:00 20 United Air Lines Inc. January
3 AA 2013-01-01 05:00:00 33 American Airlines Inc. January
4 B6 2013-01-01 05:00:00 -18 JetBlue Airways January
5 DL 2013-01-01 06:00:00 -25 Delta Air Lines Inc. January
6 UA 2013-01-01 05:00:00 12 United Air Lines Inc. January
7 B6 2013-01-01 06:00:00 19 JetBlue Airways January
8 B6 2013-01-01 06:00:00 -8 JetBlue Airways January
9 AA 2013-01-01 06:00:00 8 American Airlines Inc. January
10 B6 2013-01-01 06:00:00 -2 JetBlue Airways January
<- flights %>%
delay_monthly group_by(name, month) %>%
summarise(avg_arr_delay = mean(arr_delay, na.rm = TRUE))
%>%
delay_monthly head()
# A tibble: 6 × 3
# Groups: name [1]
name month avg_arr_delay
<chr> <ord> <dbl>
1 American Airlines Inc. January 0.982
2 American Airlines Inc. February 1.09
3 American Airlines Inc. March -2.98
4 American Airlines Inc. April 8.26
5 American Airlines Inc. May -4.68
6 American Airlines Inc. June 6.48
<- delay_monthly %>%
delay_monthly mutate(delay_color = ifelse(avg_arr_delay < 0, "darkgreen", "darkred"))
%>%
delay_monthly head(10)
# A tibble: 10 × 4
# Groups: name [1]
name month avg_arr_delay delay_color
<chr> <ord> <dbl> <chr>
1 American Airlines Inc. January 0.982 darkred
2 American Airlines Inc. February 1.09 darkred
3 American Airlines Inc. March -2.98 darkgreen
4 American Airlines Inc. April 8.26 darkred
5 American Airlines Inc. May -4.68 darkgreen
6 American Airlines Inc. June 6.48 darkred
7 American Airlines Inc. July 4.32 darkred
8 American Airlines Inc. August -2.30 darkgreen
9 American Airlines Inc. September -8.57 darkgreen
10 American Airlines Inc. October -3.79 darkgreen
%>%
delay_monthly ggplot(aes(x = month, y = avg_arr_delay)) +
geom_col()
ggplot(delay_monthly, aes(x = month, y = avg_arr_delay)) +
geom_col() +
facet_wrap(~ name)
ggplot(delay_monthly, aes(x = month, y = avg_arr_delay, fill = delay_color)) +
geom_col(alpha = 0.6) +
facet_wrap(~ name)+
scale_fill_identity() +
theme_bw() +
theme(axis.text.x = element_text(angle = 45,
hjust = 1))
ggplot(delay_monthly, aes(x = month, y = avg_arr_delay, fill = delay_color)) +
geom_col(alpha= 0.6) +
facet_wrap(~ name)+
labs(title = "Arrival Delay per Month by Carrier (2013)",
x = "",
y = "Average Arrival Delay (minutes)") +
scale_fill_identity() +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
<- data.frame(
annotation_text x = 8,
y = 15,
name = "American Airlines Inc.")
<- data.frame(
annotation_arrow x = 9.1,
y = 11,
xend = 9,
yend = 0.2,
name = "American Airlines Inc.")
ggplot(delay_monthly, aes(x = month, y = avg_arr_delay, fill = delay_color)) +
geom_col(alpha= 0.6) +
facet_wrap(~ name)+
labs(title = "Arrival Delay per Month by Carrier (2013)",
x = "",
y = "Average Arrival Delay (minutes)") +
scale_fill_identity() +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
geom_text(data = annotation_text,
aes(x = x,
y = y,
label = "Good month\n to travel"),
color = "darkgreen",
fontface = "bold",
size = 3.5,
inherit.aes = FALSE) +
geom_curve(data = annotation_arrow,
aes(x = x,
y = y,
xend = xend,
yend = yend),
arrow = arrow(length = unit(0.33, "cm")),
color = "darkgreen",
curvature = -0.25,
linewidth = 1.2,
inherit.aes = FALSE)
The lubridate package in R simplifies working with date-times by providing intuitive and user-friendly functions. It is part of the tidyverse ecosystem and makes it easy to parse, manipulate, extract, and perform arithmetic operations on date-time data.
Parse character or numeric date-time data into R date-time formats.
Manipulate dates and times through arithmetic and transformations.
Extract components like years, months, days, hours, minutes, and seconds.
Handle time zones effectively.
Perform interval and duration calculations seamlessly.