Show the code
library(tidyverse)
library(nycflights13)
library(ggh4x)
library(ggforce)- 1
- A collection of packages including the lubridate and ggplot2 packagse.
- 2
- A package that includes the flights and airlines data used in this tutorial.
library(tidyverse)
library(nycflights13)
library(ggh4x)
library(ggforce)The nycflights13 package comes with a number of data sets including flights and airlines that will be used in this exercise.
flights <- flights %>%
select(carrier, time_hour, arr_delay)
flights %>%
head(10)# A tibble: 10 × 3
carrier time_hour arr_delay
<chr> <dttm> <dbl>
1 UA 2013-01-01 05:00:00 11
2 UA 2013-01-01 05:00:00 20
3 AA 2013-01-01 05:00:00 33
4 B6 2013-01-01 05:00:00 -18
5 DL 2013-01-01 06:00:00 -25
6 UA 2013-01-01 05:00:00 12
7 B6 2013-01-01 06:00:00 19
8 EV 2013-01-01 06:00:00 -14
9 B6 2013-01-01 06:00:00 -8
10 AA 2013-01-01 06:00:00 8
airlines %>%
head(10)# A tibble: 10 × 2
carrier name
<chr> <chr>
1 9E Endeavor Air Inc.
2 AA American Airlines Inc.
3 AS Alaska Airlines Inc.
4 B6 JetBlue Airways
5 DL Delta Air Lines Inc.
6 EV ExpressJet Airlines Inc.
7 F9 Frontier Airlines Inc.
8 FL AirTran Airways Corporation
9 HA Hawaiian Airlines Inc.
10 MQ Envoy Air
flights <- flights %>%
left_join(airlines, by = "carrier")
flights %>%
head(10)# A tibble: 10 × 4
carrier time_hour arr_delay name
<chr> <dttm> <dbl> <chr>
1 UA 2013-01-01 05:00:00 11 United Air Lines Inc.
2 UA 2013-01-01 05:00:00 20 United Air Lines Inc.
3 AA 2013-01-01 05:00:00 33 American Airlines Inc.
4 B6 2013-01-01 05:00:00 -18 JetBlue Airways
5 DL 2013-01-01 06:00:00 -25 Delta Air Lines Inc.
6 UA 2013-01-01 05:00:00 12 United Air Lines Inc.
7 B6 2013-01-01 06:00:00 19 JetBlue Airways
8 EV 2013-01-01 06:00:00 -14 ExpressJet Airlines Inc.
9 B6 2013-01-01 06:00:00 -8 JetBlue Airways
10 AA 2013-01-01 06:00:00 8 American Airlines Inc.
flights <- flights %>%
filter(name %in% c(
"American Airlines Inc.",
"Delta Air Lines Inc.",
"JetBlue Airways",
"United Air Lines Inc."))
flights %>%
head(10)# A tibble: 10 × 4
carrier time_hour arr_delay name
<chr> <dttm> <dbl> <chr>
1 UA 2013-01-01 05:00:00 11 United Air Lines Inc.
2 UA 2013-01-01 05:00:00 20 United Air Lines Inc.
3 AA 2013-01-01 05:00:00 33 American Airlines Inc.
4 B6 2013-01-01 05:00:00 -18 JetBlue Airways
5 DL 2013-01-01 06:00:00 -25 Delta Air Lines Inc.
6 UA 2013-01-01 05:00:00 12 United Air Lines Inc.
7 B6 2013-01-01 06:00:00 19 JetBlue Airways
8 B6 2013-01-01 06:00:00 -8 JetBlue Airways
9 AA 2013-01-01 06:00:00 8 American Airlines Inc.
10 B6 2013-01-01 06:00:00 -2 JetBlue Airways
flights <- flights %>%
mutate(month = month(time_hour, label = TRUE, abbr = FALSE))
flights %>%
head(10)# A tibble: 10 × 5
carrier time_hour arr_delay name month
<chr> <dttm> <dbl> <chr> <ord>
1 UA 2013-01-01 05:00:00 11 United Air Lines Inc. January
2 UA 2013-01-01 05:00:00 20 United Air Lines Inc. January
3 AA 2013-01-01 05:00:00 33 American Airlines Inc. January
4 B6 2013-01-01 05:00:00 -18 JetBlue Airways January
5 DL 2013-01-01 06:00:00 -25 Delta Air Lines Inc. January
6 UA 2013-01-01 05:00:00 12 United Air Lines Inc. January
7 B6 2013-01-01 06:00:00 19 JetBlue Airways January
8 B6 2013-01-01 06:00:00 -8 JetBlue Airways January
9 AA 2013-01-01 06:00:00 8 American Airlines Inc. January
10 B6 2013-01-01 06:00:00 -2 JetBlue Airways January
delay_monthly <- flights %>%
group_by(name, month) %>%
summarise(avg_arr_delay = mean(arr_delay, na.rm = TRUE))
delay_monthly %>%
head()# A tibble: 6 × 3
# Groups: name [1]
name month avg_arr_delay
<chr> <ord> <dbl>
1 American Airlines Inc. January 0.982
2 American Airlines Inc. February 1.09
3 American Airlines Inc. March -2.98
4 American Airlines Inc. April 8.26
5 American Airlines Inc. May -4.68
6 American Airlines Inc. June 6.48
delay_monthly <- delay_monthly %>%
mutate(delay_color = ifelse(avg_arr_delay < 0, "darkgreen", "darkred"))
delay_monthly %>%
head(10)# A tibble: 10 × 4
# Groups: name [1]
name month avg_arr_delay delay_color
<chr> <ord> <dbl> <chr>
1 American Airlines Inc. January 0.982 darkred
2 American Airlines Inc. February 1.09 darkred
3 American Airlines Inc. March -2.98 darkgreen
4 American Airlines Inc. April 8.26 darkred
5 American Airlines Inc. May -4.68 darkgreen
6 American Airlines Inc. June 6.48 darkred
7 American Airlines Inc. July 4.32 darkred
8 American Airlines Inc. August -2.30 darkgreen
9 American Airlines Inc. September -8.57 darkgreen
10 American Airlines Inc. October -3.79 darkgreen
delay_monthly %>%
ggplot(aes(x = month, y = avg_arr_delay)) +
geom_col()ggplot(delay_monthly, aes(x = month, y = avg_arr_delay)) +
geom_col() +
facet_wrap(~ name)ggplot(delay_monthly, aes(x = month, y = avg_arr_delay, fill = delay_color)) +
geom_col(alpha = 0.6) +
facet_wrap(~ name)+
scale_fill_identity() +
theme_bw() +
theme(axis.text.x = element_text(angle = 45,
hjust = 1))ggplot(delay_monthly, aes(x = month, y = avg_arr_delay, fill = delay_color)) +
geom_col(alpha= 0.6) +
facet_wrap(~ name)+
labs(title = "Arrival Delay per Month by Carrier (2013)",
x = "",
y = "Average Arrival Delay (minutes)") +
scale_fill_identity() +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))annotation_text <- data.frame(
x = 8,
y = 15,
name = "American Airlines Inc.")
annotation_arrow <- data.frame(
x = 9.1,
y = 11,
xend = 9,
yend = 0.2,
name = "American Airlines Inc.")
ggplot(delay_monthly, aes(x = month, y = avg_arr_delay, fill = delay_color)) +
geom_col(alpha= 0.6) +
facet_wrap(~ name)+
labs(title = "Arrival Delay per Month by Carrier (2013)",
x = "",
y = "Average Arrival Delay (minutes)") +
scale_fill_identity() +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
geom_text(data = annotation_text,
aes(x = x,
y = y,
label = "Good month\n to travel"),
color = "darkgreen",
fontface = "bold",
size = 3.5,
inherit.aes = FALSE) +
geom_curve(data = annotation_arrow,
aes(x = x,
y = y,
xend = xend,
yend = yend),
arrow = arrow(length = unit(0.33, "cm")),
color = "darkgreen",
curvature = -0.25,
linewidth = 1.2,
inherit.aes = FALSE)The lubridate package in R simplifies working with date-times by providing intuitive and user-friendly functions. It is part of the tidyverse ecosystem and makes it easy to parse, manipulate, extract, and perform arithmetic operations on date-time data.
Parse character or numeric date-time data into R date-time formats.
Manipulate dates and times through arithmetic and transformations.
Extract components like years, months, days, hours, minutes, and seconds.
Handle time zones effectively.
Perform interval and duration calculations seamlessly.