library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
carrier_avg_delays <- flights |>
group_by(carrier) |>
summarise(avg_arr_delay = mean(arr_delay, na.rm = TRUE)) |>
arrange(desc(avg_arr_delay))
carrier_avg_delays
## # A tibble: 16 × 2
## carrier avg_arr_delay
## <chr> <dbl>
## 1 F9 21.9
## 2 FL 20.1
## 3 EV 15.8
## 4 YV 15.6
## 5 OO 11.9
## 6 MQ 10.8
## 7 WN 9.65
## 8 B6 9.46
## 9 9E 7.38
## 10 UA 3.56
## 11 US 2.13
## 12 VX 1.76
## 13 DL 1.64
## 14 AA 0.364
## 15 HA -6.92
## 16 AS -9.93
airline_name <- c("F9" = "Frontier Airlines",
"FL" = "AirTran Airways",
"EV" = "ExpressJet Airlines",
"YV" = "Mesa Airlines",
"OO" = "SkyWest Airlines",
"MQ" = "Envoy Air",
"WN" = "Southwest Airlines",
"B6" = "JetBlue Airways",
"9E" = "Endeavor Air",
"UA" = "United Airlines",
"US" = "US Airways",
"VX" = "Virgin America",
"DL" = "Delta Air Lines",
"AA" = "American Airlines",
"HA" = "Hawaiian Airlines",
"AS" = "Alaska Airlines")
carrier_avg_delays |>
ggplot(aes(x = reorder(carrier, -avg_arr_delay), y = avg_arr_delay, fill = reorder(carrier, -avg_arr_delay))) +
geom_bar(stat = "identity") +
labs(
title = "Average Arrival Delay by Carrier",
x = "Carrier Code",
y = "Average Arrival Delay (in minutes)"
) +
scale_fill_discrete(
name = "Airline Name",
labels = airline_name[order(carrier_avg_delays$avg_arr_delay, decreasing = TRUE)]
)
This visualization illustrates the average arrival delay time for
each carrier in the flights dataset. The x-axis arranges
the carrier codes in descending order of their average arrival delays,
while the y-axis represents the mean delay in minutes. The bars are
color-coded by carrier, and the legend provides the names of the
respective airlines for reference. One noteworthy aspect of this plot is
the consistency of the legend’s ordering with the carrier codes’
ordering on the x-axis. This helps the viewer associate the codes with
their respective airline names and easily identify the airlines with the
highest and lowest mean delay times.