1st Assignment

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights23)
data(flights)

Full name of the Airlines

airline_names <- c(
  "AA" = "American Airlines",
  "AS" = "Alaska Airlines",
  "B6" = "JetBlue Airways",
  "DL" = "Delta Air Lines",
  "F9" = "Frontier Airlines",
  "HA" = "Hawaiian Airlines",
  "NK" = "Spirit Airlines",
  "UA" = "United Airlines",
  "WN" = "Southwest Airlines",
  "9E" = "Endeavor Air",
  "MQ" = "Envoy Air",
  "OH" = "PSA Airlines",
  "OO" = "SkyWest Airlines",
  "YX" = "Republic Airways",
  "G4" = "Allegiant Air"
)

Average Delay

avg_delay <- flights %>%
  filter(!is.na(dep_delay)) %>%          # remove missing values
  group_by(carrier) %>%                  # group by airline
  summarize(avg_dep_delay = mean(dep_delay)) %>%   # calculate average delay
  mutate(
    airline = airline_names[carrier],    # replace code with full name
    delay_category = ifelse(avg_dep_delay > 10, "Higher Delay", "Lower Delay")  # color groups
  )

The graph bar

ggplot(avg_delay, aes(x = reorder(airline, avg_dep_delay),
                      y = avg_dep_delay,
                      fill = delay_category)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  scale_fill_manual(values = c("Higher Delay" = "darkgreen", "Lower Delay" = "darkred")) +
  labs(
    title = "Average Departure Delay by Airline (NYC, 2023)",
    x = "Airline",
    y = "Average Departure Delay (minutes)",
    fill = "Delay Category",
    caption = "Data source: nycflights23 package"
  ) +
  theme_minimal()