flights <- read_csv("arrival_delays.csv")
flights
## # A tibble: 4 × 7
##   Airline Status  `Los Angeles` Phoenix `San Diego` `San Francisco` Seattle
##   <chr>   <chr>           <dbl>   <dbl>       <dbl>           <dbl>   <dbl>
## 1 Alaska  on time           497     221         212             503    1841
## 2 Alaska  delayed            62      12          20             102     305
## 3 AM West on time           694    4840         383             320     201
## 4 AM West delayed           117     415          65             129      61
## Transform from wide to long
flights_long <- flights %>%
  pivot_longer(
    cols = c(`Los Angeles`, Phoenix, `San Diego`, `San Francisco`, Seattle),
    names_to = "City",
    values_to = "Count"
  )

flights_long
## # A tibble: 20 × 4
##    Airline Status  City          Count
##    <chr>   <chr>   <chr>         <dbl>
##  1 Alaska  on time Los Angeles     497
##  2 Alaska  on time Phoenix         221
##  3 Alaska  on time San Diego       212
##  4 Alaska  on time San Francisco   503
##  5 Alaska  on time Seattle        1841
##  6 Alaska  delayed Los Angeles      62
##  7 Alaska  delayed Phoenix          12
##  8 Alaska  delayed San Diego        20
##  9 Alaska  delayed San Francisco   102
## 10 Alaska  delayed Seattle         305
## 11 AM West on time Los Angeles     694
## 12 AM West on time Phoenix        4840
## 13 AM West on time San Diego       383
## 14 AM West on time San Francisco   320
## 15 AM West on time Seattle         201
## 16 AM West delayed Los Angeles     117
## 17 AM West delayed Phoenix         415
## 18 AM West delayed San Diego        65
## 19 AM West delayed San Francisco   129
## 20 AM West delayed Seattle          61
## Count analysis
flights_long %>%
  group_by(Airline, Status) %>%
  summarise(Total = sum(Count), .groups="drop")
## # A tibble: 4 × 3
##   Airline Status  Total
##   <chr>   <chr>   <dbl>
## 1 AM West delayed   787
## 2 AM West on time  6438
## 3 Alaska  delayed   501
## 4 Alaska  on time  3274
## Overall delay percentage by airline
overall <- flights_long %>%
  group_by(Airline, Status) %>%
  summarise(Total = sum(Count), .groups="drop") %>%
  pivot_wider(names_from = Status, values_from = Total) %>%
  mutate(
    AllFlights = `on time` + delayed,
    DelayRate = delayed / AllFlights
  )
ggplot(overall, aes(x = Airline, y = DelayRate)) +
  geom_col() +
  scale_y_continuous(labels = scales::percent) +
  labs(title="Overall Delay Rate by Airline", y="Delay %", x=NULL)

## Delay percentage by city
by_city <- flights_long %>%
  group_by(Airline, City, Status) %>%
  summarise(Total = sum(Count), .groups="drop") %>%
  pivot_wider(names_from = Status, values_from = Total) %>%
  mutate(
    AllFlights = `on time` + delayed,
    DelayRate = delayed / AllFlights
  )
ggplot(by_city, aes(x = City, y = DelayRate, group = Airline, color = Airline)) +
  geom_line() +
  geom_point(size = 3) +
  scale_y_continuous(labels = scales::percent) +
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
  labs(title="Delay Rate by City", y="Delay %", x=NULL)