Question 1: Longest departure delays

longest_delays <- flights %>%
  filter(!is.na(dep_delay)) %>%
  arrange(desc(dep_delay)) %>%
  select(year, month, day, carrier, flight, origin, dest, dep_delay) %>%
  head(10)

print(longest_delays)
## # A tibble: 10 × 8
##     year month   day carrier flight origin dest  dep_delay
##    <int> <int> <int> <chr>    <int> <chr>  <chr>     <dbl>
##  1  2013     1     9 HA          51 JFK    HNL        1301
##  2  2013     6    15 MQ        3535 JFK    CMH        1137
##  3  2013     1    10 MQ        3695 EWR    ORD        1126
##  4  2013     9    20 AA         177 JFK    SFO        1014
##  5  2013     7    22 MQ        3075 JFK    CVG        1005
##  6  2013     4    10 DL        2391 JFK    TPA         960
##  7  2013     3    17 DL        2119 LGA    MSP         911
##  8  2013     6    27 DL        2007 JFK    PDX         899
##  9  2013     7    22 DL        2047 LGA    ATL         898
## 10  2013    12     5 AA         172 EWR    MIA         896

Graph 1: Distribution of departure delays

ggplot(flights, aes(x = dep_delay)) +
  geom_histogram(binwidth = 10, color = "black", fill = "steelblue") +
  xlim(-50, 300) +
  labs(
    title = "Distribution of Flight Departure Delays",
    x = "Departure Delay (minutes)",
    y = "Number of Flights"
  )
## Warning: Removed 8865 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

Question 2: Was there a flight every day in 2013?

days_with_flights <- flights %>%
  count(year, month, day)

total_days <- nrow(days_with_flights)

print(total_days)
## [1] 365

Graph 2: Average departure delay by airline

avg_delay_by_carrier <- flights %>%
  group_by(carrier) %>%
  summarise(avg_dep_delay = mean(dep_delay, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(avg_dep_delay))

ggplot(avg_delay_by_carrier, aes(x = reorder(carrier, avg_dep_delay), y = avg_dep_delay)) +
  geom_col(fill = "darkred") +
  coord_flip() +
  labs(
    title = "Average Departure Delay by Airline",
    x = "Airline",
    y = "Average Delay (minutes)"
  )