Question 1: Longest departure delays
longest_delays <- flights %>%
filter(!is.na(dep_delay)) %>%
arrange(desc(dep_delay)) %>%
select(year, month, day, carrier, flight, origin, dest, dep_delay) %>%
head(10)
print(longest_delays)
## # A tibble: 10 × 8
## year month day carrier flight origin dest dep_delay
## <int> <int> <int> <chr> <int> <chr> <chr> <dbl>
## 1 2013 1 9 HA 51 JFK HNL 1301
## 2 2013 6 15 MQ 3535 JFK CMH 1137
## 3 2013 1 10 MQ 3695 EWR ORD 1126
## 4 2013 9 20 AA 177 JFK SFO 1014
## 5 2013 7 22 MQ 3075 JFK CVG 1005
## 6 2013 4 10 DL 2391 JFK TPA 960
## 7 2013 3 17 DL 2119 LGA MSP 911
## 8 2013 6 27 DL 2007 JFK PDX 899
## 9 2013 7 22 DL 2047 LGA ATL 898
## 10 2013 12 5 AA 172 EWR MIA 896
Graph 1: Distribution of departure delays
ggplot(flights, aes(x = dep_delay)) +
geom_histogram(binwidth = 10, color = "black", fill = "steelblue") +
xlim(-50, 300) +
labs(
title = "Distribution of Flight Departure Delays",
x = "Departure Delay (minutes)",
y = "Number of Flights"
)
## Warning: Removed 8865 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

Question 2: Was there a flight every day in 2013?
days_with_flights <- flights %>%
count(year, month, day)
total_days <- nrow(days_with_flights)
print(total_days)
## [1] 365
Graph 2: Average departure delay by airline
avg_delay_by_carrier <- flights %>%
group_by(carrier) %>%
summarise(avg_dep_delay = mean(dep_delay, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(avg_dep_delay))
ggplot(avg_delay_by_carrier, aes(x = reorder(carrier, avg_dep_delay), y = avg_dep_delay)) +
geom_col(fill = "darkred") +
coord_flip() +
labs(
title = "Average Departure Delay by Airline",
x = "Airline",
y = "Average Delay (minutes)"
)
