Data loading
data(flights)
data(airlines)
Create an initial scatterplot with loess smoother for distance to delays
Use “group_by” together with summarize functions
flights_nona <- flights |>
filter(!is.na(distance) & !is.na(arr_delay) & !is.na(dep_delay))
# remove na's for distance, arr_delay, departure delay
Join the delay_punctuality dataset with the airlines dataset
Also remove “Inc.” or “Co.” from the Carrier Name
flights2 <- left_join(flights_nona, airlines, by = "carrier")
flights2$name <- gsub("Inc\\.|Co\\.", "", flights2$name)
Calculate the percentage of flights with less than 10 minutes delay (OTP)
delay_OTP <- flights2 |>
group_by(name) |>
summarize(Departure_Percentage = sum(dep_delay <= 10)
/ n() * 100,
Arrival_Percentage = sum(arr_delay <= 10) / n() * 100)
Create a bidirectional horizontal bar chart
ggplot(delay_OTP, aes(x = -Departure_Percentage, y = reorder(name, Departure_Percentage))) +
geom_text(aes(label = paste0(round(Departure_Percentage, 0), "%")),
hjust = 1.1, size = 3.5) + #departure % labels
geom_bar(aes(fill = "Departure_Percentage"), stat = "identity", width = .75) +
geom_bar(aes(x = Arrival_Percentage, fill = "Arrival_Percentage"),
stat = "identity", width = .75) +
geom_text(aes(x = Arrival_Percentage, label = paste0(round(Arrival_Percentage, 0), "%")),
hjust =-.1, size = 3.5) + # arrival % labels
labs(x = "Departures < On-Time Performance > Arrivals",
y = "Carrier",
title = "On-Time Performance of Airline Carriers \n (Percent of Flights < 10 Minutes Delay)",
caption = "Source: FAA") +
scale_fill_manual(
name = "Performance",
breaks = c("Departure_Percentage", "Arrival_Percentage"), # Specify the order of legend items
values = c("Departure_Percentage" = "#8bd3c7", "Arrival_Percentage" = "#beb9db"),
labels = c("Departure_Percentage" = "Departure", "Arrival_Percentage" = "Arrival")
) +
scale_x_continuous(labels = abs, limits = c(-120, 120)) + # Positive negative axis
theme_minimal()