#Using the Flights dataset, I created a data visualization that shows the proportion of
#late departures that arrived at their
#destination either late or on-time/early. I chose to do this analysis because I was
#curious to see if airlines could make-up for
#the late departures by still getting the passengers to their destination
#on-time or early. I selected only five popular airlines
#(Delta, Alaska Airlines, American Airlines, Frontier, and United Airlines)
#to review in order to keep the data visualization digestible.
#This analysis can give customers
#an insight on their chances of early/on-time arrivals despite
#difficulties that may results in late departures.
#PS: I am not surprised that Frontier is the lowest.Proportion of Late Departures that Arrived Late or On-time/Early by Carrier
#Load in R packages and dataset
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.2 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.0
✔ ggplot2 3.4.2 ✔ tibble 3.2.1
✔ lubridate 1.9.2 ✔ tidyr 1.3.0
✔ purrr 1.0.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
library(psych)
Attaching package: 'psych'
The following objects are masked from 'package:ggplot2':
%+%, alpha
flights <- flights#Remove and rename carrier codes
flights2 <- flights %>%
filter(!(carrier %in% c("B6", "EV", "MQ", "US", "WN", "VX", "FL", "9E", "HA", "YV", "OO"))) %>%
mutate(carrier = recode(carrier, "AA" = "American Airlines", "AS" = "Alaska Airlines",
"DL" = "Delta Airlines", "F9" = "Frontier Airlines", "UA" = "United Airlines"))#Remove NAs from delay columns
flights3 <- flights2 %>%
filter(!is.na(dep_delay) & !is.na(arr_delay))#Flights that departed late but arrived early/on time
flights4 <- flights3[flights3$dep_delay > 0 & flights3$arr_delay <= 0,]#Create column for arrival statuses (On-time/Early)
flights4ontime <- flights4 %>%
mutate(Status = "On-time/Early")#Flights that departed and arrived late
flights5 <- flights3[flights3$dep_delay > 0 & flights3$arr_delay > 0,]#Create column for arrival statuses (Late)
flights5late <- flights5 %>%
mutate(Status = "Late")#merge on-time/early & late datasets
final <- rbind(flights4ontime, flights5late)#create stacked bar chart
ggplot(data = final) +
geom_bar(mapping = aes(x = carrier, fill = Status), position = "fill") +
ggtitle("Proportion of Late Departures that Arrived Late or On-time/Early by Carrier") +
theme(panel.background = element_rect(fill = "white")) +
theme(plot.title = element_text(hjust = 0.5))