Analysis
Total Frequency
In this code block, I create a new data frame that shows the total
number of on time and delayed flights for each airline, and display this
data in a stacked bar graph.
airline_total_frequency <- airline_df %>%
group_by(airline, on_time) %>%
summarize(total_frequency = sum(frequency), .groups = "drop") %>%
pivot_wider(names_from = on_time, values_from = total_frequency, values_fill = 0)
kable(airline_total_frequency, format = "pipe", col.names = c("Airline", "Number Delayed", "Number On Time"), caption = "Total Numbers of Delayed and On Time Flights by Airline", align = "c")
Total Numbers of Delayed and On Time Flights by
Airline
AM West |
787 |
6438 |
Alaska |
501 |
3274 |
ggplot(airline_df, aes(x = airline, y = frequency, fill = on_time)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.6) +
labs(title = "Total Numbers of Delayed and On Time Flights by Airline", x = "Airline", y = "Frequency") +
scale_fill_manual(values = c("on time" = "blue", "delayed" = "red")) +
labs (fill = "")

Relative Frequencies by Airline
At a glance the two airlines appear to have similar rates of delay,
but calculating the relative frequencies will confirm this. Below, I
calculate the relative frequencies by airline, and display these
relative frequencies in a stacked bar graph.
airline_relative_frequency <- airline_total_frequency %>%
mutate(percent_delayed = `delayed`/(`delayed`+`on time`)*100, percent_on_time = `on time`/(`delayed`+`on time`)*100) %>%
select(-c(2:3))
kable(airline_relative_frequency, format = "pipe", digits = 1, col.names = c("Airline", "Percent Delayed", "Percent On Time"), caption = "Relative Frequencies of Delayed and On Time Flights by Airline", align = "c")
Relative Frequencies of Delayed and On Time Flights by
Airline
AM West |
10.9 |
89.1 |
Alaska |
13.3 |
86.7 |
airline_relative_frequency %>%
pivot_longer(cols = 2:3, names_to = "on_time", values_to = "percentage") %>%
ggplot(aes(x = airline, y = percentage, fill = on_time)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.6) +
labs(title = "Relative Frequencies of Delayed and On Time Flights by Airline", x = "Airline", y = "Percentage") +
scale_fill_manual(values = c("percent_on_time" = "blue", "percent_delayed" = "red"), labels = c("delayed", "on time")) +
labs (fill = "", )

Below, I repeat the above process of calculating relative
frequencies, filtering by destination.
## Creating new data frames and kables
la_relative_frequency <- airline_df %>%
filter(destination == "Los Angeles") %>%
group_by(airline, on_time) %>%
summarize(total_frequency = sum(frequency), .groups = "drop") %>%
pivot_wider(names_from = on_time, values_from = total_frequency, values_fill = 0) %>%
mutate(percent_delayed = `delayed`/(`delayed`+`on time`)*100, percent_on_time = `on time`/(`delayed`+`on time`)*100) %>%
select(-c(2:3))
kable(la_relative_frequency, format = "pipe", digits = 1, col.names = c("Airline", "Percent Delayed", "Percent On Time"), caption = "Los Angeles: Relative Frequencies of Delayed and On Time Flights by Airline", align = "c")
Los Angeles: Relative Frequencies of Delayed and On Time
Flights by Airline
AM West |
14.4 |
85.6 |
Alaska |
11.1 |
88.9 |
phoenix_relative_frequency <- airline_df %>%
filter(destination == "Phoenix") %>%
group_by(airline, on_time) %>%
summarize(total_frequency = sum(frequency), .groups = "drop") %>%
pivot_wider(names_from = on_time, values_from = total_frequency, values_fill = 0) %>%
mutate(percent_delayed = `delayed`/(`delayed`+`on time`)*100, percent_on_time = `on time`/(`delayed`+`on time`)*100) %>%
select(-c(2:3))
kable(phoenix_relative_frequency, format = "pipe", digits = 1, col.names = c("Airline", "Percent Delayed", "Percent On Time"), caption = "Phoenix: Relative Frequencies of Delayed and On Time Flights by Airline", align = "c")
Phoenix: Relative Frequencies of Delayed and On Time Flights by
Airline
AM West |
7.9 |
92.1 |
Alaska |
5.2 |
94.8 |
sd_relative_frequency <- airline_df %>%
filter(destination == "San Diego") %>%
group_by(airline, on_time) %>%
summarize(total_frequency = sum(frequency), .groups = "drop") %>%
pivot_wider(names_from = on_time, values_from = total_frequency, values_fill = 0) %>%
mutate(percent_delayed = `delayed`/(`delayed`+`on time`)*100, percent_on_time = `on time`/(`delayed`+`on time`)*100) %>%
select(-c(2:3))
kable(sd_relative_frequency, format = "pipe", digits = 1, col.names = c("Airline", "Percent Delayed", "Percent On Time"), caption = "San Diego: Relative Frequencies of Delayed and On Time Flights by Airline", align = "c")
San Diego: Relative Frequencies of Delayed and On Time Flights
by Airline
AM West |
14.5 |
85.5 |
Alaska |
8.6 |
91.4 |
sf_relative_frequency <- airline_df %>%
filter(destination == "San Francisco") %>%
group_by(airline, on_time) %>%
summarize(total_frequency = sum(frequency), .groups = "drop") %>%
pivot_wider(names_from = on_time, values_from = total_frequency, values_fill = 0) %>%
mutate(percent_delayed = `delayed`/(`delayed`+`on time`)*100, percent_on_time = `on time`/(`delayed`+`on time`)*100) %>%
select(-c(2:3))
kable(sf_relative_frequency, format = "pipe", digits = 1, col.names = c("Airline", "Percent Delayed", "Percent On Time"), caption = "San Francisco: Relative Frequencies of Delayed and On Time Flights by Airline", align = "c")
San Francisco: Relative Frequencies of Delayed and On Time
Flights by Airline
AM West |
28.7 |
71.3 |
Alaska |
16.9 |
83.1 |
seattle_relative_frequency <- airline_df %>%
filter(destination == "Seattle") %>%
group_by(airline, on_time) %>%
summarize(total_frequency = sum(frequency), .groups = "drop") %>%
pivot_wider(names_from = on_time, values_from = total_frequency, values_fill = 0) %>%
mutate(percent_delayed = `delayed`/(`delayed`+`on time`)*100, percent_on_time = `on time`/(`delayed`+`on time`)*100) %>%
select(-c(2:3))
kable(seattle_relative_frequency, format = "pipe", digits = 1, col.names = c("Airline", "Percent Delayed", "Percent On Time"), caption = "Seattle: Relative Frequencies of Delayed and On Time Flights by Airline", align = "c")
Seattle: Relative Frequencies of Delayed and On Time Flights by
Airline
AM West |
23.3 |
76.7 |
Alaska |
14.2 |
85.8 |
##Creating bar graphs
la_relative_frequency %>%
pivot_longer(cols = 2:3, names_to = "on_time", values_to = "percentage") %>%
ggplot(aes(x = airline, y = percentage, fill = on_time)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.6) +
labs(title = "Los Angeles: Relative Frequencies of Delayed and On Time Flights by Airline", x = "Airline", y = "Percentage") +
scale_fill_manual(values = c("percent_on_time" = "blue", "percent_delayed" = "red"), labels = c("delayed", "on time")) +
labs (fill = "", )

phoenix_relative_frequency %>%
pivot_longer(cols = 2:3, names_to = "on_time", values_to = "percentage") %>%
ggplot(aes(x = airline, y = percentage, fill = on_time)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.6) +
labs(title = "Phoenix: Relative Frequencies of Delayed and On Time Flights by Airline", x = "Airline", y = "Percentage") +
scale_fill_manual(values = c("percent_on_time" = "blue", "percent_delayed" = "red"), labels = c("delayed", "on time")) +
labs (fill = "", )

sd_relative_frequency %>%
pivot_longer(cols = 2:3, names_to = "on_time", values_to = "percentage") %>%
ggplot(aes(x = airline, y = percentage, fill = on_time)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.6) +
labs(title = "San Diego: Relative Frequencies of Delayed and On Time Flights by Airline", x = "Airline", y = "Percentage") +
scale_fill_manual(values = c("percent_on_time" = "blue", "percent_delayed" = "red"), labels = c("delayed", "on time")) +
labs (fill = "", )

sf_relative_frequency %>%
pivot_longer(cols = 2:3, names_to = "on_time", values_to = "percentage") %>%
ggplot(aes(x = airline, y = percentage, fill = on_time)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.6) +
labs(title = "San Francisco: Relative Frequencies of Delayed and On Time Flights by Airline", x = "Airline", y = "Percentage") +
scale_fill_manual(values = c("percent_on_time" = "blue", "percent_delayed" = "red"), labels = c("delayed", "on time")) +
labs (fill = "", )

seattle_relative_frequency %>%
pivot_longer(cols = 2:3, names_to = "on_time", values_to = "percentage") %>%
ggplot(aes(x = airline, y = percentage, fill = on_time)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.6) +
labs(title = "Seattle: Relative Frequencies of Delayed and On Time Flights by Airline", x = "Airline", y = "Percentage") +
scale_fill_manual(values = c("percent_on_time" = "blue", "percent_delayed" = "red"), labels = c("delayed", "on time")) +
labs (fill = "")

Relative Frequencies by Destination
Finally, I compute and display the relative frequencies for each
destination, and then filter by airline.
destination_relative_frequency <- airline_df %>%
group_by(destination, on_time) %>%
summarize(total_frequency = sum(frequency), .groups = "drop") %>%
pivot_wider(names_from = on_time, values_from = total_frequency, values_fill = 0) %>%
mutate(percent_delayed = `delayed`/(`delayed`+`on time`)*100, percent_on_time = `on time`/(`delayed`+`on time`)*100) %>%
select(-c(2:3))
kable(destination_relative_frequency, format = "pipe", digits = 1, col.names = c("Destination", "Percent Delayed", "Percent On Time"), caption = "Relative Frequencies of Delayed and On Time Flights by Destination", align = "c")
Relative Frequencies of Delayed and On Time Flights by
Destination
Los Angeles |
13.1 |
86.9 |
Phoenix |
7.8 |
92.2 |
San Diego |
12.5 |
87.5 |
San Francisco |
21.9 |
78.1 |
Seattle |
15.2 |
84.8 |
destination_relative_frequency %>%
pivot_longer(cols = 2:3, names_to = "on_time", values_to = "percentage") %>%
ggplot(aes(x = destination, y = percentage, fill = on_time)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.6) +
labs(title = "Relative Frequencies of Delayed and On Time Flights by Destination", x = "Destination", y = "Percentage") +
scale_fill_manual(values = c("percent_on_time" = "blue", "percent_delayed" = "red"), labels = c("delayed", "on time")) +
labs (fill = "")

alaska_relative_frequency <- airline_df %>%
filter(airline == "Alaska") %>%
group_by(destination, on_time) %>%
summarize(total_frequency = sum(frequency), .groups = "drop") %>%
pivot_wider(names_from = on_time, values_from = total_frequency, values_fill = 0) %>%
mutate(percent_delayed = `delayed`/(`delayed`+`on time`)*100, percent_on_time = `on time`/(`delayed`+`on time`)*100) %>%
select(-c(2:3))
kable(alaska_relative_frequency, format = "pipe", digits = 1, col.names = c("Destination", "Percent Delayed", "Percent On Time"), caption = "Alaska Airlines: Relative Frequencies of Delayed and On Time Flights by Destination", align = "c")
Alaska Airlines: Relative Frequencies of Delayed and On Time
Flights by Destination
Los Angeles |
11.1 |
88.9 |
Phoenix |
5.2 |
94.8 |
San Diego |
8.6 |
91.4 |
San Francisco |
16.9 |
83.1 |
Seattle |
14.2 |
85.8 |
am_west_relative_frequency <- airline_df %>%
filter(airline == "AM West") %>%
group_by(destination, on_time) %>%
summarize(total_frequency = sum(frequency), .groups = "drop") %>%
pivot_wider(names_from = on_time, values_from = total_frequency, values_fill = 0) %>%
mutate(percent_delayed = `delayed`/(`delayed`+`on time`)*100, percent_on_time = `on time`/(`delayed`+`on time`)*100) %>%
select(-c(2:3))
kable(am_west_relative_frequency, format = "pipe", digits = 1, col.names = c("Destination", "Percent Delayed", "Percent On Time"), caption = "AM West: Relative Frequencies of Delayed and On Time Flights by Destination", align = "c")
AM West: Relative Frequencies of Delayed and On Time Flights by
Destination
Los Angeles |
14.4 |
85.6 |
Phoenix |
7.9 |
92.1 |
San Diego |
14.5 |
85.5 |
San Francisco |
28.7 |
71.3 |
Seattle |
23.3 |
76.7 |
alaska_relative_frequency %>%
pivot_longer(cols = 2:3, names_to = "on_time", values_to = "percentage") %>%
ggplot(aes(x = destination, y = percentage, fill = on_time)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.6) +
labs(title = "Alaska: Relative Frequencies of Delayed and On Time Flights by Destination", x = "Destination", y = "Percentage") +
scale_fill_manual(values = c("percent_on_time" = "blue", "percent_delayed" = "red"), labels = c("delayed", "on time")) +
labs (fill = "")

am_west_relative_frequency %>%
pivot_longer(cols = 2:3, names_to = "on_time", values_to = "percentage") %>%
ggplot(aes(x = destination, y = percentage, fill = on_time)) +
geom_bar(stat = "identity", position = "stack", alpha = 0.6) +
labs(title = "AM West: Relative Frequencies of Delayed and On Time Flights by Destination", x = "Destination", y = "Percentage") +
scale_fill_manual(values = c("percent_on_time" = "blue", "percent_delayed" = "red"), labels = c("delayed", "on time")) +
labs (fill = "")

Findings & Recommendations
Overall, the two airlines have similar rates of on time arrivals,
with AM West having a slightly greater rate of on time arrivals.
However, when filtering by destination, Alaska has a greater rate of on
time arrivals for each of the five cities specified, which is possible
since the two airlines operate different numbers of flights into each
city. The two airlines have similar rates of on time arrivals for both
Los Angeles and Phoenix, while Alaska has more of an advantage for San
Diego, San Francisco, and Seattle. We can also see when grouping by
destination that San Francisco and Seattle have lower rates of on time
arrivals overall, and each airline has lower rates of on time arrivals
for those two destinations. Further analysis could look at the departure
cities and rates of on time departure for the two airlines.