# Loading Packages
library(readr)
library(tidyverse)
library(tidytext)
library(knitr)
library(ggthemr)
library(ggplot2)
# Loading Datasets
read.csv("alljoined_airlines.csv") -> alljoined
read.csv("airline_key.csv")-> airline_key
# Adding New Column to alljoined with airline name instead of code
alljoined %>%
full_join(airline_key, by = "OP_UNIQUE_CARRIER") -> alljoined2
# Color Theme
ggthemr('dust', type = 'outer')
set_swatch(c("#fc8d27", "#f28422", "#e97c1f", "#df741b", "#d66d18", "#cd6517", "#c45d10", "#ba550f", "#b14d0c", "#a74507", "#9e3d03"))
#### Actual Analysis of Finding Cancellation & Delay Percentages (adjusted for # of flights)
# All Flights Counted by Airline
alljoined2 %>%
group_by(airline_name) %>%
summarize(count = n()) %>%
arrange(desc(count)) -> all_flights
# How Many flight cancellations compared to number of flights by airline
alljoined2 %>%
group_by(airline_name) %>%
summarize(cancel_count = sum(CANCELLED)) %>%
arrange(desc(cancel_count)) -> all_canceled
# How many flight delays compared to number of flights by airline
alljoined2 %>%
group_by(airline_name) %>%
filter(ARR_DELAY > 0) %>%
summarize(count = n()) %>%
arrange(desc(count)) -> all_delayed
# How many flight early compared to number of flights by airline
alljoined2 %>%
group_by(airline_name) %>%
filter(ARR_DELAY < 0) %>%
summarize(count = n()) %>%
arrange(desc(count)) -> all_early
# How many flight on-time compared to number of flights by airline
alljoined2 %>%
group_by(airline_name) %>%
filter(ARR_DELAY == 0) %>%
summarize(count = n()) %>%
arrange(desc(count)) -> all_ontime
# How many flights on-time or early compared to number of flights by airline
alljoined2 %>%
group_by(airline_name) %>%
filter(ARR_DELAY <= 0) %>%
summarize(count = n()) %>%
arrange(desc(count)) -> all_combo
## Made combined dataset of flight, delay, and cancellation counts by airline
all_flights %>%
full_join(all_delayed, by = "airline_name") %>%
full_join(all_canceled, by = "airline_name") %>%
full_join(all_early, by = "airline_name") %>%
full_join(all_ontime, by = "airline_name") %>%
full_join(all_combo, by = "airline_name") -> fly_del_can_combo
colnames(fly_del_can_combo)[2] = "flight_count"
colnames(fly_del_can_combo)[3] = "delay_count"
colnames(fly_del_can_combo)[5] = "early_count"
colnames(fly_del_can_combo)[6] = "ontime_count"
colnames(fly_del_can_combo)[7] = "early_ontime_count"
## Adding Rows with math to find the counts/total flights
# percent cancellations
fly_del_can_combo$percent_cancellations <- (fly_del_can_combo$cancel_count / fly_del_can_combo$flight_count)
# percent delayed
fly_del_can_combo$percent_delayed <- (fly_del_can_combo$delay_count / fly_del_can_combo$flight_count)
# percent early
fly_del_can_combo$percent_early <- (fly_del_can_combo$early_count / fly_del_can_combo$flight_count)
# percent on-time
fly_del_can_combo$percent_ontime <- (fly_del_can_combo$ontime_count / fly_del_can_combo$flight_count)
# percent early or on_time
fly_del_can_combo$percent_early_ontime <- (fly_del_can_combo$early_ontime_count / fly_del_can_combo$flight_count)
This part of this airline analysis project includes an analysis of the top ten domestic airlines on-time performance. Using the same flight data as the market share analysis an explanation of how the data was collected is located on the home page of this project.
The first step in the analysis of on-time performance was to find what percentage of an airlines flights arrived late by at least 1 minute. These two plots show this delay percentage and its inverse early/on-time percentage. From this analysis the airline with the best on-time performance is Delta Airlines with 71.5% early/on-time flights and the worst Allegiant Airlines with 55.5% early/on-time flights.
Delta airlines is significantly the best on-time performance of any airline over the past five years with its second closest competitor having a 5.4% worse early/on-time performance. This significant insight was used to build a comprehensive advertising campaign including a TV commercial spot and digital posters for the Atlanta Airport. To learn more about this project click here.
ggplot(fly_del_can_combo, aes(reorder(airline_name, percent_delayed), percent_delayed, fill = airline_name)) + geom_col() + coord_flip() + labs(x = "Airlines", y = "Percent of Flights Delayed") + labs(title = "Percentage of Flights Delayed by Airline") + theme(legend.position = "none")
ggplot(fly_del_can_combo, aes(reorder(airline_name, percent_early_ontime), percent_early_ontime, fill = airline_name)) + geom_col() + coord_flip() + labs(x = "Airlines", y = "Percent Early or On-Time") + labs(title = "Percentage of Flights Early or On-Time") + theme(legend.position = "none")
fly_del_can_combo[c("airline_name", "percent_delayed", "percent_early_ontime")] %>%
arrange(desc(percent_delayed)) -> delayed_table
kable(delayed_table)
airline_name | percent_delayed | percent_early_ontime |
---|---|---|
Allegiant | 0.3952445 | 0.5557034 |
JetBlue | 0.3812002 | 0.5907242 |
Frontier | 0.3728175 | 0.6019841 |
Alaska | 0.3395544 | 0.6404478 |
Southwest | 0.3337388 | 0.6328287 |
American | 0.3335979 | 0.6352442 |
Spirit | 0.3310017 | 0.6472707 |
Hawaiian | 0.3286193 | 0.6604214 |
United | 0.3260125 | 0.6512034 |
Delta | 0.2703147 | 0.7150261 |
When looking at the average arrival delays for each of the airlines Allegiant stood out as having the longest arrival delay time. This means that when Allegiant had a delay it lasted the longest amount of time on average. Generally the top three worst on-time performance airlines also had the longest average arrival delays which is an interesting relationship. Another result from this analysis is that both Delta Airlines and Alaska Airlines have a net positive average arrival delay, meaning that their customers on average get to their destination early. This is an unexpected result for Alaska Airlines because they have the fourth worst on-time performance but the best average arrival delay.
alljoined2 %>%
group_by(airline_name) %>%
summarize(average = mean(ARR_DELAY, na.rm = TRUE)) -> arrdelay
arrdelay %>%
ggplot(aes(reorder(airline_name, average), average, fill = airline_name)) + geom_col() + coord_flip() + theme(legend.position = "none")-> arrdelay_plot
arrdelay_plot + labs(x = "Airlines", y = "Average Arrival Delay") + labs(title = "Average Arrival Delays by Airline")
kable(arrdelay)
airline_name | average |
---|---|
Alaska | -0.2692295 |
Allegiant | 10.9571051 |
American | 4.3925949 |
Delta | -0.2109149 |
Frontier | 7.9127737 |
Hawaiian | 0.2750022 |
JetBlue | 9.8862939 |
Southwest | 2.2528094 |
Spirit | 4.8800388 |
United | 4.5088740 |
This graph shows the percentage of flights of each airline that were canceled over the past five years. Allegiant again performed terribly and had significantly the most amount of cancellations by almost a 1.5% difference to its next closest competitor. Delta Airlines on the other hand performed very well as they ranked the second best in cancellation percentage only behind Hawaiian Airlines.
ggplot(fly_del_can_combo, aes(reorder(airline_name, percent_cancellations), percent_cancellations, fill = airline_name)) + geom_col() + coord_flip() + labs(x = "Airlines", y = "Percent of Flights Cancelled") + labs(title = "Percentage of Flights Cancelled by Airline") + theme(legend.position = "none")
fly_del_can_combo[c("airline_name", "percent_cancellations")] %>%
arrange(desc(percent_cancellations)) -> cancel_table
kable(cancel_table)
airline_name | percent_cancellations |
---|---|
Allegiant | 0.0461494 |
Southwest | 0.0315498 |
American | 0.0286841 |
JetBlue | 0.0248924 |
Frontier | 0.0238102 |
United | 0.0202040 |
Spirit | 0.0198401 |
Alaska | 0.0175777 |
Delta | 0.0127503 |
Hawaiian | 0.0100537 |
Throughout this analysis some constants held true. Delta Airlines performed well across the board and earned itself the title as the best on-time performance over the past five years. On the other hand Allegiant significantly underperformed in all three different categories of on-time performance, average arrival delays, and cancellations. This again has supported the fact that Allegiant airline is a unique airline when combined with that they do not control any top 1000 routes which was found in the market-share analysis.
To continue to the next analysis of the airlines twitter sentiment click here