# Loading Packages
library(readr)
library(tidyverse)
library(tidytext)
library(knitr)
library(ggthemr)
library(ggplot2)

# Loading Datasets
read.csv("alljoined_airlines.csv") -> alljoined
read.csv("airline_key.csv")-> airline_key

# Adding New Column to alljoined with airline name instead of code
alljoined %>% 
  full_join(airline_key, by = "OP_UNIQUE_CARRIER") -> alljoined2

# Color Theme
ggthemr('dust', type = 'outer')
set_swatch(c("#fc8d27", "#f28422", "#e97c1f", "#df741b", "#d66d18", "#cd6517", "#c45d10", "#ba550f", "#b14d0c", "#a74507", "#9e3d03"))
#### Actual Analysis of Finding Cancellation & Delay Percentages (adjusted for # of flights)

# All Flights Counted by Airline
alljoined2 %>% 
  group_by(airline_name) %>% 
  summarize(count = n()) %>% 
  arrange(desc(count)) -> all_flights

# How Many flight cancellations compared to number of flights by airline
alljoined2 %>% 
  group_by(airline_name) %>% 
  summarize(cancel_count = sum(CANCELLED)) %>% 
  arrange(desc(cancel_count)) -> all_canceled

# How many flight delays compared to number of flights by airline
alljoined2 %>% 
  group_by(airline_name) %>% 
  filter(ARR_DELAY > 0) %>% 
  summarize(count = n()) %>% 
  arrange(desc(count)) -> all_delayed

# How many flight early compared to number of flights by airline
alljoined2 %>% 
  group_by(airline_name) %>% 
  filter(ARR_DELAY < 0) %>% 
  summarize(count = n()) %>% 
  arrange(desc(count)) -> all_early

# How many flight on-time compared to number of flights by airline
alljoined2 %>% 
  group_by(airline_name) %>% 
  filter(ARR_DELAY == 0) %>% 
  summarize(count = n()) %>% 
  arrange(desc(count)) -> all_ontime

# How many flights on-time or early compared to number of flights by airline
alljoined2 %>% 
  group_by(airline_name) %>% 
  filter(ARR_DELAY <= 0) %>% 
  summarize(count = n()) %>% 
  arrange(desc(count)) -> all_combo

## Made combined dataset of flight, delay, and cancellation counts by airline
all_flights %>% 
  full_join(all_delayed, by = "airline_name") %>% 
  full_join(all_canceled, by = "airline_name") %>% 
  full_join(all_early, by = "airline_name") %>% 
  full_join(all_ontime, by = "airline_name") %>%
  full_join(all_combo, by = "airline_name") -> fly_del_can_combo
  
colnames(fly_del_can_combo)[2] = "flight_count"
colnames(fly_del_can_combo)[3] = "delay_count"
colnames(fly_del_can_combo)[5] = "early_count"
colnames(fly_del_can_combo)[6] = "ontime_count"
colnames(fly_del_can_combo)[7] = "early_ontime_count"


## Adding Rows with math to find the counts/total flights

# percent cancellations
fly_del_can_combo$percent_cancellations <- (fly_del_can_combo$cancel_count / fly_del_can_combo$flight_count)  

# percent delayed
fly_del_can_combo$percent_delayed <- (fly_del_can_combo$delay_count / fly_del_can_combo$flight_count)  

# percent early
fly_del_can_combo$percent_early <- (fly_del_can_combo$early_count / fly_del_can_combo$flight_count)  

# percent on-time
fly_del_can_combo$percent_ontime <- (fly_del_can_combo$ontime_count / fly_del_can_combo$flight_count)

# percent early or on_time
fly_del_can_combo$percent_early_ontime <- (fly_del_can_combo$early_ontime_count / fly_del_can_combo$flight_count)

This part of this airline analysis project includes an analysis of the top ten domestic airlines on-time performance. Using the same flight data as the market share analysis an explanation of how the data was collected is located on the home page of this project.

General On-Time Performance Analysis

The first step in the analysis of on-time performance was to find what percentage of an airlines flights arrived late by at least 1 minute. These two plots show this delay percentage and its inverse early/on-time percentage. From this analysis the airline with the best on-time performance is Delta Airlines with 71.5% early/on-time flights and the worst Allegiant Airlines with 55.5% early/on-time flights.

Delta airlines is significantly the best on-time performance of any airline over the past five years with its second closest competitor having a 5.4% worse early/on-time performance. This significant insight was used to build a comprehensive advertising campaign including a TV commercial spot and digital posters for the Atlanta Airport. To learn more about this project click here.

ggplot(fly_del_can_combo, aes(reorder(airline_name, percent_delayed), percent_delayed, fill = airline_name)) + geom_col() + coord_flip() + labs(x = "Airlines", y = "Percent of Flights Delayed") + labs(title = "Percentage of Flights Delayed by Airline") + theme(legend.position = "none")

ggplot(fly_del_can_combo, aes(reorder(airline_name, percent_early_ontime), percent_early_ontime, fill = airline_name)) + geom_col() + coord_flip() + labs(x = "Airlines", y = "Percent Early or On-Time") + labs(title = "Percentage of Flights Early or On-Time") + theme(legend.position = "none")

fly_del_can_combo[c("airline_name", "percent_delayed", "percent_early_ontime")] %>% 
  arrange(desc(percent_delayed)) -> delayed_table
kable(delayed_table)
airline_name percent_delayed percent_early_ontime
Allegiant 0.3952445 0.5557034
JetBlue 0.3812002 0.5907242
Frontier 0.3728175 0.6019841
Alaska 0.3395544 0.6404478
Southwest 0.3337388 0.6328287
American 0.3335979 0.6352442
Spirit 0.3310017 0.6472707
Hawaiian 0.3286193 0.6604214
United 0.3260125 0.6512034
Delta 0.2703147 0.7150261


Analyzing the Mean of Arrival Delays

When looking at the average arrival delays for each of the airlines Allegiant stood out as having the longest arrival delay time. This means that when Allegiant had a delay it lasted the longest amount of time on average. Generally the top three worst on-time performance airlines also had the longest average arrival delays which is an interesting relationship. Another result from this analysis is that both Delta Airlines and Alaska Airlines have a net positive average arrival delay, meaning that their customers on average get to their destination early. This is an unexpected result for Alaska Airlines because they have the fourth worst on-time performance but the best average arrival delay.

alljoined2 %>% 
  group_by(airline_name) %>% 
  summarize(average = mean(ARR_DELAY, na.rm = TRUE)) -> arrdelay

arrdelay %>% 
  ggplot(aes(reorder(airline_name, average), average, fill = airline_name)) + geom_col() + coord_flip() + theme(legend.position = "none")-> arrdelay_plot

arrdelay_plot + labs(x = "Airlines", y = "Average Arrival Delay") + labs(title = "Average Arrival Delays by Airline")
kable(arrdelay) 
airline_name average
Alaska -0.2692295
Allegiant 10.9571051
American 4.3925949
Delta -0.2109149
Frontier 7.9127737
Hawaiian 0.2750022
JetBlue 9.8862939
Southwest 2.2528094
Spirit 4.8800388
United 4.5088740


Analyzing Flight Cancellations

This graph shows the percentage of flights of each airline that were canceled over the past five years. Allegiant again performed terribly and had significantly the most amount of cancellations by almost a 1.5% difference to its next closest competitor. Delta Airlines on the other hand performed very well as they ranked the second best in cancellation percentage only behind Hawaiian Airlines.

ggplot(fly_del_can_combo, aes(reorder(airline_name, percent_cancellations), percent_cancellations, fill = airline_name)) + geom_col() + coord_flip() + labs(x = "Airlines", y = "Percent of Flights Cancelled") + labs(title = "Percentage of Flights Cancelled by Airline") + theme(legend.position = "none")
fly_del_can_combo[c("airline_name", "percent_cancellations")] %>% 
  arrange(desc(percent_cancellations)) -> cancel_table
kable(cancel_table)
airline_name percent_cancellations
Allegiant 0.0461494
Southwest 0.0315498
American 0.0286841
JetBlue 0.0248924
Frontier 0.0238102
United 0.0202040
Spirit 0.0198401
Alaska 0.0175777
Delta 0.0127503
Hawaiian 0.0100537


Conclusion of On-Time Performance

Throughout this analysis some constants held true. Delta Airlines performed well across the board and earned itself the title as the best on-time performance over the past five years. On the other hand Allegiant significantly underperformed in all three different categories of on-time performance, average arrival delays, and cancellations. This again has supported the fact that Allegiant airline is a unique airline when combined with that they do not control any top 1000 routes which was found in the market-share analysis.

To continue to the next analysis of the airlines twitter sentiment click here