Airline Arrival Status
# Load library
library(dplyr)
library(tidyr)
library(ggplot2)
# Read data from github
airline_arrival <- read.csv('https://raw.githubusercontent.com/saayedalam/Data/master/airline_arrival.csv',
header = TRUE, sep = ",") # csv was created via excel
# Tidy data
colnames(airline_arrival)[1:2] <- c("Airline", "Status") # assign column names
airline_arrival <- na.omit(airline_arrival) # remove NA
airline_arrival[2, 1] <- 'ALASKA' # fill in missing values
airline_arrival[4, 1] <- 'AM WEST'
# Transform data
long_df <- gather(airline_arrival, City, Counts, 3:7)
long_df
## Airline Status City Counts
## 1 ALASKA on time Los.Angeles 497
## 2 ALASKA delayed Los.Angeles 62
## 3 AM WEST on time Los.Angeles 694
## 4 AM WEST delayed Los.Angeles 117
## 5 ALASKA on time Pheonix 221
## 6 ALASKA delayed Pheonix 12
## 7 AM WEST on time Pheonix 4840
## 8 AM WEST delayed Pheonix 415
## 9 ALASKA on time San.Diego 212
## 10 ALASKA delayed San.Diego 20
## 11 AM WEST on time San.Diego 383
## 12 AM WEST delayed San.Diego 65
## 13 ALASKA on time San.Francisco 503
## 14 ALASKA delayed San.Francisco 102
## 15 AM WEST on time San.Francisco 320
## 16 AM WEST delayed San.Francisco 129
## 17 ALASKA on time Seattle 1841
## 18 ALASKA delayed Seattle 305
## 19 AM WEST on time Seattle 201
## 20 AM WEST delayed Seattle 61
## Data Analysis
# AM West had 286 more flights delayed than Alaska.
# 13% of Alaska's flights were delayed.
spread_df <- spread(long_df, Status, Counts)
spread_df %>%
group_by(Airline) %>%
summarise(Total_Delayed = sum(delayed),
Total_OnTime = sum(`on time`),
Percent_Delayed = round((Total_Delayed / (Total_Delayed + Total_OnTime)) * 100, 2))
## # A tibble: 2 x 4
## Airline Total_Delayed Total_OnTime Percent_Delayed
## <fct> <int> <int> <dbl>
## 1 ALASKA 501 3274 13.3
## 2 AM WEST 787 6438 10.9
## Visual Analysis
# Pheonix's airport had the most AM WEST airline arriving.
# Pheonix also had the higest number of delayed flights.
ggplot(long_df, aes(City, Counts, color = Status, shape = Airline)) +
geom_point(stat = "identity", position = "dodge")