Assignment Overview
The chart below describes arrival delays for two airlines across five destinations.
knitr::include_graphics('/Users/katieevers/Desktop/airlineTable.png')

Your task is to:
1. Create a .CSV file that includes all of the information above. You’re encouraged to use a “wide” structure similar to how the information appears above, so that you can practice tidying and transformations as described below.
2. Read the information from your.CSV file into R, and use tidyr and dplyr as needed to tidy and transform your data.
3. Perform analysis to compare the arrival delays for the two airlines.
Load libraries and import data
library("tidyr")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("kableExtra")
## Warning: package 'kableExtra' was built under R version 3.5.2
library("ggplot2")
airlines <- read.csv("https://raw.githubusercontent.com/KatherineEvers/airlines/master/airlines.csv", header = TRUE)
airlines
## Airline Status Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA on time 497 221 212 503 1841
## 2 ALASKA delayed 62 12 20 102 305
## 3 AM WEST on time 694 4840 383 320 201
## 4 AM WEST delayed 117 415 54 129 61
Tidy Data
Tidy data so that each variable forms a column and each observation forms a row:
tidyDf <- airlines %>%
#separate data into destination and count columns
gather(Destination, Count, 3:7) %>%
#spread flight status into separate columns
spread(2,4)
#manipulate table style
tidyDf %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Airline
|
Destination
|
delayed
|
on time
|
ALASKA
|
Los.Angeles
|
62
|
497
|
ALASKA
|
Phoenix
|
12
|
221
|
ALASKA
|
San.Diego
|
20
|
212
|
ALASKA
|
San.Francisco
|
102
|
503
|
ALASKA
|
Seattle
|
305
|
1841
|
AM WEST
|
Los.Angeles
|
117
|
694
|
AM WEST
|
Phoenix
|
415
|
4840
|
AM WEST
|
San.Diego
|
54
|
383
|
AM WEST
|
San.Francisco
|
129
|
320
|
AM WEST
|
Seattle
|
61
|
201
|
#arrange rows by destination
tidyDf2 <- tidyDf %>%
arrange(Destination, Airline)
tidyDf2 %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Airline
|
Destination
|
delayed
|
on time
|
ALASKA
|
Los.Angeles
|
62
|
497
|
AM WEST
|
Los.Angeles
|
117
|
694
|
ALASKA
|
Phoenix
|
12
|
221
|
AM WEST
|
Phoenix
|
415
|
4840
|
ALASKA
|
San.Diego
|
20
|
212
|
AM WEST
|
San.Diego
|
54
|
383
|
ALASKA
|
San.Francisco
|
102
|
503
|
AM WEST
|
San.Francisco
|
129
|
320
|
ALASKA
|
Seattle
|
305
|
1841
|
AM WEST
|
Seattle
|
61
|
201
|
Analyze data
#calculate the percentages of delayed and on time flights for each airline and destination
tidyDf3 <- tidyDf2 %>%
mutate(total = delayed + `on time`, percentDelayed = round(delayed / total * 100, 2),
percentOnTime = round(100 * `on time` / total, 2))
tidyDf3 %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Airline
|
Destination
|
delayed
|
on time
|
total
|
percentDelayed
|
percentOnTime
|
ALASKA
|
Los.Angeles
|
62
|
497
|
559
|
11.09
|
88.91
|
AM WEST
|
Los.Angeles
|
117
|
694
|
811
|
14.43
|
85.57
|
ALASKA
|
Phoenix
|
12
|
221
|
233
|
5.15
|
94.85
|
AM WEST
|
Phoenix
|
415
|
4840
|
5255
|
7.90
|
92.10
|
ALASKA
|
San.Diego
|
20
|
212
|
232
|
8.62
|
91.38
|
AM WEST
|
San.Diego
|
54
|
383
|
437
|
12.36
|
87.64
|
ALASKA
|
San.Francisco
|
102
|
503
|
605
|
16.86
|
83.14
|
AM WEST
|
San.Francisco
|
129
|
320
|
449
|
28.73
|
71.27
|
ALASKA
|
Seattle
|
305
|
1841
|
2146
|
14.21
|
85.79
|
AM WEST
|
Seattle
|
61
|
201
|
262
|
23.28
|
76.72
|
#create summary table comparing percentage of total delayed flights for each airline
summaryDf <- tidyDf3 %>%
group_by(Airline) %>%
summarise(totalDelayed = sum(delayed), totalOnTime = sum(`on time`), totalFlights = sum(total)) %>%
mutate(percentDelayed = round(totalDelayed/totalFlights * 100, 2))
summaryDf %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Airline
|
totalDelayed
|
totalOnTime
|
totalFlights
|
percentDelayed
|
ALASKA
|
501
|
3274
|
3775
|
13.27
|
AM WEST
|
776
|
6438
|
7214
|
10.76
|
#order destinations by decreasing percentage of total delayed flights
destinationDelays <- tidyDf3 %>%
group_by(Destination) %>%
summarise(averageDelayPercent = round(mean(percentDelayed),2)) %>%
arrange(desc(averageDelayPercent))
destinationDelays %>%
kable(row.names = TRUE) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
|
Destination
|
averageDelayPercent
|
1
|
San.Francisco
|
22.80
|
2
|
Seattle
|
18.75
|
3
|
Los.Angeles
|
12.76
|
4
|
San.Diego
|
10.49
|
5
|
Phoenix
|
6.53
|
Visualize data
#create bar graphs comparing the number of flights for each airline by destination
plot <- ggplot(tidyDf3, aes(y = total, x = Airline, color = Airline, fill = Airline)) +
geom_bar( stat = "identity") +
geom_text(aes(label = total), vjust = -.3) +
facet_wrap(~Destination) +
ylim(0, 5500) +
ylab("Number of Flights") +
ggtitle("Number of Fights by Airline and Destination")
plot

plot1 <- ggplot(summaryDf, aes(Airline, percentDelayed)) +
geom_bar(aes(fill = Airline), position = "dodge", stat = "identity") +
geom_text(aes(label = percentDelayed), vjust = -.3) +
ylab("Percentage of Delayed Flights") +
ggtitle("Total Percentage of Delayed Flights by Airline")
plot1

#create bar graphs comparing the percentages of delayed flights for each airline by destination
plot2 <- ggplot(tidyDf3, aes(y=percentDelayed, x = Airline, color = Airline, fill = Airline)) +
geom_bar( stat = "identity") +
geom_text(aes(label = percentDelayed), vjust = -.3) +
facet_wrap(~Destination) +
ylim(0, 35) +
ylab("Percentage of Delayed Flights") +
ggtitle("Percentage of Delayed Flights by Airline and Destination")
plot2

#create a line graph comparing the percentages of delayed flights for each airline by destination
plot3 <- ggplot(tidyDf3, aes(x = Destination, y = percentDelayed, group = Airline, color = Airline, shape = Airline)) +
geom_point(aes(shape = Airline)) +
geom_line() +
geom_text(aes(label = percentDelayed), vjust = -.7) +
labs(x = "Destination", y = "Percentage of Delayed flights") +
theme(legend.title=element_blank()) +
ggtitle("Percentage of Delayed Flights by Airline and Destination")
plot3

Based on this analysis, America West Airlines has a smaller percentage of total delayed flights than Alaska Airlines. However, Alaska has smaller percentages of delayed flights than America West by destination. It is notable that American West has 5255 flights to Phoenix with 7.9% of those flights being delayed, while Alaska has only 233 Phoenix flights with 5.15% of flights delayed. In addition, America West has only 262 flights to Seattle with 23.28% of flights delayed, while Alaska has 2146 Seattle flights with 14.21% delayed.