Create

“Arrival.csv” is created and written to the working directory.

rm(list=ls())

LosAngeles<-c(497,62,694,117)
Phoenix<-c(221,12,4840,415)
SanDiego<-c(212,20,383,65)
SanFrancisco<-c(503,102,320,129)
Seattle<-c(1841,305,201,61)
status<-c("ontime","delayed","ontime","delayed")
airline<-c("ALASKA","ALASKA","AMWEST","AMWEST")
Arrival<-data.frame(airline,status,LosAngeles,Phoenix,SanDiego,SanFrancisco,Seattle)
#create .csv file.
write.csv(Arrival,"Arrival.csv")

Read

“Arrival.csv” loads, “kableExtra” renders the table.

#clear objects
rm(list=ls())

#read .csv, omit numbered column
Arrival<-read.csv("Arrival.csv")[ ,2:8]

#display
require(kableExtra)
arr<-knitr::kable(Arrival,"html",align='lcccccc')%>%
  kable_styling("striped", full_width = F)
arr
airline status LosAngeles Phoenix SanDiego SanFrancisco Seattle
ALASKA ontime 497 221 212 503 1841
ALASKA delayed 62 12 20 102 305
AMWEST ontime 694 4840 383 320 201
AMWEST delayed 117 415 65 129 61

Transform

Table 1 transforms via the gather function from tidyr, setting key as city and value as frequency; each city becomes a row with unique frequency values. Table 2 is created using the spread function, resulting in a column for each airline and pairs of observations for each city. Table 3 and Table 4 display on-time and delayed subsets of data built by the filter command. Various sums are calculated via mutate and sum commands and displayed in Table 5: flight sums by airline (linetotal), city (citytotal), and status (statustot), as well the combinations airline by city (linecitytot), status by city (statcitytot), and airline by status (linestattot).

Table 1

require(tidyr)

Arrival3<-gather(Arrival,'LosAngeles','Phoenix','SanDiego','SanFrancisco','Seattle',key='city',value='frequency')
arr3<-knitr::kable(Arrival3,"html",align='lccc')%>%
  kable_styling("striped", full_width = F)%>%
  scroll_box(height="195px")
arr3
airline status city frequency
ALASKA ontime LosAngeles 497
ALASKA delayed LosAngeles 62
AMWEST ontime LosAngeles 694
AMWEST delayed LosAngeles 117
ALASKA ontime Phoenix 221
ALASKA delayed Phoenix 12
AMWEST ontime Phoenix 4840
AMWEST delayed Phoenix 415
ALASKA ontime SanDiego 212
ALASKA delayed SanDiego 20
AMWEST ontime SanDiego 383
AMWEST delayed SanDiego 65
ALASKA ontime SanFrancisco 503
ALASKA delayed SanFrancisco 102
AMWEST ontime SanFrancisco 320
AMWEST delayed SanFrancisco 129
ALASKA ontime Seattle 1841
ALASKA delayed Seattle 305
AMWEST ontime Seattle 201
AMWEST delayed Seattle 61

Table 2

require(dplyr)

Arrival4<-spread(Arrival3,airline,frequency)
arr4<-knitr::kable(Arrival4,"html",align='lccc')%>%
  kable_styling("striped",full_width = F)%>%
  scroll_box(height="195px")
arr4
status city ALASKA AMWEST
delayed LosAngeles 62 117
delayed Phoenix 12 415
delayed SanDiego 20 65
delayed SanFrancisco 102 129
delayed Seattle 305 61
ontime LosAngeles 497 694
ontime Phoenix 221 4840
ontime SanDiego 212 383
ontime SanFrancisco 503 320
ontime Seattle 1841 201
#Calculate percentage of flights which are delayed.
delayednum<-sum(Arrival4$ALASKA[ which(Arrival4$status=="delayed") ])+sum(Arrival4$AMWEST[ which(Arrival4$status=="delayed") ])
delayedden<-(sum(Arrival4$ALASKA)+sum(Arrival4$AMWEST))
delayedpct<-round((delayednum/delayedden)*100,1)

Table 3

Arrival4ontime<-filter(Arrival4,status=="ontime")

arr4ontime<-knitr::kable(Arrival4ontime,"html",align='lccc')%>%
  kable_styling("striped",full_width = F)

arr4ontime
status city ALASKA AMWEST
ontime LosAngeles 497 694
ontime Phoenix 221 4840
ontime SanDiego 212 383
ontime SanFrancisco 503 320
ontime Seattle 1841 201

Table 4

Arrival4delayed<-filter(Arrival4,status=="delayed")

arr4delayed<-knitr::kable(Arrival4delayed,"html",align='lccc')%>%
  kable_styling("striped",full_width = F)

arr4delayed
status city ALASKA AMWEST
delayed LosAngeles 62 117
delayed Phoenix 12 415
delayed SanDiego 20 65
delayed SanFrancisco 102 129
delayed Seattle 305 61

Table 5

#add frequency total by airline
Arrival3.5<-Arrival3%>%
  group_by(airline)%>%
  mutate(linetotal=sum(frequency))
#add frequency total by city
Arrival3.5<-Arrival3.5%>%
  group_by(city)%>%
  mutate(citytotal=sum(frequency))
#add frequency total by city and airline
Arrival3.5<-Arrival3.5%>%
  group_by(airline,city)%>%
  mutate(linecitytot=sum(frequency))
#add frequency total by status
Arrival3.5<-Arrival3.5%>%
  group_by(status)%>%
  mutate(statustot=sum(frequency))
#add frequency total by status
Arrival3.5<-Arrival3.5%>%
  group_by(status,city)%>%
  mutate(statcitytot=sum(frequency))
#add frequency total by status and city
Arrival3.5<-Arrival3.5%>%
  group_by(status,city)%>%
  mutate(statcitytot=sum(frequency))
#add frequency total by status and airline
Arrival3.5<-Arrival3.5%>%
  group_by(status,airline)%>%
  mutate(linestattot=sum(frequency))

arr3<-knitr::kable(Arrival3.5,"html",align='lccccccccc')%>%
  kable_styling("striped", full_width = T, font_size = 12)%>%
  scroll_box(height="195px")
arr3
airline status city frequency linetotal citytotal linecitytot statustot statcitytot linestattot
ALASKA ontime LosAngeles 497 3775 1370 559 9712 1191 3274
ALASKA delayed LosAngeles 62 3775 1370 559 1288 179 501
AMWEST ontime LosAngeles 694 7225 1370 811 9712 1191 6438
AMWEST delayed LosAngeles 117 7225 1370 811 1288 179 787
ALASKA ontime Phoenix 221 3775 5488 233 9712 5061 3274
ALASKA delayed Phoenix 12 3775 5488 233 1288 427 501
AMWEST ontime Phoenix 4840 7225 5488 5255 9712 5061 6438
AMWEST delayed Phoenix 415 7225 5488 5255 1288 427 787
ALASKA ontime SanDiego 212 3775 680 232 9712 595 3274
ALASKA delayed SanDiego 20 3775 680 232 1288 85 501
AMWEST ontime SanDiego 383 7225 680 448 9712 595 6438
AMWEST delayed SanDiego 65 7225 680 448 1288 85 787
ALASKA ontime SanFrancisco 503 3775 1054 605 9712 823 3274
ALASKA delayed SanFrancisco 102 3775 1054 605 1288 231 501
AMWEST ontime SanFrancisco 320 7225 1054 449 9712 823 6438
AMWEST delayed SanFrancisco 129 7225 1054 449 1288 231 787
ALASKA ontime Seattle 1841 3775 2408 2146 9712 2042 3274
ALASKA delayed Seattle 305 3775 2408 2146 1288 366 501
AMWEST ontime Seattle 201 7225 2408 262 9712 2042 6438
AMWEST delayed Seattle 61 7225 2408 262 1288 366 787

Analyze

Figure 1 shows that 13.3% of ALASKA flights were delayed, while 10.9% of AMWEST flights were delayed, with 11.7% of flights delayed overall.In Figure 2 we see that flying into San Franciso provedmost troublesome for both airlines, with 28.7% of AMWEST flights into San Francisco delayed and 16.9% of ALASKA flights into San Francisco delayed. Phoenix proved to be the least challanging to schedules with 5.2% and 7.9% of flights delayed for ALASKA and AMWEST, respectively. Figure 3 illustrates flight delay by city; 1 out of every 5 flights into San Francisco are delayed.

Conclusion: If given the option to choose among these cities for a tightly scheduled flight itinerary, flying into San Francisco should be avoided if possible.

Figure 1

airlinetable<-unique(Arrival3.5[c("airline","status","linetotal","linestattot")])
airlinetable$statpct<-round((airlinetable$linestattot/airlinetable$linetotal)*100,1)
airlinetable2<-airlinetable[ which(airlinetable$status=="delayed"),]

require(ggplot2)
p1<-ggplot(data=airlinetable2, aes(x = airline, y=statpct)) + 
  geom_bar(stat="identity",fill="steelblue")+
  geom_text(aes(label=statpct), vjust=1.6, color="white", size=3.5)+
  ylab("percent delayed")+
  theme_bw()

require(grid)
require(gridExtra)
t1 <- tableGrob(airlinetable[c("airline","status","statpct")], theme=ttheme_default(base_size=10,padding=unit(c(4,4),"mm")),rows=NULL)

grid.arrange(p1, t1, nrow=1)
Flight delay percentage by airline

Flight delay percentage by airline


Figure 2

citytable<-unique(Arrival3.5[c("airline","status","city","frequency","linecitytot")])
citytable$statpct<-round((citytable$frequency/citytable$linecitytot)*100,1)
citytable2<-citytable[ which(citytable$status=="delayed"),]

p1.1<-ggplot(data=citytable2, aes(x = airline, y=statpct,group=city)) + 
  geom_bar(stat="identity",fill="steelblue")+
  geom_text(aes(label=statpct), vjust=1.6, color="white", size=3.5)+
  ylab("percent delayed")+
  facet_grid(~city)+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 45,hjust=1))

t1.1 <- tableGrob(citytable[c("airline","status","city","statpct")], theme=ttheme_default(base_size=8,padding=unit(c(1,3),"mm")),rows=NULL)

grid.arrange(p1.1, t1.1, nrow=1, widths = 2:1)     
Flight delay percentage by airline and city

Flight delay percentage by airline and city


Figure 3

totcitytable<-unique(Arrival3.5[c("city","status","citytotal","statcitytot")])
totcitytable$citystatpct<-round((totcitytable$statcitytot/totcitytable$citytotal)*100,1)
totcitytable2<-totcitytable[ which(totcitytable$status=="delayed"),]

p1.2<-ggplot(data=totcitytable2, aes(x = city, y=citystatpct,group=city)) + 
  geom_bar(stat="identity",fill="steelblue")+
  geom_text(aes(label=citystatpct), vjust=1.6, color="white", size=3.5)+
  ylab("percent delayed")+
  theme_bw()

t1.2<-tableGrob(totcitytable[c("city","status","citystatpct")],
                theme=ttheme_default(base_size=10,
                                     padding=unit(c(2,3),"mm")),rows=NULL)

grid.arrange(p1.2, t1.2, nrow=1, widths = 2:1)   
Percent of flights delayed by city

Percent of flights delayed by city