“Arrival.csv” is created and written to the working directory.
rm(list=ls())
LosAngeles<-c(497,62,694,117)
Phoenix<-c(221,12,4840,415)
SanDiego<-c(212,20,383,65)
SanFrancisco<-c(503,102,320,129)
Seattle<-c(1841,305,201,61)
status<-c("ontime","delayed","ontime","delayed")
airline<-c("ALASKA","ALASKA","AMWEST","AMWEST")
Arrival<-data.frame(airline,status,LosAngeles,Phoenix,SanDiego,SanFrancisco,Seattle)
#create .csv file.
write.csv(Arrival,"Arrival.csv")“Arrival.csv” loads, “kableExtra” renders the table.
#clear objects
rm(list=ls())
#read .csv, omit numbered column
Arrival<-read.csv("Arrival.csv")[ ,2:8]
#display
require(kableExtra)
arr<-knitr::kable(Arrival,"html",align='lcccccc')%>%
kable_styling("striped", full_width = F)
arr| airline | status | LosAngeles | Phoenix | SanDiego | SanFrancisco | Seattle |
|---|---|---|---|---|---|---|
| ALASKA | ontime | 497 | 221 | 212 | 503 | 1841 |
| ALASKA | delayed | 62 | 12 | 20 | 102 | 305 |
| AMWEST | ontime | 694 | 4840 | 383 | 320 | 201 |
| AMWEST | delayed | 117 | 415 | 65 | 129 | 61 |
Table 1 transforms via the gather function from tidyr, setting key as city and value as frequency; each city becomes a row with unique frequency values. Table 2 is created using the spread function, resulting in a column for each airline and pairs of observations for each city. Table 3 and Table 4 display on-time and delayed subsets of data built by the filter command. Various sums are calculated via mutate and sum commands and displayed in Table 5: flight sums by airline (linetotal), city (citytotal), and status (statustot), as well the combinations airline by city (linecitytot), status by city (statcitytot), and airline by status (linestattot).
require(tidyr)
Arrival3<-gather(Arrival,'LosAngeles','Phoenix','SanDiego','SanFrancisco','Seattle',key='city',value='frequency')
arr3<-knitr::kable(Arrival3,"html",align='lccc')%>%
kable_styling("striped", full_width = F)%>%
scroll_box(height="195px")
arr3| airline | status | city | frequency |
|---|---|---|---|
| ALASKA | ontime | LosAngeles | 497 |
| ALASKA | delayed | LosAngeles | 62 |
| AMWEST | ontime | LosAngeles | 694 |
| AMWEST | delayed | LosAngeles | 117 |
| ALASKA | ontime | Phoenix | 221 |
| ALASKA | delayed | Phoenix | 12 |
| AMWEST | ontime | Phoenix | 4840 |
| AMWEST | delayed | Phoenix | 415 |
| ALASKA | ontime | SanDiego | 212 |
| ALASKA | delayed | SanDiego | 20 |
| AMWEST | ontime | SanDiego | 383 |
| AMWEST | delayed | SanDiego | 65 |
| ALASKA | ontime | SanFrancisco | 503 |
| ALASKA | delayed | SanFrancisco | 102 |
| AMWEST | ontime | SanFrancisco | 320 |
| AMWEST | delayed | SanFrancisco | 129 |
| ALASKA | ontime | Seattle | 1841 |
| ALASKA | delayed | Seattle | 305 |
| AMWEST | ontime | Seattle | 201 |
| AMWEST | delayed | Seattle | 61 |
require(dplyr)
Arrival4<-spread(Arrival3,airline,frequency)
arr4<-knitr::kable(Arrival4,"html",align='lccc')%>%
kable_styling("striped",full_width = F)%>%
scroll_box(height="195px")
arr4| status | city | ALASKA | AMWEST |
|---|---|---|---|
| delayed | LosAngeles | 62 | 117 |
| delayed | Phoenix | 12 | 415 |
| delayed | SanDiego | 20 | 65 |
| delayed | SanFrancisco | 102 | 129 |
| delayed | Seattle | 305 | 61 |
| ontime | LosAngeles | 497 | 694 |
| ontime | Phoenix | 221 | 4840 |
| ontime | SanDiego | 212 | 383 |
| ontime | SanFrancisco | 503 | 320 |
| ontime | Seattle | 1841 | 201 |
#Calculate percentage of flights which are delayed.
delayednum<-sum(Arrival4$ALASKA[ which(Arrival4$status=="delayed") ])+sum(Arrival4$AMWEST[ which(Arrival4$status=="delayed") ])
delayedden<-(sum(Arrival4$ALASKA)+sum(Arrival4$AMWEST))
delayedpct<-round((delayednum/delayedden)*100,1)Arrival4ontime<-filter(Arrival4,status=="ontime")
arr4ontime<-knitr::kable(Arrival4ontime,"html",align='lccc')%>%
kable_styling("striped",full_width = F)
arr4ontime| status | city | ALASKA | AMWEST |
|---|---|---|---|
| ontime | LosAngeles | 497 | 694 |
| ontime | Phoenix | 221 | 4840 |
| ontime | SanDiego | 212 | 383 |
| ontime | SanFrancisco | 503 | 320 |
| ontime | Seattle | 1841 | 201 |
Arrival4delayed<-filter(Arrival4,status=="delayed")
arr4delayed<-knitr::kable(Arrival4delayed,"html",align='lccc')%>%
kable_styling("striped",full_width = F)
arr4delayed| status | city | ALASKA | AMWEST |
|---|---|---|---|
| delayed | LosAngeles | 62 | 117 |
| delayed | Phoenix | 12 | 415 |
| delayed | SanDiego | 20 | 65 |
| delayed | SanFrancisco | 102 | 129 |
| delayed | Seattle | 305 | 61 |
#add frequency total by airline
Arrival3.5<-Arrival3%>%
group_by(airline)%>%
mutate(linetotal=sum(frequency))
#add frequency total by city
Arrival3.5<-Arrival3.5%>%
group_by(city)%>%
mutate(citytotal=sum(frequency))
#add frequency total by city and airline
Arrival3.5<-Arrival3.5%>%
group_by(airline,city)%>%
mutate(linecitytot=sum(frequency))
#add frequency total by status
Arrival3.5<-Arrival3.5%>%
group_by(status)%>%
mutate(statustot=sum(frequency))
#add frequency total by status
Arrival3.5<-Arrival3.5%>%
group_by(status,city)%>%
mutate(statcitytot=sum(frequency))
#add frequency total by status and city
Arrival3.5<-Arrival3.5%>%
group_by(status,city)%>%
mutate(statcitytot=sum(frequency))
#add frequency total by status and airline
Arrival3.5<-Arrival3.5%>%
group_by(status,airline)%>%
mutate(linestattot=sum(frequency))
arr3<-knitr::kable(Arrival3.5,"html",align='lccccccccc')%>%
kable_styling("striped", full_width = T, font_size = 12)%>%
scroll_box(height="195px")
arr3| airline | status | city | frequency | linetotal | citytotal | linecitytot | statustot | statcitytot | linestattot |
|---|---|---|---|---|---|---|---|---|---|
| ALASKA | ontime | LosAngeles | 497 | 3775 | 1370 | 559 | 9712 | 1191 | 3274 |
| ALASKA | delayed | LosAngeles | 62 | 3775 | 1370 | 559 | 1288 | 179 | 501 |
| AMWEST | ontime | LosAngeles | 694 | 7225 | 1370 | 811 | 9712 | 1191 | 6438 |
| AMWEST | delayed | LosAngeles | 117 | 7225 | 1370 | 811 | 1288 | 179 | 787 |
| ALASKA | ontime | Phoenix | 221 | 3775 | 5488 | 233 | 9712 | 5061 | 3274 |
| ALASKA | delayed | Phoenix | 12 | 3775 | 5488 | 233 | 1288 | 427 | 501 |
| AMWEST | ontime | Phoenix | 4840 | 7225 | 5488 | 5255 | 9712 | 5061 | 6438 |
| AMWEST | delayed | Phoenix | 415 | 7225 | 5488 | 5255 | 1288 | 427 | 787 |
| ALASKA | ontime | SanDiego | 212 | 3775 | 680 | 232 | 9712 | 595 | 3274 |
| ALASKA | delayed | SanDiego | 20 | 3775 | 680 | 232 | 1288 | 85 | 501 |
| AMWEST | ontime | SanDiego | 383 | 7225 | 680 | 448 | 9712 | 595 | 6438 |
| AMWEST | delayed | SanDiego | 65 | 7225 | 680 | 448 | 1288 | 85 | 787 |
| ALASKA | ontime | SanFrancisco | 503 | 3775 | 1054 | 605 | 9712 | 823 | 3274 |
| ALASKA | delayed | SanFrancisco | 102 | 3775 | 1054 | 605 | 1288 | 231 | 501 |
| AMWEST | ontime | SanFrancisco | 320 | 7225 | 1054 | 449 | 9712 | 823 | 6438 |
| AMWEST | delayed | SanFrancisco | 129 | 7225 | 1054 | 449 | 1288 | 231 | 787 |
| ALASKA | ontime | Seattle | 1841 | 3775 | 2408 | 2146 | 9712 | 2042 | 3274 |
| ALASKA | delayed | Seattle | 305 | 3775 | 2408 | 2146 | 1288 | 366 | 501 |
| AMWEST | ontime | Seattle | 201 | 7225 | 2408 | 262 | 9712 | 2042 | 6438 |
| AMWEST | delayed | Seattle | 61 | 7225 | 2408 | 262 | 1288 | 366 | 787 |
Figure 1 shows that 13.3% of ALASKA flights were delayed, while 10.9% of AMWEST flights were delayed, with 11.7% of flights delayed overall.In Figure 2 we see that flying into San Franciso provedmost troublesome for both airlines, with 28.7% of AMWEST flights into San Francisco delayed and 16.9% of ALASKA flights into San Francisco delayed. Phoenix proved to be the least challanging to schedules with 5.2% and 7.9% of flights delayed for ALASKA and AMWEST, respectively. Figure 3 illustrates flight delay by city; 1 out of every 5 flights into San Francisco are delayed.
Conclusion: If given the option to choose among these cities for a tightly scheduled flight itinerary, flying into San Francisco should be avoided if possible.
airlinetable<-unique(Arrival3.5[c("airline","status","linetotal","linestattot")])
airlinetable$statpct<-round((airlinetable$linestattot/airlinetable$linetotal)*100,1)
airlinetable2<-airlinetable[ which(airlinetable$status=="delayed"),]
require(ggplot2)
p1<-ggplot(data=airlinetable2, aes(x = airline, y=statpct)) +
geom_bar(stat="identity",fill="steelblue")+
geom_text(aes(label=statpct), vjust=1.6, color="white", size=3.5)+
ylab("percent delayed")+
theme_bw()
require(grid)
require(gridExtra)
t1 <- tableGrob(airlinetable[c("airline","status","statpct")], theme=ttheme_default(base_size=10,padding=unit(c(4,4),"mm")),rows=NULL)
grid.arrange(p1, t1, nrow=1)Flight delay percentage by airline
citytable<-unique(Arrival3.5[c("airline","status","city","frequency","linecitytot")])
citytable$statpct<-round((citytable$frequency/citytable$linecitytot)*100,1)
citytable2<-citytable[ which(citytable$status=="delayed"),]
p1.1<-ggplot(data=citytable2, aes(x = airline, y=statpct,group=city)) +
geom_bar(stat="identity",fill="steelblue")+
geom_text(aes(label=statpct), vjust=1.6, color="white", size=3.5)+
ylab("percent delayed")+
facet_grid(~city)+
theme_bw()+
theme(axis.text.x = element_text(angle = 45,hjust=1))
t1.1 <- tableGrob(citytable[c("airline","status","city","statpct")], theme=ttheme_default(base_size=8,padding=unit(c(1,3),"mm")),rows=NULL)
grid.arrange(p1.1, t1.1, nrow=1, widths = 2:1) Flight delay percentage by airline and city
totcitytable<-unique(Arrival3.5[c("city","status","citytotal","statcitytot")])
totcitytable$citystatpct<-round((totcitytable$statcitytot/totcitytable$citytotal)*100,1)
totcitytable2<-totcitytable[ which(totcitytable$status=="delayed"),]
p1.2<-ggplot(data=totcitytable2, aes(x = city, y=citystatpct,group=city)) +
geom_bar(stat="identity",fill="steelblue")+
geom_text(aes(label=citystatpct), vjust=1.6, color="white", size=3.5)+
ylab("percent delayed")+
theme_bw()
t1.2<-tableGrob(totcitytable[c("city","status","citystatpct")],
theme=ttheme_default(base_size=10,
padding=unit(c(2,3),"mm")),rows=NULL)
grid.arrange(p1.2, t1.2, nrow=1, widths = 2:1) Percent of flights delayed by city