Load packages.

library(knitr)
library(stringr)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.4
library(RCurl)
## Loading required package: bitops
## 
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
## 
##     complete

Import CSV, column header = True.

x <- getURL("https://raw.githubusercontent.com/spsstudent15/2016-01-607-06/master/flights.csv")
flights<-data.frame(read.csv(text=x, header=T))
flights
##   Airline Arrival Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1  ALASKA on time         497     221       212           503    1841
## 2  ALASKA delayed          62      12        20           102     305
## 3 AM WEST on time         694    4840       383           320     201
## 4 AM WEST delayed         117     415        65           129      61
#str(flights) #check structure

Reshape data with city columns as factors.

flights2 <- flights %>% 
  gather("City","NumFlights",3:7)
kable(flights2)
Airline Arrival City NumFlights
ALASKA on time Los.Angeles 497
ALASKA delayed Los.Angeles 62
AM WEST on time Los.Angeles 694
AM WEST delayed Los.Angeles 117
ALASKA on time Phoenix 221
ALASKA delayed Phoenix 12
AM WEST on time Phoenix 4840
AM WEST delayed Phoenix 415
ALASKA on time San.Diego 212
ALASKA delayed San.Diego 20
AM WEST on time San.Diego 383
AM WEST delayed San.Diego 65
ALASKA on time San.Francisco 503
ALASKA delayed San.Francisco 102
AM WEST on time San.Francisco 320
AM WEST delayed San.Francisco 129
ALASKA on time Seattle 1841
ALASKA delayed Seattle 305
AM WEST on time Seattle 201
AM WEST delayed Seattle 61

Reshape data with Arrival factors as columns.

flights3 <- flights2 %>% 
  spread(Arrival,NumFlights,fill=NA)
colnames(flights3)<-c("Airline","City","Delayed","OnTime")
kable(flights3)
Airline City Delayed OnTime
ALASKA Los.Angeles 62 497
ALASKA Phoenix 12 221
ALASKA San.Diego 20 212
ALASKA San.Francisco 102 503
ALASKA Seattle 305 1841
AM WEST Los.Angeles 117 694
AM WEST Phoenix 415 4840
AM WEST San.Diego 65 383
AM WEST San.Francisco 129 320
AM WEST Seattle 61 201

Add calculated column for Total Flights.

flights4<-mutate(flights3, TotalFlights=(Delayed+OnTime))
kable(flights4)
Airline City Delayed OnTime TotalFlights
ALASKA Los.Angeles 62 497 559
ALASKA Phoenix 12 221 233
ALASKA San.Diego 20 212 232
ALASKA San.Francisco 102 503 605
ALASKA Seattle 305 1841 2146
AM WEST Los.Angeles 117 694 811
AM WEST Phoenix 415 4840 5255
AM WEST San.Diego 65 383 448
AM WEST San.Francisco 129 320 449
AM WEST Seattle 61 201 262

Add calculated column for Percent On Time.

flights5<-mutate(flights4, PercentOnTime=round((OnTime/TotalFlights),2))
kable(flights5)
Airline City Delayed OnTime TotalFlights PercentOnTime
ALASKA Los.Angeles 62 497 559 0.89
ALASKA Phoenix 12 221 233 0.95
ALASKA San.Diego 20 212 232 0.91
ALASKA San.Francisco 102 503 605 0.83
ALASKA Seattle 305 1841 2146 0.86
AM WEST Los.Angeles 117 694 811 0.86
AM WEST Phoenix 415 4840 5255 0.92
AM WEST San.Diego 65 383 448 0.85
AM WEST San.Francisco 129 320 449 0.71
AM WEST Seattle 61 201 262 0.77

Show percent delay by airline.

sumairline<-aggregate(cbind(Delayed,OnTime,TotalFlights) ~ Airline, data = flights5, sum)
sumairline<-mutate(sumairline,PercentOnTime=round((OnTime/TotalFlights),3))
kable(sumairline)
Airline Delayed OnTime TotalFlights PercentOnTime
ALASKA 501 3274 3775 0.867
AM WEST 787 6438 7225 0.891

Show percent delay by city, ordered by percent on time ascending.

sumcity<-aggregate(cbind(Delayed,OnTime,TotalFlights) ~ City, data = flights5, sum)
sumcity<-mutate(sumcity,PercentOnTime=round((OnTime/TotalFlights),3))
sumcity<-sumcity[ order(sumcity[,5]), ]
kable(sumcity)
City Delayed OnTime TotalFlights PercentOnTime
4 San.Francisco 231 823 1054 0.781
5 Seattle 366 2042 2408 0.848
1 Los.Angeles 179 1191 1370 0.869
3 San.Diego 85 595 680 0.875
2 Phoenix 427 5061 5488 0.922

Show bar plots.

ggplot(flights5, aes(x = Airline, y = PercentOnTime, fill = Airline)) + geom_bar(stat = "identity", color="black") + ggtitle("Percent On Time Airline Arrivals by Airline and City") + facet_wrap(~ City, nrow = 2)+ ylim(0, 1) + theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank()) +scale_fill_manual(values=c("#56B4E9", "#0000CC"))

ggplot(sumcity, aes(x = City, y = PercentOnTime)) + geom_bar(stat = "identity",fill="#9999CC",color="black") + ggtitle("Percent On Time Airline Arrivals by City")+ theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank()) + coord_cartesian(ylim = c(.5, 1)) 

ggplot(sumairline, aes(x = Airline, y = TotalFlights)) + geom_bar(stat = "identity",fill="#ADFF2F",color="black") + ggtitle("Total Flights by Airline")+ theme(panel.grid.minor.x=element_blank(), panel.grid.major.x=element_blank())

Conclusion

Alaska Airlines has better on-time arrival percentages for these five cities.

A city comparison shows that San Francisco has the most delays of these cities and Phoenix has the least delays.

The gap between the two airlines is most significant for San Francisco and Seattle. We would need more information on what factors cause a flight to be delayed.

For example, San Francisco and Seattle might have the worst weather of all of these cities, and Alaska airlines may have special equipment or training which helps with poor weather. Alaska also has a smaller total number of flights to these cities, which may contribute to better on-time percentages.