library(tidyr)
## Warning: package 'tidyr' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
flt <- read.csv("Assignment5.csv", na.strings = "")
flt
##       ï..       X Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1  ALASKA on time         497     221       212           503    1841
## 2    <NA> delayed          62      12        20           102     305
## 3    <NA>    <NA>          NA      NA        NA            NA      NA
## 4 AM WEST on time         694    4840       383           302     201
## 5    <NA> delayed         117     415        65           129      61

Use tidyr and dplyr to clean up the table in one fell swoop, and add a column showing on-time performance percentage.

tidy.flt <- flt %>% 
  
  drop_na(Los.Angeles) %>% 
  
  rename("Airline"=ï.., "Status"=X) %>% 

  fill(Airline) %>% 
  
  gather(City, No.Flights, Los.Angeles:Seattle) %>%
  
  spread(Status, No.Flights) %>% 
  
  rename("Delayed"=delayed, "On.Time"= "on time") %>% 
  
  mutate(PercentOn.Time = round(
                              (On.Time / (On.Time + Delayed))*100,1))
## Warning: package 'bindrcpp' was built under R version 3.3.3
tidy.flt
##    Airline          City Delayed On.Time PercentOn.Time
## 1   ALASKA   Los.Angeles      62     497           88.9
## 2   ALASKA       Phoenix      12     221           94.8
## 3   ALASKA     San.Diego      20     212           91.4
## 4   ALASKA San.Francisco     102     503           83.1
## 5   ALASKA       Seattle     305    1841           85.8
## 6  AM WEST   Los.Angeles     117     694           85.6
## 7  AM WEST       Phoenix     415    4840           92.1
## 8  AM WEST     San.Diego      65     383           85.5
## 9  AM WEST San.Francisco     129     302           70.1
## 10 AM WEST       Seattle      61     201           76.7

See total flights for each airline

tidy.flt %>% 
  group_by(Airline) %>%  
  
  summarise(Total.Flights = sum(Delayed) + sum(On.Time))
## # A tibble: 2 x 2
##   Airline Total.Flights
##    <fctr>         <int>
## 1  ALASKA          3775
## 2 AM WEST          7207

See total on-time flights for each airline

tidy.flt %>% 
  group_by(Airline) %>% 
  
  summarise(On.Time = sum(On.Time))
## # A tibble: 2 x 2
##   Airline On.Time
##    <fctr>   <int>
## 1  ALASKA    3274
## 2 AM WEST    6420

See total delayed flights for each airline

tidy.flt %>% 
  group_by(Airline) %>% 
  
  summarise(Delayed = sum(Delayed))
## # A tibble: 2 x 2
##   Airline Delayed
##    <fctr>   <int>
## 1  ALASKA     501
## 2 AM WEST     787

Grab the weighted average of on-time performance to see which airline has been better overall.

At first I only averaged the OnTimePerformance percentages by City and Alaska Airlines actually looked like it had better on-time performance. However, since weighted average is better suited and since AM West was so strong and voluminous in Phoenix (92.1%), it pulled their weighted average up.

It also appears that Alaska Airlines had one of their worst on-time performances in Seattle, which had more than triple the number of flights compared to the other citys where the company flew.

tidy.flt %>% 
  group_by(Airline) %>% 
  
  summarise(AvgPercentOnTime = round(sum(On.Time) / sum(sum(On.Time)+sum(Delayed))*100,1))
## # A tibble: 2 x 2
##   Airline AvgPercentOnTime
##    <fctr>            <dbl>
## 1  ALASKA             86.7
## 2 AM WEST             89.1