library(tidyr)
## Warning: package 'tidyr' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
flt <- read.csv("Assignment5.csv", na.strings = "")
flt
## ï.. X Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1 ALASKA on time 497 221 212 503 1841
## 2 <NA> delayed 62 12 20 102 305
## 3 <NA> <NA> NA NA NA NA NA
## 4 AM WEST on time 694 4840 383 302 201
## 5 <NA> delayed 117 415 65 129 61
Use tidyr and dplyr to clean up the table in one fell swoop, and add a column showing on-time performance percentage.
tidy.flt <- flt %>%
drop_na(Los.Angeles) %>%
rename("Airline"=ï.., "Status"=X) %>%
fill(Airline) %>%
gather(City, No.Flights, Los.Angeles:Seattle) %>%
spread(Status, No.Flights) %>%
rename("Delayed"=delayed, "On.Time"= "on time") %>%
mutate(PercentOn.Time = round(
(On.Time / (On.Time + Delayed))*100,1))
## Warning: package 'bindrcpp' was built under R version 3.3.3
tidy.flt
## Airline City Delayed On.Time PercentOn.Time
## 1 ALASKA Los.Angeles 62 497 88.9
## 2 ALASKA Phoenix 12 221 94.8
## 3 ALASKA San.Diego 20 212 91.4
## 4 ALASKA San.Francisco 102 503 83.1
## 5 ALASKA Seattle 305 1841 85.8
## 6 AM WEST Los.Angeles 117 694 85.6
## 7 AM WEST Phoenix 415 4840 92.1
## 8 AM WEST San.Diego 65 383 85.5
## 9 AM WEST San.Francisco 129 302 70.1
## 10 AM WEST Seattle 61 201 76.7
See total flights for each airline
tidy.flt %>%
group_by(Airline) %>%
summarise(Total.Flights = sum(Delayed) + sum(On.Time))
## # A tibble: 2 x 2
## Airline Total.Flights
## <fctr> <int>
## 1 ALASKA 3775
## 2 AM WEST 7207
See total on-time flights for each airline
tidy.flt %>%
group_by(Airline) %>%
summarise(On.Time = sum(On.Time))
## # A tibble: 2 x 2
## Airline On.Time
## <fctr> <int>
## 1 ALASKA 3274
## 2 AM WEST 6420
See total delayed flights for each airline
tidy.flt %>%
group_by(Airline) %>%
summarise(Delayed = sum(Delayed))
## # A tibble: 2 x 2
## Airline Delayed
## <fctr> <int>
## 1 ALASKA 501
## 2 AM WEST 787
Grab the weighted average of on-time performance to see which airline has been better overall.
At first I only averaged the OnTimePerformance percentages by City and Alaska Airlines actually looked like it had better on-time performance. However, since weighted average is better suited and since AM West was so strong and voluminous in Phoenix (92.1%), it pulled their weighted average up.
It also appears that Alaska Airlines had one of their worst on-time performances in Seattle, which had more than triple the number of flights compared to the other citys where the company flew.
tidy.flt %>%
group_by(Airline) %>%
summarise(AvgPercentOnTime = round(sum(On.Time) / sum(sum(On.Time)+sum(Delayed))*100,1))
## # A tibble: 2 x 2
## Airline AvgPercentOnTime
## <fctr> <dbl>
## 1 ALASKA 86.7
## 2 AM WEST 89.1