setwd("../"); setwd("data")
raw <- readit::readit("raw_data.csv")
raw
## # A tibble: 5 x 7
##   X1      X2      `Los Angeles` Phoenix `San Diego` `San Francisco` Seattle
##   <chr>   <chr>           <int>   <int>       <int>           <int>   <int>
## 1 ALASKA  on time           497     221         212             503    1841
## 2 <NA>    delayed            62      12          20             102     305
## 3 <NA>    <NA>               NA      NA          NA              NA      NA
## 4 AM WEST on time           694    4840         383             320     201
## 5 <NA>    delayed           117     415          65             129      61
df <- raw[-3, ] %>% rename("airline" = X1, "status" = X2)
df[2, 1] <- "ALASKA"
df[4, 1] <- "AM WEST"

dfTidy <- df %>%
        tidyr::gather(., destination, flights,
                      one_of(c("Los Angeles", "Phoenix", "San Diego", "San Francisco", "Seattle"))) %>%
        dplyr::group_by(airline, destination) %>%
        dplyr::mutate(flights_total = sum(flights),
                      percent_by_destination = flights / flights_total) %>%
        ungroup %>%
        dplyr::arrange(airline, status, desc(flights))
dfTidy
## # A tibble: 20 x 6
##    airline status  destination  flights flights_total percent_by_destinat~
##    <chr>   <chr>   <chr>          <int>         <int>                <dbl>
##  1 ALASKA  delayed Seattle          305          2146               0.142 
##  2 ALASKA  delayed San Francis~     102           605               0.169 
##  3 ALASKA  delayed Los Angeles       62           559               0.111 
##  4 ALASKA  delayed San Diego         20           232               0.0862
##  5 ALASKA  delayed Phoenix           12           233               0.0515
##  6 ALASKA  on time Seattle         1841          2146               0.858 
##  7 ALASKA  on time San Francis~     503           605               0.831 
##  8 ALASKA  on time Los Angeles      497           559               0.889 
##  9 ALASKA  on time Phoenix          221           233               0.948 
## 10 ALASKA  on time San Diego        212           232               0.914 
## 11 AM WEST delayed Phoenix          415          5255               0.0790
## 12 AM WEST delayed San Francis~     129           449               0.287 
## 13 AM WEST delayed Los Angeles      117           811               0.144 
## 14 AM WEST delayed San Diego         65           448               0.145 
## 15 AM WEST delayed Seattle           61           262               0.233 
## 16 AM WEST on time Phoenix         4840          5255               0.921 
## 17 AM WEST on time Los Angeles      694           811               0.856 
## 18 AM WEST on time San Diego        383           448               0.855 
## 19 AM WEST on time San Francis~     320           449               0.713 
## 20 AM WEST on time Seattle          201           262               0.767
plot1 <- ggplot(dfTidy, aes(x = airline, y = flights, fill = status)) +
        geom_bar(stat = "identity", position = "dodge") +
        # coord_flip() + 
        theme(legend.position = "top") +
        labs(x = "", y = "") +
        theme_bw() + 
        ggtitle("Flight Status by Airlines and 5 Destinations") +
        facet_wrap( ~ destination, ncol = 5)
        # facet_wrap(airline ~ destination, ncol = 5, scales = "free_y")
        # facet_grid(airline ~ destination, scales = "free")

plot2 <- ggplot(dfTidy, aes(x = airline, y = flights, fill = status)) +
        geom_bar(stat = "identity", position = "fill") +
        # coord_flip() + 
        theme(legend.position = "top") +
        labs(x = "", y = "") +
        theme_bw() + 
        ggtitle("Flight Status by Airlines and 5 Destinations (in percentage)") +
        scale_y_continuous(labels = scales::percent) +
        facet_wrap(~ destination, ncol = 5)

grid.arrange(plot1, plot2, ncol = 1)

In conclusion, the top flight destination for ALASKA airline is Seattle; on the other hand, it is Phoenix for AM WEST. For ALASKA airline, close to 95% of arrival to Phoenix is on time, and that’s the highest on-time percentage by destinations. For AM WEST, the highest percentage is also Phoenix, but it is only 92%. The worst delayed arrival for ALASKA is San Francisco, which is close to 17% of its flights to the destination. Similarly, San Francisco is also the worst for AM WEST, and the percentage of delayed flights is a lot worse, i.e. close to 29% delayed. It seems that passengers flying to San Francisco on either airline should expect high chance of delay, and they should reserve more time if they need to get on a connecting flight. Overall, ALASKA seems to do slightly better than AM WEST flying to all five destinations. It seems that people flying to Seattle prefer ALASKA airline, while most would prefer AM WEST when flying to Phoenix.