1. import packages
library(tidyr)
library(dplyr,warn.conflicts = FALSE)
options(dplyr.summarise.inform = FALSE)
library(stringr)
library(ggplot2)
  1. create csv in excel and add to github

  2. import csv from github

mydata <- read.csv("https://raw.githubusercontent.com/arinolan/assignment-4/main/airline%20data.csv")
mydata[2,1] <- mydata[1,1]
mydata[5,1] <- mydata[4,1]
mydata[,2] <- sapply(mydata[,2], str_replace, " ", ".")
mydata
##         X     X.1 los.angeles phoenix san.diego san.francisco seattle
## 1  alaska on.time         497     221       212           503    1841
## 2  alaska delayed          62      12        20           102     305
## 3                          NA      NA        NA            NA      NA
## 4 am west on.time         694    4840       383           320     201
## 5 am west delayed         117     415        65           129      61
  1. clean data
clean_mydata <- mydata %>%
  na.omit() %>%
  rename(airline = X, status = X.1) %>%
  gather("dest.city", "n", 3:7) %>%
  spread(status, "n") %>%
  mutate(total.arrivals = delayed + on.time, on.time.percent.rate = on.time/total.arrivals*100) %>%
  arrange(desc(total.arrivals))

clean_mydata[,2] <- sapply(clean_mydata[,2], str_replace, "\\.", " ")
clean_mydata
##    airline     dest.city delayed on.time total.arrivals on.time.percent.rate
## 1  am west       phoenix     415    4840           5255             92.10276
## 2   alaska       seattle     305    1841           2146             85.78751
## 3  am west   los angeles     117     694            811             85.57337
## 4   alaska san francisco     102     503            605             83.14050
## 5   alaska   los angeles      62     497            559             88.90877
## 6  am west san francisco     129     320            449             71.26949
## 7  am west     san diego      65     383            448             85.49107
## 8  am west       seattle      61     201            262             76.71756
## 9   alaska       phoenix      12     221            233             94.84979
## 10  alaska     san diego      20     212            232             91.37931
  1. airline activity

best and worst arrival by destination

best_dest <- clean_mydata %>% 
  filter(on.time.percent.rate == max(on.time.percent.rate))
best_dest
##   airline dest.city delayed on.time total.arrivals on.time.percent.rate
## 1  alaska   phoenix      12     221            233             94.84979
worst_dest <- clean_mydata %>%
  filter(on.time.percent.rate == min(on.time.percent.rate))
worst_dest
##   airline     dest.city delayed on.time total.arrivals on.time.percent.rate
## 1 am west san francisco     129     320            449             71.26949

most and least timely arrival

timely <- clean_mydata %>% 
  group_by(airline) %>% 
  summarise(on.time.airline.perecent.rate = sum(on.time) / sum(total.arrivals)*100) %>% 
  filter(on.time.airline.perecent.rate == max(on.time.airline.perecent.rate))
timely
## # A tibble: 1 × 2
##   airline on.time.airline.perecent.rate
##   <chr>                           <dbl>
## 1 am west                          89.1
least_timely <- clean_mydata %>% 
  group_by(airline) %>% 
  summarise(on.time.airline.perecent.rate = sum(on.time) / sum(total.arrivals)*100) %>% 
  filter(on.time.airline.perecent.rate == min(on.time.airline.perecent.rate))
least_timely
## # A tibble: 1 × 2
##   airline on.time.airline.perecent.rate
##   <chr>                           <dbl>
## 1 alaska                           86.7

highest to lowest rank

ranked <- clean_mydata %>% 
  group_by(dest.city) %>% 
  summarise(on.time.city.percent.rate = sum(on.time) / sum(total.arrivals)*100) %>% 
  mutate(ranking.on.time = min_rank(desc(on.time.city.percent.rate))) %>% 
  arrange(ranking.on.time)
ranked
## # A tibble: 5 × 3
##   dest.city     on.time.city.percent.rate ranking.on.time
##   <chr>                             <dbl>           <int>
## 1 phoenix                            92.2               1
## 2 san diego                          87.5               2
## 3 los angeles                        86.9               3
## 4 seattle                            84.8               4
## 5 san francisco                      78.1               5