## From, Arrival, Los Angeles,Phoenix,San Diego,San Francisco,Seattle
## ALASKA,on time,497,221,212,503,1841
## ALASKA,delayed,62,12,20,102,305
## AM WEST,on time,694,4840,383,320,201
## AM WEST,delayed,117,415,65,129,61
# use tidyr and dplyr as needed to tidy and transform your data. 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)

arrivals <- read.csv("../data/numbersense.csv")

#data preview
head(arrivals)
##      From Arrival Los.Angeles Phoenix San.Diego San.Francisco Seattle
## 1  ALASKA on time         497     221       212           503    1841
## 2  ALASKA delayed          62      12        20           102     305
## 3 AM WEST on time         694    4840       383           320     201
## 4 AM WEST delayed         117     415        65           129      61
arrival_delayed <- filter(arrivals, Arrival == "delayed")
arrival_delayed_ALASKA  <- filter(arrival_delayed, From == "ALASKA")
arrival_delayed_AM_WEST <- filter(arrival_delayed, From == "AM WEST")

#TODO ITERATE Through list
#c(Phoenix, San.Diego, San.Francisco, Seattle)
arrival_delayed_LA <- arrival_delayed %>%
  summarise(avg_delay   = mean(Los.Angeles), 
              min_delay = min(Los.Angeles),
              max_delay = max(Los.Angeles),
              sd        = sd(Los.Angeles),
              total     = n())

arrival_delayed_SD <- arrival_delayed %>%
  summarise(avg_delay   = mean(San.Diego), 
              min_delay = min(San.Diego),
              max_delay = max(San.Diego),
              sd        = sd(San.Diego),
              total     = n())

arrival_delayed_SF <- arrival_delayed %>%
  summarise(avg_delay   = mean(San.Francisco), 
              min_delay = min(San.Francisco),
              max_delay = max(San.Francisco),
              sd        = sd(San.Francisco),
              total     = n())

arrival_delayed_SEA <- arrival_delayed %>%
  summarise(avg_delay   = mean(Seattle), 
              min_delay = min(Seattle),
              max_delay = max(Seattle),
              sd        = sd(Seattle),
              total     = n())

arrival_delayed_PHX <- arrival_delayed %>%
  summarise(avg_delay   = mean(Phoenix), 
              min_delay = min(Phoenix),
              max_delay = max(Phoenix),
              sd        = sd(Phoenix),
              total     = n())


arrival_delayed_LA; arrival_delayed_SF; arrival_delayed_LA; arrival_delayed_PHX; arrival_delayed_SEA;
##   avg_delay min_delay max_delay       sd total
## 1      89.5        62       117 38.89087     2
##   avg_delay min_delay max_delay       sd total
## 1     115.5       102       129 19.09188     2
##   avg_delay min_delay max_delay       sd total
## 1      89.5        62       117 38.89087     2
##   avg_delay min_delay max_delay      sd total
## 1     213.5        12       415 284.964     2
##   avg_delay min_delay max_delay       sd total
## 1       183        61       305 172.5341     2
arrival_delayed_ALASKA_all <- arrival_delayed_ALASKA %>% 
  gather(arrival_delayed_ALASKA, all_cities_delayed, Los.Angeles, San.Diego, San.Francisco, Phoenix, Seattle)

arrival_delayed_AM_WEST_all <- arrival_delayed_AM_WEST %>% 
  gather(arrival_delayed_AM_WEST_all, all_cities_delayed, Los.Angeles, San.Diego, San.Francisco, Phoenix, Seattle)

arrival_delayed_AM_WEST_all_stats <- arrival_delayed_AM_WEST_all %>% 
  summarise(avg_delay   = mean(all_cities_delayed), 
              min_delay = min(all_cities_delayed),
              max_delay = max(all_cities_delayed),
              sd        = sd(all_cities_delayed),
              sum       = sum(all_cities_delayed),
              total     = n())

arrival_delayed_ALASKA_all_stats <- arrival_delayed_ALASKA_all %>% 
  summarise(avg_delay   = mean(all_cities_delayed), 
              min_delay = min(all_cities_delayed),
              max_delay = max(all_cities_delayed),
              sd        = sd(all_cities_delayed),
              sum       = sum(all_cities_delayed),
              total     = n())

summary(arrival_delayed_ALASKA_all); summary(arrival_delayed_AM_WEST_all)
##       From      Arrival  arrival_delayed_ALASKA all_cities_delayed
##  ALASKA :5   delayed:5   Length:5               Min.   : 12.0     
##  AM WEST:0   on time:0   Class :character       1st Qu.: 20.0     
##                          Mode  :character       Median : 62.0     
##                                                 Mean   :100.2     
##                                                 3rd Qu.:102.0     
##                                                 Max.   :305.0
##       From      Arrival  arrival_delayed_AM_WEST_all all_cities_delayed
##  ALASKA :0   delayed:5   Length:5                    Min.   : 61.0     
##  AM WEST:5   on time:0   Class :character            1st Qu.: 65.0     
##                          Mode  :character            Median :117.0     
##                                                      Mean   :157.4     
##                                                      3rd Qu.:129.0     
##                                                      Max.   :415.0
print(arrival_delayed_AM_WEST_all_stats); print(arrival_delayed_ALASKA_all_stats)
##   avg_delay min_delay max_delay       sd sum total
## 1     157.4        61       415 147.1625 787     5
##   avg_delay min_delay max_delay       sd sum total
## 1     100.2        12       305 120.0175 501     5

After looking at both summaries and basic statistics for the delayed sample set, it can be inferred that the delays for AM WEST were more substantial with a higher sd, min, mean, and max.