Reading

airline_data <- read_csv("Tidying and Transforming Data.csv")
## Warning: Missing column names filled in: 'X8' [8], 'X9' [9], 'X10' [10],
## 'X11' [11], 'X12' [12], 'X13' [13]
## Parsed with column specification:
## cols(
##   AIRLINE = col_character(),
##   STATUS = col_character(),
##   `LOS ANGELES` = col_double(),
##   PHOENIX = col_double(),
##   `SAN DIEGO` = col_double(),
##   `SAN FRANCISCO` = col_double(),
##   SEATTLE = col_number(),
##   X8 = col_logical(),
##   X9 = col_logical(),
##   X10 = col_logical(),
##   X11 = col_logical(),
##   X12 = col_logical(),
##   X13 = col_logical()
## )
str(airline_data)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 5 obs. of  13 variables:
##  $ AIRLINE      : chr  "ALASKA" "ALASKA" NA "AM WEST" ...
##  $ STATUS       : chr  "on time" "delayed" NA "on time" ...
##  $ LOS ANGELES  : num  497 62 NA 694 117
##  $ PHOENIX      : num  221 12 NA 4840 415
##  $ SAN DIEGO    : num  212 20 NA 383 65
##  $ SAN FRANCISCO: num  503 102 NA 320 129
##  $ SEATTLE      : num  1841 305 NA 201 61
##  $ X8           : logi  NA NA NA NA NA
##  $ X9           : logi  NA NA NA NA NA
##  $ X10          : logi  NA NA NA NA NA
##  $ X11          : logi  NA NA NA NA NA
##  $ X12          : logi  NA NA NA NA NA
##  $ X13          : logi  NA NA NA NA NA
##  - attr(*, "spec")=
##   .. cols(
##   ..   AIRLINE = col_character(),
##   ..   STATUS = col_character(),
##   ..   `LOS ANGELES` = col_double(),
##   ..   PHOENIX = col_double(),
##   ..   `SAN DIEGO` = col_double(),
##   ..   `SAN FRANCISCO` = col_double(),
##   ..   SEATTLE = col_number(),
##   ..   X8 = col_logical(),
##   ..   X9 = col_logical(),
##   ..   X10 = col_logical(),
##   ..   X11 = col_logical(),
##   ..   X12 = col_logical(),
##   ..   X13 = col_logical()
##   .. )
summary(airline_data)
##    AIRLINE             STATUS           LOS ANGELES       PHOENIX      
##  Length:5           Length:5           Min.   : 62.0   Min.   :  12.0  
##  Class :character   Class :character   1st Qu.:103.2   1st Qu.: 168.8  
##  Mode  :character   Mode  :character   Median :307.0   Median : 318.0  
##                                        Mean   :342.5   Mean   :1372.0  
##                                        3rd Qu.:546.2   3rd Qu.:1521.2  
##                                        Max.   :694.0   Max.   :4840.0  
##                                        NA's   :1       NA's   :1       
##    SAN DIEGO      SAN FRANCISCO      SEATTLE        X8         
##  Min.   : 20.00   Min.   :102.0   Min.   :  61   Mode:logical  
##  1st Qu.: 53.75   1st Qu.:122.2   1st Qu.: 166   NA's:5        
##  Median :138.50   Median :224.5   Median : 253                 
##  Mean   :170.00   Mean   :263.5   Mean   : 602                 
##  3rd Qu.:254.75   3rd Qu.:365.8   3rd Qu.: 689                 
##  Max.   :383.00   Max.   :503.0   Max.   :1841                 
##  NA's   :1        NA's   :1       NA's   :1                    
##     X9            X10            X11            X12         
##  Mode:logical   Mode:logical   Mode:logical   Mode:logical  
##  NA's:5         NA's:5         NA's:5         NA's:5        
##                                                             
##                                                             
##                                                             
##                                                             
##                                                             
##    X13         
##  Mode:logical  
##  NA's:5        
##                
##                
##                
##                
## 

Tidying

table(airline_data$AIRLINE)
## 
##  ALASKA AM WEST 
##       2       2
airline_data$AIRLINE <- factor(airline_data$AIRLINE)
table(airline_data$STATUS)
## 
## delayed on time 
##       2       2
airline_data$STATUS <- factor(airline_data$STATUS)
dim(airline_data)
## [1]  5 13

Preparation for Analysis - Missing values

airline_data_g <- gather(airline_data, CITY, MINUTES, -AIRLINE, -STATUS)
airline_data_g$CITY <- factor(airline_data_g$CITY)
dim(airline_data_g)
## [1] 55  4
head(airline_data_g)
## # A tibble: 6 x 4
##   AIRLINE STATUS  CITY        MINUTES
##   <fct>   <fct>   <fct>         <dbl>
## 1 ALASKA  on time LOS ANGELES     497
## 2 ALASKA  delayed LOS ANGELES      62
## 3 <NA>    <NA>    LOS ANGELES      NA
## 4 AM WEST on time LOS ANGELES     694
## 5 AM WEST delayed LOS ANGELES     117
## 6 ALASKA  on time PHOENIX         221
airline_data_ready <- na.omit(airline_data_g)
airline_data_ready <- spread(airline_data_ready, STATUS, MINUTES)
glimpse(airline_data_ready)
## Observations: 10
## Variables: 4
## $ AIRLINE   <fct> ALASKA, ALASKA, ALASKA, ALASKA, ALASKA, AM WEST, AM ...
## $ CITY      <fct> LOS ANGELES, PHOENIX, SAN DIEGO, SAN FRANCISCO, SEAT...
## $ delayed   <dbl> 62, 12, 20, 102, 305, 117, 415, 65, 129, 61
## $ `on time` <dbl> 497, 221, 212, 503, 1841, 694, 4840, 383, 320, 201
dim(airline_data_ready)
## [1] 10  4
head(airline_data_ready)
## # A tibble: 6 x 4
##   AIRLINE CITY          delayed `on time`
##   <fct>   <fct>           <dbl>     <dbl>
## 1 ALASKA  LOS ANGELES        62       497
## 2 ALASKA  PHOENIX            12       221
## 3 ALASKA  SAN DIEGO          20       212
## 4 ALASKA  SAN FRANCISCO     102       503
## 5 ALASKA  SEATTLE           305      1841
## 6 AM WEST LOS ANGELES       117       694
tail(airline_data_ready)
## # A tibble: 6 x 4
##   AIRLINE CITY          delayed `on time`
##   <fct>   <fct>           <dbl>     <dbl>
## 1 ALASKA  SEATTLE           305      1841
## 2 AM WEST LOS ANGELES       117       694
## 3 AM WEST PHOENIX           415      4840
## 4 AM WEST SAN DIEGO          65       383
## 5 AM WEST SAN FRANCISCO     129       320
## 6 AM WEST SEATTLE            61       201

Analysis-Compare delays across airlines

airline_delay_tbl <- select(airline_data_ready, AIRLINE, delayed)
glimpse(airline_delay_tbl)
## Observations: 10
## Variables: 2
## $ AIRLINE <fct> ALASKA, ALASKA, ALASKA, ALASKA, ALASKA, AM WEST, AM WE...
## $ delayed <dbl> 62, 12, 20, 102, 305, 117, 415, 65, 129, 61
summarize(filter(airline_delay_tbl, AIRLINE=='ALASKA'), Alasks_Avg_delay=mean(delayed))
## # A tibble: 1 x 1
##   Alasks_Avg_delay
##              <dbl>
## 1             100.
summarize(filter(airline_delay_tbl, AIRLINE=='AM WEST'), AM_Avg_delay=mean(delayed))
## # A tibble: 1 x 1
##   AM_Avg_delay
##          <dbl>
## 1         157.