Tidying and Transforming Data

Load libraries

library(tidyr)
## Warning: package 'tidyr' was built under R version 3.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Load Data

fightstatus <- tbl_df(read.csv("sampleFlightData.csv", stringsAsFactors = FALSE, check.names = FALSE))
#all flight status
print(fightstatus)
## # A tibble: 4 x 7
##    Flight  Status `Los Angeles` Phoenix `San Diego` `San Francisco`
##     <chr>   <chr>         <int>   <int>       <int>           <int>
## 1  Alaska On Time           497     221         212             503
## 2  Alaska Delayed            62      12          20             102
## 3 AM WEST On Time           694    4840         383             320
## 4 AM WEST Delayed           117     415          65             129
## # ... with 1 more variables: Seattle <int>

Tidy the data per airline

#all arrival status for Alaska
fightstatus.ALASKA <- fightstatus %>%
        filter(Flight == "Alaska") %>%
        gather(Destination, Arrival, 3:7)
## Warning: package 'bindrcpp' was built under R version 3.4.2
print(fightstatus.ALASKA)
## # A tibble: 10 x 4
##    Flight  Status   Destination Arrival
##     <chr>   <chr>         <chr>   <int>
##  1 Alaska On Time   Los Angeles     497
##  2 Alaska Delayed   Los Angeles      62
##  3 Alaska On Time       Phoenix     221
##  4 Alaska Delayed       Phoenix      12
##  5 Alaska On Time     San Diego     212
##  6 Alaska Delayed     San Diego      20
##  7 Alaska On Time San Francisco     503
##  8 Alaska Delayed San Francisco     102
##  9 Alaska On Time       Seattle    1841
## 10 Alaska Delayed       Seattle     305
#all arrival status for AM WEST
fightstatus.AMWEST <- fightstatus %>%
  filter(Flight == "AM WEST") %>%
  gather(Destination, Arrival, 3:7)

print(fightstatus.AMWEST)
## # A tibble: 10 x 4
##     Flight  Status   Destination Arrival
##      <chr>   <chr>         <chr>   <int>
##  1 AM WEST On Time   Los Angeles     694
##  2 AM WEST Delayed   Los Angeles     117
##  3 AM WEST On Time       Phoenix    4840
##  4 AM WEST Delayed       Phoenix     415
##  5 AM WEST On Time     San Diego     383
##  6 AM WEST Delayed     San Diego      65
##  7 AM WEST On Time San Francisco     320
##  8 AM WEST Delayed San Francisco     129
##  9 AM WEST On Time       Seattle     201
## 10 AM WEST Delayed       Seattle      61
#spread the status column
fightstatus.ALASKA <- fightstatus.ALASKA %>%
  spread(Status, Arrival)

Transform the data and calculate the delay percentage

#calculate and add the delay percentage
fightstatus.ALASKA <- fightstatus.ALASKA %>%
  mutate(
    Arrival_Total = fightstatus.ALASKA$Delayed + fightstatus.ALASKA$`On Time`, 
    Delay_Pct = fightstatus.ALASKA$Delayed /  Arrival_Total     
  )

#spread the status column
fightstatus.AMWEST <- fightstatus.AMWEST %>%
  spread(Status, Arrival)

#calculate and add the delay percentage
fightstatus.AMWEST <- fightstatus.AMWEST %>%
  mutate(
    Arrival_Total = fightstatus.AMWEST$Delayed + fightstatus.AMWEST$`On Time`, 
    Delay_Pct = fightstatus.AMWEST$Delayed /  Arrival_Total     
  )

Print Delay Pct

print(fightstatus.ALASKA)
## # A tibble: 5 x 6
##   Flight   Destination Delayed `On Time` Arrival_Total  Delay_Pct
##    <chr>         <chr>   <int>     <int>         <int>      <dbl>
## 1 Alaska   Los Angeles      62       497           559 0.11091234
## 2 Alaska       Phoenix      12       221           233 0.05150215
## 3 Alaska     San Diego      20       212           232 0.08620690
## 4 Alaska San Francisco     102       503           605 0.16859504
## 5 Alaska       Seattle     305      1841          2146 0.14212488
print(fightstatus.AMWEST)
## # A tibble: 5 x 6
##    Flight   Destination Delayed `On Time` Arrival_Total  Delay_Pct
##     <chr>         <chr>   <int>     <int>         <int>      <dbl>
## 1 AM WEST   Los Angeles     117       694           811 0.14426634
## 2 AM WEST       Phoenix     415      4840          5255 0.07897241
## 3 AM WEST     San Diego      65       383           448 0.14508929
## 4 AM WEST San Francisco     129       320           449 0.28730512
## 5 AM WEST       Seattle      61       201           262 0.23282443