domestic<- read.csv("C:/Users/joyof/Desktop/JOY OFIELU MBA 676/domestic_flights_jan_2016.csv")
head(domestic)
##   FlightDate Carrier TailNum FlightNum Origin        OriginCityName
## 1   1/6/2016      AA  N4YBAA        43    DFW Dallas/Fort Worth, TX
## 2   1/7/2016      AA  N434AA        43    DFW Dallas/Fort Worth, TX
## 3   1/8/2016      AA  N541AA        43    DFW Dallas/Fort Worth, TX
## 4   1/9/2016      AA  N489AA        43    DFW Dallas/Fort Worth, TX
## 5  1/10/2016      AA  N439AA        43    DFW Dallas/Fort Worth, TX
## 6  1/11/2016      AA  N468AA        43    DFW Dallas/Fort Worth, TX
##   OriginState Dest DestCityName DestState CRSDepTime DepTime WheelsOff
## 1          TX  DTW  Detroit, MI        MI       1100    1057      1112
## 2          TX  DTW  Detroit, MI        MI       1100    1056      1110
## 3          TX  DTW  Detroit, MI        MI       1100    1055      1116
## 4          TX  DTW  Detroit, MI        MI       1100    1102      1115
## 5          TX  DTW  Detroit, MI        MI       1100    1240      1300
## 6          TX  DTW  Detroit, MI        MI       1100    1107      1118
##   WheelsOn CRSArrTime ArrTime Cancelled Diverted CRSElapsedTime
## 1     1424       1438    1432         0        0            158
## 2     1416       1438    1426         0        0            158
## 3     1431       1438    1445         0        0            158
## 4     1424       1438    1433         0        0            158
## 5     1617       1438    1631         0        0            158
## 6     1426       1438    1435         0        0            158
##   ActualElapsedTime Distance
## 1               155      986
## 2               150      986
## 3               170      986
## 4               151      986
## 5               171      986
## 6               148      986
library(dplyr)
head(domestic %>% filter(!complete.cases(.)))
##   FlightDate Carrier TailNum FlightNum Origin OriginCityName OriginState
## 1  1/16/2016      AA  N3CXAA        44    SEA    Seattle, WA          WA
## 2  1/23/2016      AA                44    SEA    Seattle, WA          WA
## 3  1/24/2016      AA  N3DGAA        44    SEA    Seattle, WA          WA
## 4  1/25/2016      AA  N3MNAA        44    SEA    Seattle, WA          WA
## 5  1/15/2016      AA  N3JSAA        45    JFK   New York, NY          NY
## 6  1/23/2016      AA                45    JFK   New York, NY          NY
##   Dest DestCityName DestState CRSDepTime DepTime WheelsOff WheelsOn
## 1  JFK New York, NY        NY        640      NA        NA       NA
## 2  JFK New York, NY        NY        640      NA        NA       NA
## 3  JFK New York, NY        NY        645      NA        NA       NA
## 4  JFK New York, NY        NY        645      NA        NA       NA
## 5  SEA  Seattle, WA        WA       1830      NA        NA       NA
## 6  SEA  Seattle, WA        WA       1830      NA        NA       NA
##   CRSArrTime ArrTime Cancelled Diverted CRSElapsedTime ActualElapsedTime
## 1       1501      NA         1        0            321                NA
## 2       1501      NA         1        0            321                NA
## 3       1506      NA         1        0            321                NA
## 4       1506      NA         1        0            321                NA
## 5       2152      NA         1        0            382                NA
## 6       2152      NA         1        0            382                NA
##   Distance
## 1     2422
## 2     2422
## 3     2422
## 4     2422
## 5     2422
## 6     2422
domestic$FlightDate <- as.Date(domestic$FlightDate, format = "%m/%d/%Y")
domestic%>% filter(Cancelled == 0) %>% summarize(maxcrsdep = max(CRSDepTime), maxdep = max(DepTime))
##   maxcrsdep maxdep
## 1      2359   2400
domestic <- domestic %>% 
  mutate(new_CRSDepTime = paste(FlightDate, sprintf("%04d", CRSDepTime)))
domestic$new_CRSDepTime <- as.POSIXct(domestic$new_CRSDepTime, format="%Y-%m-%d %H%M")
head(domestic %>% select(new_CRSDepTime, CRSDepTime))
##        new_CRSDepTime CRSDepTime
## 1 2016-01-06 11:00:00       1100
## 2 2016-01-07 11:00:00       1100
## 3 2016-01-08 11:00:00       1100
## 4 2016-01-09 11:00:00       1100
## 5 2016-01-10 11:00:00       1100
## 6 2016-01-11 11:00:00       1100
domestic <- domestic %>% 
  mutate(new_CRSArrTime = paste(FlightDate, sprintf("%04d", CRSArrTime)))
domestic$new_CRSArrTime <- as.POSIXct(domestic$new_CRSArrTime, format="%Y-%m-%d %H%M")
domestic <- domestic %>% filter(Cancelled == 0) %>% 
  mutate(new_DepTime = paste(FlightDate, sprintf("%04d", DepTime)), new_WheelsOff = paste(FlightDate, sprintf("%04d", WheelsOff)),
         new_WheelsOn = paste(FlightDate, sprintf("%04d", WheelsOn)), new_ArrTime = paste(FlightDate, sprintf("%04d", ArrTime)))
domestic$new_DepTime <- as.POSIXct(domestic$new_DepTime, format="%Y-%m-%d %H%M")
domestic$new_WheelsOff <- as.POSIXct(domestic$new_WheelsOff, format="%Y-%m-%d %H%M")
domestic$new_WheelsOn <- as.POSIXct(domestic$new_WheelsOn, format="%Y-%m-%d %H%M")
domestic$new_ArrTime <- as.POSIXct(domestic$new_ArrTime, format="%Y-%m-%d %H%M")
head(domestic)
##   FlightDate Carrier TailNum FlightNum Origin        OriginCityName
## 1 2016-01-06      AA  N4YBAA        43    DFW Dallas/Fort Worth, TX
## 2 2016-01-07      AA  N434AA        43    DFW Dallas/Fort Worth, TX
## 3 2016-01-08      AA  N541AA        43    DFW Dallas/Fort Worth, TX
## 4 2016-01-09      AA  N489AA        43    DFW Dallas/Fort Worth, TX
## 5 2016-01-10      AA  N439AA        43    DFW Dallas/Fort Worth, TX
## 6 2016-01-11      AA  N468AA        43    DFW Dallas/Fort Worth, TX
##   OriginState Dest DestCityName DestState CRSDepTime DepTime WheelsOff
## 1          TX  DTW  Detroit, MI        MI       1100    1057      1112
## 2          TX  DTW  Detroit, MI        MI       1100    1056      1110
## 3          TX  DTW  Detroit, MI        MI       1100    1055      1116
## 4          TX  DTW  Detroit, MI        MI       1100    1102      1115
## 5          TX  DTW  Detroit, MI        MI       1100    1240      1300
## 6          TX  DTW  Detroit, MI        MI       1100    1107      1118
##   WheelsOn CRSArrTime ArrTime Cancelled Diverted CRSElapsedTime
## 1     1424       1438    1432         0        0            158
## 2     1416       1438    1426         0        0            158
## 3     1431       1438    1445         0        0            158
## 4     1424       1438    1433         0        0            158
## 5     1617       1438    1631         0        0            158
## 6     1426       1438    1435         0        0            158
##   ActualElapsedTime Distance      new_CRSDepTime      new_CRSArrTime
## 1               155      986 2016-01-06 11:00:00 2016-01-06 14:38:00
## 2               150      986 2016-01-07 11:00:00 2016-01-07 14:38:00
## 3               170      986 2016-01-08 11:00:00 2016-01-08 14:38:00
## 4               151      986 2016-01-09 11:00:00 2016-01-09 14:38:00
## 5               171      986 2016-01-10 11:00:00 2016-01-10 14:38:00
## 6               148      986 2016-01-11 11:00:00 2016-01-11 14:38:00
##           new_DepTime       new_WheelsOff        new_WheelsOn
## 1 2016-01-06 10:57:00 2016-01-06 11:12:00 2016-01-06 14:24:00
## 2 2016-01-07 10:56:00 2016-01-07 11:10:00 2016-01-07 14:16:00
## 3 2016-01-08 10:55:00 2016-01-08 11:16:00 2016-01-08 14:31:00
## 4 2016-01-09 11:02:00 2016-01-09 11:15:00 2016-01-09 14:24:00
## 5 2016-01-10 12:40:00 2016-01-10 13:00:00 2016-01-10 16:17:00
## 6 2016-01-11 11:07:00 2016-01-11 11:18:00 2016-01-11 14:26:00
##           new_ArrTime
## 1 2016-01-06 14:32:00
## 2 2016-01-07 14:26:00
## 3 2016-01-08 14:45:00
## 4 2016-01-09 14:33:00
## 5 2016-01-10 16:31:00
## 6 2016-01-11 14:35:00
domestic <-domestic %>% filter(Cancelled == 0) %>%
  mutate(DepDelay = as.integer(difftime(new_DepTime, new_CRSDepTime, units = "mins")))
head(domestic %>% select(CRSDepTime, DepTime, DepDelay))
##   CRSDepTime DepTime DepDelay
## 1       1100    1057       -3
## 2       1100    1056       -4
## 3       1100    1055       -5
## 4       1100    1102        2
## 5       1100    1240      100
## 6       1100    1107        7
domestic <- domestic %>% filter(Cancelled == 0) %>% 
  mutate(DepDelayMinutes = ifelse(DepDelay < 0, 0, DepDelay), 
         DepDel15 = ifelse(DepDelay >= 15, 1, 0))
head(domestic %>% select(DepDelay, DepDelayMinutes, DepDel15))
##   DepDelay DepDelayMinutes DepDel15
## 1       -3               0        0
## 2       -4               0        0
## 3       -5               0        0
## 4        2               2        0
## 5      100             100        1
## 6        7               7        0
domestic <- domestic %>% filter(Cancelled == 0) %>% 
  mutate(TaxiOut = as.integer(difftime(new_WheelsOff, new_DepTime, units = "mins")),
         TaxiIn = as.integer(difftime(new_ArrTime, new_WheelsOn, units = "mins")),
         ArrDelay = as.integer(difftime(new_ArrTime, new_CRSArrTime, units = "mins")),
         ArrDelayMinutes = ifelse(ArrDelay < 0, 0, ArrDelay), 
         ArrDel15 = ifelse(ArrDelay >= 15, 1, 0),
         FlightTimeBuffer = CRSElapsedTime - ActualElapsedTime)
domestic <- domestic %>% filter(Cancelled == 0) %>% 
  mutate(AirTime = ActualElapsedTime - TaxiOut - TaxiIn)
domestic <- domestic %>% filter(Cancelled == 0) %>% 
  mutate(AirSpeed = Distance / (AirTime / 60))
head(domestic)
##   FlightDate Carrier TailNum FlightNum Origin        OriginCityName
## 1 2016-01-06      AA  N4YBAA        43    DFW Dallas/Fort Worth, TX
## 2 2016-01-07      AA  N434AA        43    DFW Dallas/Fort Worth, TX
## 3 2016-01-08      AA  N541AA        43    DFW Dallas/Fort Worth, TX
## 4 2016-01-09      AA  N489AA        43    DFW Dallas/Fort Worth, TX
## 5 2016-01-10      AA  N439AA        43    DFW Dallas/Fort Worth, TX
## 6 2016-01-11      AA  N468AA        43    DFW Dallas/Fort Worth, TX
##   OriginState Dest DestCityName DestState CRSDepTime DepTime WheelsOff
## 1          TX  DTW  Detroit, MI        MI       1100    1057      1112
## 2          TX  DTW  Detroit, MI        MI       1100    1056      1110
## 3          TX  DTW  Detroit, MI        MI       1100    1055      1116
## 4          TX  DTW  Detroit, MI        MI       1100    1102      1115
## 5          TX  DTW  Detroit, MI        MI       1100    1240      1300
## 6          TX  DTW  Detroit, MI        MI       1100    1107      1118
##   WheelsOn CRSArrTime ArrTime Cancelled Diverted CRSElapsedTime
## 1     1424       1438    1432         0        0            158
## 2     1416       1438    1426         0        0            158
## 3     1431       1438    1445         0        0            158
## 4     1424       1438    1433         0        0            158
## 5     1617       1438    1631         0        0            158
## 6     1426       1438    1435         0        0            158
##   ActualElapsedTime Distance      new_CRSDepTime      new_CRSArrTime
## 1               155      986 2016-01-06 11:00:00 2016-01-06 14:38:00
## 2               150      986 2016-01-07 11:00:00 2016-01-07 14:38:00
## 3               170      986 2016-01-08 11:00:00 2016-01-08 14:38:00
## 4               151      986 2016-01-09 11:00:00 2016-01-09 14:38:00
## 5               171      986 2016-01-10 11:00:00 2016-01-10 14:38:00
## 6               148      986 2016-01-11 11:00:00 2016-01-11 14:38:00
##           new_DepTime       new_WheelsOff        new_WheelsOn
## 1 2016-01-06 10:57:00 2016-01-06 11:12:00 2016-01-06 14:24:00
## 2 2016-01-07 10:56:00 2016-01-07 11:10:00 2016-01-07 14:16:00
## 3 2016-01-08 10:55:00 2016-01-08 11:16:00 2016-01-08 14:31:00
## 4 2016-01-09 11:02:00 2016-01-09 11:15:00 2016-01-09 14:24:00
## 5 2016-01-10 12:40:00 2016-01-10 13:00:00 2016-01-10 16:17:00
## 6 2016-01-11 11:07:00 2016-01-11 11:18:00 2016-01-11 14:26:00
##           new_ArrTime DepDelay DepDelayMinutes DepDel15 TaxiOut TaxiIn
## 1 2016-01-06 14:32:00       -3               0        0      15      8
## 2 2016-01-07 14:26:00       -4               0        0      14     10
## 3 2016-01-08 14:45:00       -5               0        0      21     14
## 4 2016-01-09 14:33:00        2               2        0      13      9
## 5 2016-01-10 16:31:00      100             100        1      20     14
## 6 2016-01-11 14:35:00        7               7        0      11      9
##   ArrDelay ArrDelayMinutes ArrDel15 FlightTimeBuffer AirTime AirSpeed
## 1       -6               0        0                3     132 448.1818
## 2      -12               0        0                8     126 469.5238
## 3        7               7        0              -12     135 438.2222
## 4       -5               0        0                7     129 458.6047
## 5      113             113        1              -13     137 431.8248
## 6       -3               0        0               10     128 462.1875
domestic2 <- domestic %>% filter(Cancelled == 0)
library(ggplot2)
library(lubridate)
q1 <- domestic2 %>% group_by(day = wday(FlightDate, label=TRUE)) %>% 
  mutate(delayed = ifelse(DepDelay > 0, 1, 0)) %>% 
  summarize(perc_delay = sum(delayed) / n())
q1 %>% ggplot(aes(x = day, y = perc_delay)) + geom_col()

domestic2 %>% mutate(delayed = ifelse(DepDelay > 0, 1, 0), time_part = ifelse(CRSDepTime > 1200, "Afternoon/Evening", "Morning")) %>% 
  group_by(time_part) %>%
  summarize(perc_delay = sum(delayed) / n())
## # A tibble: 2 x 2
##   time_part         perc_delay
##   <chr>                  <dbl>
## 1 Afternoon/Evening      0.397
## 2 Morning                0.262
library(chron)
domestic2%>% group_by(is.weekend(FlightDate)) %>% 
  mutate(delayed = ifelse(DepDelay > 0, 1, 0)) %>% 
  summarize(perc_delay = sum(delayed) / n())
## # A tibble: 2 x 2
##   `is.weekend(FlightDate)` perc_delay
##   <lgl>                         <dbl>
## 1 FALSE                         0.335
## 2 TRUE                          0.353
 domestic2 %>% group_by(day = wday(FlightDate, label=TRUE)) %>% filter(ArrDelayMinutes > 0) %>% count(ArrDelayMinutes > 0) 
## # A tibble: 7 x 3
## # Groups:   day [7]
##   day   `ArrDelayMinutes > 0`     n
##   <ord> <lgl>                 <int>
## 1 Sun   TRUE                  23324
## 2 Mon   TRUE                  20702
## 3 Tue   TRUE                  17541
## 4 Wed   TRUE                  17111
## 5 Thu   TRUE                  19014
## 6 Fri   TRUE                  25110
## 7 Sat   TRUE                  17001
# Total Number of delays by day for January
q3 <- domestic %>% 
  group_by(day = wday(FlightDate, label=TRUE))  %>% select(day, ArrDelayMinutes) %>%
  filter(ArrDelayMinutes > 0) %>%
  summarise(delaysByDay = n())
q3
## # A tibble: 7 x 2
##   day   delaysByDay
##   <ord>       <int>
## 1 Sun         23324
## 2 Mon         20702
## 3 Tue         17541
## 4 Wed         17111
## 5 Thu         19014
## 6 Fri         25110
## 7 Sat         17001
# Number of flights delayed by carrier
q4 <-  domestic %>% 
  filter(ArrDelayMinutes > 0) %>% 
  group_by(Carrier) %>% 
  summarise(delaysByCarrier = n())
q4
## # A tibble: 12 x 2
##    Carrier delaysByCarrier
##    <fct>             <int>
##  1 AA                24914
##  2 AS                 3947
##  3 B6                 9695
##  4 DL                18768
##  5 EV                12426
##  6 F9                 1723
##  7 HA                 1892
##  8 NK                 5270
##  9 OO                17513
## 10 UA                 9766
## 11 VX                 2059
## 12 WN                31830
# Number of flights delayed by state
q5 <-  domestic2%>% 
  group_by(OriginState) %>%
  filter(ArrDelayMinutes > 0) %>% 
  count(ArrDelayMinutes > 0)
q5
## # A tibble: 52 x 3
## # Groups:   OriginState [52]
##    OriginState `ArrDelayMinutes > 0`     n
##    <fct>       <lgl>                 <int>
##  1 AK          TRUE                    667
##  2 AL          TRUE                    508
##  3 AR          TRUE                    306
##  4 AZ          TRUE                   5438
##  5 CA          TRUE                  20986
##  6 CO          TRUE                   5926
##  7 CT          TRUE                    464
##  8 FL          TRUE                  12764
##  9 GA          TRUE                   8791
## 10 HI          TRUE                   1917
## # ... with 42 more rows
# Mean average air speed per air carrier when a flight is delayed

q6 <-  domestic2 %>% filter(ArrDelayMinutes > 0) %>%
  group_by(Carrier) %>%
  summarize(MeanAirSpeed = mean(AirSpeed, na.rm = TRUE))
q6
## # A tibble: 12 x 2
##    Carrier MeanAirSpeed
##    <fct>          <dbl>
##  1 AA              413.
##  2 AS              420.
##  3 B6              414.
##  4 DL              407.
##  5 EV              356.
##  6 F9              434.
##  7 HA              338.
##  8 NK              424.
##  9 OO              361.
## 10 UA              437.
## 11 VX              420.
## 12 WN              405.
# Mean average air speed per air carrier when a flight is not delayed
q7 <-  domestic2 %>% filter(ArrDelayMinutes == 0) %>%
  group_by(Carrier) %>%
  summarize(MeanAirSpeedNoDelay = mean(AirSpeed, na.rm = TRUE))
q7
## # A tibble: 12 x 2
##    Carrier MeanAirSpeedNoDelay
##    <fct>                 <dbl>
##  1 AA                     421.
##  2 AS                     430.
##  3 B6                     413.
##  4 DL                     416.
##  5 EV                     360.
##  6 F9                     439.
##  7 HA                     346.
##  8 NK                     435.
##  9 OO                     372.
## 10 UA                     442.
## 11 VX                     442.
## 12 WN                     416.
air_speed_query_results <- merge(q6, q7)
library(ggvis)
q4 %>% 
  ggvis(x =~Carrier, y =~ delaysByCarrier) %>% 
  layer_bars() 
  #add_axis("x", title="Carriers") %>%
  #add_axis("y", title="Mean AirSpeed”)

Looking at the barplot we can see that there is maximun delay in WN,followed by AA and the minimum is F9

library(ggvis)
air_speed_query_results %>%
  ggvis(x =~Carrier, y =~ MeanAirSpeed) %>%
  layer_points(stroke:="red") %>%
  layer_points(x=~Carrier, y=~MeanAirSpeedNoDelay, stroke:= "blue") %>%
  add_axis("x", title="Carriers") %>%
  add_axis("y", title="Mean Air Speed")

looking at the speed we can see that UA and VX have the highest speedand HA have the min.speed.