# read in AA performance data for US flights during Jan 2016
flights<-read.csv(url("http://media.usm.maine.edu/~suleiman/mba676/domestic_flights_jan_2016.csv"),stringsAsFactors=FALSE)
#str(flights)
# convert FlightDate from character to Date class
flights$FlightDate <- as.Date(flights$FlightDate, format = "%m/%d/%Y")
# str(flights$FlightDate)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# filter non-cancelled flights from JFK to LAX. Merge the flight date and time for Dep, Arr, & Wheels -on and -off vectors
w_flights <- flights %>% filter(Cancelled==0) %>% filter(Origin=="JFK") %>% filter(Dest=="LAX") %>% mutate(w_new_DepTime = paste(FlightDate, sprintf("%04d", DepTime)), w_new_ArrTime = paste(FlightDate, sprintf("%04d",ArrTime)), w_new_WheelsOff = paste(FlightDate, sprintf("%04d",WheelsOff)), w_new_WheelsOn = paste(FlightDate, sprintf("%04d", WheelsOn)))
str(w_flights)
## 'data.frame':    914 obs. of  25 variables:
##  $ FlightDate       : Date, format: "2016-01-01" "2016-01-02" ...
##  $ Carrier          : chr  "AA" "AA" "AA" "AA" ...
##  $ TailNum          : chr  "N795AA" "N797AA" "N786AA" "N797AA" ...
##  $ FlightNum        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Origin           : chr  "JFK" "JFK" "JFK" "JFK" ...
##  $ OriginCityName   : chr  "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
##  $ OriginState      : chr  "NY" "NY" "NY" "NY" ...
##  $ Dest             : chr  "LAX" "LAX" "LAX" "LAX" ...
##  $ DestCityName     : chr  "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" ...
##  $ DestState        : chr  "CA" "CA" "CA" "CA" ...
##  $ CRSDepTime       : int  900 900 900 900 900 900 900 900 900 900 ...
##  $ DepTime          : int  856 857 913 903 850 855 900 855 858 858 ...
##  $ WheelsOff        : int  922 923 935 935 914 906 917 924 915 951 ...
##  $ WheelsOn         : int  1159 1207 1212 1206 1201 1157 1144 1152 1142 1219 ...
##  $ CRSArrTime       : int  1225 1225 1225 1225 1234 1234 1234 1234 1234 1234 ...
##  $ ArrTime          : int  1205 1217 1221 1211 1216 1208 1152 1157 1148 1234 ...
##  $ Cancelled        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Diverted         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CRSElapsedTime   : int  385 385 385 385 394 394 394 394 394 394 ...
##  $ ActualElapsedTime: int  369 380 368 368 386 373 352 362 350 396 ...
##  $ Distance         : int  2475 2475 2475 2475 2475 2475 2475 2475 2475 2475 ...
##  $ w_new_DepTime    : chr  "2016-01-01 0856" "2016-01-02 0857" "2016-01-03 0913" "2016-01-04 0903" ...
##  $ w_new_ArrTime    : chr  "2016-01-01 1205" "2016-01-02 1217" "2016-01-03 1221" "2016-01-04 1211" ...
##  $ w_new_WheelsOff  : chr  "2016-01-01 0922" "2016-01-02 0923" "2016-01-03 0935" "2016-01-04 0935" ...
##  $ w_new_WheelsOn   : chr  "2016-01-01 1159" "2016-01-02 1207" "2016-01-03 1212" "2016-01-04 1206" ...
# do same as above but for reverse directions: LAX to JFK
e_flights <- flights %>% filter(Cancelled==0) %>% filter(Origin=="LAX") %>% filter(Dest=="JFK") %>% mutate(e_new_DepTime = paste(FlightDate, sprintf("%04d", DepTime)), e_new_ArrTime = paste(FlightDate, sprintf("%04d",ArrTime)), e_new_WheelsOff = paste(FlightDate, sprintf("%04d",WheelsOff)), e_new_WheelsOn = paste(FlightDate, sprintf("%04d", WheelsOn)))
str(e_flights)
## 'data.frame':    916 obs. of  25 variables:
##  $ FlightDate       : Date, format: "2016-01-01" "2016-01-02" ...
##  $ Carrier          : chr  "AA" "AA" "AA" "AA" ...
##  $ TailNum          : chr  "N791AA" "N788AA" "N790AA" "N794AA" ...
##  $ FlightNum        : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ Origin           : chr  "LAX" "LAX" "LAX" "LAX" ...
##  $ OriginCityName   : chr  "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" ...
##  $ OriginState      : chr  "CA" "CA" "CA" "CA" ...
##  $ Dest             : chr  "JFK" "JFK" "JFK" "JFK" ...
##  $ DestCityName     : chr  "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
##  $ DestState        : chr  "NY" "NY" "NY" "NY" ...
##  $ CRSDepTime       : int  900 900 900 900 900 900 900 900 900 900 ...
##  $ DepTime          : int  856 859 901 901 901 931 1107 905 1121 1046 ...
##  $ WheelsOff        : int  914 920 931 931 931 948 1122 926 1140 1107 ...
##  $ WheelsOn         : int  1646 1719 1717 1726 1737 1747 1911 1730 1937 1919 ...
##  $ CRSArrTime       : int  1730 1730 1730 1729 1729 1729 1729 1729 1729 1729 ...
##  $ ArrTime          : int  1703 1731 1725 1738 1750 1802 1923 1736 1946 1930 ...
##  $ Cancelled        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Diverted         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CRSElapsedTime   : int  330 330 330 329 329 329 329 329 329 329 ...
##  $ ActualElapsedTime: int  307 332 324 337 349 331 316 331 325 344 ...
##  $ Distance         : int  2475 2475 2475 2475 2475 2475 2475 2475 2475 2475 ...
##  $ e_new_DepTime    : chr  "2016-01-01 0856" "2016-01-02 0859" "2016-01-03 0901" "2016-01-05 0901" ...
##  $ e_new_ArrTime    : chr  "2016-01-01 1703" "2016-01-02 1731" "2016-01-03 1725" "2016-01-05 1738" ...
##  $ e_new_WheelsOff  : chr  "2016-01-01 0914" "2016-01-02 0920" "2016-01-03 0931" "2016-01-05 0931" ...
##  $ e_new_WheelsOn   : chr  "2016-01-01 1646" "2016-01-02 1719" "2016-01-03 1717" "2016-01-05 1726" ...
# convert classes: date-time to POSIXct
w_flights$w_new_DepTime <- as.POSIXct(w_flights$w_new_DepTime, format="%Y-%m-%d %H%M")
w_flights$w_new_WheelsOff <- as.POSIXct(w_flights$w_new_WheelsOff, format="%Y-%m-%d %H%M")
w_flights$w_new_WheelsOn <- as.POSIXct(w_flights$w_new_WheelsOn, format="%Y-%m-%d %H%M")
w_flights$w_new_ArrTime <- as.POSIXct(w_flights$w_new_ArrTime, format="%Y-%m-%d %H%M")
e_flights$e_new_DepTime <- as.POSIXct(e_flights$e_new_DepTime, format="%Y-%m-%d %H%M")
e_flights$e_new_WheelsOff <- as.POSIXct(e_flights$e_new_WheelsOff, format="%Y-%m-%d %H%M")
e_flights$e_new_WheelsOn <- as.POSIXct(e_flights$e_new_WheelsOn, format="%Y-%m-%d %H%M")
e_flights$e_new_ArrTime <- as.POSIXct(e_flights$e_new_ArrTime, format="%Y-%m-%d %H%M")
str(w_flights, e_flights)
## 'data.frame':    914 obs. of  25 variables:
##  $ FlightDate       : Date, format: "2016-01-01" "2016-01-02" ...
##  $ Carrier          : chr  "AA" "AA" "AA" "AA" ...
##  $ TailNum          : chr  "N795AA" "N797AA" "N786AA" "N797AA" ...
##  $ FlightNum        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Origin           : chr  "JFK" "JFK" "JFK" "JFK" ...
##  $ OriginCityName   : chr  "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
##  $ OriginState      : chr  "NY" "NY" "NY" "NY" ...
##  $ Dest             : chr  "LAX" "LAX" "LAX" "LAX" ...
##  $ DestCityName     : chr  "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" ...
##  $ DestState        : chr  "CA" "CA" "CA" "CA" ...
##  $ CRSDepTime       : int  900 900 900 900 900 900 900 900 900 900 ...
##  $ DepTime          : int  856 857 913 903 850 855 900 855 858 858 ...
##  $ WheelsOff        : int  922 923 935 935 914 906 917 924 915 951 ...
##  $ WheelsOn         : int  1159 1207 1212 1206 1201 1157 1144 1152 1142 1219 ...
##  $ CRSArrTime       : int  1225 1225 1225 1225 1234 1234 1234 1234 1234 1234 ...
##  $ ArrTime          : int  1205 1217 1221 1211 1216 1208 1152 1157 1148 1234 ...
##  $ Cancelled        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Diverted         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CRSElapsedTime   : int  385 385 385 385 394 394 394 394 394 394 ...
##  $ ActualElapsedTime: int  369 380 368 368 386 373 352 362 350 396 ...
##  $ Distance         : int  2475 2475 2475 2475 2475 2475 2475 2475 2475 2475 ...
##  $ w_new_DepTime    : POSIXct, format: "2016-01-01 08:56:00" "2016-01-02 08:57:00" ...
##  $ w_new_ArrTime    : POSIXct, format: "2016-01-01 12:05:00" "2016-01-02 12:17:00" ...
##  $ w_new_WheelsOff  : POSIXct, format: "2016-01-01 09:22:00" "2016-01-02 09:23:00" ...
##  $ w_new_WheelsOn   : POSIXct, format: "2016-01-01 11:59:00" "2016-01-02 12:07:00" ...
# for westbound flights: compute TaxiOut and TaxiIn times
w_flights <- w_flights %>% mutate(w_TaxiOut = as.integer(difftime(w_new_WheelsOff, w_new_DepTime, units = "mins")), w_TaxiIn = as.integer(difftime(w_new_ArrTime, w_new_WheelsOn, units = "mins")))
# for eastbound flights: compute same as above
e_flights <- e_flights %>% mutate(e_TaxiOut = as.integer(difftime(e_new_WheelsOff, e_new_DepTime, units = "mins")), e_TaxiIn = as.integer(difftime(e_new_ArrTime, e_new_WheelsOn, units = "mins")))
str(w_flights, e_flights)
## 'data.frame':    914 obs. of  27 variables:
##  $ FlightDate       : Date, format: "2016-01-01" "2016-01-02" ...
##  $ Carrier          : chr  "AA" "AA" "AA" "AA" ...
##  $ TailNum          : chr  "N795AA" "N797AA" "N786AA" "N797AA" ...
##  $ FlightNum        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Origin           : chr  "JFK" "JFK" "JFK" "JFK" ...
##  $ OriginCityName   : chr  "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
##  $ OriginState      : chr  "NY" "NY" "NY" "NY" ...
##  $ Dest             : chr  "LAX" "LAX" "LAX" "LAX" ...
##  $ DestCityName     : chr  "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" ...
##  $ DestState        : chr  "CA" "CA" "CA" "CA" ...
##  $ CRSDepTime       : int  900 900 900 900 900 900 900 900 900 900 ...
##  $ DepTime          : int  856 857 913 903 850 855 900 855 858 858 ...
##  $ WheelsOff        : int  922 923 935 935 914 906 917 924 915 951 ...
##  $ WheelsOn         : int  1159 1207 1212 1206 1201 1157 1144 1152 1142 1219 ...
##  $ CRSArrTime       : int  1225 1225 1225 1225 1234 1234 1234 1234 1234 1234 ...
##  $ ArrTime          : int  1205 1217 1221 1211 1216 1208 1152 1157 1148 1234 ...
##  $ Cancelled        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Diverted         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CRSElapsedTime   : int  385 385 385 385 394 394 394 394 394 394 ...
##  $ ActualElapsedTime: int  369 380 368 368 386 373 352 362 350 396 ...
##  $ Distance         : int  2475 2475 2475 2475 2475 2475 2475 2475 2475 2475 ...
##  $ w_new_DepTime    : POSIXct, format: "2016-01-01 08:56:00" "2016-01-02 08:57:00" ...
##  $ w_new_ArrTime    : POSIXct, format: "2016-01-01 12:05:00" "2016-01-02 12:17:00" ...
##  $ w_new_WheelsOff  : POSIXct, format: "2016-01-01 09:22:00" "2016-01-02 09:23:00" ...
##  $ w_new_WheelsOn   : POSIXct, format: "2016-01-01 11:59:00" "2016-01-02 12:07:00" ...
##  $ w_TaxiOut        : int  26 26 22 32 24 11 17 29 17 53 ...
##  $ w_TaxiIn         : int  6 10 9 5 15 11 8 5 6 15 ...
# compute AirTime and AirSpeed for west and east-bound flights
w_flights <- w_flights %>% mutate(w_AirTime = ActualElapsedTime - w_TaxiOut - w_TaxiIn)
e_flights <- e_flights %>% mutate(e_AirTime = ActualElapsedTime - e_TaxiOut - e_TaxiIn)
w_flights <- w_flights %>% mutate(w_AirSpeed = as.integer(Distance/ (w_AirTime / 60)))
e_flights <- e_flights %>% mutate(e_AirSpeed = as.integer(Distance/ (e_AirTime / 60)))
str(w_flights, e_flights)
## 'data.frame':    914 obs. of  29 variables:
##  $ FlightDate       : Date, format: "2016-01-01" "2016-01-02" ...
##  $ Carrier          : chr  "AA" "AA" "AA" "AA" ...
##  $ TailNum          : chr  "N795AA" "N797AA" "N786AA" "N797AA" ...
##  $ FlightNum        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Origin           : chr  "JFK" "JFK" "JFK" "JFK" ...
##  $ OriginCityName   : chr  "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
##  $ OriginState      : chr  "NY" "NY" "NY" "NY" ...
##  $ Dest             : chr  "LAX" "LAX" "LAX" "LAX" ...
##  $ DestCityName     : chr  "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" ...
##  $ DestState        : chr  "CA" "CA" "CA" "CA" ...
##  $ CRSDepTime       : int  900 900 900 900 900 900 900 900 900 900 ...
##  $ DepTime          : int  856 857 913 903 850 855 900 855 858 858 ...
##  $ WheelsOff        : int  922 923 935 935 914 906 917 924 915 951 ...
##  $ WheelsOn         : int  1159 1207 1212 1206 1201 1157 1144 1152 1142 1219 ...
##  $ CRSArrTime       : int  1225 1225 1225 1225 1234 1234 1234 1234 1234 1234 ...
##  $ ArrTime          : int  1205 1217 1221 1211 1216 1208 1152 1157 1148 1234 ...
##  $ Cancelled        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Diverted         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CRSElapsedTime   : int  385 385 385 385 394 394 394 394 394 394 ...
##  $ ActualElapsedTime: int  369 380 368 368 386 373 352 362 350 396 ...
##  $ Distance         : int  2475 2475 2475 2475 2475 2475 2475 2475 2475 2475 ...
##  $ w_new_DepTime    : POSIXct, format: "2016-01-01 08:56:00" "2016-01-02 08:57:00" ...
##  $ w_new_ArrTime    : POSIXct, format: "2016-01-01 12:05:00" "2016-01-02 12:17:00" ...
##  $ w_new_WheelsOff  : POSIXct, format: "2016-01-01 09:22:00" "2016-01-02 09:23:00" ...
##  $ w_new_WheelsOn   : POSIXct, format: "2016-01-01 11:59:00" "2016-01-02 12:07:00" ...
##  $ w_TaxiOut        : int  26 26 22 32 24 11 17 29 17 53 ...
##  $ w_TaxiIn         : int  6 10 9 5 15 11 8 5 6 15 ...
##  $ w_AirTime        : int  337 344 337 331 347 351 327 328 327 328 ...
##  $ w_AirSpeed       : int  440 431 440 448 427 423 454 452 454 452 ...
# compute the avg speeds of all e bound and w bound coastal flights
w_flights <- w_flights %>% mutate(w_mean_AirSpeed = mean(w_AirSpeed, trim = 0.10, na.rm = TRUE))
e_flights <- e_flights %>% mutate(e_mean_AirSpeed = mean(e_AirSpeed, trim = 0.10, na.rm = TRUE))
# tailwind effect: subtracted means of e- and w- bound flights
Speed_diff_between_e_and_w_bound_flights <- e_flights$e_mean_AirSpeed - w_flights$w_mean_AirSpeed
## Warning in e_flights$e_mean_AirSpeed - w_flights$w_mean_AirSpeed: longer
## object length is not a multiple of shorter object length
str(Speed_diff_between_e_and_w_bound_flights)
##  num [1:916] 63 63 63 63 63 ...
# line graph of DepTime versus AirSpeed for e-bound flights
library(ggvis)
e_flights %>% ggvis(x = ~DepTime, y = ~e_AirSpeed) %>% layer_points() %>% layer_model_predictions(model = "lm", se = TRUE, stroke := "red", formula = e_AirSpeed ~ DepTime)
library(ggvis)
# bar graph of flights aggregated by number and their taxi-out times
w_flights <- w_flights %>% group_by(FlightNum) %>% summarize(w_avg_TaxiOutTime_min = as.integer(mean(w_TaxiOut))) %>% filter(w_avg_TaxiOutTime_min > 0)
w_flights %>% ggvis(x = ~FlightNum, y = ~w_avg_TaxiOutTime_min) %>% layer_bars()