# read in AA performance data for US flights during Jan 2016
flights<-read.csv(url("http://media.usm.maine.edu/~suleiman/mba676/domestic_flights_jan_2016.csv"),stringsAsFactors=FALSE)
#str(flights)
# convert FlightDate from character to Date class
flights$FlightDate <- as.Date(flights$FlightDate, format = "%m/%d/%Y")
# str(flights$FlightDate)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# filter non-cancelled flights from JFK to LAX. Merge the flight date and time for Dep, Arr, & Wheels -on and -off vectors
w_flights <- flights %>% filter(Cancelled==0) %>% filter(Origin=="JFK") %>% filter(Dest=="LAX") %>% mutate(w_new_DepTime = paste(FlightDate, sprintf("%04d", DepTime)), w_new_ArrTime = paste(FlightDate, sprintf("%04d",ArrTime)), w_new_WheelsOff = paste(FlightDate, sprintf("%04d",WheelsOff)), w_new_WheelsOn = paste(FlightDate, sprintf("%04d", WheelsOn)))
str(w_flights)
## 'data.frame': 914 obs. of 25 variables:
## $ FlightDate : Date, format: "2016-01-01" "2016-01-02" ...
## $ Carrier : chr "AA" "AA" "AA" "AA" ...
## $ TailNum : chr "N795AA" "N797AA" "N786AA" "N797AA" ...
## $ FlightNum : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Origin : chr "JFK" "JFK" "JFK" "JFK" ...
## $ OriginCityName : chr "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
## $ OriginState : chr "NY" "NY" "NY" "NY" ...
## $ Dest : chr "LAX" "LAX" "LAX" "LAX" ...
## $ DestCityName : chr "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" ...
## $ DestState : chr "CA" "CA" "CA" "CA" ...
## $ CRSDepTime : int 900 900 900 900 900 900 900 900 900 900 ...
## $ DepTime : int 856 857 913 903 850 855 900 855 858 858 ...
## $ WheelsOff : int 922 923 935 935 914 906 917 924 915 951 ...
## $ WheelsOn : int 1159 1207 1212 1206 1201 1157 1144 1152 1142 1219 ...
## $ CRSArrTime : int 1225 1225 1225 1225 1234 1234 1234 1234 1234 1234 ...
## $ ArrTime : int 1205 1217 1221 1211 1216 1208 1152 1157 1148 1234 ...
## $ Cancelled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CRSElapsedTime : int 385 385 385 385 394 394 394 394 394 394 ...
## $ ActualElapsedTime: int 369 380 368 368 386 373 352 362 350 396 ...
## $ Distance : int 2475 2475 2475 2475 2475 2475 2475 2475 2475 2475 ...
## $ w_new_DepTime : chr "2016-01-01 0856" "2016-01-02 0857" "2016-01-03 0913" "2016-01-04 0903" ...
## $ w_new_ArrTime : chr "2016-01-01 1205" "2016-01-02 1217" "2016-01-03 1221" "2016-01-04 1211" ...
## $ w_new_WheelsOff : chr "2016-01-01 0922" "2016-01-02 0923" "2016-01-03 0935" "2016-01-04 0935" ...
## $ w_new_WheelsOn : chr "2016-01-01 1159" "2016-01-02 1207" "2016-01-03 1212" "2016-01-04 1206" ...
# do same as above but for reverse directions: LAX to JFK
e_flights <- flights %>% filter(Cancelled==0) %>% filter(Origin=="LAX") %>% filter(Dest=="JFK") %>% mutate(e_new_DepTime = paste(FlightDate, sprintf("%04d", DepTime)), e_new_ArrTime = paste(FlightDate, sprintf("%04d",ArrTime)), e_new_WheelsOff = paste(FlightDate, sprintf("%04d",WheelsOff)), e_new_WheelsOn = paste(FlightDate, sprintf("%04d", WheelsOn)))
str(e_flights)
## 'data.frame': 916 obs. of 25 variables:
## $ FlightDate : Date, format: "2016-01-01" "2016-01-02" ...
## $ Carrier : chr "AA" "AA" "AA" "AA" ...
## $ TailNum : chr "N791AA" "N788AA" "N790AA" "N794AA" ...
## $ FlightNum : int 2 2 2 2 2 2 2 2 2 2 ...
## $ Origin : chr "LAX" "LAX" "LAX" "LAX" ...
## $ OriginCityName : chr "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" ...
## $ OriginState : chr "CA" "CA" "CA" "CA" ...
## $ Dest : chr "JFK" "JFK" "JFK" "JFK" ...
## $ DestCityName : chr "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
## $ DestState : chr "NY" "NY" "NY" "NY" ...
## $ CRSDepTime : int 900 900 900 900 900 900 900 900 900 900 ...
## $ DepTime : int 856 859 901 901 901 931 1107 905 1121 1046 ...
## $ WheelsOff : int 914 920 931 931 931 948 1122 926 1140 1107 ...
## $ WheelsOn : int 1646 1719 1717 1726 1737 1747 1911 1730 1937 1919 ...
## $ CRSArrTime : int 1730 1730 1730 1729 1729 1729 1729 1729 1729 1729 ...
## $ ArrTime : int 1703 1731 1725 1738 1750 1802 1923 1736 1946 1930 ...
## $ Cancelled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CRSElapsedTime : int 330 330 330 329 329 329 329 329 329 329 ...
## $ ActualElapsedTime: int 307 332 324 337 349 331 316 331 325 344 ...
## $ Distance : int 2475 2475 2475 2475 2475 2475 2475 2475 2475 2475 ...
## $ e_new_DepTime : chr "2016-01-01 0856" "2016-01-02 0859" "2016-01-03 0901" "2016-01-05 0901" ...
## $ e_new_ArrTime : chr "2016-01-01 1703" "2016-01-02 1731" "2016-01-03 1725" "2016-01-05 1738" ...
## $ e_new_WheelsOff : chr "2016-01-01 0914" "2016-01-02 0920" "2016-01-03 0931" "2016-01-05 0931" ...
## $ e_new_WheelsOn : chr "2016-01-01 1646" "2016-01-02 1719" "2016-01-03 1717" "2016-01-05 1726" ...
# convert classes: date-time to POSIXct
w_flights$w_new_DepTime <- as.POSIXct(w_flights$w_new_DepTime, format="%Y-%m-%d %H%M")
w_flights$w_new_WheelsOff <- as.POSIXct(w_flights$w_new_WheelsOff, format="%Y-%m-%d %H%M")
w_flights$w_new_WheelsOn <- as.POSIXct(w_flights$w_new_WheelsOn, format="%Y-%m-%d %H%M")
w_flights$w_new_ArrTime <- as.POSIXct(w_flights$w_new_ArrTime, format="%Y-%m-%d %H%M")
e_flights$e_new_DepTime <- as.POSIXct(e_flights$e_new_DepTime, format="%Y-%m-%d %H%M")
e_flights$e_new_WheelsOff <- as.POSIXct(e_flights$e_new_WheelsOff, format="%Y-%m-%d %H%M")
e_flights$e_new_WheelsOn <- as.POSIXct(e_flights$e_new_WheelsOn, format="%Y-%m-%d %H%M")
e_flights$e_new_ArrTime <- as.POSIXct(e_flights$e_new_ArrTime, format="%Y-%m-%d %H%M")
str(w_flights, e_flights)
## 'data.frame': 914 obs. of 25 variables:
## $ FlightDate : Date, format: "2016-01-01" "2016-01-02" ...
## $ Carrier : chr "AA" "AA" "AA" "AA" ...
## $ TailNum : chr "N795AA" "N797AA" "N786AA" "N797AA" ...
## $ FlightNum : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Origin : chr "JFK" "JFK" "JFK" "JFK" ...
## $ OriginCityName : chr "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
## $ OriginState : chr "NY" "NY" "NY" "NY" ...
## $ Dest : chr "LAX" "LAX" "LAX" "LAX" ...
## $ DestCityName : chr "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" ...
## $ DestState : chr "CA" "CA" "CA" "CA" ...
## $ CRSDepTime : int 900 900 900 900 900 900 900 900 900 900 ...
## $ DepTime : int 856 857 913 903 850 855 900 855 858 858 ...
## $ WheelsOff : int 922 923 935 935 914 906 917 924 915 951 ...
## $ WheelsOn : int 1159 1207 1212 1206 1201 1157 1144 1152 1142 1219 ...
## $ CRSArrTime : int 1225 1225 1225 1225 1234 1234 1234 1234 1234 1234 ...
## $ ArrTime : int 1205 1217 1221 1211 1216 1208 1152 1157 1148 1234 ...
## $ Cancelled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CRSElapsedTime : int 385 385 385 385 394 394 394 394 394 394 ...
## $ ActualElapsedTime: int 369 380 368 368 386 373 352 362 350 396 ...
## $ Distance : int 2475 2475 2475 2475 2475 2475 2475 2475 2475 2475 ...
## $ w_new_DepTime : POSIXct, format: "2016-01-01 08:56:00" "2016-01-02 08:57:00" ...
## $ w_new_ArrTime : POSIXct, format: "2016-01-01 12:05:00" "2016-01-02 12:17:00" ...
## $ w_new_WheelsOff : POSIXct, format: "2016-01-01 09:22:00" "2016-01-02 09:23:00" ...
## $ w_new_WheelsOn : POSIXct, format: "2016-01-01 11:59:00" "2016-01-02 12:07:00" ...
# for westbound flights: compute TaxiOut and TaxiIn times
w_flights <- w_flights %>% mutate(w_TaxiOut = as.integer(difftime(w_new_WheelsOff, w_new_DepTime, units = "mins")), w_TaxiIn = as.integer(difftime(w_new_ArrTime, w_new_WheelsOn, units = "mins")))
# for eastbound flights: compute same as above
e_flights <- e_flights %>% mutate(e_TaxiOut = as.integer(difftime(e_new_WheelsOff, e_new_DepTime, units = "mins")), e_TaxiIn = as.integer(difftime(e_new_ArrTime, e_new_WheelsOn, units = "mins")))
str(w_flights, e_flights)
## 'data.frame': 914 obs. of 27 variables:
## $ FlightDate : Date, format: "2016-01-01" "2016-01-02" ...
## $ Carrier : chr "AA" "AA" "AA" "AA" ...
## $ TailNum : chr "N795AA" "N797AA" "N786AA" "N797AA" ...
## $ FlightNum : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Origin : chr "JFK" "JFK" "JFK" "JFK" ...
## $ OriginCityName : chr "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
## $ OriginState : chr "NY" "NY" "NY" "NY" ...
## $ Dest : chr "LAX" "LAX" "LAX" "LAX" ...
## $ DestCityName : chr "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" ...
## $ DestState : chr "CA" "CA" "CA" "CA" ...
## $ CRSDepTime : int 900 900 900 900 900 900 900 900 900 900 ...
## $ DepTime : int 856 857 913 903 850 855 900 855 858 858 ...
## $ WheelsOff : int 922 923 935 935 914 906 917 924 915 951 ...
## $ WheelsOn : int 1159 1207 1212 1206 1201 1157 1144 1152 1142 1219 ...
## $ CRSArrTime : int 1225 1225 1225 1225 1234 1234 1234 1234 1234 1234 ...
## $ ArrTime : int 1205 1217 1221 1211 1216 1208 1152 1157 1148 1234 ...
## $ Cancelled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CRSElapsedTime : int 385 385 385 385 394 394 394 394 394 394 ...
## $ ActualElapsedTime: int 369 380 368 368 386 373 352 362 350 396 ...
## $ Distance : int 2475 2475 2475 2475 2475 2475 2475 2475 2475 2475 ...
## $ w_new_DepTime : POSIXct, format: "2016-01-01 08:56:00" "2016-01-02 08:57:00" ...
## $ w_new_ArrTime : POSIXct, format: "2016-01-01 12:05:00" "2016-01-02 12:17:00" ...
## $ w_new_WheelsOff : POSIXct, format: "2016-01-01 09:22:00" "2016-01-02 09:23:00" ...
## $ w_new_WheelsOn : POSIXct, format: "2016-01-01 11:59:00" "2016-01-02 12:07:00" ...
## $ w_TaxiOut : int 26 26 22 32 24 11 17 29 17 53 ...
## $ w_TaxiIn : int 6 10 9 5 15 11 8 5 6 15 ...
# compute AirTime and AirSpeed for west and east-bound flights
w_flights <- w_flights %>% mutate(w_AirTime = ActualElapsedTime - w_TaxiOut - w_TaxiIn)
e_flights <- e_flights %>% mutate(e_AirTime = ActualElapsedTime - e_TaxiOut - e_TaxiIn)
w_flights <- w_flights %>% mutate(w_AirSpeed = as.integer(Distance/ (w_AirTime / 60)))
e_flights <- e_flights %>% mutate(e_AirSpeed = as.integer(Distance/ (e_AirTime / 60)))
str(w_flights, e_flights)
## 'data.frame': 914 obs. of 29 variables:
## $ FlightDate : Date, format: "2016-01-01" "2016-01-02" ...
## $ Carrier : chr "AA" "AA" "AA" "AA" ...
## $ TailNum : chr "N795AA" "N797AA" "N786AA" "N797AA" ...
## $ FlightNum : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Origin : chr "JFK" "JFK" "JFK" "JFK" ...
## $ OriginCityName : chr "New York, NY" "New York, NY" "New York, NY" "New York, NY" ...
## $ OriginState : chr "NY" "NY" "NY" "NY" ...
## $ Dest : chr "LAX" "LAX" "LAX" "LAX" ...
## $ DestCityName : chr "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" "Los Angeles, CA" ...
## $ DestState : chr "CA" "CA" "CA" "CA" ...
## $ CRSDepTime : int 900 900 900 900 900 900 900 900 900 900 ...
## $ DepTime : int 856 857 913 903 850 855 900 855 858 858 ...
## $ WheelsOff : int 922 923 935 935 914 906 917 924 915 951 ...
## $ WheelsOn : int 1159 1207 1212 1206 1201 1157 1144 1152 1142 1219 ...
## $ CRSArrTime : int 1225 1225 1225 1225 1234 1234 1234 1234 1234 1234 ...
## $ ArrTime : int 1205 1217 1221 1211 1216 1208 1152 1157 1148 1234 ...
## $ Cancelled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CRSElapsedTime : int 385 385 385 385 394 394 394 394 394 394 ...
## $ ActualElapsedTime: int 369 380 368 368 386 373 352 362 350 396 ...
## $ Distance : int 2475 2475 2475 2475 2475 2475 2475 2475 2475 2475 ...
## $ w_new_DepTime : POSIXct, format: "2016-01-01 08:56:00" "2016-01-02 08:57:00" ...
## $ w_new_ArrTime : POSIXct, format: "2016-01-01 12:05:00" "2016-01-02 12:17:00" ...
## $ w_new_WheelsOff : POSIXct, format: "2016-01-01 09:22:00" "2016-01-02 09:23:00" ...
## $ w_new_WheelsOn : POSIXct, format: "2016-01-01 11:59:00" "2016-01-02 12:07:00" ...
## $ w_TaxiOut : int 26 26 22 32 24 11 17 29 17 53 ...
## $ w_TaxiIn : int 6 10 9 5 15 11 8 5 6 15 ...
## $ w_AirTime : int 337 344 337 331 347 351 327 328 327 328 ...
## $ w_AirSpeed : int 440 431 440 448 427 423 454 452 454 452 ...
# compute the avg speeds of all e bound and w bound coastal flights
w_flights <- w_flights %>% mutate(w_mean_AirSpeed = mean(w_AirSpeed, trim = 0.10, na.rm = TRUE))
e_flights <- e_flights %>% mutate(e_mean_AirSpeed = mean(e_AirSpeed, trim = 0.10, na.rm = TRUE))
# tailwind effect: subtracted means of e- and w- bound flights
Speed_diff_between_e_and_w_bound_flights <- e_flights$e_mean_AirSpeed - w_flights$w_mean_AirSpeed
## Warning in e_flights$e_mean_AirSpeed - w_flights$w_mean_AirSpeed: longer
## object length is not a multiple of shorter object length
str(Speed_diff_between_e_and_w_bound_flights)
## num [1:916] 63 63 63 63 63 ...
# line graph of DepTime versus AirSpeed for e-bound flights
library(ggvis)
e_flights %>% ggvis(x = ~DepTime, y = ~e_AirSpeed) %>% layer_points() %>% layer_model_predictions(model = "lm", se = TRUE, stroke := "red", formula = e_AirSpeed ~ DepTime)
library(ggvis)
# bar graph of flights aggregated by number and their taxi-out times
w_flights <- w_flights %>% group_by(FlightNum) %>% summarize(w_avg_TaxiOutTime_min = as.integer(mean(w_TaxiOut))) %>% filter(w_avg_TaxiOutTime_min > 0)
w_flights %>% ggvis(x = ~FlightNum, y = ~w_avg_TaxiOutTime_min) %>% layer_bars()