NYC Flights Dataset

New York City Flights (2013) Treemap

Source: Defense Visual Information Distribution Service

The NYC Flights Dataset contains data for all flights that departed from New York City area airports (JFK, LGA, and EWR) in 2013. It is available as a package in R Studio.

#install.packages("nycflights13")
library(nycflights13)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
data(flights)

Checking how many airlines are represented in the data.

length(unique(flights$carrier))
[1] 16
?airlines
starting httpd help server ... done

Creating a dataframe of just the carrier, arrival delay, and departure delay.

flights_delay <- flights[, c("carrier", "arr_delay", "dep_delay")]
flights_delay
# A tibble: 336,776 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 UA             11         2
 2 UA             20         4
 3 AA             33         2
 4 B6            -18        -1
 5 DL            -25        -6
 6 UA             12        -4
 7 B6             19        -5
 8 EV            -14        -3
 9 B6             -8        -3
10 AA              8        -2
# ℹ 336,766 more rows

Looking at the delays for each carrier:

Calculating the average departure delay

Endeavor Air Inc. (9E)

EndeavorAir <- filter(flights_delay, carrier == "9E")
EndeavorAir
# A tibble: 18,460 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 9E             11         0
 2 9E             -2        -9
 3 9E             -2        -3
 4 9E             -1        -6
 5 9E             -5        -8
 6 9E             -5         0
 7 9E              5         6
 8 9E             13         0
 9 9E             -8        -8
10 9E            -33        -6
# ℹ 18,450 more rows
EndeavorAir_dd_mean <- mean(EndeavorAir$dep_delay, na.rm = TRUE)

American Airlines Inc. (AA)

AmAir <- filter(flights_delay, carrier == "AA")
AmAir
# A tibble: 32,729 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 AA             33         2
 2 AA              8        -2
 3 AA             31        -1
 4 AA            -12        -4
 5 AA              5        13
 6 AA             -3        -2
 7 AA             14        -1
 8 AA             48         0
 9 AA              4        -4
10 AA            -10        -3
# ℹ 32,719 more rows
AmAir_dd_mean <- mean(AmAir$dep_delay, na.rm = TRUE)

Alaska Airlines Inc. (AS)

AlaskaAir <- filter(flights_delay, carrier == "AS")
AlaskaAir
# A tibble: 714 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 AS            -10        -1
 2 AS            -19        -7
 3 AS            -41        -3
 4 AS              1         3
 5 AS            -18        -1
 6 AS             -9         2
 7 AS              1         0
 8 AS            -29        -7
 9 AS            -19         0
10 AS            -12       -12
# ℹ 704 more rows
AlaskaAir_dd_mean <- mean(AlaskaAir$dep_delay, na.rm = TRUE)

JetBlue Airways (B6)

JetBlue <- filter(flights_delay, carrier == "B6")
JetBlue
# A tibble: 54,635 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 B6            -18        -1
 2 B6             19        -5
 3 B6             -8        -3
 4 B6             -2        -2
 5 B6             -3        -2
 6 B6             -4         0
 7 B6             -7         0
 8 B6             -6         1
 9 B6              4         3
10 B6            -21         0
# ℹ 54,625 more rows
JetBlue_dd_mean <- mean(JetBlue$dep_delay, na.rm = TRUE)

Delta Air Lines Inc. (DL)

DeltaAir <- filter(flights_delay, carrier == "DL")
DeltaAir
# A tibble: 48,110 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 DL            -25        -6
 2 DL             -8        -8
 3 DL             -8        -4
 4 DL             -9         0
 5 DL            -33        -7
 6 DL             -9         0
 7 DL             -8        -5
 8 DL            -18        -5
 9 DL            -14        -3
10 DL              5        -2
# ℹ 48,100 more rows
DeltaAir_dd_mean <- mean(DeltaAir$dep_delay, na.rm = TRUE)

ExpressJet Airlines Inc. (EV)

ExpressAir <- filter(flights_delay, carrier == "EV")
ExpressAir
# A tibble: 54,173 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 EV            -14        -3
 2 EV             29        -6
 3 EV             12        24
 4 EV             -7        -6
 5 EV            -26        -4
 6 EV             23        -2
 7 EV             -2        -8
 8 EV            -18        -4
 9 EV             -4         0
10 EV            -14         0
# ℹ 54,163 more rows
ExpressAir_dd_mean <- mean(ExpressAir$dep_delay, na.rm = TRUE)

Frontier Airlines Inc. (F9)

FrontierAir <- filter(flights_delay, carrier == "F9")
FrontierAir
# A tibble: 685 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 F9             32        -2
 2 F9             -6       -14
 3 F9             18        -8
 4 F9             -1        -2
 5 F9              0         0
 6 F9             98       123
 7 F9             -3        -1
 8 F9             36        61
 9 F9             -5         0
10 F9             -5        -4
# ℹ 675 more rows
FrontierAir_dd_mean <- mean(FrontierAir$dep_delay, na.rm = TRUE)

AirTran Airways Corporation (FL)

AirTran <- filter(flights_delay, carrier == "FL")
AirTran
# A tibble: 3,260 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 FL             10        -3
 2 FL             17         4
 3 FL              2        -2
 4 FL              8        -8
 5 FL             -5       -10
 6 FL              6        -3
 7 FL              6        -8
 8 FL             14         0
 9 FL              2       -11
10 FL             -7       -10
# ℹ 3,250 more rows
AirTran_dd_mean <- mean(AirTran$dep_delay, na.rm = TRUE)

Hawaiian Airlines Inc. (HA)

HawaiiAir <- filter(flights_delay, carrier == "HA")
HawaiiAir
# A tibble: 342 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 HA            -14        -3
 2 HA             -5         9
 3 HA            -26        14
 4 HA            -14         0
 5 HA            -11        -2
 6 HA             28        79
 7 HA             50       102
 8 HA            -26         1
 9 HA           1272      1301
10 HA            -41        -1
# ℹ 332 more rows
HawaiiAir_dd_mean <- mean(HawaiiAir$dep_delay, na.rm = TRUE)

Envoy Air (MQ)

EnvoyAir <- filter(flights_delay, carrier == "MQ")
EnvoyAir
# A tibble: 26,397 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 MQ             12         0
 2 MQ             16        -3
 3 MQ             32         8
 4 MQ             10        -6
 5 MQ             27        -9
 6 MQ             49        39
 7 MQ             -6       -10
 8 MQ             -4       -10
 9 MQ            137       101
10 MQ            -13        -4
# ℹ 26,387 more rows
EnvoyAir_dd_mean <- mean(EnvoyAir$dep_delay, na.rm = TRUE)

SkyWest AIrlines Inc. (OO)

SkyWest <- filter(flights_delay, carrier == "OO")
SkyWest
# A tibble: 32 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 OO            107        67
 2 OO             -5        -6
 3 OO             27        13
 4 OO            -24        -8
 5 OO             -6         4
 6 OO              3         1
 7 OO            -20        -9
 8 OO            157       131
 9 OO              3       -10
10 OO            140       154
# ℹ 22 more rows
SkyWest_dd_mean <- mean(SkyWest$dep_delay, na.rm = TRUE)

United Air Lines Inc. (UA)

UnitedAir <- filter(flights_delay, carrier == "UA")
UnitedAir
# A tibble: 58,665 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 UA             11         2
 2 UA             20         4
 3 UA             12        -4
 4 UA              7        -2
 5 UA            -14        -2
 6 UA             -8        -1
 7 UA            -17         0
 8 UA             14        11
 9 UA              1        -4
10 UA             29        -2
# ℹ 58,655 more rows
UnitedAir_dd_mean <- mean(UnitedAir$dep_delay, na.rm = TRUE)

US Airways Inc. (US)

USAir <- filter(flights_delay, carrier == "US")
USAir
# A tibble: 20,536 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 US              3        -8
 2 US              0        -3
 3 US             -9        -1
 4 US            -11        -2
 5 US             -4        -7
 6 US             10        -4
 7 US             -5        -2
 8 US            -29        -7
 9 US             11        -5
10 US            -15        -1
# ℹ 20,526 more rows
USAir_dd_mean <- mean(USAir$dep_delay, na.rm = TRUE)

Virgin America (VX)

VirginAir <- filter(flights_delay, carrier == "VX")
VirginAir
# A tibble: 5,162 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 VX              2        -2
 2 VX            -26        -1
 3 VX             -2        -1
 4 VX             -6         2
 5 VX            -22         1
 6 VX             -2         3
 7 VX            -26         3
 8 VX            -17        -3
 9 VX            -40        -3
10 VX             -5         0
# ℹ 5,152 more rows
VirginAir_dd_mean <- mean(USAir$dep_delay, na.rm = TRUE)

Southwest Airlines Co. (WN)

Southwest <- filter(flights_delay, carrier == "WN")
Southwest
# A tibble: 12,275 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 WN            -19        -1
 2 WN             33        -1
 3 WN             26        -3
 4 WN             30         4
 5 WN             15        -2
 6 WN              7        -3
 7 WN            -14        -1
 8 WN              5        -5
 9 WN             -4        -2
10 WN             23         0
# ℹ 12,265 more rows
Southwest_dd_mean <- mean(Southwest$dep_delay, na.rm = TRUE)

Mesa Airlines Inc. (YV)

MesaAir <- filter(flights_delay, carrier == "YV")
MesaAir
# A tibble: 601 × 3
   carrier arr_delay dep_delay
   <chr>       <dbl>     <dbl>
 1 YV            -20        -7
 2 YV            -23       -11
 3 YV            -13        -5
 4 YV             75        89
 5 YV            -15        -8
 6 YV            -18        -5
 7 YV             -1        -6
 8 YV            -22        -3
 9 YV              5        -7
10 YV            -16        -3
# ℹ 591 more rows
MesaAir_dd_mean <- mean(MesaAir$dep_delay, na.rm = TRUE)

New Dataframe

delays_by_airline <- data.frame(
    carrier = c("Endeavor Air", "American Airlines","Alaska Airlines", 
                "JetBlue Airways", "Delta Air Lines", "ExpressJet Airlines", 
                "Frontier Airlines", "AirTran Airways", "Hawaiian Airlines", 
                "Envoy Air","SkyWest Airlines", "United Air Lines", "US Airways", 
                "Virgin America", "Southwest Airlines", "Mesa Airlines"),
    mean_dep_delay = c(EndeavorAir_dd_mean, AmAir_dd_mean, AlaskaAir_dd_mean, 
                       JetBlue_dd_mean, DeltaAir_dd_mean, ExpressAir_dd_mean, 
                       FrontierAir_dd_mean, AirTran_dd_mean, HawaiiAir_dd_mean, 
                       EnvoyAir_dd_mean, SkyWest_dd_mean, UnitedAir_dd_mean, 
                       USAir_dd_mean, VirginAir_dd_mean, Southwest_dd_mean, 
                       MesaAir_dd_mean),
    flight_frequency = c(length(EndeavorAir$carrier), length(AmAir$carrier), 
                         length(AlaskaAir$carrier),length(JetBlue$carrier), 
                         length(DeltaAir$carrier),length(ExpressAir$carrier), 
                         length(FrontierAir$carrier), length(AirTran$carrier), 
                         length(HawaiiAir$carrier), length(EnvoyAir$carrier),
                         length(SkyWest$carrier), length(UnitedAir$carrier),
                         length(USAir$carrier), length(VirginAir$carrier),
                         length(Southwest$carrier), length(MesaAir$carrier))
)
delays_by_airline
               carrier mean_dep_delay flight_frequency
1         Endeavor Air      16.725769            18460
2    American Airlines       8.586016            32729
3      Alaska Airlines       5.804775              714
4      JetBlue Airways      13.022522            54635
5      Delta Air Lines       9.264505            48110
6  ExpressJet Airlines      19.955390            54173
7    Frontier Airlines      20.215543              685
8      AirTran Airways      18.726075             3260
9    Hawaiian Airlines       4.900585              342
10           Envoy Air      10.552041            26397
11    SkyWest Airlines      12.586207               32
12    United Air Lines      12.106073            58665
13          US Airways       3.782418            20536
14      Virgin America       3.782418             5162
15  Southwest Airlines      17.711744            12275
16       Mesa Airlines      18.996330              601

Treemap

library(treemap)
library(RColorBrewer)
treemap(delays_by_airline, index="carrier", vSize="flight_frequency", 
        vColor="mean_dep_delay", type="manual", 
        title = "Mean Departure Delays and Number of Flights (2013)",
        fontsize.title = 18,
        title.legend = "Mean Departure Delay",
        force.print.labels = TRUE,
        palette="PuRd", aspRatio = NA)

Source: Federal Aviation Administration

Final Thoughts:

The airlines responsible for the most flights out of NYC in 2013 had mean departure delays closest to the median. The exception was ExpressJet Airlines, which had one of the highest mean departure delays, while third in the number of flights flown to and from NYC airports. Airlines operating fewer flights to and from NYC were more likely to have a very high or very low mean departure delay.