Reference : video tutorial on dplyr package by Kevin Makham.

dplyr package by Headley Wickham

Options Available in dplyr

# loading packages
library(hflights)
suppressMessages(library(dplyr))

# exploring data
head(hflights)
##      Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
## 5424 2011     1          1         6    1400    1500            AA
## 5425 2011     1          2         7    1401    1501            AA
## 5426 2011     1          3         1    1352    1502            AA
## 5427 2011     1          4         2    1403    1513            AA
## 5428 2011     1          5         3    1405    1507            AA
## 5429 2011     1          6         4    1359    1503            AA
##      FlightNum TailNum ActualElapsedTime AirTime ArrDelay DepDelay Origin
## 5424       428  N576AA                60      40      -10        0    IAH
## 5425       428  N557AA                60      45       -9        1    IAH
## 5426       428  N541AA                70      48       -8       -8    IAH
## 5427       428  N403AA                70      39        3        3    IAH
## 5428       428  N492AA                62      44       -3        5    IAH
## 5429       428  N262AA                64      45       -7       -1    IAH
##      Dest Distance TaxiIn TaxiOut Cancelled CancellationCode Diverted
## 5424  DFW      224      7      13         0                         0
## 5425  DFW      224      6       9         0                         0
## 5426  DFW      224      5      17         0                         0
## 5427  DFW      224      9      22         0                         0
## 5428  DFW      224      9       9         0                         0
## 5429  DFW      224      6      13         0                         0
data(hflights)
# Convert to local data frame
flights<-tbl_df(hflights)

# Printing flights dataset
flights
## # A tibble: 227,496 × 21
##     Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
## *  <int> <int>      <int>     <int>   <int>   <int>         <chr>
## 1   2011     1          1         6    1400    1500            AA
## 2   2011     1          2         7    1401    1501            AA
## 3   2011     1          3         1    1352    1502            AA
## 4   2011     1          4         2    1403    1513            AA
## 5   2011     1          5         3    1405    1507            AA
## 6   2011     1          6         4    1359    1503            AA
## 7   2011     1          7         5    1359    1509            AA
## 8   2011     1          8         6    1355    1454            AA
## 9   2011     1          9         7    1443    1554            AA
## 10  2011     1         10         1    1443    1553            AA
## # ... with 227,486 more rows, and 14 more variables: FlightNum <int>,
## #   TailNum <chr>, ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## #   DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## #   TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## #   Diverted <int>
# Display n rows of flight dataset
print(flights, n=30)

# convert to a normal data frame to see all of the columns
data.frame(head(flights))

filter: Keep rows matching criteria

## dplyr filtering of interested columns

#Filtering how many flights flew on 01-Feb-2011 from Houstan airports
filter(flights, Month == 2, DayofMonth == 1)
## # A tibble: 577 × 21
##     Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
##    <int> <int>      <int>     <int>   <int>   <int>         <chr>
## 1   2011     2          1         2    1401    1539            AA
## 2   2011     2          1         2      NA      NA            AA
## 3   2011     2          1         2      NA      NA            AA
## 4   2011     2          1         2      NA      NA            AA
## 5   2011     2          1         2    1746    2109            AA
## 6   2011     2          1         2      NA      NA            AA
## 7   2011     2          1         2    1032    1358            AA
## 8   2011     2          1         2      NA      NA            AA
## 9   2011     2          1         2     558     912            AA
## 10  2011     2          1         2    1820    2112            AS
## # ... with 567 more rows, and 14 more variables: FlightNum <int>,
## #   TailNum <chr>, ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## #   DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## #   TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## #   Diverted <int>
#Filtering how many flights flew on 01-Feb-2011 from Houstan airports using pipe
filter(flights, Month == 2, DayofMonth == 1, UniqueCarrier=="AA" | UniqueCarrier=="CO")
## # A tibble: 188 × 21
##     Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
##    <int> <int>      <int>     <int>   <int>   <int>         <chr>
## 1   2011     2          1         2    1401    1539            AA
## 2   2011     2          1         2      NA      NA            AA
## 3   2011     2          1         2      NA      NA            AA
## 4   2011     2          1         2      NA      NA            AA
## 5   2011     2          1         2    1746    2109            AA
## 6   2011     2          1         2      NA      NA            AA
## 7   2011     2          1         2    1032    1358            AA
## 8   2011     2          1         2      NA      NA            AA
## 9   2011     2          1         2     558     912            AA
## 10  2011     2          1         2    1014    1430            CO
## # ... with 178 more rows, and 14 more variables: FlightNum <int>,
## #   TailNum <chr>, ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## #   DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## #   TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## #   Diverted <int>
# use %in% operator instead of pipe
filter(flights, UniqueCarrier %in% c("UA","CO"))
## # A tibble: 72,104 × 21
##     Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
##    <int> <int>      <int>     <int>   <int>   <int>         <chr>
## 1   2011     1         31         1     924    1413            CO
## 2   2011     1         31         1    1825    1925            CO
## 3   2011     1         31         1    1554    1650            CO
## 4   2011     1         31         1    1522    1632            CO
## 5   2011     1         31         1    1536    1635            CO
## 6   2011     1         31         1    1916    2103            CO
## 7   2011     1         31         1     747     936            CO
## 8   2011     1         31         1    1803    1927            CO
## 9   2011     1         31         1    1206    1631            CO
## 10  2011     1         31         1    1425    1848            CO
## # ... with 72,094 more rows, and 14 more variables: FlightNum <int>,
## #   TailNum <chr>, ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## #   DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## #   TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## #   Diverted <int>
filter(flights, DayofMonth %in% c("1","2"), Month == 1, UniqueCarrier=="AA")
## # A tibble: 16 × 21
##     Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
##    <int> <int>      <int>     <int>   <int>   <int>         <chr>
## 1   2011     1          1         6    1400    1500            AA
## 2   2011     1          2         7    1401    1501            AA
## 3   2011     1          1         6     728     840            AA
## 4   2011     1          2         7     719     821            AA
## 5   2011     1          2         7    1959    2106            AA
## 6   2011     1          1         6    1631    1736            AA
## 7   2011     1          2         7    1636    1759            AA
## 8   2011     1          1         6    1756    2112            AA
## 9   2011     1          2         7    1823    2132            AA
## 10  2011     1          1         6    1012    1347            AA
## 11  2011     1          2         7    1008    1321            AA
## 12  2011     1          1         6    1211    1325            AA
## 13  2011     1          2         7    1200    1303            AA
## 14  2011     1          2         7     907    1018            AA
## 15  2011     1          1         6     557     906            AA
## 16  2011     1          2         7     554     912            AA
## # ... with 14 more variables: FlightNum <int>, TailNum <chr>,
## #   ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## #   DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## #   TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## #   Diverted <int>

select: Pick columns by name

# dplyr approach
select(flights, DepTime, ArrTime, FlightNum)
## # A tibble: 227,496 × 3
##    DepTime ArrTime FlightNum
## *    <int>   <int>     <int>
## 1     1400    1500       428
## 2     1401    1501       428
## 3     1352    1502       428
## 4     1403    1513       428
## 5     1405    1507       428
## 6     1359    1503       428
## 7     1359    1509       428
## 8     1355    1454       428
## 9     1443    1554       428
## 10    1443    1553       428
## # ... with 227,486 more rows

“Chaining” or “Pipelining”

# Chaining or pipeline using `%>%`
flights %>% 
  select(Month, UniqueCarrier, DepDelay) %>%
  filter(UniqueCarrier=="AA",DepDelay>30)
## # A tibble: 251 × 3
##    Month UniqueCarrier DepDelay
##    <int>         <chr>    <int>
## 1      1            AA       43
## 2      1            AA       43
## 3      1            AA       90
## 4      1            AA       67
## 5      1            AA       41
## 6      1            AA       55
## 7      1            AA       40
## 8      1            AA       74
## 9      1            AA       31
## 10     1            AA       38
## # ... with 241 more rows

arrange: Reorder rows

# Arrange in ascending order of DepDelay
flights %>%
  select(UniqueCarrier, DepDelay) %>%
  arrange(DepDelay)
## # A tibble: 227,496 × 2
##    UniqueCarrier DepDelay
##            <chr>    <int>
## 1             OO      -33
## 2             MQ      -23
## 3             XE      -19
## 4             XE      -19
## 5             CO      -18
## 6             EV      -18
## 7             XE      -17
## 8             CO      -17
## 9             XE      -17
## 10            MQ      -17
## # ... with 227,486 more rows
# Arrange in ascending order of DepDelay
flights %>%
  select(UniqueCarrier, DepDelay, Cancelled) %>%
  filter(UniqueCarrier == "AA") %>%
  arrange(desc(DepDelay))
## # A tibble: 3,244 × 3
##    UniqueCarrier DepDelay Cancelled
##            <chr>    <int>     <int>
## 1             AA      970         0
## 2             AA      677         0
## 3             AA      653         0
## 4             AA      525         0
## 5             AA      286         0
## 6             AA      277         0
## 7             AA      235         0
## 8             AA      234         0
## 9             AA      233         0
## 10            AA      228         0
## # ... with 3,234 more rows

mutate: Add new variables

# New variable is not stored
flights %>%
  select(Distance, AirTime) %>%
  mutate(Speed_Kmps=Distance/AirTime*60)
## # A tibble: 227,496 × 3
##    Distance AirTime Speed_Kmps
##       <int>   <int>      <dbl>
## 1       224      40   336.0000
## 2       224      45   298.6667
## 3       224      48   280.0000
## 4       224      39   344.6154
## 5       224      44   305.4545
## 6       224      45   298.6667
## 7       224      43   312.5581
## 8       224      40   336.0000
## 9       224      41   327.8049
## 10      224      45   298.6667
## # ... with 227,486 more rows
# To store new variable
flights <- flights %>% mutate(Speed_Kmps=Distance/AirTime*60)

summarise: Reduce variables to values

# dplyr approach: create a table grouped by UniqueCarrier, and then summarise each group by taking the mean of ArrDelay. NAs are removed to facilitate calculation of mean delay
flights %>%
    group_by(UniqueCarrier) %>%
    summarise(avgArrival_delay = mean(ArrDelay, na.rm=TRUE)) %>%
    arrange(desc(avgArrival_delay))
## # A tibble: 15 × 2
##    UniqueCarrier avgArrival_delay
##            <chr>            <dbl>
## 1             UA       10.4628628
## 2             B6        9.8588410
## 3             OO        8.6934922
## 4             XE        8.1865242
## 5             F9        7.6682692
## 6             WN        7.5871430
## 7             EV        7.2569543
## 8             MQ        7.1529751
## 9             CO        6.0986983
## 10            DL        6.0841374
## 11            YV        4.0128205
## 12            AS        3.1923077
## 13            FL        1.8536239
## 14            AA        0.8917558
## 15            US       -0.6307692
# Applying summarize functions on two columns of flights
flights %>%
  group_by(UniqueCarrier) %>%
  summarize_each(funs(mean), Cancelled, Diverted)
## # A tibble: 15 × 3
##    UniqueCarrier   Cancelled    Diverted
##            <chr>       <dbl>       <dbl>
## 1             AA 0.018495684 0.001849568
## 2             AS 0.000000000 0.002739726
## 3             B6 0.025899281 0.005755396
## 4             CO 0.006782614 0.002627370
## 5             DL 0.015903067 0.003029156
## 6             EV 0.034482759 0.003176044
## 7             F9 0.007159905 0.000000000
## 8             FL 0.009817672 0.003272557
## 9             MQ 0.029044750 0.001936317
## 10            OO 0.013946828 0.003486707
## 11            UA 0.016409266 0.002413127
## 12            US 0.011268986 0.001469868
## 13            WN 0.015504047 0.002293629
## 14            XE 0.015495599 0.003449550
## 15            YV 0.012658228 0.000000000
# for each carrier, calculate the minimum and maximum arrival and departure delays using a match function to select columns with "Delay"
flights %>%
  group_by(UniqueCarrier) %>%
  summarize_each(funs(min(.,na.rm=TRUE), max(.,na.rm=TRUE)),matches("Delay"))
## # A tibble: 15 × 5
##    UniqueCarrier ArrDelay_min DepDelay_min ArrDelay_max DepDelay_max
##            <chr>        <int>        <int>        <int>        <int>
## 1             AA          -39          -15          978          970
## 2             AS          -43          -15          183          172
## 3             B6          -44          -14          335          310
## 4             CO          -55          -18          957          981
## 5             DL          -32          -17          701          730
## 6             EV          -40          -18          469          479
## 7             F9          -24          -15          277          275
## 8             FL          -30          -14          500          507
## 9             MQ          -38          -23          918          931
## 10            OO          -57          -33          380          360
## 11            UA          -47          -11          861          869
## 12            US          -42          -17          433          425
## 13            WN          -44          -10          499          548
## 14            XE          -70          -19          634          628
## 15            YV          -32          -11           72           54
# To get the number of flight count on particular day of a month
flights %>%
  group_by(Month, DayofMonth) %>%
  summarize(flight_count = n()) %>%
  arrange(desc(flight_count))
## Source: local data frame [365 x 3]
## Groups: Month [12]
## 
##    Month DayofMonth flight_count
##    <int>      <int>        <int>
## 1      8          4          706
## 2      8         11          706
## 3      8         12          706
## 4      8          5          705
## 5      8          3          704
## 6      8         10          704
## 7      1          3          702
## 8      7          7          702
## 9      7         14          702
## 10     7         28          701
## # ... with 355 more rows
# To get the number of flight count on particular day of a month with unique tailnumbers
flights %>%
    group_by(Dest) %>%
    summarise(flight_count = n(), plane_count = n_distinct(TailNum))
## # A tibble: 116 × 3
##     Dest flight_count plane_count
##    <chr>        <int>       <int>
## 1    ABQ         2812         716
## 2    AEX          724         215
## 3    AGS            1           1
## 4    AMA         1297         158
## 5    ANC          125          38
## 6    ASE          125          60
## 7    ATL         7886         983
## 8    AUS         5022        1015
## 9    AVL          350         142
## 10   BFL          504          70
## # ... with 106 more rows

Window Functions

# for each carrier, calculate which three days of the year they had their longest departure delays
flights %>%
    group_by(UniqueCarrier) %>%
    select(Month, DayofMonth, DepDelay) %>%
    filter(min_rank(desc(DepDelay)) <= 3) %>%
    arrange(UniqueCarrier, desc(DepDelay))
## Adding missing grouping variables: `UniqueCarrier`
## Source: local data frame [45 x 4]
## Groups: UniqueCarrier [15]
## 
##    UniqueCarrier Month DayofMonth DepDelay
##            <chr> <int>      <int>    <int>
## 1             AA    12         12      970
## 2             AA    11         19      677
## 3             AA    12         22      653
## 4             AS     2         28      172
## 5             AS     7          6      138
## 6             AS     4          8      102
## 7             B6    10         29      310
## 8             B6     8         19      283
## 9             B6     3         10      278
## 10            CO     8          1      981
## # ... with 35 more rows
# rewrite more simply with the `top_n` function
flights %>%
    group_by(UniqueCarrier) %>%
    select(Month, DayofMonth, DepDelay) %>%
    top_n(2) %>%
    arrange(UniqueCarrier, desc(DepDelay))
## Adding missing grouping variables: `UniqueCarrier`
## Selecting by DepDelay
## Source: local data frame [30 x 4]
## Groups: UniqueCarrier [15]
## 
##    UniqueCarrier Month DayofMonth DepDelay
##            <chr> <int>      <int>    <int>
## 1             AA    12         12      970
## 2             AA    11         19      677
## 3             AS     2         28      172
## 4             AS     7          6      138
## 5             B6    10         29      310
## 6             B6     8         19      283
## 7             CO     8          1      981
## 8             CO     1         20      780
## 9             DL    10         25      730
## 10            DL     4          5      497
## # ... with 20 more rows
# for each month, calculate the number of flights and the change from the previous month
flights %>%
    group_by(Month) %>%
    summarise(flight_count = n()) %>%
    mutate(change = flight_count - lag(flight_count))
## # A tibble: 12 × 3
##    Month flight_count change
##    <int>        <int>  <int>
## 1      1        18910     NA
## 2      2        17128  -1782
## 3      3        19470   2342
## 4      4        18593   -877
## 5      5        19172    579
## 6      6        19600    428
## 7      7        20548    948
## 8      8        20176   -372
## 9      9        18065  -2111
## 10    10        18696    631
## 11    11        18021   -675
## 12    12        19117   1096
# rewrite more simply with the `tally` function
flights %>%
    group_by(Month) %>%
    tally() %>%
    mutate(change = n - lag(n))
## # A tibble: 12 × 3
##    Month     n change
##    <int> <int>  <int>
## 1      1 18910     NA
## 2      2 17128  -1782
## 3      3 19470   2342
## 4      4 18593   -877
## 5      5 19172    579
## 6      6 19600    428
## 7      7 20548    948
## 8      8 20176   -372
## 9      9 18065  -2111
## 10    10 18696    631
## 11    11 18021   -675
## 12    12 19117   1096

Other Useful Convenience Functions

# randomly sample a fixed number of rows, without replacement
flights %>% sample_n(5)
## # A tibble: 5 × 22
##    Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum
##   <int> <int>      <int>     <int>   <int>   <int>         <chr>     <int>
## 1  2011     7          3         7    1026    1346            AA      1700
## 2  2011     1         26         3    1203    1448            WN      2761
## 3  2011     7         29         5    2020    2348            CO      1522
## 4  2011    10          2         7    1908    2236            CO      1574
## 5  2011     8         15         1    1514    1834            CO      1699
## # ... with 14 more variables: TailNum <chr>, ActualElapsedTime <int>,
## #   AirTime <int>, ArrDelay <int>, DepDelay <int>, Origin <chr>,
## #   Dest <chr>, Distance <int>, TaxiIn <int>, TaxiOut <int>,
## #   Cancelled <int>, CancellationCode <chr>, Diverted <int>,
## #   Speed_Kmps <dbl>
# randomly sample a fraction of rows, with replacement
flights %>% sample_frac(0.25, replace=TRUE)
## # A tibble: 56,874 × 22
##     Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
##    <int> <int>      <int>     <int>   <int>   <int>         <chr>
## 1   2011     5          9         1    1615    1752            EV
## 2   2011     8          9         2     943    1056            CO
## 3   2011    12         15         4     641    1016            CO
## 4   2011     5         27         5    1158    1455            FL
## 5   2011     9          5         1    1910    2234            CO
## 6   2011     1         25         2    1821    1944            WN
## 7   2011     8          8         1    1131    1607            WN
## 8   2011     3         29         2     918    1037            XE
## 9   2011     3         30         3    1435    1737            XE
## 10  2011     4         17         7     800     852            WN
## # ... with 56,864 more rows, and 15 more variables: FlightNum <int>,
## #   TailNum <chr>, ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## #   DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## #   TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## #   Diverted <int>, Speed_Kmps <dbl>
# base R approach to view the structure of an object
str(flights)
## Classes 'tbl_df', 'tbl' and 'data.frame':    227496 obs. of  22 variables:
##  $ Year             : int  2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
##  $ Month            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ DayofMonth       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ DayOfWeek        : int  6 7 1 2 3 4 5 6 7 1 ...
##  $ DepTime          : int  1400 1401 1352 1403 1405 1359 1359 1355 1443 1443 ...
##  $ ArrTime          : int  1500 1501 1502 1513 1507 1503 1509 1454 1554 1553 ...
##  $ UniqueCarrier    : chr  "AA" "AA" "AA" "AA" ...
##  $ FlightNum        : int  428 428 428 428 428 428 428 428 428 428 ...
##  $ TailNum          : chr  "N576AA" "N557AA" "N541AA" "N403AA" ...
##  $ ActualElapsedTime: int  60 60 70 70 62 64 70 59 71 70 ...
##  $ AirTime          : int  40 45 48 39 44 45 43 40 41 45 ...
##  $ ArrDelay         : int  -10 -9 -8 3 -3 -7 -1 -16 44 43 ...
##  $ DepDelay         : int  0 1 -8 3 5 -1 -1 -5 43 43 ...
##  $ Origin           : chr  "IAH" "IAH" "IAH" "IAH" ...
##  $ Dest             : chr  "DFW" "DFW" "DFW" "DFW" ...
##  $ Distance         : int  224 224 224 224 224 224 224 224 224 224 ...
##  $ TaxiIn           : int  7 6 5 9 9 6 12 7 8 6 ...
##  $ TaxiOut          : int  13 9 17 22 9 13 15 12 22 19 ...
##  $ Cancelled        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ CancellationCode : chr  "" "" "" "" ...
##  $ Diverted         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Speed_Kmps       : num  336 299 280 345 305 ...
# dplyr approach: better formatting, and adapts to your screen width
glimpse(flights)
## Observations: 227,496
## Variables: 22
## $ Year              <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 20...
## $ Month             <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ DayofMonth        <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1...
## $ DayOfWeek         <int> 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6,...
## $ DepTime           <int> 1400, 1401, 1352, 1403, 1405, 1359, 1359, 13...
## $ ArrTime           <int> 1500, 1501, 1502, 1513, 1507, 1503, 1509, 14...
## $ UniqueCarrier     <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "A...
## $ FlightNum         <int> 428, 428, 428, 428, 428, 428, 428, 428, 428,...
## $ TailNum           <chr> "N576AA", "N557AA", "N541AA", "N403AA", "N49...
## $ ActualElapsedTime <int> 60, 60, 70, 70, 62, 64, 70, 59, 71, 70, 70, ...
## $ AirTime           <int> 40, 45, 48, 39, 44, 45, 43, 40, 41, 45, 42, ...
## $ ArrDelay          <int> -10, -9, -8, 3, -3, -7, -1, -16, 44, 43, 29,...
## $ DepDelay          <int> 0, 1, -8, 3, 5, -1, -1, -5, 43, 43, 29, 19, ...
## $ Origin            <chr> "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "I...
## $ Dest              <chr> "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "D...
## $ Distance          <int> 224, 224, 224, 224, 224, 224, 224, 224, 224,...
## $ TaxiIn            <int> 7, 6, 5, 9, 9, 6, 12, 7, 8, 6, 8, 4, 6, 5, 6...
## $ TaxiOut           <int> 13, 9, 17, 22, 9, 13, 15, 12, 22, 19, 20, 11...
## $ Cancelled         <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ CancellationCode  <chr> "", "", "", "", "", "", "", "", "", "", "", ...
## $ Diverted          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Speed_Kmps        <dbl> 336.0000, 298.6667, 280.0000, 344.6154, 305....

< END OF DOCUMENT >