Reference : video tutorial on dplyr package by Kevin Makham.
filter
, select
, arrange
, mutate
, summarise
, group_by
# loading packages
library(hflights)
suppressMessages(library(dplyr))
# exploring data
head(hflights)
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
## 5424 2011 1 1 6 1400 1500 AA
## 5425 2011 1 2 7 1401 1501 AA
## 5426 2011 1 3 1 1352 1502 AA
## 5427 2011 1 4 2 1403 1513 AA
## 5428 2011 1 5 3 1405 1507 AA
## 5429 2011 1 6 4 1359 1503 AA
## FlightNum TailNum ActualElapsedTime AirTime ArrDelay DepDelay Origin
## 5424 428 N576AA 60 40 -10 0 IAH
## 5425 428 N557AA 60 45 -9 1 IAH
## 5426 428 N541AA 70 48 -8 -8 IAH
## 5427 428 N403AA 70 39 3 3 IAH
## 5428 428 N492AA 62 44 -3 5 IAH
## 5429 428 N262AA 64 45 -7 -1 IAH
## Dest Distance TaxiIn TaxiOut Cancelled CancellationCode Diverted
## 5424 DFW 224 7 13 0 0
## 5425 DFW 224 6 9 0 0
## 5426 DFW 224 5 17 0 0
## 5427 DFW 224 9 22 0 0
## 5428 DFW 224 9 9 0 0
## 5429 DFW 224 6 13 0 0
data(hflights)
tbl_df
used to create local data frame which prints data in a nice manner# Convert to local data frame
flights<-tbl_df(hflights)
# Printing flights dataset
flights
## # A tibble: 227,496 × 21
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
## * <int> <int> <int> <int> <int> <int> <chr>
## 1 2011 1 1 6 1400 1500 AA
## 2 2011 1 2 7 1401 1501 AA
## 3 2011 1 3 1 1352 1502 AA
## 4 2011 1 4 2 1403 1513 AA
## 5 2011 1 5 3 1405 1507 AA
## 6 2011 1 6 4 1359 1503 AA
## 7 2011 1 7 5 1359 1509 AA
## 8 2011 1 8 6 1355 1454 AA
## 9 2011 1 9 7 1443 1554 AA
## 10 2011 1 10 1 1443 1553 AA
## # ... with 227,486 more rows, and 14 more variables: FlightNum <int>,
## # TailNum <chr>, ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## # DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## # TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## # Diverted <int>
# Display n rows of flight dataset
print(flights, n=30)
# convert to a normal data frame to see all of the columns
data.frame(head(flights))
## dplyr filtering of interested columns
#Filtering how many flights flew on 01-Feb-2011 from Houstan airports
filter(flights, Month == 2, DayofMonth == 1)
## # A tibble: 577 × 21
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
## <int> <int> <int> <int> <int> <int> <chr>
## 1 2011 2 1 2 1401 1539 AA
## 2 2011 2 1 2 NA NA AA
## 3 2011 2 1 2 NA NA AA
## 4 2011 2 1 2 NA NA AA
## 5 2011 2 1 2 1746 2109 AA
## 6 2011 2 1 2 NA NA AA
## 7 2011 2 1 2 1032 1358 AA
## 8 2011 2 1 2 NA NA AA
## 9 2011 2 1 2 558 912 AA
## 10 2011 2 1 2 1820 2112 AS
## # ... with 567 more rows, and 14 more variables: FlightNum <int>,
## # TailNum <chr>, ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## # DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## # TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## # Diverted <int>
#Filtering how many flights flew on 01-Feb-2011 from Houstan airports using pipe
filter(flights, Month == 2, DayofMonth == 1, UniqueCarrier=="AA" | UniqueCarrier=="CO")
## # A tibble: 188 × 21
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
## <int> <int> <int> <int> <int> <int> <chr>
## 1 2011 2 1 2 1401 1539 AA
## 2 2011 2 1 2 NA NA AA
## 3 2011 2 1 2 NA NA AA
## 4 2011 2 1 2 NA NA AA
## 5 2011 2 1 2 1746 2109 AA
## 6 2011 2 1 2 NA NA AA
## 7 2011 2 1 2 1032 1358 AA
## 8 2011 2 1 2 NA NA AA
## 9 2011 2 1 2 558 912 AA
## 10 2011 2 1 2 1014 1430 CO
## # ... with 178 more rows, and 14 more variables: FlightNum <int>,
## # TailNum <chr>, ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## # DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## # TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## # Diverted <int>
# use %in% operator instead of pipe
filter(flights, UniqueCarrier %in% c("UA","CO"))
## # A tibble: 72,104 × 21
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
## <int> <int> <int> <int> <int> <int> <chr>
## 1 2011 1 31 1 924 1413 CO
## 2 2011 1 31 1 1825 1925 CO
## 3 2011 1 31 1 1554 1650 CO
## 4 2011 1 31 1 1522 1632 CO
## 5 2011 1 31 1 1536 1635 CO
## 6 2011 1 31 1 1916 2103 CO
## 7 2011 1 31 1 747 936 CO
## 8 2011 1 31 1 1803 1927 CO
## 9 2011 1 31 1 1206 1631 CO
## 10 2011 1 31 1 1425 1848 CO
## # ... with 72,094 more rows, and 14 more variables: FlightNum <int>,
## # TailNum <chr>, ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## # DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## # TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## # Diverted <int>
filter(flights, DayofMonth %in% c("1","2"), Month == 1, UniqueCarrier=="AA")
## # A tibble: 16 × 21
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
## <int> <int> <int> <int> <int> <int> <chr>
## 1 2011 1 1 6 1400 1500 AA
## 2 2011 1 2 7 1401 1501 AA
## 3 2011 1 1 6 728 840 AA
## 4 2011 1 2 7 719 821 AA
## 5 2011 1 2 7 1959 2106 AA
## 6 2011 1 1 6 1631 1736 AA
## 7 2011 1 2 7 1636 1759 AA
## 8 2011 1 1 6 1756 2112 AA
## 9 2011 1 2 7 1823 2132 AA
## 10 2011 1 1 6 1012 1347 AA
## 11 2011 1 2 7 1008 1321 AA
## 12 2011 1 1 6 1211 1325 AA
## 13 2011 1 2 7 1200 1303 AA
## 14 2011 1 2 7 907 1018 AA
## 15 2011 1 1 6 557 906 AA
## 16 2011 1 2 7 554 912 AA
## # ... with 14 more variables: FlightNum <int>, TailNum <chr>,
## # ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## # DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## # TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## # Diverted <int>
# dplyr approach
select(flights, DepTime, ArrTime, FlightNum)
## # A tibble: 227,496 × 3
## DepTime ArrTime FlightNum
## * <int> <int> <int>
## 1 1400 1500 428
## 2 1401 1501 428
## 3 1352 1502 428
## 4 1403 1513 428
## 5 1405 1507 428
## 6 1359 1503 428
## 7 1359 1509 428
## 8 1355 1454 428
## 9 1443 1554 428
## 10 1443 1553 428
## # ... with 227,486 more rows
%>%
infix operator (which can be pronounced as “then”)# Chaining or pipeline using `%>%`
flights %>%
select(Month, UniqueCarrier, DepDelay) %>%
filter(UniqueCarrier=="AA",DepDelay>30)
## # A tibble: 251 × 3
## Month UniqueCarrier DepDelay
## <int> <chr> <int>
## 1 1 AA 43
## 2 1 AA 43
## 3 1 AA 90
## 4 1 AA 67
## 5 1 AA 41
## 6 1 AA 55
## 7 1 AA 40
## 8 1 AA 74
## 9 1 AA 31
## 10 1 AA 38
## # ... with 241 more rows
# Arrange in ascending order of DepDelay
flights %>%
select(UniqueCarrier, DepDelay) %>%
arrange(DepDelay)
## # A tibble: 227,496 × 2
## UniqueCarrier DepDelay
## <chr> <int>
## 1 OO -33
## 2 MQ -23
## 3 XE -19
## 4 XE -19
## 5 CO -18
## 6 EV -18
## 7 XE -17
## 8 CO -17
## 9 XE -17
## 10 MQ -17
## # ... with 227,486 more rows
# Arrange in ascending order of DepDelay
flights %>%
select(UniqueCarrier, DepDelay, Cancelled) %>%
filter(UniqueCarrier == "AA") %>%
arrange(desc(DepDelay))
## # A tibble: 3,244 × 3
## UniqueCarrier DepDelay Cancelled
## <chr> <int> <int>
## 1 AA 970 0
## 2 AA 677 0
## 3 AA 653 0
## 4 AA 525 0
## 5 AA 286 0
## 6 AA 277 0
## 7 AA 235 0
## 8 AA 234 0
## 9 AA 233 0
## 10 AA 228 0
## # ... with 3,234 more rows
# New variable is not stored
flights %>%
select(Distance, AirTime) %>%
mutate(Speed_Kmps=Distance/AirTime*60)
## # A tibble: 227,496 × 3
## Distance AirTime Speed_Kmps
## <int> <int> <dbl>
## 1 224 40 336.0000
## 2 224 45 298.6667
## 3 224 48 280.0000
## 4 224 39 344.6154
## 5 224 44 305.4545
## 6 224 45 298.6667
## 7 224 43 312.5581
## 8 224 40 336.0000
## 9 224 41 327.8049
## 10 224 45 298.6667
## # ... with 227,486 more rows
# To store new variable
flights <- flights %>% mutate(Speed_Kmps=Distance/AirTime*60)
group_by
creates the groups that will be operated onsummarise
uses the provided aggregation function to summarise each group# dplyr approach: create a table grouped by UniqueCarrier, and then summarise each group by taking the mean of ArrDelay. NAs are removed to facilitate calculation of mean delay
flights %>%
group_by(UniqueCarrier) %>%
summarise(avgArrival_delay = mean(ArrDelay, na.rm=TRUE)) %>%
arrange(desc(avgArrival_delay))
## # A tibble: 15 × 2
## UniqueCarrier avgArrival_delay
## <chr> <dbl>
## 1 UA 10.4628628
## 2 B6 9.8588410
## 3 OO 8.6934922
## 4 XE 8.1865242
## 5 F9 7.6682692
## 6 WN 7.5871430
## 7 EV 7.2569543
## 8 MQ 7.1529751
## 9 CO 6.0986983
## 10 DL 6.0841374
## 11 YV 4.0128205
## 12 AS 3.1923077
## 13 FL 1.8536239
## 14 AA 0.8917558
## 15 US -0.6307692
summarise_each
allows to apply the same summary function to multiple columns at once# Applying summarize functions on two columns of flights
flights %>%
group_by(UniqueCarrier) %>%
summarize_each(funs(mean), Cancelled, Diverted)
## # A tibble: 15 × 3
## UniqueCarrier Cancelled Diverted
## <chr> <dbl> <dbl>
## 1 AA 0.018495684 0.001849568
## 2 AS 0.000000000 0.002739726
## 3 B6 0.025899281 0.005755396
## 4 CO 0.006782614 0.002627370
## 5 DL 0.015903067 0.003029156
## 6 EV 0.034482759 0.003176044
## 7 F9 0.007159905 0.000000000
## 8 FL 0.009817672 0.003272557
## 9 MQ 0.029044750 0.001936317
## 10 OO 0.013946828 0.003486707
## 11 UA 0.016409266 0.002413127
## 12 US 0.011268986 0.001469868
## 13 WN 0.015504047 0.002293629
## 14 XE 0.015495599 0.003449550
## 15 YV 0.012658228 0.000000000
# for each carrier, calculate the minimum and maximum arrival and departure delays using a match function to select columns with "Delay"
flights %>%
group_by(UniqueCarrier) %>%
summarize_each(funs(min(.,na.rm=TRUE), max(.,na.rm=TRUE)),matches("Delay"))
## # A tibble: 15 × 5
## UniqueCarrier ArrDelay_min DepDelay_min ArrDelay_max DepDelay_max
## <chr> <int> <int> <int> <int>
## 1 AA -39 -15 978 970
## 2 AS -43 -15 183 172
## 3 B6 -44 -14 335 310
## 4 CO -55 -18 957 981
## 5 DL -32 -17 701 730
## 6 EV -40 -18 469 479
## 7 F9 -24 -15 277 275
## 8 FL -30 -14 500 507
## 9 MQ -38 -23 918 931
## 10 OO -57 -33 380 360
## 11 UA -47 -11 861 869
## 12 US -42 -17 433 425
## 13 WN -44 -10 499 548
## 14 XE -70 -19 634 628
## 15 YV -32 -11 72 54
n()
counts the number of rows in a groupn_distinct(vector)
counts the number of unique items in that vector# To get the number of flight count on particular day of a month
flights %>%
group_by(Month, DayofMonth) %>%
summarize(flight_count = n()) %>%
arrange(desc(flight_count))
## Source: local data frame [365 x 3]
## Groups: Month [12]
##
## Month DayofMonth flight_count
## <int> <int> <int>
## 1 8 4 706
## 2 8 11 706
## 3 8 12 706
## 4 8 5 705
## 5 8 3 704
## 6 8 10 704
## 7 1 3 702
## 8 7 7 702
## 9 7 14 702
## 10 7 28 701
## # ... with 355 more rows
# To get the number of flight count on particular day of a month with unique tailnumbers
flights %>%
group_by(Dest) %>%
summarise(flight_count = n(), plane_count = n_distinct(TailNum))
## # A tibble: 116 × 3
## Dest flight_count plane_count
## <chr> <int> <int>
## 1 ABQ 2812 716
## 2 AEX 724 215
## 3 AGS 1 1
## 4 AMA 1297 158
## 5 ANC 125 38
## 6 ASE 125 60
## 7 ATL 7886 983
## 8 AUS 5022 1015
## 9 AVL 350 142
## 10 BFL 504 70
## # ... with 106 more rows
mean
) takes n inputs and returns 1 value# for each carrier, calculate which three days of the year they had their longest departure delays
flights %>%
group_by(UniqueCarrier) %>%
select(Month, DayofMonth, DepDelay) %>%
filter(min_rank(desc(DepDelay)) <= 3) %>%
arrange(UniqueCarrier, desc(DepDelay))
## Adding missing grouping variables: `UniqueCarrier`
## Source: local data frame [45 x 4]
## Groups: UniqueCarrier [15]
##
## UniqueCarrier Month DayofMonth DepDelay
## <chr> <int> <int> <int>
## 1 AA 12 12 970
## 2 AA 11 19 677
## 3 AA 12 22 653
## 4 AS 2 28 172
## 5 AS 7 6 138
## 6 AS 4 8 102
## 7 B6 10 29 310
## 8 B6 8 19 283
## 9 B6 3 10 278
## 10 CO 8 1 981
## # ... with 35 more rows
# rewrite more simply with the `top_n` function
flights %>%
group_by(UniqueCarrier) %>%
select(Month, DayofMonth, DepDelay) %>%
top_n(2) %>%
arrange(UniqueCarrier, desc(DepDelay))
## Adding missing grouping variables: `UniqueCarrier`
## Selecting by DepDelay
## Source: local data frame [30 x 4]
## Groups: UniqueCarrier [15]
##
## UniqueCarrier Month DayofMonth DepDelay
## <chr> <int> <int> <int>
## 1 AA 12 12 970
## 2 AA 11 19 677
## 3 AS 2 28 172
## 4 AS 7 6 138
## 5 B6 10 29 310
## 6 B6 8 19 283
## 7 CO 8 1 981
## 8 CO 1 20 780
## 9 DL 10 25 730
## 10 DL 4 5 497
## # ... with 20 more rows
# for each month, calculate the number of flights and the change from the previous month
flights %>%
group_by(Month) %>%
summarise(flight_count = n()) %>%
mutate(change = flight_count - lag(flight_count))
## # A tibble: 12 × 3
## Month flight_count change
## <int> <int> <int>
## 1 1 18910 NA
## 2 2 17128 -1782
## 3 3 19470 2342
## 4 4 18593 -877
## 5 5 19172 579
## 6 6 19600 428
## 7 7 20548 948
## 8 8 20176 -372
## 9 9 18065 -2111
## 10 10 18696 631
## 11 11 18021 -675
## 12 12 19117 1096
# rewrite more simply with the `tally` function
flights %>%
group_by(Month) %>%
tally() %>%
mutate(change = n - lag(n))
## # A tibble: 12 × 3
## Month n change
## <int> <int> <int>
## 1 1 18910 NA
## 2 2 17128 -1782
## 3 3 19470 2342
## 4 4 18593 -877
## 5 5 19172 579
## 6 6 19600 428
## 7 7 20548 948
## 8 8 20176 -372
## 9 9 18065 -2111
## 10 10 18696 631
## 11 11 18021 -675
## 12 12 19117 1096
# randomly sample a fixed number of rows, without replacement
flights %>% sample_n(5)
## # A tibble: 5 × 22
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum
## <int> <int> <int> <int> <int> <int> <chr> <int>
## 1 2011 7 3 7 1026 1346 AA 1700
## 2 2011 1 26 3 1203 1448 WN 2761
## 3 2011 7 29 5 2020 2348 CO 1522
## 4 2011 10 2 7 1908 2236 CO 1574
## 5 2011 8 15 1 1514 1834 CO 1699
## # ... with 14 more variables: TailNum <chr>, ActualElapsedTime <int>,
## # AirTime <int>, ArrDelay <int>, DepDelay <int>, Origin <chr>,
## # Dest <chr>, Distance <int>, TaxiIn <int>, TaxiOut <int>,
## # Cancelled <int>, CancellationCode <chr>, Diverted <int>,
## # Speed_Kmps <dbl>
# randomly sample a fraction of rows, with replacement
flights %>% sample_frac(0.25, replace=TRUE)
## # A tibble: 56,874 × 22
## Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier
## <int> <int> <int> <int> <int> <int> <chr>
## 1 2011 5 9 1 1615 1752 EV
## 2 2011 8 9 2 943 1056 CO
## 3 2011 12 15 4 641 1016 CO
## 4 2011 5 27 5 1158 1455 FL
## 5 2011 9 5 1 1910 2234 CO
## 6 2011 1 25 2 1821 1944 WN
## 7 2011 8 8 1 1131 1607 WN
## 8 2011 3 29 2 918 1037 XE
## 9 2011 3 30 3 1435 1737 XE
## 10 2011 4 17 7 800 852 WN
## # ... with 56,864 more rows, and 15 more variables: FlightNum <int>,
## # TailNum <chr>, ActualElapsedTime <int>, AirTime <int>, ArrDelay <int>,
## # DepDelay <int>, Origin <chr>, Dest <chr>, Distance <int>,
## # TaxiIn <int>, TaxiOut <int>, Cancelled <int>, CancellationCode <chr>,
## # Diverted <int>, Speed_Kmps <dbl>
# base R approach to view the structure of an object
str(flights)
## Classes 'tbl_df', 'tbl' and 'data.frame': 227496 obs. of 22 variables:
## $ Year : int 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
## $ Month : int 1 1 1 1 1 1 1 1 1 1 ...
## $ DayofMonth : int 1 2 3 4 5 6 7 8 9 10 ...
## $ DayOfWeek : int 6 7 1 2 3 4 5 6 7 1 ...
## $ DepTime : int 1400 1401 1352 1403 1405 1359 1359 1355 1443 1443 ...
## $ ArrTime : int 1500 1501 1502 1513 1507 1503 1509 1454 1554 1553 ...
## $ UniqueCarrier : chr "AA" "AA" "AA" "AA" ...
## $ FlightNum : int 428 428 428 428 428 428 428 428 428 428 ...
## $ TailNum : chr "N576AA" "N557AA" "N541AA" "N403AA" ...
## $ ActualElapsedTime: int 60 60 70 70 62 64 70 59 71 70 ...
## $ AirTime : int 40 45 48 39 44 45 43 40 41 45 ...
## $ ArrDelay : int -10 -9 -8 3 -3 -7 -1 -16 44 43 ...
## $ DepDelay : int 0 1 -8 3 5 -1 -1 -5 43 43 ...
## $ Origin : chr "IAH" "IAH" "IAH" "IAH" ...
## $ Dest : chr "DFW" "DFW" "DFW" "DFW" ...
## $ Distance : int 224 224 224 224 224 224 224 224 224 224 ...
## $ TaxiIn : int 7 6 5 9 9 6 12 7 8 6 ...
## $ TaxiOut : int 13 9 17 22 9 13 15 12 22 19 ...
## $ Cancelled : int 0 0 0 0 0 0 0 0 0 0 ...
## $ CancellationCode : chr "" "" "" "" ...
## $ Diverted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Speed_Kmps : num 336 299 280 345 305 ...
# dplyr approach: better formatting, and adapts to your screen width
glimpse(flights)
## Observations: 227,496
## Variables: 22
## $ Year <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 20...
## $ Month <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ DayofMonth <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1...
## $ DayOfWeek <int> 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6,...
## $ DepTime <int> 1400, 1401, 1352, 1403, 1405, 1359, 1359, 13...
## $ ArrTime <int> 1500, 1501, 1502, 1513, 1507, 1503, 1509, 14...
## $ UniqueCarrier <chr> "AA", "AA", "AA", "AA", "AA", "AA", "AA", "A...
## $ FlightNum <int> 428, 428, 428, 428, 428, 428, 428, 428, 428,...
## $ TailNum <chr> "N576AA", "N557AA", "N541AA", "N403AA", "N49...
## $ ActualElapsedTime <int> 60, 60, 70, 70, 62, 64, 70, 59, 71, 70, 70, ...
## $ AirTime <int> 40, 45, 48, 39, 44, 45, 43, 40, 41, 45, 42, ...
## $ ArrDelay <int> -10, -9, -8, 3, -3, -7, -1, -16, 44, 43, 29,...
## $ DepDelay <int> 0, 1, -8, 3, 5, -1, -1, -5, 43, 43, 29, 19, ...
## $ Origin <chr> "IAH", "IAH", "IAH", "IAH", "IAH", "IAH", "I...
## $ Dest <chr> "DFW", "DFW", "DFW", "DFW", "DFW", "DFW", "D...
## $ Distance <int> 224, 224, 224, 224, 224, 224, 224, 224, 224,...
## $ TaxiIn <int> 7, 6, 5, 9, 9, 6, 12, 7, 8, 6, 8, 4, 6, 5, 6...
## $ TaxiOut <int> 13, 9, 17, 22, 9, 13, 15, 12, 22, 19, 20, 11...
## $ Cancelled <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ CancellationCode <chr> "", "", "", "", "", "", "", "", "", "", "", ...
## $ Diverted <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ Speed_Kmps <dbl> 336.0000, 298.6667, 280.0000, 344.6154, 305....
< END OF DOCUMENT >