library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
flights2 <- flights %>%
select(year:day, hour, origin, dest, tailnum, carrier)
flights2
## # A tibble: 336,776 × 8
## year month day hour origin dest tailnum carrier
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr>
## 1 2013 1 1 5 EWR IAH N14228 UA
## 2 2013 1 1 5 LGA IAH N24211 UA
## 3 2013 1 1 5 JFK MIA N619AA AA
## 4 2013 1 1 5 JFK BQN N804JB B6
## 5 2013 1 1 6 LGA ATL N668DN DL
## 6 2013 1 1 5 EWR ORD N39463 UA
## 7 2013 1 1 6 EWR FLL N516JB B6
## 8 2013 1 1 6 LGA IAD N829AS EV
## 9 2013 1 1 6 JFK MCO N593JB B6
## 10 2013 1 1 6 LGA ORD N3ALAA AA
## # ℹ 336,766 more rows
#> # A tibble: 336,776 × 8
#> year month day hour origin dest tailnum carrier
#> <int> <int> <int> <dbl> <chr> <chr> <chr> <chr>
#> 1 2013 1 1 5 EWR IAH N14228 UA
#> 2 2013 1 1 5 LGA IAH N24211 UA
#> 3 2013 1 1 5 JFK MIA N619AA AA
#> 4 2013 1 1 5 JFK BQN N804JB B6
#> 5 2013 1 1 6 LGA ATL N668DN DL
#> 6 2013 1 1 5 EWR ORD N39463 UA
#> # ℹ 336,770 more rows
flights2 %>%
select(-origin, -dest) %>%
left_join(airlines, by = "carrier")
## # A tibble: 336,776 × 7
## year month day hour tailnum carrier name
## <int> <int> <int> <dbl> <chr> <chr> <chr>
## 1 2013 1 1 5 N14228 UA United Air Lines Inc.
## 2 2013 1 1 5 N24211 UA United Air Lines Inc.
## 3 2013 1 1 5 N619AA AA American Airlines Inc.
## 4 2013 1 1 5 N804JB B6 JetBlue Airways
## 5 2013 1 1 6 N668DN DL Delta Air Lines Inc.
## 6 2013 1 1 5 N39463 UA United Air Lines Inc.
## 7 2013 1 1 6 N516JB B6 JetBlue Airways
## 8 2013 1 1 6 N829AS EV ExpressJet Airlines Inc.
## 9 2013 1 1 6 N593JB B6 JetBlue Airways
## 10 2013 1 1 6 N3ALAA AA American Airlines Inc.
## # ℹ 336,766 more rows
#> # A tibble: 336,776 × 7
#> year month day hour tailnum carrier name
#> <int> <int> <int> <dbl> <chr> <chr> <chr>
#> 1 2013 1 1 5 N14228 UA United Air Lines Inc.
#> 2 2013 1 1 5 N24211 UA United Air Lines Inc.
#> 3 2013 1 1 5 N619AA AA American Airlines Inc.
#> 4 2013 1 1 5 N804JB B6 JetBlue Airways
#> 5 2013 1 1 6 N668DN DL Delta Air Lines Inc.
#> 6 2013 1 1 5 N39463 UA United Air Lines Inc.
#> # ℹ 336,770 more rows
flights2 %>%
select(-origin, -dest) %>%
mutate(name = airlines$name[match(carrier, airlines$carrier)])
## # A tibble: 336,776 × 7
## year month day hour tailnum carrier name
## <int> <int> <int> <dbl> <chr> <chr> <chr>
## 1 2013 1 1 5 N14228 UA United Air Lines Inc.
## 2 2013 1 1 5 N24211 UA United Air Lines Inc.
## 3 2013 1 1 5 N619AA AA American Airlines Inc.
## 4 2013 1 1 5 N804JB B6 JetBlue Airways
## 5 2013 1 1 6 N668DN DL Delta Air Lines Inc.
## 6 2013 1 1 5 N39463 UA United Air Lines Inc.
## 7 2013 1 1 6 N516JB B6 JetBlue Airways
## 8 2013 1 1 6 N829AS EV ExpressJet Airlines Inc.
## 9 2013 1 1 6 N593JB B6 JetBlue Airways
## 10 2013 1 1 6 N3ALAA AA American Airlines Inc.
## # ℹ 336,766 more rows
#> # A tibble: 336,776 × 7
#> year month day hour tailnum carrier name
#> <int> <int> <int> <dbl> <chr> <chr> <chr>
#> 1 2013 1 1 5 N14228 UA United Air Lines Inc.
#> 2 2013 1 1 5 N24211 UA United Air Lines Inc.
#> 3 2013 1 1 5 N619AA AA American Airlines Inc.
#> 4 2013 1 1 5 N804JB B6 JetBlue Airways
#> 5 2013 1 1 6 N668DN DL Delta Air Lines Inc.
#> 6 2013 1 1 5 N39463 UA United Air Lines Inc.
#> # ℹ 336,770 more rows
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
3, "x3"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2",
4, "y3"
)
x %>%
inner_join(y, by = "key")
## # A tibble: 2 × 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
#> # A tibble: 2 × 3
#> key val_x val_y
#> <dbl> <chr> <chr>
#> 1 1 x1 y1
#> 2 2 x2 y2
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
2, "x3",
1, "x4"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2"
)
left_join(x, y, by = "key")
## # A tibble: 4 × 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
## 3 2 x3 y2
## 4 1 x4 y1
#> # A tibble: 4 × 3
#> key val_x val_y
#> <dbl> <chr> <chr>
#> 1 1 x1 y1
#> 2 2 x2 y2
#> 3 2 x3 y2
#> 4 1 x4 y1
x <- tribble(
~key, ~val_x,
1, "x1",
2, "x2",
2, "x3",
3, "x4"
)
y <- tribble(
~key, ~val_y,
1, "y1",
2, "y2",
2, "y3",
3, "y4"
)
left_join(x, y, by = "key")
## Warning in left_join(x, y, by = "key"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 2 of `x` matches multiple rows in `y`.
## ℹ Row 2 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## # A tibble: 6 × 3
## key val_x val_y
## <dbl> <chr> <chr>
## 1 1 x1 y1
## 2 2 x2 y2
## 3 2 x2 y3
## 4 2 x3 y2
## 5 2 x3 y3
## 6 3 x4 y4
#> Warning in left_join(x, y, by = "key"): Detected an unexpected many-to-many relationship between `x` and `y`.
#> ℹ Row 2 of `x` matches multiple rows in `y`.
#> ℹ Row 2 of `y` matches multiple rows in `x`.
#> ℹ If a many-to-many relationship is expected, set `relationship =
#> "many-to-many"` to silence this warning.
#> # A tibble: 6 × 3
#> key val_x val_y
#> <dbl> <chr> <chr>
#> 1 1 x1 y1
#> 2 2 x2 y2
#> 3 2 x2 y3
#> 4 2 x3 y2
#> 5 2 x3 y3
#> 6 3 x4 y4
flights2 %>%
left_join(weather)
## Joining with `by = join_by(year, month, day, hour, origin)`
## # A tibble: 336,776 × 18
## year month day hour origin dest tailnum carrier temp dewp humid
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2013 1 1 5 EWR IAH N14228 UA 39.0 28.0 64.4
## 2 2013 1 1 5 LGA IAH N24211 UA 39.9 25.0 54.8
## 3 2013 1 1 5 JFK MIA N619AA AA 39.0 27.0 61.6
## 4 2013 1 1 5 JFK BQN N804JB B6 39.0 27.0 61.6
## 5 2013 1 1 6 LGA ATL N668DN DL 39.9 25.0 54.8
## 6 2013 1 1 5 EWR ORD N39463 UA 39.0 28.0 64.4
## 7 2013 1 1 6 EWR FLL N516JB B6 37.9 28.0 67.2
## 8 2013 1 1 6 LGA IAD N829AS EV 39.9 25.0 54.8
## 9 2013 1 1 6 JFK MCO N593JB B6 37.9 27.0 64.3
## 10 2013 1 1 6 LGA ORD N3ALAA AA 39.9 25.0 54.8
## # ℹ 336,766 more rows
## # ℹ 7 more variables: wind_dir <dbl>, wind_speed <dbl>, wind_gust <dbl>,
## # precip <dbl>, pressure <dbl>, visib <dbl>, time_hour <dttm>
#> Joining with `by = join_by(year, month, day, hour, origin)`
#> # A tibble: 336,776 × 18
#> year month day hour origin dest tailnum carrier temp dewp humid
#> <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 2013 1 1 5 EWR IAH N14228 UA 39.0 28.0 64.4
#> 2 2013 1 1 5 LGA IAH N24211 UA 39.9 25.0 54.8
#> 3 2013 1 1 5 JFK MIA N619AA AA 39.0 27.0 61.6
#> 4 2013 1 1 5 JFK BQN N804JB B6 39.0 27.0 61.6
#> 5 2013 1 1 6 LGA ATL N668DN DL 39.9 25.0 54.8
#> 6 2013 1 1 5 EWR ORD N39463 UA 39.0 28.0 64.4
#> # ℹ 336,770 more rows
#> # ℹ 7 more variables: wind_dir <dbl>, wind_speed <dbl>, wind_gust <dbl>,
#> # precip <dbl>, pressure <dbl>, visib <dbl>, time_hour <dttm>
flights2 %>%
left_join(planes, by = "tailnum")
## # A tibble: 336,776 × 16
## year.x month day hour origin dest tailnum carrier year.y type
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <int> <chr>
## 1 2013 1 1 5 EWR IAH N14228 UA 1999 Fixed wing mult…
## 2 2013 1 1 5 LGA IAH N24211 UA 1998 Fixed wing mult…
## 3 2013 1 1 5 JFK MIA N619AA AA 1990 Fixed wing mult…
## 4 2013 1 1 5 JFK BQN N804JB B6 2012 Fixed wing mult…
## 5 2013 1 1 6 LGA ATL N668DN DL 1991 Fixed wing mult…
## 6 2013 1 1 5 EWR ORD N39463 UA 2012 Fixed wing mult…
## 7 2013 1 1 6 EWR FLL N516JB B6 2000 Fixed wing mult…
## 8 2013 1 1 6 LGA IAD N829AS EV 1998 Fixed wing mult…
## 9 2013 1 1 6 JFK MCO N593JB B6 2004 Fixed wing mult…
## 10 2013 1 1 6 LGA ORD N3ALAA AA NA <NA>
## # ℹ 336,766 more rows
## # ℹ 6 more variables: manufacturer <chr>, model <chr>, engines <int>,
## # seats <int>, speed <int>, engine <chr>
#> # A tibble: 336,776 × 16
#> year.x month day hour origin dest tailnum carrier year.y type
#> <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <int> <chr>
#> 1 2013 1 1 5 EWR IAH N14228 UA 1999 Fixed wing multi…
#> 2 2013 1 1 5 LGA IAH N24211 UA 1998 Fixed wing multi…
#> 3 2013 1 1 5 JFK MIA N619AA AA 1990 Fixed wing multi…
#> 4 2013 1 1 5 JFK BQN N804JB B6 2012 Fixed wing multi…
#> 5 2013 1 1 6 LGA ATL N668DN DL 1991 Fixed wing multi…
#> 6 2013 1 1 5 EWR ORD N39463 UA 2012 Fixed wing multi…
#> # ℹ 336,770 more rows
#> # ℹ 6 more variables: manufacturer <chr>, model <chr>, engines <int>,
#> # seats <int>, speed <int>, engine <chr>
flights2 %>%
left_join(airports, c("dest" = "faa"))
## # A tibble: 336,776 × 15
## year month day hour origin dest tailnum carrier name lat lon alt
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2013 1 1 5 EWR IAH N14228 UA Georg… 30.0 -95.3 97
## 2 2013 1 1 5 LGA IAH N24211 UA Georg… 30.0 -95.3 97
## 3 2013 1 1 5 JFK MIA N619AA AA Miami… 25.8 -80.3 8
## 4 2013 1 1 5 JFK BQN N804JB B6 <NA> NA NA NA
## 5 2013 1 1 6 LGA ATL N668DN DL Harts… 33.6 -84.4 1026
## 6 2013 1 1 5 EWR ORD N39463 UA Chica… 42.0 -87.9 668
## 7 2013 1 1 6 EWR FLL N516JB B6 Fort … 26.1 -80.2 9
## 8 2013 1 1 6 LGA IAD N829AS EV Washi… 38.9 -77.5 313
## 9 2013 1 1 6 JFK MCO N593JB B6 Orlan… 28.4 -81.3 96
## 10 2013 1 1 6 LGA ORD N3ALAA AA Chica… 42.0 -87.9 668
## # ℹ 336,766 more rows
## # ℹ 3 more variables: tz <dbl>, dst <chr>, tzone <chr>
#> # A tibble: 336,776 × 15
#> year month day hour origin dest tailnum carrier name lat lon alt
#> <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 2013 1 1 5 EWR IAH N14228 UA George… 30.0 -95.3 97
#> 2 2013 1 1 5 LGA IAH N24211 UA George… 30.0 -95.3 97
#> 3 2013 1 1 5 JFK MIA N619AA AA Miami … 25.8 -80.3 8
#> 4 2013 1 1 5 JFK BQN N804JB B6 <NA> NA NA NA
#> 5 2013 1 1 6 LGA ATL N668DN DL Hartsf… 33.6 -84.4 1026
#> 6 2013 1 1 5 EWR ORD N39463 UA Chicag… 42.0 -87.9 668
#> # ℹ 336,770 more rows
#> # ℹ 3 more variables: tz <dbl>, dst <chr>, tzone <chr>
flights2 %>%
left_join(airports, c("origin" = "faa"))
## # A tibble: 336,776 × 15
## year month day hour origin dest tailnum carrier name lat lon alt
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2013 1 1 5 EWR IAH N14228 UA Newar… 40.7 -74.2 18
## 2 2013 1 1 5 LGA IAH N24211 UA La Gu… 40.8 -73.9 22
## 3 2013 1 1 5 JFK MIA N619AA AA John … 40.6 -73.8 13
## 4 2013 1 1 5 JFK BQN N804JB B6 John … 40.6 -73.8 13
## 5 2013 1 1 6 LGA ATL N668DN DL La Gu… 40.8 -73.9 22
## 6 2013 1 1 5 EWR ORD N39463 UA Newar… 40.7 -74.2 18
## 7 2013 1 1 6 EWR FLL N516JB B6 Newar… 40.7 -74.2 18
## 8 2013 1 1 6 LGA IAD N829AS EV La Gu… 40.8 -73.9 22
## 9 2013 1 1 6 JFK MCO N593JB B6 John … 40.6 -73.8 13
## 10 2013 1 1 6 LGA ORD N3ALAA AA La Gu… 40.8 -73.9 22
## # ℹ 336,766 more rows
## # ℹ 3 more variables: tz <dbl>, dst <chr>, tzone <chr>
#> # A tibble: 336,776 × 15
#> year month day hour origin dest tailnum carrier name lat lon alt
#> <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
#> 1 2013 1 1 5 EWR IAH N14228 UA Newark… 40.7 -74.2 18
#> 2 2013 1 1 5 LGA IAH N24211 UA La Gua… 40.8 -73.9 22
#> 3 2013 1 1 5 JFK MIA N619AA AA John F… 40.6 -73.8 13
#> 4 2013 1 1 5 JFK BQN N804JB B6 John F… 40.6 -73.8 13
#> 5 2013 1 1 6 LGA ATL N668DN DL La Gua… 40.8 -73.9 22
#> 6 2013 1 1 5 EWR ORD N39463 UA Newark… 40.7 -74.2 18
#> # ℹ 336,770 more rows
#> # ℹ 3 more variables: tz <dbl>, dst <chr>, tzone <chr>
airports %>% semi_join(flights, c(“faa” = “dest”)) %>% ggplot(aes(lon, lat)) + borders(“state”) + geom_point() + coord_quickmap()
airports %>%
semi_join(flights, c("faa" = "dest")) %>%
ggplot(aes(lon, lat)) +
borders("state") +
geom_point() +
coord_quickmap()
avg_dest_delays <-
flights %>%
group_by(dest) %>%
# arrival delay NA's are cancelled flights
summarise(delay = mean(arr_delay, na.rm = TRUE)) %>%
inner_join(airports, by = c(dest = "faa"))
#> `summarise()` ungrouping output (override with `.groups` argument)
avg_dest_delays %>%
ggplot(aes(lon, lat, colour = delay)) +
borders("state") +
geom_point() +
coord_quickmap()
airport_locations <- airports %>%
select(faa, lat, lon)
flights %>%
select(year:day, hour, origin, dest) %>%
left_join(
airport_locations,
by = c("origin" = "faa")
) %>%
left_join(
airport_locations,
by = c("dest" = "faa")
)
## # A tibble: 336,776 × 10
## year month day hour origin dest lat.x lon.x lat.y lon.y
## <int> <int> <int> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2013 1 1 5 EWR IAH 40.7 -74.2 30.0 -95.3
## 2 2013 1 1 5 LGA IAH 40.8 -73.9 30.0 -95.3
## 3 2013 1 1 5 JFK MIA 40.6 -73.8 25.8 -80.3
## 4 2013 1 1 5 JFK BQN 40.6 -73.8 NA NA
## 5 2013 1 1 6 LGA ATL 40.8 -73.9 33.6 -84.4
## 6 2013 1 1 5 EWR ORD 40.7 -74.2 42.0 -87.9
## 7 2013 1 1 6 EWR FLL 40.7 -74.2 26.1 -80.2
## 8 2013 1 1 6 LGA IAD 40.8 -73.9 38.9 -77.5
## 9 2013 1 1 6 JFK MCO 40.6 -73.8 28.4 -81.3
## 10 2013 1 1 6 LGA ORD 40.8 -73.9 42.0 -87.9
## # ℹ 336,766 more rows
#> # A tibble: 336,776 x 10
#> year month day hour origin dest lat.x lon.x lat.y lon.y
#> <int> <int> <int> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 2013 1 1 5 EWR IAH 40.7 -74.2 30.0 -95.3
#> 2 2013 1 1 5 LGA IAH 40.8 -73.9 30.0 -95.3
#> 3 2013 1 1 5 JFK MIA 40.6 -73.8 25.8 -80.3
#> 4 2013 1 1 5 JFK BQN 40.6 -73.8 NA NA
#> 5 2013 1 1 6 LGA ATL 40.8 -73.9 33.6 -84.4
#> 6 2013 1 1 5 EWR ORD 40.7 -74.2 42.0 -87.9
#> # … with 336,770 more rows
airport_locations <- airports %>%
select(faa, lat, lon)
flights %>%
select(year:day, hour, origin, dest) %>%
left_join(
airport_locations,
by = c("origin" = "faa")
) %>%
left_join(
airport_locations,
by = c("dest" = "faa"),
suffix = c("_origin", "_dest")
# existing lat and lon variables in tibble gain the _origin suffix
# new lat and lon variables are given _dest suffix
)
## # A tibble: 336,776 × 10
## year month day hour origin dest lat_origin lon_origin lat_dest lon_dest
## <int> <int> <int> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2013 1 1 5 EWR IAH 40.7 -74.2 30.0 -95.3
## 2 2013 1 1 5 LGA IAH 40.8 -73.9 30.0 -95.3
## 3 2013 1 1 5 JFK MIA 40.6 -73.8 25.8 -80.3
## 4 2013 1 1 5 JFK BQN 40.6 -73.8 NA NA
## 5 2013 1 1 6 LGA ATL 40.8 -73.9 33.6 -84.4
## 6 2013 1 1 5 EWR ORD 40.7 -74.2 42.0 -87.9
## 7 2013 1 1 6 EWR FLL 40.7 -74.2 26.1 -80.2
## 8 2013 1 1 6 LGA IAD 40.8 -73.9 38.9 -77.5
## 9 2013 1 1 6 JFK MCO 40.6 -73.8 28.4 -81.3
## 10 2013 1 1 6 LGA ORD 40.8 -73.9 42.0 -87.9
## # ℹ 336,766 more rows
#> # A tibble: 336,776 x 10
#> year month day hour origin dest lat_origin lon_origin lat_dest lon_dest
#> <int> <int> <int> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 2013 1 1 5 EWR IAH 40.7 -74.2 30.0 -95.3
#> 2 2013 1 1 5 LGA IAH 40.8 -73.9 30.0 -95.3
#> 3 2013 1 1 5 JFK MIA 40.6 -73.8 25.8 -80.3
#> 4 2013 1 1 5 JFK BQN 40.6 -73.8 NA NA
#> 5 2013 1 1 6 LGA ATL 40.8 -73.9 33.6 -84.4
#> 6 2013 1 1 5 EWR ORD 40.7 -74.2 42.0 -87.9
#> # … with 336,770 more rows
3.Is there a relationship between the age of a plane and its delays? To compare the age of the plane to flights delay, I merge flights with the planes, which contains a variable plane_year, with the year in which the plane was built. To look at the relationship between plane age and departure delay
plane_cohorts <- inner_join(flights,
select(planes, tailnum, plane_year = year),
by = "tailnum"
) %>%
mutate(age = year - plane_year) %>%
filter(!is.na(age)) %>%
mutate(age = if_else(age > 25, 25L, age)) %>%
group_by(age) %>%
summarise(
dep_delay_mean = mean(dep_delay, na.rm = TRUE),
dep_delay_sd = sd(dep_delay, na.rm = TRUE),
arr_delay_mean = mean(arr_delay, na.rm = TRUE),
arr_delay_sd = sd(arr_delay, na.rm = TRUE),
n_arr_delay = sum(!is.na(arr_delay)),
n_dep_delay = sum(!is.na(dep_delay))
)
#> `summarise()` ungrouping output (override with `.groups` argument)
ggplot(plane_cohorts, aes(x = age, y = dep_delay_mean)) +
geom_point() +
scale_x_continuous("Age of plane (years)", breaks = seq(0, 30, by = 10)) +
scale_y_continuous("Mean Departure Delay (minutes)")
ggplot(plane_cohorts, aes(x = age, y = arr_delay_mean)) +
geom_point() +
scale_x_continuous("Age of Plane (years)", breaks = seq(0, 30, by = 10)) +
scale_y_continuous("Mean Arrival Delay (minutes)")
flight_weather <-
flights %>%
inner_join(weather, by = c(
"origin" = "origin",
"year" = "year",
"month" = "month",
"day" = "day",
"hour" = "hour"
))
flight_weather %>%
group_by(precip) %>%
summarise(delay = mean(dep_delay, na.rm = TRUE)) %>%
ggplot(aes(x = precip, y = delay)) +
geom_line() + geom_point()
#> `summarise()` ungrouping output (override with `.groups` argument)
flight_weather %>%
ungroup() %>%
mutate(visib_cat = cut_interval(visib, n = 10)) %>%
group_by(visib_cat) %>%
summarise(dep_delay = mean(dep_delay, na.rm = TRUE)) %>%
ggplot(aes(x = visib_cat, y = dep_delay)) +
geom_point()
#> `summarise()` ungrouping output (override with `.groups` argument)
##Filtering Joins
top_dest <- flights %>%
count(dest, sort = TRUE) %>%
head(10)
top_dest
## # A tibble: 10 × 2
## dest n
## <chr> <int>
## 1 ORD 17283
## 2 ATL 17215
## 3 LAX 16174
## 4 BOS 15508
## 5 MCO 14082
## 6 CLT 14064
## 7 SFO 13331
## 8 FLL 12055
## 9 MIA 11728
## 10 DCA 9705
#> # A tibble: 10 × 2
#> dest n
#> <chr> <int>
#> 1 ORD 17283
#> 2 ATL 17215
#> 3 LAX 16174
#> 4 BOS 15508
#> 5 MCO 14082
#> 6 CLT 14064
#> # ℹ 4 more rows
flights %>%
filter(dest %in% top_dest$dest)
## # A tibble: 141,145 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 542 540 2 923 850
## 2 2013 1 1 554 600 -6 812 837
## 3 2013 1 1 554 558 -4 740 728
## 4 2013 1 1 555 600 -5 913 854
## 5 2013 1 1 557 600 -3 838 846
## 6 2013 1 1 558 600 -2 753 745
## 7 2013 1 1 558 600 -2 924 917
## 8 2013 1 1 558 600 -2 923 937
## 9 2013 1 1 559 559 0 702 706
## 10 2013 1 1 600 600 0 851 858
## # ℹ 141,135 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
#> # A tibble: 141,145 × 19
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 2013 1 1 542 540 2 923 850
#> 2 2013 1 1 554 600 -6 812 837
#> 3 2013 1 1 554 558 -4 740 728
#> 4 2013 1 1 555 600 -5 913 854
#> 5 2013 1 1 557 600 -3 838 846
#> 6 2013 1 1 558 600 -2 753 745
#> # ℹ 141,139 more rows
#> # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
#> # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
#> # hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>%
semi_join(top_dest)
## Joining with `by = join_by(dest)`
## # A tibble: 141,145 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 542 540 2 923 850
## 2 2013 1 1 554 600 -6 812 837
## 3 2013 1 1 554 558 -4 740 728
## 4 2013 1 1 555 600 -5 913 854
## 5 2013 1 1 557 600 -3 838 846
## 6 2013 1 1 558 600 -2 753 745
## 7 2013 1 1 558 600 -2 924 917
## 8 2013 1 1 558 600 -2 923 937
## 9 2013 1 1 559 559 0 702 706
## 10 2013 1 1 600 600 0 851 858
## # ℹ 141,135 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
#> Joining with `by = join_by(dest)`
#> # A tibble: 141,145 × 19
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 2013 1 1 542 540 2 923 850
#> 2 2013 1 1 554 600 -6 812 837
#> 3 2013 1 1 554 558 -4 740 728
#> 4 2013 1 1 555 600 -5 913 854
#> 5 2013 1 1 557 600 -3 838 846
#> 6 2013 1 1 558 600 -2 753 745
#> # ℹ 141,139 more rows
#> # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
#> # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
#> # hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>%
semi_join(top_dest)
## Joining with `by = join_by(dest)`
## # A tibble: 141,145 × 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 542 540 2 923 850
## 2 2013 1 1 554 600 -6 812 837
## 3 2013 1 1 554 558 -4 740 728
## 4 2013 1 1 555 600 -5 913 854
## 5 2013 1 1 557 600 -3 838 846
## 6 2013 1 1 558 600 -2 753 745
## 7 2013 1 1 558 600 -2 924 917
## 8 2013 1 1 558 600 -2 923 937
## 9 2013 1 1 559 559 0 702 706
## 10 2013 1 1 600 600 0 851 858
## # ℹ 141,135 more rows
## # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
#> Joining with `by = join_by(dest)`
#> # A tibble: 141,145 × 19
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 2013 1 1 542 540 2 923 850
#> 2 2013 1 1 554 600 -6 812 837
#> 3 2013 1 1 554 558 -4 740 728
#> 4 2013 1 1 555 600 -5 913 854
#> 5 2013 1 1 557 600 -3 838 846
#> 6 2013 1 1 558 600 -2 753 745
#> # ℹ 141,139 more rows
#> # ℹ 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
#> # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
#> # hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>%
anti_join(planes, by = "tailnum") %>%
count(tailnum, sort = TRUE)
## # A tibble: 722 × 2
## tailnum n
## <chr> <int>
## 1 <NA> 2512
## 2 N725MQ 575
## 3 N722MQ 513
## 4 N723MQ 507
## 5 N713MQ 483
## 6 N735MQ 396
## 7 N0EGMQ 371
## 8 N534MQ 364
## 9 N542MQ 363
## 10 N531MQ 349
## # ℹ 712 more rows
#> # A tibble: 722 × 2
#> tailnum n
#> <chr> <int>
#> 1 <NA> 2512
#> 2 N725MQ 575
#> 3 N722MQ 513
#> 4 N723MQ 507
#> 5 N713MQ 483
#> 6 N735MQ 396
#> # ℹ 716 more rows
2.Filter flights to only show flights with planes that have flown at least 100 flights.
flights %>%
filter(!is.na(tailnum)) %>%
group_by(tailnum) %>%
mutate(n = n()) %>%
filter(n >= 100)
## # A tibble: 228,390 × 20
## # Groups: tailnum [1,217]
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 544 545 -1 1004 1022
## 4 2013 1 1 554 558 -4 740 728
## 5 2013 1 1 555 600 -5 913 854
## 6 2013 1 1 557 600 -3 709 723
## 7 2013 1 1 557 600 -3 838 846
## 8 2013 1 1 558 600 -2 849 851
## 9 2013 1 1 558 600 -2 853 856
## 10 2013 1 1 558 600 -2 923 937
## # ℹ 228,380 more rows
## # ℹ 12 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>, n <int>
#> # A tibble: 228,390 x 20
#> # Groups: tailnum [1,217]
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> <int> <int> <int> <int> <int> <dbl> <int> <int>
#> 1 2013 1 1 517 515 2 830 819
#> 2 2013 1 1 533 529 4 850 830
#> 3 2013 1 1 544 545 -1 1004 1022
#> 4 2013 1 1 554 558 -4 740 728
#> 5 2013 1 1 555 600 -5 913 854
#> 6 2013 1 1 557 600 -3 709 723
#> # … with 228,384 more rows, and 12 more variables: arr_delay <dbl>,
#> # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
#> # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>,
#> # n <int>
3.Combine fueleconomy::vehicles and fueleconomy::common to find only the records for the most common models.
fueleconomy::vehicles %>%
semi_join(fueleconomy::common, by = c("make", "model"))
## # A tibble: 14,531 × 12
## id make model year class trans drive cyl displ fuel hwy cty
## <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 1833 Acura Integra 1986 Subcompa… Auto… Fron… 4 1.6 Regu… 28 22
## 2 1834 Acura Integra 1986 Subcompa… Manu… Fron… 4 1.6 Regu… 28 23
## 3 3037 Acura Integra 1987 Subcompa… Auto… Fron… 4 1.6 Regu… 28 22
## 4 3038 Acura Integra 1987 Subcompa… Manu… Fron… 4 1.6 Regu… 28 23
## 5 4183 Acura Integra 1988 Subcompa… Auto… Fron… 4 1.6 Regu… 27 22
## 6 4184 Acura Integra 1988 Subcompa… Manu… Fron… 4 1.6 Regu… 28 23
## 7 5303 Acura Integra 1989 Subcompa… Auto… Fron… 4 1.6 Regu… 27 22
## 8 5304 Acura Integra 1989 Subcompa… Manu… Fron… 4 1.6 Regu… 28 23
## 9 6442 Acura Integra 1990 Subcompa… Auto… Fron… 4 1.8 Regu… 24 20
## 10 6443 Acura Integra 1990 Subcompa… Manu… Fron… 4 1.8 Regu… 26 21
## # ℹ 14,521 more rows
#> # A tibble: 14,531 x 12
#> id make model year class trans drive cyl displ fuel hwy cty
#> <dbl> <chr> <chr> <dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
#> 1 1833 Acura Integ… 1986 Subcom… Automa… Front-… 4 1.6 Regu… 28 22
#> 2 1834 Acura Integ… 1986 Subcom… Manual… Front-… 4 1.6 Regu… 28 23
#> 3 3037 Acura Integ… 1987 Subcom… Automa… Front-… 4 1.6 Regu… 28 22
#> 4 3038 Acura Integ… 1987 Subcom… Manual… Front-… 4 1.6 Regu… 28 23
#> 5 4183 Acura Integ… 1988 Subcom… Automa… Front-… 4 1.6 Regu… 27 22
#> 6 4184 Acura Integ… 1988 Subcom… Manual… Front-… 4 1.6 Regu… 28 23
#> # … with 14,525 more rows
fueleconomy::vehicles %>%
distinct(model, make) %>%
group_by(model) %>%
filter(n() > 1) %>%
arrange(model)
## # A tibble: 126 × 2
## # Groups: model [60]
## model make
## <chr> <chr>
## 1 200 Audi
## 2 200 Chrysler
## 3 240 DL/240 GL Wagon Mcevoy Motors
## 4 240 DL/240 GL Wagon Volvo
## 5 300E Lambda Control Systems
## 6 300E Mercedes-Benz
## 7 300SL J.K. Motors
## 8 300SL Mercedes-Benz
## 9 500SE Mercedes-Benz
## 10 500SE Texas Coach Company
## # ℹ 116 more rows
#> # A tibble: 126 x 2
#> # Groups: model [60]
#> make model
#> <chr> <chr>
#> 1 Audi 200
#> 2 Chrysler 200
#> 3 Mcevoy Motors 240 DL/240 GL Wagon
#> 4 Volvo 240 DL/240 GL Wagon
#> 5 Lambda Control Systems 300E
#> 6 Mercedes-Benz 300E
#> # … with 120 more rows
fueleconomy::common %>%
distinct(model, make) %>%
group_by(model) %>%
filter(n() > 1) %>%
arrange(model)
## # A tibble: 8 × 2
## # Groups: model [3]
## model make
## <chr> <chr>
## 1 Colt Dodge
## 2 Colt Plymouth
## 3 Truck 2WD Mitsubishi
## 4 Truck 2WD Nissan
## 5 Truck 2WD Toyota
## 6 Truck 4WD Mitsubishi
## 7 Truck 4WD Nissan
## 8 Truck 4WD Toyota
#> # A tibble: 8 x 2
#> # Groups: model [3]
#> make model
#> <chr> <chr>
#> 1 Dodge Colt
#> 2 Plymouth Colt
#> 3 Mitsubishi Truck 2WD
#> 4 Nissan Truck 2WD
#> 5 Toyota Truck 2WD
#> 6 Mitsubishi Truck 4WD
#> # … with 2 more rows
4.Find the 48 hours (over the course of the whole year) that have the worst delays. Cross-reference it with the weather data. Can you see any patterns?
worst_hours <- flights %>%
mutate(hour = sched_dep_time %/% 100) %>%
group_by(origin, year, month, day, hour) %>%
summarise(dep_delay = mean(dep_delay, na.rm = TRUE)) %>%
ungroup() %>%
arrange(desc(dep_delay)) %>%
slice(1:48)
## `summarise()` has grouped output by 'origin', 'year', 'month', 'day'. You can
## override using the `.groups` argument.
#> `summarise()` regrouping output by 'origin', 'year', 'month', 'day' (override with `.groups` argument)
weather_most_delayed <- semi_join(weather, worst_hours,
by = c("origin", "year",
"month", "day", "hour"))
select(weather_most_delayed, temp, wind_speed, precip) %>%
print(n = 48)
## # A tibble: 48 × 3
## temp wind_speed precip
## <dbl> <dbl> <dbl>
## 1 27.0 13.8 0
## 2 28.0 19.6 0
## 3 28.9 28.8 0
## 4 33.8 9.21 0.06
## 5 34.0 8.06 0.05
## 6 80.1 8.06 0
## 7 86 13.8 0
## 8 73.4 6.90 0.08
## 9 84.0 5.75 0
## 10 78.8 18.4 0.23
## 11 53.6 0 0
## 12 60.8 31.1 0.11
## 13 55.4 17.3 0.14
## 14 53.1 9.21 0.01
## 15 55.9 11.5 0.1
## 16 55.4 8.06 0.15
## 17 57.0 29.9 0
## 18 33.8 20.7 0.02
## 19 34.0 19.6 0.01
## 20 36.0 21.9 0.01
## 21 37.9 16.1 0
## 22 32 13.8 0.12
## 23 60.1 33.4 0.14
## 24 60.8 11.5 0.02
## 25 62.1 17.3 0
## 26 66.9 10.4 0
## 27 66.9 13.8 0
## 28 79.0 10.4 0
## 29 77 16.1 0.07
## 30 75.9 13.8 0
## 31 82.4 8.06 0
## 32 86 9.21 0
## 33 80.1 9.21 0
## 34 80.6 11.5 0
## 35 78.1 6.90 0
## 36 75.2 10.4 0.01
## 37 73.9 5.75 0.03
## 38 73.9 8.06 0
## 39 75.0 4.60 0
## 40 75.0 4.60 0.01
## 41 80.1 0 0.01
## 42 80.1 0 0
## 43 77 10.4 0
## 44 82.0 10.4 0
## 45 72.0 13.8 0.3
## 46 72.0 4.60 0.03
## 47 51.1 4.60 0
## 48 54.0 6.90 0
#> # A tibble: 48 x 3
#> temp wind_speed precip
#> <dbl> <dbl> <dbl>
#> 1 27.0 13.8 0
#> 2 28.0 19.6 0
#> 3 28.9 28.8 0
#> 4 33.8 9.21 0.06
#> 5 34.0 8.06 0.05
#> 6 80.1 8.06 0
#> 7 86 13.8 0
#> 8 73.4 6.90 0.08
#> 9 84.0 5.75 0
#> 10 78.8 18.4 0.23
#> 11 53.6 0 0
#> 12 60.8 31.1 0.11
#> 13 55.4 17.3 0.14
#> 14 53.1 9.21 0.01
#> 15 55.9 11.5 0.1
#> 16 55.4 8.06 0.15
#> 17 57.0 29.9 0
#> 18 33.8 20.7 0.02
#> 19 34.0 19.6 0.01
#> 20 36.0 21.9 0.01
#> 21 37.9 16.1 0
#> 22 32 13.8 0.12
#> 23 60.1 33.4 0.14
#> 24 60.8 11.5 0.02
#> 25 62.1 17.3 0
#> 26 66.9 10.4 0
#> 27 66.9 13.8 0
#> 28 79.0 10.4 0
#> 29 77 16.1 0.07
#> 30 75.9 13.8 0
#> 31 82.4 8.06 0
#> 32 86 9.21 0
#> 33 80.1 9.21 0
#> 34 80.6 11.5 0
#> 35 78.1 6.90 0
#> 36 75.2 10.4 0.01
#> 37 73.9 5.75 0.03
#> 38 73.9 8.06 0
#> 39 75.0 4.60 0
#> 40 75.0 4.60 0.01
#> 41 80.1 0 0.01
#> 42 80.1 0 0
#> 43 77 10.4 0
#> 44 82.0 10.4 0
#> 45 72.0 13.8 0.3
#> 46 72.0 4.60 0.03
#> 47 51.1 4.60 0
#> 48 54.0 6.90 0
ggplot(weather_most_delayed, aes(x = precip, y = wind_speed, color = temp)) +
geom_point()
5.What does anti_join(flights, airports, by = c(“dest” = “faa”)) tell you? What does anti_join(airports, flights, by = c(“faa” = “dest”)) tell you?
anti_join(flights, airports, by = c("dest" = "faa")) %>%
distinct(dest)
## # A tibble: 4 × 1
## dest
## <chr>
## 1 BQN
## 2 SJU
## 3 STT
## 4 PSE
#> # A tibble: 4 x 1
#> dest
#> <chr>
#> 1 BQN
#> 2 SJU
#> 3 STT
#> 4 PSE
anti_join(airports, flights, by = c("faa" = "dest"))
## # A tibble: 1,357 × 8
## faa name lat lon alt tz dst tzone
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 04G Lansdowne Airport 41.1 -80.6 1044 -5 A America/…
## 2 06A Moton Field Municipal Airport 32.5 -85.7 264 -6 A America/…
## 3 06C Schaumburg Regional 42.0 -88.1 801 -6 A America/…
## 4 06N Randall Airport 41.4 -74.4 523 -5 A America/…
## 5 09J Jekyll Island Airport 31.1 -81.4 11 -5 A America/…
## 6 0A9 Elizabethton Municipal Airport 36.4 -82.2 1593 -5 A America/…
## 7 0G6 Williams County Airport 41.5 -84.5 730 -5 A America/…
## 8 0G7 Finger Lakes Regional Airport 42.9 -76.8 492 -5 A America/…
## 9 0P2 Shoestring Aviation Airfield 39.8 -76.6 1000 -5 U America/…
## 10 0S9 Jefferson County Intl 48.1 -123. 108 -8 A America/…
## # ℹ 1,347 more rows
#> # A tibble: 1,357 x 8
#> faa name lat lon alt tz dst tzone
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 04G Lansdowne Airport 41.1 -80.6 1044 -5 A America/New_Y…
#> 2 06A Moton Field Municipal Airp… 32.5 -85.7 264 -6 A America/Chica…
#> 3 06C Schaumburg Regional 42.0 -88.1 801 -6 A America/Chica…
#> 4 06N Randall Airport 41.4 -74.4 523 -5 A America/New_Y…
#> 5 09J Jekyll Island Airport 31.1 -81.4 11 -5 A America/New_Y…
#> 6 0A9 Elizabethton Municipal Air… 36.4 -82.2 1593 -5 A America/New_Y…
#> # … with 1,351 more rows
planes_carriers <-
flights %>%
filter(!is.na(tailnum)) %>%
distinct(tailnum, carrier)
planes_carriers %>%
count(tailnum) %>%
filter(n > 1) %>%
nrow()
## [1] 17
#> [1] 17
carrier_transfer_tbl <- planes_carriers %>%
# keep only planes which have flown for more than one airline
group_by(tailnum) %>%
filter(n() > 1) %>%
# join with airlines to get airline names
left_join(airlines, by = "carrier") %>%
arrange(tailnum, carrier)
carrier_transfer_tbl
## # A tibble: 34 × 3
## # Groups: tailnum [17]
## tailnum carrier name
## <chr> <chr> <chr>
## 1 N146PQ 9E Endeavor Air Inc.
## 2 N146PQ EV ExpressJet Airlines Inc.
## 3 N153PQ 9E Endeavor Air Inc.
## 4 N153PQ EV ExpressJet Airlines Inc.
## 5 N176PQ 9E Endeavor Air Inc.
## 6 N176PQ EV ExpressJet Airlines Inc.
## 7 N181PQ 9E Endeavor Air Inc.
## 8 N181PQ EV ExpressJet Airlines Inc.
## 9 N197PQ 9E Endeavor Air Inc.
## 10 N197PQ EV ExpressJet Airlines Inc.
## # ℹ 24 more rows
#> # A tibble: 34 x 3
#> # Groups: tailnum [17]
#> carrier tailnum name
#> <chr> <chr> <chr>
#> 1 9E N146PQ Endeavor Air Inc.
#> 2 EV N146PQ ExpressJet Airlines Inc.
#> 3 9E N153PQ Endeavor Air Inc.
#> 4 EV N153PQ ExpressJet Airlines Inc.
#> 5 9E N176PQ Endeavor Air Inc.
#> 6 EV N176PQ ExpressJet Airlines Inc.
#> # … with 28 more rows
airports %>% count(alt, lon) %>% filter(n > 1)
## # A tibble: 0 × 3
## # ℹ 3 variables: alt <dbl>, lon <dbl>, n <int>
#> # A tibble: 0 × 3
#> # ℹ 3 variables: alt <dbl>, lon <dbl>, n <int>
df1 <- tribble(
~x, ~y,
1, 1,
2, 1
)
df2 <- tribble(
~x, ~y,
1, 1,
1, 2
)
intersect(df1, df2)
## # A tibble: 1 × 2
## x y
## <dbl> <dbl>
## 1 1 1
#> # A tibble: 1 × 2
#> x y
#> <dbl> <dbl>
#> 1 1 1
# Note that we get 3 rows, not 4
union(df1, df2)
## # A tibble: 3 × 2
## x y
## <dbl> <dbl>
## 1 1 1
## 2 2 1
## 3 1 2
#> # A tibble: 3 × 2
#> x y
#> <dbl> <dbl>
#> 1 1 1
#> 2 2 1
#> 3 1 2
setdiff(df1, df2)
## # A tibble: 1 × 2
## x y
## <dbl> <dbl>
## 1 2 1
#> # A tibble: 1 × 2
#> x y
#> <dbl> <dbl>
#> 1 2 1
setdiff(df2, df1)
## # A tibble: 1 × 2
## x y
## <dbl> <dbl>
## 1 1 2
#> # A tibble: 1 × 2
#> x y
#> <dbl> <dbl>
#> 1 1 2
Celtics 2025 nba champs