Introduction

nycflights13

Keys

Mutating Joins

Inner Join

x <- tribble(
  ~key, ~val_x,
     1, "x1",
     2, "x2",
     3, "x3"
)
y <- tribble(
  ~key, ~val_y,
     1, "y1",
     2, "y2",
     4, "y3"
)

Outer join

left_join(x,y,)
## Joining with `by = join_by(key)`
## # A tibble: 3 × 3
##     key val_x val_y
##   <dbl> <chr> <chr>
## 1     1 x1    y1   
## 2     2 x2    y2   
## 3     3 x3    <NA>
right_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 3 × 3
##     key val_x val_y
##   <dbl> <chr> <chr>
## 1     1 x1    y1   
## 2     2 x2    y2   
## 3     4 <NA>  y3
full_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 4 × 3
##     key val_x val_y
##   <dbl> <chr> <chr>
## 1     1 x1    y1   
## 2     2 x2    y2   
## 3     3 x3    <NA> 
## 4     4 <NA>  y3

Filtering joins

semi_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 2 × 2
##     key val_x
##   <dbl> <chr>
## 1     1 x1   
## 2     2 x2
semi_join(y,x)
## Joining with `by = join_by(key)`
## # A tibble: 2 × 2
##     key val_y
##   <dbl> <chr>
## 1     1 y1   
## 2     2 y2
anti_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 1 × 2
##     key val_x
##   <dbl> <chr>
## 1     3 x3
anti_join(y,x)
## Joining with `by = join_by(key)`
## # A tibble: 1 × 2
##     key val_y
##   <dbl> <chr>
## 1     4 y3

Join Problems

airports %>% count(lat, lon) %>% arrange(desc(n))
## # A tibble: 1,458 × 3
##      lat   lon     n
##    <dbl> <dbl> <int>
##  1  19.7 -155.     1
##  2  19.7 -156.     1
##  3  19.8 -156.     1
##  4  19.9 -156.     1
##  5  20.0 -156.     1
##  6  20.3 -156.     1
##  7  20.8 -157.     1
##  8  20.8 -156.     1
##  9  20.9 -156.     1
## 10  21.0 -157.     1
## # ℹ 1,448 more rows
left_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 3 × 3
##     key val_x val_y
##   <dbl> <chr> <chr>
## 1     1 x1    y1   
## 2     2 x2    y2   
## 3     3 x3    <NA>

set operations

df1 <- tribble(
  ~x, ~y,
   1,  1,
   2,  1
)
df2 <- tribble(
  ~x, ~y,
   1,  1,
   1,  2
)