##Mutating joins

###inner Join

x <- tribble(
  ~key, ~val_x,
     1, "x1",
     2, "x2",
     2, "x3",
     1, "x4"
)
y <- tribble(
  ~key, ~val_y,
     1, "y1",
     2, "y2"
)

inner_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 4 × 3
##     key val_x val_y
##   <dbl> <chr> <chr>
## 1     1 x1    y1   
## 2     2 x2    y2   
## 3     2 x3    y2   
## 4     1 x4    y1

###outer join

left_join(x,y, by ="key")
## # A tibble: 4 × 3
##     key val_x val_y
##   <dbl> <chr> <chr>
## 1     1 x1    y1   
## 2     2 x2    y2   
## 3     2 x3    y2   
## 4     1 x4    y1
right_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 4 × 3
##     key val_x val_y
##   <dbl> <chr> <chr>
## 1     1 x1    y1   
## 2     2 x2    y2   
## 3     2 x3    y2   
## 4     1 x4    y1
full_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 4 × 3
##     key val_x val_y
##   <dbl> <chr> <chr>
## 1     1 x1    y1   
## 2     2 x2    y2   
## 3     2 x3    y2   
## 4     1 x4    y1

###defining the key columns

flights %>% left_join(planes)
## Joining with `by = join_by(year, tailnum)`
## # A tibble: 336,776 × 26
##     year month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1  2013     1     1      517            515         2      830            819
##  2  2013     1     1      533            529         4      850            830
##  3  2013     1     1      542            540         2      923            850
##  4  2013     1     1      544            545        -1     1004           1022
##  5  2013     1     1      554            600        -6      812            837
##  6  2013     1     1      554            558        -4      740            728
##  7  2013     1     1      555            600        -5      913            854
##  8  2013     1     1      557            600        -3      709            723
##  9  2013     1     1      557            600        -3      838            846
## 10  2013     1     1      558            600        -2      753            745
## # ℹ 336,766 more rows
## # ℹ 18 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>, type <chr>, manufacturer <chr>,
## #   model <chr>, engines <int>, seats <int>, speed <int>, engine <chr>
flights %>% left_join(planes, by ="tailnum")
## # A tibble: 336,776 × 27
##    year.x month   day dep_time sched_dep_time dep_delay arr_time sched_arr_time
##     <int> <int> <int>    <int>          <int>     <dbl>    <int>          <int>
##  1   2013     1     1      517            515         2      830            819
##  2   2013     1     1      533            529         4      850            830
##  3   2013     1     1      542            540         2      923            850
##  4   2013     1     1      544            545        -1     1004           1022
##  5   2013     1     1      554            600        -6      812            837
##  6   2013     1     1      554            558        -4      740            728
##  7   2013     1     1      555            600        -5      913            854
##  8   2013     1     1      557            600        -3      709            723
##  9   2013     1     1      557            600        -3      838            846
## 10   2013     1     1      558            600        -2      753            745
## # ℹ 336,766 more rows
## # ℹ 19 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## #   tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## #   hour <dbl>, minute <dbl>, time_hour <dttm>, year.y <int>, type <chr>,
## #   manufacturer <chr>, model <chr>, engines <int>, seats <int>, speed <int>,
## #   engine <chr>

##filtering joins

semi_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 4 × 2
##     key val_x
##   <dbl> <chr>
## 1     1 x1   
## 2     2 x2   
## 3     2 x3   
## 4     1 x4
semi_join(y,x)
## Joining with `by = join_by(key)`
## # A tibble: 2 × 2
##     key val_y
##   <dbl> <chr>
## 1     1 y1   
## 2     2 y2
anti_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 0 × 2
## # ℹ 2 variables: key <dbl>, val_x <chr>
anti_join(y,x)
## Joining with `by = join_by(key)`
## # A tibble: 0 × 2
## # ℹ 2 variables: key <dbl>, val_y <chr>

##join problems

airports %>% count(name) %>% arrange(desc(n))
## # A tibble: 1,440 × 2
##    name                             n
##    <chr>                        <int>
##  1 Municipal Airport                5
##  2 All Airports                     3
##  3 Capital City Airport             2
##  4 Dillingham                       2
##  5 Douglas Municipal Airport        2
##  6 Executive                        2
##  7 Grand Canyon West Airport        2
##  8 Jefferson County Intl            2
##  9 Marshfield Municipal Airport     2
## 10 Penn Station                     2
## # ℹ 1,430 more rows
left_join(x,y)
## Joining with `by = join_by(key)`
## # A tibble: 4 × 3
##     key val_x val_y
##   <dbl> <chr> <chr>
## 1     1 x1    y1   
## 2     2 x2    y2   
## 3     2 x3    y2   
## 4     1 x4    y1

##set operations

df1 <- tribble(
  ~x, ~y,
   1,  1,
   2,  1
)
df2 <- tribble(
  ~x, ~y,
   1,  1,
   1,  2
)
intersect(df1,df2)
## # A tibble: 1 × 2
##       x     y
##   <dbl> <dbl>
## 1     1     1
union(df1,df2)
## # A tibble: 3 × 2
##       x     y
##   <dbl> <dbl>
## 1     1     1
## 2     2     1
## 3     1     2
setdiff(df1,df2)
## # A tibble: 1 × 2
##       x     y
##   <dbl> <dbl>
## 1     2     1
setdiff(df2,df1)
## # A tibble: 1 × 2
##       x     y
##   <dbl> <dbl>
## 1     1     2