Employees Dataset

employees <- tibble(
  emp_id = c(1, 2, 3, 4, 5),
  name = c("John", "Jane", "Alice", "Bob", "Charlie"),
  department = c("HR", "IT", "Finance", "IT", "Marketing")
)
head(employees)
## # A tibble: 5 × 3
##   emp_id name    department
##    <dbl> <chr>   <chr>     
## 1      1 John    HR        
## 2      2 Jane    IT        
## 3      3 Alice   Finance   
## 4      4 Bob     IT        
## 5      5 Charlie Marketing

Salaries Dataset

salaries <- tibble(
  emp_id = c(1, 2, 3, 6, 7),
  salary = c(50000, 60000, 55000, 65000, 70000)
)
head(salaries)
## # A tibble: 5 × 2
##   emp_id salary
##    <dbl>  <dbl>
## 1      1  50000
## 2      2  60000
## 3      3  55000
## 4      6  65000
## 5      7  70000

Inner Join

Returns all rows from both tables where there is a match

innerjoin <- inner_join(employees, 
                        salaries, 
                        by = "emp_id")
print(innerjoin)
## # A tibble: 3 × 4
##   emp_id name  department salary
##    <dbl> <chr> <chr>       <dbl>
## 1      1 John  HR          50000
## 2      2 Jane  IT          60000
## 3      3 Alice Finance     55000

Note: Only employees with matching emp_id in both tables are included

Left Join

Returns all rows from the left table and matching rows from the right table

leftjoin <- left_join(employees,
                      salaries,
                      by = "emp_id")
print(leftjoin)
## # A tibble: 5 × 4
##   emp_id name    department salary
##    <dbl> <chr>   <chr>       <dbl>
## 1      1 John    HR          50000
## 2      2 Jane    IT          60000
## 3      3 Alice   Finance     55000
## 4      4 Bob     IT             NA
## 5      5 Charlie Marketing      NA

Note: All employees are included, even those without salary information

Right Join

Returns all rows from the right table and matching rows from the left table

rightjoin <- right_join(employees,
                       salaries,
                       by = "emp_id")
print(rightjoin)
## # A tibble: 5 × 4
##   emp_id name  department salary
##    <dbl> <chr> <chr>       <dbl>
## 1      1 John  HR          50000
## 2      2 Jane  IT          60000
## 3      3 Alice Finance     55000
## 4      6 <NA>  <NA>        65000
## 5      7 <NA>  <NA>        70000

Note: All salary entries are included, even for employees not in the employees table

Full Join

Returns all rows when there is a match in either left or right table

fulljoin <- full_join(employees,
                      salaries,
                      by = "emp_id")
print(fulljoin)
## # A tibble: 7 × 4
##   emp_id name    department salary
##    <dbl> <chr>   <chr>       <dbl>
## 1      1 John    HR          50000
## 2      2 Jane    IT          60000
## 3      3 Alice   Finance     55000
## 4      4 Bob     IT             NA
## 5      5 Charlie Marketing      NA
## 6      6 <NA>    <NA>        65000
## 7      7 <NA>    <NA>        70000

Semi Join

Returns all rows from the left table where there is a match in the right table

semijoin <- semi_join(employees,
                      salaries,
                      by ="emp_id")
print(semijoin)
## # A tibble: 3 × 3
##   emp_id name  department
##    <dbl> <chr> <chr>     
## 1      1 John  HR        
## 2      2 Jane  IT        
## 3      3 Alice Finance

Anti Join

Returns all rows from the left table where there is no match in the right table

antijoin <- anti_join(employees,
                      salaries,
                      by = "emp_id")
print(antijoin)
## # A tibble: 2 × 3
##   emp_id name    department
##    <dbl> <chr>   <chr>     
## 1      4 Bob     IT        
## 2      5 Charlie Marketing