employees <- tibble(
emp_id = c(1, 2, 3, 4, 5),
name = c("John", "Jane", "Alice", "Bob", "Charlie"),
department = c("HR", "IT", "Finance", "IT", "Marketing")
)
head(employees)
## # A tibble: 5 × 3
## emp_id name department
## <dbl> <chr> <chr>
## 1 1 John HR
## 2 2 Jane IT
## 3 3 Alice Finance
## 4 4 Bob IT
## 5 5 Charlie Marketing
salaries <- tibble(
emp_id = c(1, 2, 3, 6, 7),
salary = c(50000, 60000, 55000, 65000, 70000)
)
head(salaries)
## # A tibble: 5 × 2
## emp_id salary
## <dbl> <dbl>
## 1 1 50000
## 2 2 60000
## 3 3 55000
## 4 6 65000
## 5 7 70000
Returns all rows from both tables where there is a match
innerjoin <- inner_join(employees,
salaries,
by = "emp_id")
print(innerjoin)
## # A tibble: 3 × 4
## emp_id name department salary
## <dbl> <chr> <chr> <dbl>
## 1 1 John HR 50000
## 2 2 Jane IT 60000
## 3 3 Alice Finance 55000
Note: Only employees with matching emp_id in both tables are included
Returns all rows from the left table and matching rows from the right table
leftjoin <- left_join(employees,
salaries,
by = "emp_id")
print(leftjoin)
## # A tibble: 5 × 4
## emp_id name department salary
## <dbl> <chr> <chr> <dbl>
## 1 1 John HR 50000
## 2 2 Jane IT 60000
## 3 3 Alice Finance 55000
## 4 4 Bob IT NA
## 5 5 Charlie Marketing NA
Note: All employees are included, even those without salary information
Returns all rows from the right table and matching rows from the left table
rightjoin <- right_join(employees,
salaries,
by = "emp_id")
print(rightjoin)
## # A tibble: 5 × 4
## emp_id name department salary
## <dbl> <chr> <chr> <dbl>
## 1 1 John HR 50000
## 2 2 Jane IT 60000
## 3 3 Alice Finance 55000
## 4 6 <NA> <NA> 65000
## 5 7 <NA> <NA> 70000
Note: All salary entries are included, even for employees not in the employees table
Returns all rows when there is a match in either left or right table
fulljoin <- full_join(employees,
salaries,
by = "emp_id")
print(fulljoin)
## # A tibble: 7 × 4
## emp_id name department salary
## <dbl> <chr> <chr> <dbl>
## 1 1 John HR 50000
## 2 2 Jane IT 60000
## 3 3 Alice Finance 55000
## 4 4 Bob IT NA
## 5 5 Charlie Marketing NA
## 6 6 <NA> <NA> 65000
## 7 7 <NA> <NA> 70000
Returns all rows from the left table where there is a match in the right table
semijoin <- semi_join(employees,
salaries,
by ="emp_id")
print(semijoin)
## # A tibble: 3 × 3
## emp_id name department
## <dbl> <chr> <chr>
## 1 1 John HR
## 2 2 Jane IT
## 3 3 Alice Finance
Returns all rows from the left table where there is no match in the right table
antijoin <- anti_join(employees,
salaries,
by = "emp_id")
print(antijoin)
## # A tibble: 2 × 3
## emp_id name department
## <dbl> <chr> <chr>
## 1 4 Bob IT
## 2 5 Charlie Marketing