library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Dataset 1: Customers
customers <- tibble(
customer_id = c(1, 2, 3, 4, 5),
name = c("Alice", "Bob", "Charlie", "David", "Eve"),
city = c("New York", "Los Angeles", "Chicago", "Houston", "Phoenix")
)
# Dataset 2: Orders
orders <- tibble(
order_id = c(101, 102, 103, 104, 105, 106),
customer_id = c(1, 2, 3, 2, 6, 7),
product = c("Laptop", "Phone", "Tablet", "Desktop", "Camera", "Printer"),
amount = c(1200, 800, 300, 1500, 600, 150)
)
4 rows
Only customers with orders are included. Customers without orders or orders without matching customers are excluded.
result <- inner_join(customers, orders, by = "customer_id")
print(result)
## # A tibble: 4 × 6
## customer_id name city order_id product amount
## <dbl> <chr> <chr> <dbl> <chr> <dbl>
## 1 1 Alice New York 101 Laptop 1200
## 2 2 Bob Los Angeles 102 Phone 800
## 3 2 Bob Los Angeles 104 Desktop 1500
## 4 3 Charlie Chicago 103 Tablet 300
n_rows <- nrow(result)
print(paste("Number of rows in the result:", n_rows))
## [1] "Number of rows in the result: 4"
5
A left join includes all rows from the left table (customers), even if there is no matching row in the right table (orders). If there’s no match, NA values will be added for the right table’s columns. In the inner join, only matching rows are kept. # Perform left join result <- left_join(customers, orders, by = “customer_id”)
print(result)
## # A tibble: 4 × 6
## customer_id name city order_id product amount
## <dbl> <chr> <chr> <dbl> <chr> <dbl>
## 1 1 Alice New York 101 Laptop 1200
## 2 2 Bob Los Angeles 102 Phone 800
## 3 2 Bob Los Angeles 104 Desktop 1500
## 4 3 Charlie Chicago 103 Tablet 300
n_rows <- nrow(result)
print(paste("Number of rows in the result:", n_rows))
## [1] "Number of rows in the result: 4"
5
A right join includes all rows from the right table (orders) and only the matching rows from the left table (customers). Since customer_id = 6 has no matching entry in the customers table, the name and city fields will be NULL.
result <- right_join(customers, orders, by = "customer_id")
print(result)
## # A tibble: 6 × 6
## customer_id name city order_id product amount
## <dbl> <chr> <chr> <dbl> <chr> <dbl>
## 1 1 Alice New York 101 Laptop 1200
## 2 2 Bob Los Angeles 102 Phone 800
## 3 2 Bob Los Angeles 104 Desktop 1500
## 4 3 Charlie Chicago 103 Tablet 300
## 5 6 <NA> <NA> 105 Camera 600
## 6 7 <NA> <NA> 106 Printer 150
n_rows <- nrow(result) print(paste(“Number of rows in the result:”, n_rows))
6
Rows with information from only one table include customer_id = 3, customer_id = 4, and customer_id = 6. For customer_id = 3 and customer_id = 4, there are no matching orders, so order_id and order_value are NA. For customer_id = 6, there is no matching customer, so name and city are NA. These results occur because a full join keeps all rows from both tables, filling in NA where no match exists.
result <- full_join(customers, orders, by = "customer_id")
print(result)
## # A tibble: 8 × 6
## customer_id name city order_id product amount
## <dbl> <chr> <chr> <dbl> <chr> <dbl>
## 1 1 Alice New York 101 Laptop 1200
## 2 2 Bob Los Angeles 102 Phone 800
## 3 2 Bob Los Angeles 104 Desktop 1500
## 4 3 Charlie Chicago 103 Tablet 300
## 5 4 David Houston NA <NA> NA
## 6 5 Eve Phoenix NA <NA> NA
## 7 6 <NA> <NA> 105 Camera 600
## 8 7 <NA> <NA> 106 Printer 150
n_rows <- nrow(result) print(paste(“Number of rows in the result:”, n_rows))
3
A semi join returns only rows from the customers table that have matching customer_ids in the orders table, but it does not include columns from orders. This differs from an inner join, which includes columns from both tables.
result <- semi_join(customers, orders, by = "customer_id")
print(result)
## # A tibble: 3 × 3
## customer_id name city
## <dbl> <chr> <chr>
## 1 1 Alice New York
## 2 2 Bob Los Angeles
## 3 3 Charlie Chicago
The result includes customers with customer_id = 3 and customer_id = 4
These customers appear in the customers table but do not have any matching records in the orders table. This means they have not placed any orders.
result <- anti_join(customers, orders, by = "customer_id")
print(result)
## # A tibble: 2 × 3
## customer_id name city
## <dbl> <chr> <chr>
## 1 4 David Houston
## 2 5 Eve Phoenix
A left join keeps all customers from the customers table, adding order details where available. If a customer has not placed an order, the order columns will be NA.
To find only customers who have placed orders, use a semi join. A semi join returns only customers who have matching orders but does not include order details, ensuring that only customers who have placed orders appear in the result.
all_customers <- left_join(customers, orders, by = "customer_id")
print("All customers, including those without orders:")
## [1] "All customers, including those without orders:"
print(all_customers)
## # A tibble: 6 × 6
## customer_id name city order_id product amount
## <dbl> <chr> <chr> <dbl> <chr> <dbl>
## 1 1 Alice New York 101 Laptop 1200
## 2 2 Bob Los Angeles 102 Phone 800
## 3 2 Bob Los Angeles 104 Desktop 1500
## 4 3 Charlie Chicago 103 Tablet 300
## 5 4 David Houston NA <NA> NA
## 6 5 Eve Phoenix NA <NA> NA
customers_with_orders <- semi_join(customers, orders, by = "customer_id")
print("Customers who have placed orders:")
## [1] "Customers who have placed orders:"
print(customers_with_orders)
## # A tibble: 3 × 3
## customer_id name city
## <dbl> <chr> <chr>
## 1 1 Alice New York
## 2 2 Bob Los Angeles
## 3 3 Charlie Chicago
customer_summary <- customers %>%
left_join(orders, by = "customer_id") %>%
group_by(customer_id, name, city) %>%
summarize(
total_orders = n(),
total_spent = sum(amount, na.rm = TRUE),
.groups = "drop")
print(customer_summary)
## # A tibble: 5 × 5
## customer_id name city total_orders total_spent
## <dbl> <chr> <chr> <int> <dbl>
## 1 1 Alice New York 1 1200
## 2 2 Bob Los Angeles 2 2300
## 3 3 Charlie Chicago 1 300
## 4 4 David Houston 1 0
## 5 5 Eve Phoenix 1 0