library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Dataset 1: Customers

customers <- tibble(
  customer_id = c(1, 2, 3, 4, 5),
  name = c("Alice", "Bob", "Charlie", "David", "Eve"),
  city = c("New York", "Los Angeles", "Chicago", "Houston", "Phoenix")
)

Dataset 2: Orders

orders <- tibble(
  order_id = c(101, 102, 103, 104, 105, 106),
  customer_id = c(1, 2, 3, 2, 6, 7),
  product = c("Laptop", "Phone", "Tablet", "Desktop", "Camera", "Printer"),
  amount = c(1200, 800, 300, 1500, 600, 150)
)

q1

q1 <- inner_join(customers, orders, by = "customer_id")

a. How many rows are in the result?

####There are 4 rows of data in the result ### b. Why are some customers or orders not included in the result? ####They are not included because inner only joins table with a match ### c.Display the result

head(q1)
## # A tibble: 4 × 6
##   customer_id name    city        order_id product amount
##         <dbl> <chr>   <chr>          <dbl> <chr>    <dbl>
## 1           1 Alice   New York         101 Laptop    1200
## 2           2 Bob     Los Angeles      102 Phone      800
## 3           2 Bob     Los Angeles      104 Desktop   1500
## 4           3 Charlie Chicago          103 Tablet     300

q2

q2 <- left_join(customers , orders , by = "customer_id")

a. How many rows are in the result?

####There are 6 rows in the result ### b. Explain why this number differs from the inner join result. ####Left join returns all rows from the left table and only the matching from the right ### c. Display the result

head(q2)
## # A tibble: 6 × 6
##   customer_id name    city        order_id product amount
##         <dbl> <chr>   <chr>          <dbl> <chr>    <dbl>
## 1           1 Alice   New York         101 Laptop    1200
## 2           2 Bob     Los Angeles      102 Phone      800
## 3           2 Bob     Los Angeles      104 Desktop   1500
## 4           3 Charlie Chicago          103 Tablet     300
## 5           4 David   Houston           NA <NA>        NA
## 6           5 Eve     Phoenix           NA <NA>        NA

q3

q3 <- right_join(customers , orders , by = "customer_id")

a. How many rows are in the result?

####There are 6 rows in the result ### b. Which customer_ids in the result have NULL for customer name and city? Explain why. ####Similar to left join, right join pulls in everything from the right and only matches from the left. The NULL values were pulled but did not have any matches ### c. Display the result

head(q3)
## # A tibble: 6 × 6
##   customer_id name    city        order_id product amount
##         <dbl> <chr>   <chr>          <dbl> <chr>    <dbl>
## 1           1 Alice   New York         101 Laptop    1200
## 2           2 Bob     Los Angeles      102 Phone      800
## 3           2 Bob     Los Angeles      104 Desktop   1500
## 4           3 Charlie Chicago          103 Tablet     300
## 5           6 <NA>    <NA>             105 Camera     600
## 6           7 <NA>    <NA>             106 Printer    150

#q4

q4 <- full_join(customers , orders , by = "customer_id")

a. How many rows are in the result?

####There are 8 rows in the result ### b. Identify any rows where there’s information from only one table. Explain these results. ####4 of the rows only pull from one table. The reason is because there are NULL values which means there was not a match in customer_id in either table. ### c. Display the result

head(q4)
## # A tibble: 6 × 6
##   customer_id name    city        order_id product amount
##         <dbl> <chr>   <chr>          <dbl> <chr>    <dbl>
## 1           1 Alice   New York         101 Laptop    1200
## 2           2 Bob     Los Angeles      102 Phone      800
## 3           2 Bob     Los Angeles      104 Desktop   1500
## 4           3 Charlie Chicago          103 Tablet     300
## 5           4 David   Houston           NA <NA>        NA
## 6           5 Eve     Phoenix           NA <NA>        NA

q5

q5 <- semi_join(customers , orders , by = "customer_id")

a. How many rows are in the result?

####There are 3 rows in the result ### b. How does this result differ from the inner join result? ####This only pulls data from the left table if there is a match from the right. Does not pull from matches in the right. ### c. Display the result

head(q5)
## # A tibble: 3 × 3
##   customer_id name    city       
##         <dbl> <chr>   <chr>      
## 1           1 Alice   New York   
## 2           2 Bob     Los Angeles
## 3           3 Charlie Chicago

q6

q6 <- anti_join(customers , orders , by = "customer_id")

a. How many rows are in the result?

####There are 2 rows in the result ### b. Explain what this result tells you about these customers. ####This result tells us the customers did not make an order. ### c. Display the result

head(q6)
## # A tibble: 2 × 3
##   customer_id name  city   
##         <dbl> <chr> <chr>  
## 1           4 David Houston
## 2           5 Eve   Phoenix

a. Which join would you use to find all customers, including those who haven’t placed any orders? Why?

I would use left_join because it returns all rows from the left table (‘customers’) and their matching orders, using NA for customers without orders.

b. Which join would you use to find only the customers who have placed orders? Why?

I would use inner_join because it returns only records that have a matching customer_id in both the ‘customers’ and ‘orders’ tables.

c. Write the R code for both scenarios.

q7a <- left_join(customers , orders , by = "customer_id")
q7b <- inner_join(customers , orders , by = "customer_id")

d. Display the result

print("Q7a (All customers, including those with no orders):")
## [1] "Q7a (All customers, including those with no orders):"
head(q7a)
## # A tibble: 6 × 6
##   customer_id name    city        order_id product amount
##         <dbl> <chr>   <chr>          <dbl> <chr>    <dbl>
## 1           1 Alice   New York         101 Laptop    1200
## 2           2 Bob     Los Angeles      102 Phone      800
## 3           2 Bob     Los Angeles      104 Desktop   1500
## 4           3 Charlie Chicago          103 Tablet     300
## 5           4 David   Houston           NA <NA>        NA
## 6           5 Eve     Phoenix           NA <NA>        NA
print("Q7b (Only customers who have placed orders):")
## [1] "Q7b (Only customers who have placed orders):"
head(q7b)
## # A tibble: 4 × 6
##   customer_id name    city        order_id product amount
##         <dbl> <chr>   <chr>          <dbl> <chr>    <dbl>
## 1           1 Alice   New York         101 Laptop    1200
## 2           2 Bob     Los Angeles      102 Phone      800
## 3           2 Bob     Los Angeles      104 Desktop   1500
## 4           3 Charlie Chicago          103 Tablet     300

```