1. Import your data

Import two related datasets from TidyTuesday Project.

penguins <-  readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins.csv')
## Rows: 344 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): species, island, sex
## dbl (5): bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g, year
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
penguins_raw <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins_raw.csv') %>% janitor::clean_names() %>% 
    
    # Revise sex to lower case
    mutate(sex = sex %>% stringr::str_to_lower()) %>% 
    
    # Extract the first word
    mutate(species = species %>% word(start=1)) 
## Rows: 344 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (9): studyName, Species, Region, Island, Stage, Individual ID, Clutch C...
## dbl  (7): Sample Number, Culmen Length (mm), Culmen Depth (mm), Flipper Leng...
## date (1): Date Egg
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
penguins.raw_small

colnames(penguins.raw_small) <- c("sex", "species", "island")
penguins.raw_small
row.names(penguins.raw_small)[row.names(penguins.raw_small) == "Adelie", "Gentoo", "female", "male"]

2. Make data small

Describe the two datasets:

Data1: Penguins

Data 2: Penguins_raw

set.seed(1234)
penguins_small  <- penguins %>% select(sex, species, year) %>% sample_n(10)
penguins.raw_small  <- penguins_raw %>% select(sex, species, island ) %>% sample_n(10)

penguins_small
## # A tibble: 10 × 3
##    sex    species    year
##    <chr>  <chr>     <dbl>
##  1 male   Chinstrap  2007
##  2 female Chinstrap  2009
##  3 female Adelie     2009
##  4 female Adelie     2009
##  5 female Adelie     2009
##  6 male   Adelie     2008
##  7 female Adelie     2009
##  8 male   Gentoo     2008
##  9 female Adelie     2008
## 10 female Chinstrap  2009
penguins.raw_small
## # A tibble: 10 × 3
##    sex    species island   
##    <chr>  <chr>   <chr>    
##  1 female Adelie  Torgersen
##  2 male   Gentoo  Biscoe   
##  3 female Gentoo  Biscoe   
##  4 male   Adelie  Biscoe   
##  5 <NA>   Adelie  Torgersen
##  6 female Adelie  Dream    
##  7 male   Adelie  Dream    
##  8 male   Gentoo  Biscoe   
##  9 female Gentoo  Biscoe   
## 10 female Adelie  Dream

3. inner_join

Describe the resulting data:

How is it different from the original two datasets?

penguins_small %>% inner_join(penguins.raw_small, by = c("species"))
## Warning in inner_join(., penguins.raw_small, by = c("species")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 3 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
## # A tibble: 40 × 5
##    sex.x  species  year sex.y  island   
##    <chr>  <chr>   <dbl> <chr>  <chr>    
##  1 female Adelie   2009 female Torgersen
##  2 female Adelie   2009 male   Biscoe   
##  3 female Adelie   2009 <NA>   Torgersen
##  4 female Adelie   2009 female Dream    
##  5 female Adelie   2009 male   Dream    
##  6 female Adelie   2009 female Dream    
##  7 female Adelie   2009 female Torgersen
##  8 female Adelie   2009 male   Biscoe   
##  9 female Adelie   2009 <NA>   Torgersen
## 10 female Adelie   2009 female Dream    
## # ℹ 30 more rows

4. left_join

Describe the resulting data:

How is it different from the original two datasets? * This dataset has the different species from both original datasets

left_joined_data <- left_join(penguins_small, penguins.raw_small, by = "species")
## Warning in left_join(penguins_small, penguins.raw_small, by = "species"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 3 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
left_joined_data
## # A tibble: 43 × 5
##    sex.x  species    year sex.y  island   
##    <chr>  <chr>     <dbl> <chr>  <chr>    
##  1 male   Chinstrap  2007 <NA>   <NA>     
##  2 female Chinstrap  2009 <NA>   <NA>     
##  3 female Adelie     2009 female Torgersen
##  4 female Adelie     2009 male   Biscoe   
##  5 female Adelie     2009 <NA>   Torgersen
##  6 female Adelie     2009 female Dream    
##  7 female Adelie     2009 male   Dream    
##  8 female Adelie     2009 female Dream    
##  9 female Adelie     2009 female Torgersen
## 10 female Adelie     2009 male   Biscoe   
## # ℹ 33 more rows

5. right_join

Describe the resulting data:

How is it different from the original two datasets? This keeps all rows from penguins.raw_small. If a species is in penguins_small it will be included but NA will appear in the missing value.

right_joined_data <- right_join(penguins_small, penguins.raw_small, by = "species")
## Warning in right_join(penguins_small, penguins.raw_small, by = "species"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 3 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
right_joined_data
## # A tibble: 40 × 5
##    sex.x  species  year sex.y  island   
##    <chr>  <chr>   <dbl> <chr>  <chr>    
##  1 female Adelie   2009 female Torgersen
##  2 female Adelie   2009 male   Biscoe   
##  3 female Adelie   2009 <NA>   Torgersen
##  4 female Adelie   2009 female Dream    
##  5 female Adelie   2009 male   Dream    
##  6 female Adelie   2009 female Dream    
##  7 female Adelie   2009 female Torgersen
##  8 female Adelie   2009 male   Biscoe   
##  9 female Adelie   2009 <NA>   Torgersen
## 10 female Adelie   2009 female Dream    
## # ℹ 30 more rows

6. full_join

Describe the resulting data:

How is it different from the original two datasets? Full join combines all rows from both datasets. If a species is in only one dataset, it appears with NA in sex or island columns.

full_joined_data <- full_join(penguins_small, penguins.raw_small, by = "species")
## Warning in full_join(penguins_small, penguins.raw_small, by = "species"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 3 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
full_joined_data
## # A tibble: 43 × 5
##    sex.x  species    year sex.y  island   
##    <chr>  <chr>     <dbl> <chr>  <chr>    
##  1 male   Chinstrap  2007 <NA>   <NA>     
##  2 female Chinstrap  2009 <NA>   <NA>     
##  3 female Adelie     2009 female Torgersen
##  4 female Adelie     2009 male   Biscoe   
##  5 female Adelie     2009 <NA>   Torgersen
##  6 female Adelie     2009 female Dream    
##  7 female Adelie     2009 male   Dream    
##  8 female Adelie     2009 female Dream    
##  9 female Adelie     2009 female Torgersen
## 10 female Adelie     2009 male   Biscoe   
## # ℹ 33 more rows

7. semi_join

Describe the resulting data:

How is it different from the original two datasets? Semi join returns only the rows from penguins_small that have matches in penguins.raw_small.

semi_joined_data <- semi_join(penguins_small, penguins.raw_small, by = "species")

semi_joined_data
## # A tibble: 7 × 3
##   sex    species  year
##   <chr>  <chr>   <dbl>
## 1 female Adelie   2009
## 2 female Adelie   2009
## 3 female Adelie   2009
## 4 male   Adelie   2008
## 5 female Adelie   2009
## 6 male   Gentoo   2008
## 7 female Adelie   2008

8. anti_join

Describe the resulting data:

How is it different from the original two datasets? Anti join returns rows from penguins_small that have no match in penguins.raw_small.

anti_joined_data <- anti_join(penguins_small, penguins.raw_small, by = "species")

anti_joined_data
## # A tibble: 3 × 3
##   sex    species    year
##   <chr>  <chr>     <dbl>
## 1 male   Chinstrap  2007
## 2 female Chinstrap  2009
## 3 female Chinstrap  2009