1. Import your data

Import two related datasets from TidyTuesday Project.

nhl_rosters <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-01-09/nhl_rosters.csv')
## Rows: 54883 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (10): team_code, position_type, headshot, first_name, last_name, positi...
## dbl   (7): season, player_id, sweater_number, height_in_inches, weight_in_po...
## date  (1): birth_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nhl_teams <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2024/2024-01-09/nhl_teams.csv')
## Rows: 59 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): team_code, full_name
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

2. Make data small

Describe the two datasets:

Data1 nhl_roster

Data 2

set.seed(1234)
nhl_rosters_small <- nhl_rosters %>% select(team_code, season, position_type) %>% sample_n(10)
nhl_teams_small   <- nhl_teams %>% select(team_code, full_name) %>% sample_n(10)

nhl_rosters_small
## # A tibble: 10 × 3
##    team_code   season position_type
##    <chr>        <dbl> <chr>        
##  1 DAL       20122013 forwards     
##  2 DAL       20142015 forwards     
##  3 CHI       19351936 defensemen   
##  4 PIT       19981999 forwards     
##  5 VAN       19821983 goalies      
##  6 COL       20132014 forwards     
##  7 OTT       20232024 forwards     
##  8 PHI       19751976 forwards     
##  9 PIT       19971998 goalies      
## 10 TOR       19561957 forwards
nhl_teams_small
## # A tibble: 10 × 2
##    team_code full_name            
##    <chr>     <chr>                
##  1 EDM       Edmonton Oilers      
##  2 QUE       Quebec Nordiques     
##  3 STL       St. Louis Blues      
##  4 DFL       Detroit Falcons      
##  5 MTL       Montréal Canadiens   
##  6 CLE       Cleveland Barons     
##  7 MNS       Minnesota North Stars
##  8 BUF       Buffalo Sabres       
##  9 SJS       San Jose Sharks      
## 10 BOS       Boston Bruins

3. inner_join

Describe the resulting data:

How is it different from the original two datasets? 1 row compared to 10 rows in orginal dataset

nhl_rosters_small %>% inner_join(nhl_teams_small, by = c("team_code"))
## # A tibble: 0 × 4
## # ℹ 4 variables: team_code <chr>, season <dbl>, position_type <chr>,
## #   full_name <chr>

4. left_join

Describe the resulting data:

How is it different from the original two datasets?

nhl_rosters_small %>% 
  left_join(nhl_teams_small, by = "team_code")
## # A tibble: 10 × 4
##    team_code   season position_type full_name
##    <chr>        <dbl> <chr>         <chr>    
##  1 DAL       20122013 forwards      <NA>     
##  2 DAL       20142015 forwards      <NA>     
##  3 CHI       19351936 defensemen    <NA>     
##  4 PIT       19981999 forwards      <NA>     
##  5 VAN       19821983 goalies       <NA>     
##  6 COL       20132014 forwards      <NA>     
##  7 OTT       20232024 forwards      <NA>     
##  8 PHI       19751976 forwards      <NA>     
##  9 PIT       19971998 goalies       <NA>     
## 10 TOR       19561957 forwards      <NA>

5. right_join

Describe the resulting data:

How is it different from the original two datasets?

nhl_rosters_small %>% 
  right_join(nhl_teams_small, by = "team_code")
## # A tibble: 10 × 4
##    team_code season position_type full_name            
##    <chr>      <dbl> <chr>         <chr>                
##  1 EDM           NA <NA>          Edmonton Oilers      
##  2 QUE           NA <NA>          Quebec Nordiques     
##  3 STL           NA <NA>          St. Louis Blues      
##  4 DFL           NA <NA>          Detroit Falcons      
##  5 MTL           NA <NA>          Montréal Canadiens   
##  6 CLE           NA <NA>          Cleveland Barons     
##  7 MNS           NA <NA>          Minnesota North Stars
##  8 BUF           NA <NA>          Buffalo Sabres       
##  9 SJS           NA <NA>          San Jose Sharks      
## 10 BOS           NA <NA>          Boston Bruins

6. full_join

Describe the resulting data:

How is it different from the original two datasets?

nhl_rosters_small %>% 
  full_join(nhl_teams_small, by = "team_code")
## # A tibble: 20 × 4
##    team_code   season position_type full_name            
##    <chr>        <dbl> <chr>         <chr>                
##  1 DAL       20122013 forwards      <NA>                 
##  2 DAL       20142015 forwards      <NA>                 
##  3 CHI       19351936 defensemen    <NA>                 
##  4 PIT       19981999 forwards      <NA>                 
##  5 VAN       19821983 goalies       <NA>                 
##  6 COL       20132014 forwards      <NA>                 
##  7 OTT       20232024 forwards      <NA>                 
##  8 PHI       19751976 forwards      <NA>                 
##  9 PIT       19971998 goalies       <NA>                 
## 10 TOR       19561957 forwards      <NA>                 
## 11 EDM             NA <NA>          Edmonton Oilers      
## 12 QUE             NA <NA>          Quebec Nordiques     
## 13 STL             NA <NA>          St. Louis Blues      
## 14 DFL             NA <NA>          Detroit Falcons      
## 15 MTL             NA <NA>          Montréal Canadiens   
## 16 CLE             NA <NA>          Cleveland Barons     
## 17 MNS             NA <NA>          Minnesota North Stars
## 18 BUF             NA <NA>          Buffalo Sabres       
## 19 SJS             NA <NA>          San Jose Sharks      
## 20 BOS             NA <NA>          Boston Bruins

7. semi_join

Describe the resulting data:

How is it different from the original two datasets?

nhl_rosters_small %>% 
  semi_join(nhl_teams_small)
## Joining with `by = join_by(team_code)`
## # A tibble: 0 × 3
## # ℹ 3 variables: team_code <chr>, season <dbl>, position_type <chr>

8. anti_join

Describe the resulting data:

How is it different from the original two datasets?

 nhl_rosters_small %>%
  anti_join(nhl_teams_small, by = "team_code") %>%
  count(team_code, sort = TRUE)
## # A tibble: 8 × 2
##   team_code     n
##   <chr>     <int>
## 1 DAL           2
## 2 PIT           2
## 3 CHI           1
## 4 COL           1
## 5 OTT           1
## 6 PHI           1
## 7 TOR           1
## 8 VAN           1