1. Import your data

Import two related datasets from TidyTuesday Project.

attendance <- read_excel("../00_data/nfl_attendance.xlsx")

standings <- read_excel("../00_data/nfl_standings.xlsx")

2. Make data small

Describe the two datasets:

Data1: Attendance

Data 2: Standings

set.seed(1234)
attendance_small <- attendance %>% select(team_name, year, total) %>% sample_n(10)
standings_small <- standings %>% select(team_name, year, wins) %>% sample_n(10)

attendance_small
## # A tibble: 10 × 3
##    team_name   year   total
##    <chr>      <dbl>   <dbl>
##  1 Steelers    2013 1025772
##  2 Chargers    2014 1069529
##  3 Browns      2013 1122750
##  4 Buccaneers  2014 1023818
##  5 Colts       2013 1054693
##  6 Titans      2016 1016507
##  7 Bears       2001 1059454
##  8 Steelers    2001 1055127
##  9 Chiefs      2005 1180710
## 10 Cardinals   2004  838557
standings_small
## # A tibble: 10 × 3
##    team_name   year  wins
##    <chr>      <dbl> <dbl>
##  1 Colts       2003    12
##  2 Buccaneers  2018     5
##  3 Bengals     2010     4
##  4 Eagles      2002    12
##  5 Chiefs      2008     2
##  6 Rams        2011     2
##  7 Panthers    2005    11
##  8 49ers       2017     6
##  9 Bills       2000     8
## 10 Titans      2017     9

3. inner_join

Describe the resulting data:

How is it different from the original two datasets?

attendance_small %>% inner_join(standings_small, by = c("team_name"))
## # A tibble: 4 × 5
##   team_name  year.x   total year.y  wins
##   <chr>       <dbl>   <dbl>  <dbl> <dbl>
## 1 Buccaneers   2014 1023818   2018     5
## 2 Colts        2013 1054693   2003    12
## 3 Titans       2016 1016507   2017     9
## 4 Chiefs       2005 1180710   2008     2

4. left_join

Describe the resulting data:

How is it different from the original two datasets?

attendance_small %>% left_join(standings_small)
## Joining with `by = join_by(team_name, year)`
## # A tibble: 10 × 4
##    team_name   year   total  wins
##    <chr>      <dbl>   <dbl> <dbl>
##  1 Steelers    2013 1025772    NA
##  2 Chargers    2014 1069529    NA
##  3 Browns      2013 1122750    NA
##  4 Buccaneers  2014 1023818    NA
##  5 Colts       2013 1054693    NA
##  6 Titans      2016 1016507    NA
##  7 Bears       2001 1059454    NA
##  8 Steelers    2001 1055127    NA
##  9 Chiefs      2005 1180710    NA
## 10 Cardinals   2004  838557    NA
standings_small %>% left_join(attendance_small)
## Joining with `by = join_by(team_name, year)`
## # A tibble: 10 × 4
##    team_name   year  wins total
##    <chr>      <dbl> <dbl> <dbl>
##  1 Colts       2003    12    NA
##  2 Buccaneers  2018     5    NA
##  3 Bengals     2010     4    NA
##  4 Eagles      2002    12    NA
##  5 Chiefs      2008     2    NA
##  6 Rams        2011     2    NA
##  7 Panthers    2005    11    NA
##  8 49ers       2017     6    NA
##  9 Bills       2000     8    NA
## 10 Titans      2017     9    NA

5. right_join

Describe the resulting data:

How is it different from the original two datasets?

attendance_small %>% right_join(standings_small)
## Joining with `by = join_by(team_name, year)`
## # A tibble: 10 × 4
##    team_name   year total  wins
##    <chr>      <dbl> <dbl> <dbl>
##  1 Colts       2003    NA    12
##  2 Buccaneers  2018    NA     5
##  3 Bengals     2010    NA     4
##  4 Eagles      2002    NA    12
##  5 Chiefs      2008    NA     2
##  6 Rams        2011    NA     2
##  7 Panthers    2005    NA    11
##  8 49ers       2017    NA     6
##  9 Bills       2000    NA     8
## 10 Titans      2017    NA     9
standings_small %>% right_join(attendance_small)
## Joining with `by = join_by(team_name, year)`
## # A tibble: 10 × 4
##    team_name   year  wins   total
##    <chr>      <dbl> <dbl>   <dbl>
##  1 Steelers    2013    NA 1025772
##  2 Chargers    2014    NA 1069529
##  3 Browns      2013    NA 1122750
##  4 Buccaneers  2014    NA 1023818
##  5 Colts       2013    NA 1054693
##  6 Titans      2016    NA 1016507
##  7 Bears       2001    NA 1059454
##  8 Steelers    2001    NA 1055127
##  9 Chiefs      2005    NA 1180710
## 10 Cardinals   2004    NA  838557

6. full_join

Describe the resulting data:

How is it different from the original two datasets?

attendance_small %>% full_join(standings_small)
## Joining with `by = join_by(team_name, year)`
## # A tibble: 20 × 4
##    team_name   year   total  wins
##    <chr>      <dbl>   <dbl> <dbl>
##  1 Steelers    2013 1025772    NA
##  2 Chargers    2014 1069529    NA
##  3 Browns      2013 1122750    NA
##  4 Buccaneers  2014 1023818    NA
##  5 Colts       2013 1054693    NA
##  6 Titans      2016 1016507    NA
##  7 Bears       2001 1059454    NA
##  8 Steelers    2001 1055127    NA
##  9 Chiefs      2005 1180710    NA
## 10 Cardinals   2004  838557    NA
## 11 Colts       2003      NA    12
## 12 Buccaneers  2018      NA     5
## 13 Bengals     2010      NA     4
## 14 Eagles      2002      NA    12
## 15 Chiefs      2008      NA     2
## 16 Rams        2011      NA     2
## 17 Panthers    2005      NA    11
## 18 49ers       2017      NA     6
## 19 Bills       2000      NA     8
## 20 Titans      2017      NA     9

7. semi_join

Describe the resulting data:

How is it different from the original two datasets?

attendance_small %>% semi_join(standings_small, by = c("team_name"))
## # A tibble: 4 × 3
##   team_name   year   total
##   <chr>      <dbl>   <dbl>
## 1 Buccaneers  2014 1023818
## 2 Colts       2013 1054693
## 3 Titans      2016 1016507
## 4 Chiefs      2005 1180710
attendance_small %>% semi_join(standings_small, by = c("year"))
## # A tibble: 1 × 3
##   team_name  year   total
##   <chr>     <dbl>   <dbl>
## 1 Chiefs     2005 1180710

8. anti_join

Describe the resulting data:

How is it different from the original two datasets?

attendance_small %>% anti_join(standings_small)
## Joining with `by = join_by(team_name, year)`
## # A tibble: 10 × 3
##    team_name   year   total
##    <chr>      <dbl>   <dbl>
##  1 Steelers    2013 1025772
##  2 Chargers    2014 1069529
##  3 Browns      2013 1122750
##  4 Buccaneers  2014 1023818
##  5 Colts       2013 1054693
##  6 Titans      2016 1016507
##  7 Bears       2001 1059454
##  8 Steelers    2001 1055127
##  9 Chiefs      2005 1180710
## 10 Cardinals   2004  838557
standings_small %>% anti_join(attendance_small)
## Joining with `by = join_by(team_name, year)`
## # A tibble: 10 × 3
##    team_name   year  wins
##    <chr>      <dbl> <dbl>
##  1 Colts       2003    12
##  2 Buccaneers  2018     5
##  3 Bengals     2010     4
##  4 Eagles      2002    12
##  5 Chiefs      2008     2
##  6 Rams        2011     2
##  7 Panthers    2005    11
##  8 49ers       2017     6
##  9 Bills       2000     8
## 10 Titans      2017     9