1. Import your data

Import two related datasets from TidyTuesday Project.

data1 <- readr::read_csv('https://storage.googleapis.com/kagglesdsdata/datasets/8895632/14065905/premier_league_table_13.csv?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20251209%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20251209T053343Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=1552228db435a2c48eb9b1c1da7e40d23078189f8c18012bd5f853f29725694f7c1453b4cea21720ee3246831af2daac9f4aa01e69ea29d0f8992ae3ab9a9814d67636e4ec8f8fcf9c5b95ab74dbb0c0db6bfc76921261eddb2245035cf41548e334d59718517e773a3b6d9b63ecbe066c6233e99b5e05adbd22539b45dfa5e2cf55b6b63dda91041ba7b624fd558a515a45bcb239c36fd26ede08a0d1c325e8c380ce74442d5725d77832192fc25c17ea6326197a1f926ff7191981ade0bc651d75e0f57dee13c5e8a29ae23dc7238cd4f58065f8f037b54035dc6448ed114ccd8f26f94247f2794172df50c581ee02c29206c16036900da4a201530dc8aca6')
## Rows: 20 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (8): Played, Won, Drawn, Lost, Goals For, Goals Against, Goal Difference...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data2 <- readr::read_csv('https://storage.googleapis.com/kagglesdsdata/datasets/8895632/14065905/premier_league_table_14.csv?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20251209%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20251209T053426Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=47ea1f80a894595fd85159f65d763bd2e8520fcad41a64095979014f838423a7b4b0d68e054f24ce70350fdbf4b8fe90b7e004250d9223209ecfbf931307a850099afe9b3b169711202a11c1fcb1330dac20193ebdbb98b3f347c19fb5dd6e86da3a58614464139cd93f52aca8bdeb2a29a79cb34fc19e11a2e453f42a2bc7469b9f7a5341a6010515d7b8b0c987dccacfb8b24f790bc995320fa2265dc034075f94aec5fafb4b3d4450359da69a11c4d67b4a6b1802d6d2e5d08f9a8175ad81db937e9787966294282e777334e1e6344f473dc6cf6a9ef463a8303a72eae00d6151014a956017755b7b5b476717f135663ff8e003b34d98d5ca5c159fc1c554')
## Rows: 20 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Team
## dbl (8): Played, Won, Drawn, Lost, Goals For, Goals Against, Goal Difference...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

2. Make data small

Describe the two data sets:

Data1: Premier League Table 2025/26 table 13

Data 2: Premier League Table 2025/26 table 14

data1_small <- data1 %>% select(Team, Won, Lost) %>% sample_n(10)

data2_small <- data2 %>% select(Team, Won, Played) %>% sample_n(10)

data2_small
## # A tibble: 10 × 3
##    Team                     Won Played
##    <chr>                  <dbl>  <dbl>
##  1 Sunderland                 6     14
##  2 Newcastle United           5     14
##  3 Manchester City            9     14
##  4 Brentford                  6     14
##  5 Tottenham Hotspur          5     14
##  6 Brighton & Hove Albion     6     14
##  7 Everton                    6     14
##  8 Arsenal                   10     14
##  9 Leeds United               4     14
## 10 Chelsea                    7     14
data1_small
## # A tibble: 10 × 3
##    Team                      Won  Lost
##    <chr>                   <dbl> <dbl>
##  1 Sunderland                  6     3
##  2 Manchester City             8     4
##  3 Chelsea                     7     3
##  4 Fulham                      5     6
##  5 Wolverhampton Wanderers     0    11
##  6 AFC Bournemouth             5     4
##  7 Burnley                     3     9
##  8 Arsenal                     9     1
##  9 Nottingham Forest           3     7
## 10 Manchester United           6     4

3. inner_join

Describe the resulting data:

How is it different from the original two data sets? 2 compared to 10 from the original data set

data1_small %>% 
    
    inner_join(data2_small, by = c("Team","Won"))
## # A tibble: 2 × 4
##   Team         Won  Lost Played
##   <chr>      <dbl> <dbl>  <dbl>
## 1 Sunderland     6     3     14
## 2 Chelsea        7     3     14

4. left_join

Describe the resulting data:

How is it different from the original two data sets? it has 5 columns and NA on the right.

 joined_data <- data1_small %>%
  left_join(data2_small, by = "Team", suffix = c("_data1", "_data2"))

joined_data
## # A tibble: 10 × 5
##    Team                    Won_data1  Lost Won_data2 Played
##    <chr>                       <dbl> <dbl>     <dbl>  <dbl>
##  1 Sunderland                      6     3         6     14
##  2 Manchester City                 8     4         9     14
##  3 Chelsea                         7     3         7     14
##  4 Fulham                          5     6        NA     NA
##  5 Wolverhampton Wanderers         0    11        NA     NA
##  6 AFC Bournemouth                 5     4        NA     NA
##  7 Burnley                         3     9        NA     NA
##  8 Arsenal                         9     1        10     14
##  9 Nottingham Forest               3     7        NA     NA
## 10 Manchester United               6     4        NA     NA

5. right_join

Describe the resulting data:

How is it different from the original two data sets? 5 columns and it has NA on the left.

right_join_alt <- merge( x = data1_small, y = data2_small, by = "Team", all.y = TRUE)

right_join_alt
##                      Team Won.x Lost Won.y Played
## 1                 Arsenal     9    1    10     14
## 2               Brentford    NA   NA     6     14
## 3  Brighton & Hove Albion    NA   NA     6     14
## 4                 Chelsea     7    3     7     14
## 5                 Everton    NA   NA     6     14
## 6            Leeds United    NA   NA     4     14
## 7         Manchester City     8    4     9     14
## 8        Newcastle United    NA   NA     5     14
## 9              Sunderland     6    3     6     14
## 10      Tottenham Hotspur    NA   NA     5     14

6. full_join

Describe the resulting data:

How is it different from the original two data sets? it has more rows and all the teams on it.

full_data <- data1_small %>%
  full_join(data2_small, by = "Team", suffix = c("_data1", "_data2"))

full_data
## # A tibble: 16 × 5
##    Team                    Won_data1  Lost Won_data2 Played
##    <chr>                       <dbl> <dbl>     <dbl>  <dbl>
##  1 Sunderland                      6     3         6     14
##  2 Manchester City                 8     4         9     14
##  3 Chelsea                         7     3         7     14
##  4 Fulham                          5     6        NA     NA
##  5 Wolverhampton Wanderers         0    11        NA     NA
##  6 AFC Bournemouth                 5     4        NA     NA
##  7 Burnley                         3     9        NA     NA
##  8 Arsenal                         9     1        10     14
##  9 Nottingham Forest               3     7        NA     NA
## 10 Manchester United               6     4        NA     NA
## 11 Newcastle United               NA    NA         5     14
## 12 Brentford                      NA    NA         6     14
## 13 Tottenham Hotspur              NA    NA         5     14
## 14 Brighton & Hove Albion         NA    NA         6     14
## 15 Everton                        NA    NA         6     14
## 16 Leeds United                   NA    NA         4     14

7. semi_join

Describe the resulting data:

How is it different from the original two data sets? It has all 4 teams that are on both data sets

semi_data <- data1_small %>%
  semi_join(data2_small, by = "Team")

semi_data
## # A tibble: 4 × 3
##   Team              Won  Lost
##   <chr>           <dbl> <dbl>
## 1 Sunderland          6     3
## 2 Manchester City     8     4
## 3 Chelsea             7     3
## 4 Arsenal             9     1

8. anti_join

Describe the resulting data:

How is it different from the original two data sets? It has the teams that are not on both of the data sets.

anti_data <- data1_small %>%
  anti_join(data2_small, by = "Team")

anti_data
## # A tibble: 6 × 3
##   Team                      Won  Lost
##   <chr>                   <dbl> <dbl>
## 1 Fulham                      5     6
## 2 Wolverhampton Wanderers     0    11
## 3 AFC Bournemouth             5     4
## 4 Burnley                     3     9
## 5 Nottingham Forest           3     7
## 6 Manchester United           6     4