age_gaps <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-02-14/age_gaps.csv')
## Rows: 1155 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): movie_name, director, actor_1_name, actor_2_name, character_1_gend...
## dbl (5): release_year, age_difference, couple_number, actor_1_age, actor_2_age
## date (2): actor_1_birthdate, actor_2_birthdate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
age_gaps %>% count(movie_name, release_year, director, age_difference) %>% filter(n>1)
## # A tibble: 22 × 5
## movie_name release_year director age_difference n
## <chr> <dbl> <chr> <dbl> <int>
## 1 A Walk in the Clouds 1995 Alfonso Arau 4 2
## 2 American Pie 1999 Paul Weitz, Chris We… 5 2
## 3 Aquaman 2018 James Wan 7 2
## 4 Boogie Nights 1997 Paul Thomas Anderson 4 2
## 5 Bridget Jones's Diary 2001 Sharon Maguire 9 2
## 6 Brokeback Mountain 2005 Ang Lee 1 2
## 7 Clueless 1995 Amy Heckerling 7 2
## 8 Crazy, Stupid, Love 2011 Glenn Ficarra, John … 2 2
## 9 Dreamgirls 2006 Bill Condon 14 2
## 10 Fight Club 1999 David Fincher 3 2
## # ℹ 12 more rows
Divide it using dplyr::select in a way the two have a common variable, which you could use to join the two.
age_gaps_1half <- age_gaps %>% select(movie_name:age_difference) %>% head(50)
age_gaps_2half <- age_gaps %>% select(age_difference: actor_2_age) %>% head(50)
Use tidyr::left_join or other joining functions.
left_join(age_gaps_1half, age_gaps_2half)
## Joining with `by = join_by(age_difference)`
## Warning in left_join(age_gaps_1half, age_gaps_2half): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 9 of `x` matches multiple rows in `y`.
## ℹ Row 9 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## # A tibble: 238 × 13
## movie_name release_year director age_difference couple_number actor_1_name
## <chr> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Harold and M… 1971 Hal Ash… 52 1 Ruth Gordon
## 2 Venus 2006 Roger M… 50 1 Peter O'Too…
## 3 The Quiet Am… 2002 Phillip… 49 1 Michael Cai…
## 4 The Big Lebo… 1998 Joel Co… 45 1 David Huddl…
## 5 Beginners 2010 Mike Mi… 43 1 Christopher…
## 6 Poison Ivy 1992 Katt Sh… 42 1 Tom Skerritt
## 7 Whatever Wor… 2009 Woody A… 40 1 Larry David
## 8 Entrapment 1999 Jon Ami… 39 1 Sean Connery
## 9 Husbands and… 1992 Woody A… 38 1 Woody Allen
## 10 Husbands and… 1992 Woody A… 38 1 Jason Robar…
## # ℹ 228 more rows
## # ℹ 7 more variables: actor_2_name <chr>, character_1_gender <chr>,
## # character_2_gender <chr>, actor_1_birthdate <date>,
## # actor_2_birthdate <date>, actor_1_age <dbl>, actor_2_age <dbl>
relationship = "many-to-many"