# Load package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
age_gaps <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-02-14/age_gaps.csv')
## Rows: 1155 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): movie_name, director, actor_1_name, actor_2_name, character_1_gend...
## dbl (5): release_year, age_difference, couple_number, actor_1_age, actor_2_age
## date (2): actor_1_birthdate, actor_2_birthdate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(123)
data_small <- age_gaps %>%
select(movie_name, release_year, director, age_difference) %>%
sample_n(10)
data_small
## # A tibble: 10 × 4
## movie_name release_year director age_difference
## <chr> <dbl> <chr> <dbl>
## 1 A Star Is Born 2018 Bradley Cooper 11
## 2 You Only Live Twice 1967 Lewis Gilbert 11
## 3 Firewall 2006 Richard Loncraine 19
## 4 Don't Mess with the Zohan 2008 Dennis Dugan 9
## 5 A Single Man 2009 Tom Ford 18
## 6 Tag 2018 Jeff Tomsic 3
## 7 Red Riding Hood 2011 Catherine Hardwicke 0
## 8 The Vow 2012 Michael Sucsy 2
## 9 Love Actually 2003 Richard Curtis 7
## 10 Sorry to Bother You 2018 Boots Riley 8
data_small_wide <- data_small %>%
pivot_wider(names_from = movie_name, values_from = age_difference)
data_small_wide
## # A tibble: 10 × 12
## release_year director `A Star Is Born` `You Only Live Twice` Firewall
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 2018 Bradley Cooper 11 NA NA
## 2 1967 Lewis Gilbert NA 11 NA
## 3 2006 Richard Loncrai… NA NA 19
## 4 2008 Dennis Dugan NA NA NA
## 5 2009 Tom Ford NA NA NA
## 6 2018 Jeff Tomsic NA NA NA
## 7 2011 Catherine Hardw… NA NA NA
## 8 2012 Michael Sucsy NA NA NA
## 9 2003 Richard Curtis NA NA NA
## 10 2018 Boots Riley NA NA NA
## # ℹ 7 more variables: `Don't Mess with the Zohan` <dbl>, `A Single Man` <dbl>,
## # Tag <dbl>, `Red Riding Hood` <dbl>, `The Vow` <dbl>, `Love Actually` <dbl>,
## # `Sorry to Bother You` <dbl>
data_small_wide %>%
pivot_longer(cols = `A Star Is Born`:`Sorry to Bother You`,
values_drop_na = TRUE,
names_to = "movie_name",
values_to = "age_difference") %>%
select(movie_name, everything())
## # A tibble: 10 × 4
## movie_name release_year director age_difference
## <chr> <dbl> <chr> <dbl>
## 1 A Star Is Born 2018 Bradley Cooper 11
## 2 You Only Live Twice 1967 Lewis Gilbert 11
## 3 Firewall 2006 Richard Loncraine 19
## 4 Don't Mess with the Zohan 2008 Dennis Dugan 9
## 5 A Single Man 2009 Tom Ford 18
## 6 Tag 2018 Jeff Tomsic 3
## 7 Red Riding Hood 2011 Catherine Hardwicke 0
## 8 The Vow 2012 Michael Sucsy 2
## 9 Love Actually 2003 Richard Curtis 7
## 10 Sorry to Bother You 2018 Boots Riley 8
data_small %>% separate(col = director, into = c("f_name", "l_name"), sep = " ")
## # A tibble: 10 × 5
## movie_name release_year f_name l_name age_difference
## <chr> <dbl> <chr> <chr> <dbl>
## 1 A Star Is Born 2018 Bradley Cooper 11
## 2 You Only Live Twice 1967 Lewis Gilbert 11
## 3 Firewall 2006 Richard Loncraine 19
## 4 Don't Mess with the Zohan 2008 Dennis Dugan 9
## 5 A Single Man 2009 Tom Ford 18
## 6 Tag 2018 Jeff Tomsic 3
## 7 Red Riding Hood 2011 Catherine Hardwicke 0
## 8 The Vow 2012 Michael Sucsy 2
## 9 Love Actually 2003 Richard Curtis 7
## 10 Sorry to Bother You 2018 Boots Riley 8
data_unite <- data_small %>% unite(col = "movie_year", c(movie_name, release_year), sep = "/")
data_unite
## # A tibble: 10 × 3
## movie_year director age_difference
## <chr> <chr> <dbl>
## 1 A Star Is Born/2018 Bradley Cooper 11
## 2 You Only Live Twice/1967 Lewis Gilbert 11
## 3 Firewall/2006 Richard Loncraine 19
## 4 Don't Mess with the Zohan/2008 Dennis Dugan 9
## 5 A Single Man/2009 Tom Ford 18
## 6 Tag/2018 Jeff Tomsic 3
## 7 Red Riding Hood/2011 Catherine Hardwicke 0
## 8 The Vow/2012 Michael Sucsy 2
## 9 Love Actually/2003 Richard Curtis 7
## 10 Sorry to Bother You/2018 Boots Riley 8
data_small %>% complete(director, release_year)
## # A tibble: 80 × 4
## director release_year movie_name age_difference
## <chr> <dbl> <chr> <dbl>
## 1 Boots Riley 1967 <NA> NA
## 2 Boots Riley 2003 <NA> NA
## 3 Boots Riley 2006 <NA> NA
## 4 Boots Riley 2008 <NA> NA
## 5 Boots Riley 2009 <NA> NA
## 6 Boots Riley 2011 <NA> NA
## 7 Boots Riley 2012 <NA> NA
## 8 Boots Riley 2018 Sorry to Bother You 8
## 9 Bradley Cooper 1967 <NA> NA
## 10 Bradley Cooper 2003 <NA> NA
## # ℹ 70 more rows