Import your data
holiday_movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-12-12/holiday_movies.csv')
## Rows: 2265 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): tconst, title_type, primary_title, original_title, genres, simple_t...
## dbl (4): year, runtime_minutes, average_rating, num_votes
## lgl (4): christmas, hanukkah, kwanzaa, holiday
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Pivoting
wide to long form
Separating and Uniting
Unite two columns
holiday_movies_united <- holiday_movies %>%
unite(col = "newName", year:runtime_minutes, sep = "/", remove = TRUE)
Separate a column
holiday_movies_united %>%
separate(col = newName, into = c("year", "runtime_minutes"), sep = "/")
## # A tibble: 2,265 × 14
## tconst title_type primary_title original_title year runtime_minutes genres
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 tt00203… movie Sailor's Hol… Sailor's Holi… 1929 58 Comedy
## 2 tt00208… movie The Devil's … The Devil's H… 1930 80 Drama…
## 3 tt00209… movie Holiday Holiday 1930 91 Comed…
## 4 tt00212… movie Holiday of S… Prazdnik svya… 1930 83 Comedy
## 5 tt00213… movie Sin Takes a … Sin Takes a H… 1930 81 Comed…
## 6 tt00213… movie Sinners' Hol… Sinners' Holi… 1930 60 Adven…
## 7 tt00230… movie Husband's Ho… Husband's Hol… 1931 70 Drama
## 8 tt00248… movie Beggar's Hol… Beggar's Holi… 1934 60 Crime…
## 9 tt00250… movie Cowboy Holid… Cowboy Holiday 1934 56 Weste…
## 10 tt00250… movie Death Takes … Death Takes a… 1934 79 Drama…
## # ℹ 2,255 more rows
## # ℹ 7 more variables: simple_title <chr>, average_rating <dbl>,
## # num_votes <dbl>, christmas <lgl>, hanukkah <lgl>, kwanzaa <lgl>,
## # holiday <lgl>
Missing Values
holiday_movies %>%
pivot_wider(names_from = year, values_from = runtime_minutes) %>%
pivot_longer(
cols = c(`1949`, `2001`),
names_to = "year",
values_to = "runtime_minutes",
values_drop_na = TRUE
)
## # A tibble: 27 × 103
## tconst title_type primary_title original_title genres simple_title
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 tt0041473 movie Holiday Affair Holiday Affair Comed… holiday aff…
## 2 tt0041475 movie Holiday in Havana Holiday in Ha… Comed… holiday in …
## 3 tt0041528 movie Johnny Holiday Johnny Holiday Crime… johnny holi…
## 4 tt0217978 movie 'R Xmas 'R Xmas Crime… r xmas
## 5 tt0221074 movie Christmas in the Clo… Christmas in … Comed… christmas i…
## 6 tt0225394 tvMovie The Christmas Carol The Christmas… Drama… the christm…
## 7 tt0259929 movie Christmas Carol: The… Christmas Car… Anima… christmas c…
## 8 tt0273726 movie Christmas Nightmare Christmas Nig… Horro… christmas n…
## 9 tt0278150 tvMovie A Wind at My Back Ch… A Wind at My … Drama… a wind at m…
## 10 tt0285192 tvMovie Holiday Affair Holiday Affair Thril… holiday aff…
## # ℹ 17 more rows
## # ℹ 97 more variables: average_rating <dbl>, num_votes <dbl>, christmas <lgl>,
## # hanukkah <lgl>, kwanzaa <lgl>, holiday <lgl>, `1929` <dbl>, `1930` <dbl>,
## # `1931` <dbl>, `1934` <dbl>, `1936` <dbl>, `1937` <dbl>, `1938` <dbl>,
## # `1939` <dbl>, `1940` <dbl>, `1942` <dbl>, `1943` <dbl>, `1944` <dbl>,
## # `1945` <dbl>, `1946` <dbl>, `1947` <dbl>, `1948` <dbl>, `1950` <dbl>,
## # `1951` <dbl>, `1952` <dbl>, `1953` <dbl>, `1954` <dbl>, `1955` <dbl>, …