Summer_Movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/master/data/2024/2024-07-30/summer_movies.csv')
## Rows: 905 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): tconst, title_type, primary_title, original_title, genres, simple_t...
## dbl (4): year, runtime_minutes, average_rating, num_votes
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_small <- Summer_Movies %>%
select(genres, year, average_rating) %>%
filter(genres %in% c("Drama", "Romance"))
data_small %>%
pivot_longer(col = c(`genres`),
names_to = "years",
values_to = "ratings")
## # A tibble: 212 × 4
## year average_rating years ratings
## <dbl> <dbl> <chr> <chr>
## 1 1920 7.4 genres Drama
## 2 1955 5 genres Drama
## 3 1957 7.2 genres Drama
## 4 1956 6.5 genres Drama
## 5 1958 7.4 genres Drama
## 6 1958 7.3 genres Drama
## 7 1961 7.7 genres Drama
## 8 1961 6.9 genres Drama
## 9 1962 5.6 genres Drama
## 10 1963 7.8 genres Drama
## # ℹ 202 more rows
data_small %>%
pivot_wider( names_from = "year",
values_from = "average_rating")
## Warning: Values from `average_rating` are not uniquely identified; output will contain
## list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
## {data} |>
## dplyr::summarise(n = dplyr::n(), .by = c(genres, year)) |>
## dplyr::filter(n > 1L)
## # A tibble: 2 × 66
## genres `1920` `1955` `1957` `1956` `1958` `1961` `1962` `1963` `1968` `1969`
## <chr> <list> <list> <list> <list> <list> <list> <list> <list> <list> <list>
## 1 Drama <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 2 Romance <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <NULL> <dbl> <dbl>
## # ℹ 55 more variables: `1971` <list>, `1972` <list>, `1973` <list>,
## # `1974` <list>, `1976` <list>, `1977` <list>, `1979` <list>, `1978` <list>,
## # `1981` <list>, `1980` <list>, `1982` <list>, `1984` <list>, `1983` <list>,
## # `1986` <list>, `1987` <list>, `1988` <list>, `1991` <list>, `1992` <list>,
## # `1994` <list>, `1985` <list>, `1997` <list>, `1989` <list>, `1975` <list>,
## # `1970` <list>, `1999` <list>, `1998` <list>, `2001` <list>, `2000` <list>,
## # `1967` <list>, `1990` <list>, `2002` <list>, `2004` <list>, …
data_small
## # A tibble: 212 × 3
## genres year average_rating
## <chr> <dbl> <dbl>
## 1 Drama 1920 7.4
## 2 Drama 1955 5
## 3 Drama 1957 7.2
## 4 Drama 1956 6.5
## 5 Drama 1958 7.4
## 6 Drama 1958 7.3
## 7 Drama 1961 7.7
## 8 Drama 1961 6.9
## 9 Drama 1962 5.6
## 10 Drama 1963 7.8
## # ℹ 202 more rows
data_table <- data_small %>%
separate(col = genres, into = c("Drama", "Romance"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 212 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
data_table
## # A tibble: 212 × 4
## Drama Romance year average_rating
## <chr> <chr> <dbl> <dbl>
## 1 Drama <NA> 1920 7.4
## 2 Drama <NA> 1955 5
## 3 Drama <NA> 1957 7.2
## 4 Drama <NA> 1956 6.5
## 5 Drama <NA> 1958 7.4
## 6 Drama <NA> 1958 7.3
## 7 Drama <NA> 1961 7.7
## 8 Drama <NA> 1961 6.9
## 9 Drama <NA> 1962 5.6
## 10 Drama <NA> 1963 7.8
## # ℹ 202 more rows
data_table %>%
unite(col = "Year/rating", c(year,average_rating), sep = "/", )
## # A tibble: 212 × 3
## Drama Romance `Year/rating`
## <chr> <chr> <chr>
## 1 Drama <NA> 1920/7.4
## 2 Drama <NA> 1955/5
## 3 Drama <NA> 1957/7.2
## 4 Drama <NA> 1956/6.5
## 5 Drama <NA> 1958/7.4
## 6 Drama <NA> 1958/7.3
## 7 Drama <NA> 1961/7.7
## 8 Drama <NA> 1961/6.9
## 9 Drama <NA> 1962/5.6
## 10 Drama <NA> 1963/7.8
## # ℹ 202 more rows