Import your data

Summer_Movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/refs/heads/master/data/2024/2024-07-30/summer_movies.csv')
## Rows: 905 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): tconst, title_type, primary_title, original_title, genres, simple_t...
## dbl (4): year, runtime_minutes, average_rating, num_votes
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

Summer_Movies %>% count(title_type)
## # A tibble: 3 × 2
##   title_type     n
##   <chr>      <int>
## 1 movie        711
## 2 tvMovie      136
## 3 video         58
rank_levels <- c("movie","tvMovie","video")

data_rev <- Summer_Movies %>%
    mutate(title_type = title_type %>% factor(levels = rank_levels))

Modify factor order

Make two bar charts here - one before ordering another after

data_summary <- Summer_Movies %>%
    group_by(title_type) %>%
    summarise(
        runtime_minutes = mean(runtime_minutes, na.rm = TRUE)
    )

data_summary
## # A tibble: 3 × 2
##   title_type runtime_minutes
##   <chr>                <dbl>
## 1 movie                 94.7
## 2 tvMovie               82.7
## 3 video                 61
ggplot(data_summary, aes(runtime_minutes, title_type)) + geom_point()

ggplot(data_summary, aes(runtime_minutes, fct_reorder(title_type, runtime_minutes))) + geom_point()

Modify factor levels

Show examples of three functions:

  • fct_recode
Summer_Movies %>%
    mutate(title_type = fct_recode(title_type,
                                   "Movie" = "movie",
                                   "Television Movie" = "tvMovie",
                                   "Video" = "video")) %>%
    count(title_type)
## # A tibble: 3 × 2
##   title_type           n
##   <fct>            <int>
## 1 Movie              711
## 2 Television Movie   136
## 3 Video               58
  • fct_collapse
Summer_Movies %>%
    mutate(title_type = fct_collapse(title_type,
                                     TheaterRelease = "movie",
                                     Other = c("tvMovie", "video"))) %>%
    count(title_type)
## # A tibble: 2 × 2
##   title_type         n
##   <fct>          <int>
## 1 TheaterRelease   711
## 2 Other            194
  • fct_lump
Summer_Movies %>%
    mutate(title_type = fct_lump(title_type)) %>%
    count(title_type)
## # A tibble: 2 × 2
##   title_type     n
##   <fct>      <int>
## 1 movie        711
## 2 Other        194

Chapter 16

No need to do anything here.