Module 11: Apply it to your data 10

Import your data

# Load data
birds <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2026/2026-04-14/birds.csv")

## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)

## Rows: 49019 Columns: 26
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): species_common_name, species_scientific_name, species_abbreviation...
## dbl  (9): bird_observation_id, record_id, count, n_feeding, n_sitting_on_wat...
## lgl (11): sex, feeding, sitting_on_water, sitting_on_ice, sitting_on_ship, i...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Chapter 15

Create a factor

Modify factor order

Make two bar charts here - one before ordering another after

# Unordered Factor Levels 
count_by_species <- birds %>%
    group_by(species_abbreviation) %>%
    summarise(
        avg_count = mean(count, na.rm = TRUE)
    ) %>%
    slice_max(avg_count, n = 15)

count_by_species

## # A tibble: 15 × 2
##    species_abbreviation avg_count
##    <chr>                    <dbl>
##  1 PACDESVIT               2500. 
##  2 PUFTEN                  1216. 
##  3 PACDESSAL               1000  
##  4 PROCER                   264. 
##  5 LARBUL                   242. 
##  6 PUFGAV                   241. 
##  7 PACSP                    159. 
##  8 PUFGRI                   130. 
##  9 PACTUR                   121. 
## 10 PACBELDESSAL              80.5
## 11 PUFASS                    66.5
## 12 PUFTENGRI                 62.1
## 13 THAMEL                    51.5
## 14 PUFBUL                    49.7
## 15 PUFSP                     45.9

# Plot
count_by_species %>%
    ggplot(aes(x = avg_count, y = reorder(species_abbreviation, avg_count))) +
    geom_point() +
    labs(
        x = "Average Count",
        y = "Species",
        title = "Top 15 Bird Species by Average Observation Count")

# Ordered Factor Levels 

count_by_species %>%
    ggplot(aes(x = avg_count, y = fct_reorder(.f = species_abbreviation, .x = avg_count))) +
    geom_point() +
   
     # Labeling
    labs(y = NULL, x = "Mean Observation Count",
         title = "Top 15 Bird Species by Average Observation Count")

Modify factor levels

Show examples of three functions:

fct_recode
fct_collapse
fct_lump

birds %>% distinct(species_abbreviation)

## # A tibble: 321 × 1
##    species_abbreviation
##    <chr>               
##  1 DIOEPOSANANTEXU     
##  2 DIOIMPMEL           
##  3 DAPCAP              
##  4 PACTUR              
##  5 PUFGRI              
##  6 DIOEPOSAN           
##  7 PACSP               
##  8 SEABUN              
##  9 MACSP               
## 10 PROPAR              
## # ℹ 311 more rows

# Recode 
birds %>%
    mutate(species_rev = fct_recode(species_abbreviation, "ROYAL_WAND" = "DIOEPOSANANTEXU")) %>%
    select(species_abbreviation, species_rev) %>%
    filter(species_abbreviation == "DIOEPOSANANTEXU")

## # A tibble: 3 × 2
##   species_abbreviation species_rev
##   <chr>                <fct>      
## 1 DIOEPOSANANTEXU      ROYAL_WAND 
## 2 DIOEPOSANANTEXU      ROYAL_WAND 
## 3 DIOEPOSANANTEXU      ROYAL_WAND

# Collapse multiple levels into one
birds %>%
    mutate(species_col = fct_collapse(species_abbreviation,
        "ALBATROSS" = c("DIOEPOSANANTEXU", "DIOIMPMEL"))) %>%
    select(species_abbreviation, species_col) %>%
    filter(species_abbreviation %in% c("DIOEPOSANANTEXU", "DIOIMPMEL"))

## # A tibble: 580 × 2
##    species_abbreviation species_col
##    <chr>                <fct>      
##  1 DIOEPOSANANTEXU      ALBATROSS  
##  2 DIOIMPMEL            ALBATROSS  
##  3 DIOIMPMEL            ALBATROSS  
##  4 DIOIMPMEL            ALBATROSS  
##  5 DIOIMPMEL            ALBATROSS  
##  6 DIOIMPMEL            ALBATROSS  
##  7 DIOIMPMEL            ALBATROSS  
##  8 DIOIMPMEL            ALBATROSS  
##  9 DIOIMPMEL            ALBATROSS  
## 10 DIOIMPMEL            ALBATROSS  
## # ℹ 570 more rows

# Lump small levels into other
birds %>% count(species_abbreviation)

## # A tibble: 321 × 2
##    species_abbreviation     n
##    <chr>                <int>
##  1 ALBUNI                  10
##  2 ALBUNI AD                2
##  3 ANOMIN                   2
##  4 ANOSP                    4
##  5 ANOSTO                   6
##  6 APTFOR                   5
##  7 APTFOR AD                1
##  8 BULBUL                   1
##  9 CALLEU                   5
## 10 CATANT                 126
## # ℹ 311 more rows

birds %>% mutate(species_lump = fct_lump(species_abbreviation)) %>% distinct(species_lump)

## # A tibble: 321 × 1
##    species_lump   
##    <fct>          
##  1 DIOEPOSANANTEXU
##  2 DIOIMPMEL      
##  3 DAPCAP         
##  4 PACTUR         
##  5 PUFGRI         
##  6 DIOEPOSAN      
##  7 PACSP          
##  8 SEABUN         
##  9 MACSP          
## 10 PROPAR         
## # ℹ 311 more rows

Module 11: Apply it to your data 10

Kyle Jasper

Import your data

Chapter 15

Create a factor

Modify factor order

Modify factor levels

Chapter 16