# Load data
birds <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2026/2026-04-14/birds.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 49019 Columns: 26
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): species_common_name, species_scientific_name, species_abbreviation...
## dbl (9): bird_observation_id, record_id, count, n_feeding, n_sitting_on_wat...
## lgl (11): sex, feeding, sitting_on_water, sitting_on_ice, sitting_on_ship, i...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Make two bar charts here - one before ordering another after
# Unordered Factor Levels
count_by_species <- birds %>%
group_by(species_abbreviation) %>%
summarise(
avg_count = mean(count, na.rm = TRUE)
) %>%
slice_max(avg_count, n = 15)
count_by_species
## # A tibble: 15 × 2
## species_abbreviation avg_count
## <chr> <dbl>
## 1 PACDESVIT 2500.
## 2 PUFTEN 1216.
## 3 PACDESSAL 1000
## 4 PROCER 264.
## 5 LARBUL 242.
## 6 PUFGAV 241.
## 7 PACSP 159.
## 8 PUFGRI 130.
## 9 PACTUR 121.
## 10 PACBELDESSAL 80.5
## 11 PUFASS 66.5
## 12 PUFTENGRI 62.1
## 13 THAMEL 51.5
## 14 PUFBUL 49.7
## 15 PUFSP 45.9
# Plot
count_by_species %>%
ggplot(aes(x = avg_count, y = reorder(species_abbreviation, avg_count))) +
geom_point() +
labs(
x = "Average Count",
y = "Species",
title = "Top 15 Bird Species by Average Observation Count")
# Ordered Factor Levels
count_by_species %>%
ggplot(aes(x = avg_count, y = fct_reorder(.f = species_abbreviation, .x = avg_count))) +
geom_point() +
# Labeling
labs(y = NULL, x = "Mean Observation Count",
title = "Top 15 Bird Species by Average Observation Count")
Show examples of three functions:
birds %>% distinct(species_abbreviation)
## # A tibble: 321 × 1
## species_abbreviation
## <chr>
## 1 DIOEPOSANANTEXU
## 2 DIOIMPMEL
## 3 DAPCAP
## 4 PACTUR
## 5 PUFGRI
## 6 DIOEPOSAN
## 7 PACSP
## 8 SEABUN
## 9 MACSP
## 10 PROPAR
## # ℹ 311 more rows
# Recode
birds %>%
mutate(species_rev = fct_recode(species_abbreviation, "ROYAL_WAND" = "DIOEPOSANANTEXU")) %>%
select(species_abbreviation, species_rev) %>%
filter(species_abbreviation == "DIOEPOSANANTEXU")
## # A tibble: 3 × 2
## species_abbreviation species_rev
## <chr> <fct>
## 1 DIOEPOSANANTEXU ROYAL_WAND
## 2 DIOEPOSANANTEXU ROYAL_WAND
## 3 DIOEPOSANANTEXU ROYAL_WAND
# Collapse multiple levels into one
birds %>%
mutate(species_col = fct_collapse(species_abbreviation,
"ALBATROSS" = c("DIOEPOSANANTEXU", "DIOIMPMEL"))) %>%
select(species_abbreviation, species_col) %>%
filter(species_abbreviation %in% c("DIOEPOSANANTEXU", "DIOIMPMEL"))
## # A tibble: 580 × 2
## species_abbreviation species_col
## <chr> <fct>
## 1 DIOEPOSANANTEXU ALBATROSS
## 2 DIOIMPMEL ALBATROSS
## 3 DIOIMPMEL ALBATROSS
## 4 DIOIMPMEL ALBATROSS
## 5 DIOIMPMEL ALBATROSS
## 6 DIOIMPMEL ALBATROSS
## 7 DIOIMPMEL ALBATROSS
## 8 DIOIMPMEL ALBATROSS
## 9 DIOIMPMEL ALBATROSS
## 10 DIOIMPMEL ALBATROSS
## # ℹ 570 more rows
# Lump small levels into other
birds %>% count(species_abbreviation)
## # A tibble: 321 × 2
## species_abbreviation n
## <chr> <int>
## 1 ALBUNI 10
## 2 ALBUNI AD 2
## 3 ANOMIN 2
## 4 ANOSP 4
## 5 ANOSTO 6
## 6 APTFOR 5
## 7 APTFOR AD 1
## 8 BULBUL 1
## 9 CALLEU 5
## 10 CATANT 126
## # ℹ 311 more rows
birds %>% mutate(species_lump = fct_lump(species_abbreviation)) %>% distinct(species_lump)
## # A tibble: 321 × 1
## species_lump
## <fct>
## 1 DIOEPOSANANTEXU
## 2 DIOIMPMEL
## 3 DAPCAP
## 4 PACTUR
## 5 PUFGRI
## 6 DIOEPOSAN
## 7 PACSP
## 8 SEABUN
## 9 MACSP
## 10 PROPAR
## # ℹ 311 more rows
No need to do anything here.