if (!require(pacman)) install.packages("pacman")
pacman::p_load(
tidyverse, magrittr, viridis, stringr,
ggrepel, janitor, tidytuesdayR,
readr, ggtext, lubridate, scales,
forcats, showtext, viridisLite,
ggalluvial, gganimate, ggsankey, cowplot
)TidyTuesday
Long Beach Animal Shelter — PMAP 8551, Summer 2025
Load packages
Load dataset: Long Beach Animal Shelter
font_add_google("Barlow", "barlow")
showtext_auto()
longbeach_raw <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-03-04/longbeach.csv')
#saveRDS(longbeach_raw, file = "data/longbeach_raw.rds")
#longbeach <- readRDS("data/longbeach_raw.rds")
longbeach <-longbeach_raw %>%
clean_names()Clean the data
lb1 <- longbeach %>%
mutate(animal_name = str_remove(animal_name, "^\\*")) %>%
mutate(animal_name = str_to_title(animal_name),
intake_type = str_to_title(intake_type),
outcome_type = str_to_title(outcome_type),
animal_type = str_to_title(animal_type)) %>%
mutate(intake_date = as.Date(intake_date)) %>%
mutate(animal_type = case_when(
animal_type %in% c("amphibian","livestock") ~ "other",
TRUE ~ animal_type
)) Number of animals accepted at the shelter
intake_counts <- lb1 %>%
filter(!is.na(intake_type)) %>%
count(intake_type) %>%
mutate(intake_type = fct_reorder(intake_type, n))
# Plot
tot_intake<-ggplot(intake_counts, aes(x = intake_type, y = n)) +
geom_segment(aes(xend = intake_type, y = 0, yend = n),
color = "black", linewidth = 1.5) +
geom_point(aes(color = intake_type), size = 20) +
geom_text(aes(label = comma(n)), color = "white", size = 4.5, fontface = "bold") +
coord_flip() +
scale_color_viridis_d(option = "D", end = 0.8, guide = "none") +
labs(
title = "Total Animals accepted into animal shelter by intake type",
x = "Intake Type",
y = "Number of Intakes"
) +
theme_classic(base_family = "barlow", base_size = 16) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title = element_text(face = "bold"),
axis.text = element_text(face = "bold"),
axis.text.x = element_blank()
)
tot_intake#ggsave("output/tot_intake.pdf", tot_intake, width = 12, height = 10, dpi = 1300)Top Animal Names
# Count names by animal_type
top_names <- lb1 %>%
filter(!is.na(animal_name), !animal_name %in% c("", "Unknown")) %>%
filter(animal_type %in% c("Cat","Dog")) %>%
count(animal_type, animal_name, sort = TRUE) %>%
group_by(animal_type) %>%
slice_max(n, n = 10) %>%
ungroup() %>%
mutate(animal_name = fct_reorder2(animal_name, animal_type, -n))
# Plot
top_names<-ggplot(top_names, aes(x = animal_name, y = n, fill = animal_type)) +
geom_col(show.legend = FALSE) +
facet_wrap(~ animal_type, scales = "free_y") +
coord_flip() +
labs(
title = "Top 10 Most Popular Cat and Dog Names ",
x = "Animal Name",
y = "Count"
) +
scale_fill_viridis_d(option = "D", end = 0.8) +
theme_classic(base_family = "barlow", base_size = 16) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
axis.title = element_text(face = "bold"),
axis.text = element_text(face = "bold"),
strip.text = element_text(face = "bold")
)
top_names#ggsave("output/top_names.pdf", top_names, width = 12, height = 10, dpi = 1300)Static Sankey Plot
# filter for healthy/normal animals
lb_healthy <- lb1 %>%
filter(
!is.na(intake_date),
!is.na(intake_type),
!is.na(outcome_type),
tolower(intake_condition) == "normal" ) %>%
mutate(month = floor_date(as.Date(intake_date), "month")) %>%
group_by(month, intake_type, outcome_type) %>%
summarise(count = n()) %>%
ungroup()
# select month
selected_month <- as.Date("2021-04-01")
df_plot <- lb_healthy %>% filter(month == selected_month)
# Sankey plot
sankey_plot <- ggplot(df_plot,
aes(axis1 = intake_type,
axis2 = outcome_type,
y = count)) +
geom_alluvium(
aes(fill = intake_type),
width = 0.08,
alpha = 0.5,
color = "gray30",
size = 0.25,
show.legend = FALSE ) +
geom_stratum(
width = 0.20,
fill = "gray95",
color = "gray30",
linewidth = 0.6 ) +
geom_text(
stat = "stratum",
infer.label = TRUE,
size = 4.2,
fontface = "bold",
color = "black" ) +
scale_x_discrete(
limits = c("Intake Type", "Outcome Type"),
expand = c(0.03, 0.03),
labels = c("Intake Type", "Outcome Type")) +
scale_fill_viridis_d(option = "D", direction = -1, begin=0.2) +
labs(
title = paste("Flow of Healthy Animals –", format(selected_month, "%B %Y")),
subtitle = "Only animals with 'Normal' intake condition included" ) +
theme_classic(base_family = "barlow", base_size = 16) +
theme(
panel.grid = element_blank(),
axis.title = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.x = element_text(face = "bold", size = 12),
legend.position = "none",
plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray30")
)
sankey_plot#ggsave("output/sankey_plot.pdf", sankey_plot, width = 12, height = 10, dpi = 1300)