TidyTuesday

Long Beach Animal Shelter — PMAP 8551, Summer 2025

Author

Tebitha Mawokomatanda

Published

July 25, 2025

Load packages

if (!require(pacman)) install.packages("pacman")
pacman::p_load(
   tidyverse,   magrittr,      viridis,      stringr, 
   ggrepel,     janitor,       tidytuesdayR,
   readr,       ggtext,        lubridate,    scales,
   forcats,     showtext,      viridisLite, 
   ggalluvial,  gganimate,     ggsankey,  cowplot
    )

Load dataset: Long Beach Animal Shelter

font_add_google("Barlow", "barlow")
showtext_auto()

longbeach_raw <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-03-04/longbeach.csv') 

#saveRDS(longbeach_raw, file = "data/longbeach_raw.rds")
#longbeach <- readRDS("data/longbeach_raw.rds") 

longbeach <-longbeach_raw %>% 
clean_names()

Clean the data

lb1 <- longbeach %>%
  mutate(animal_name = str_remove(animal_name, "^\\*")) %>% 
  mutate(animal_name = str_to_title(animal_name),
         intake_type = str_to_title(intake_type),
         outcome_type = str_to_title(outcome_type),
         animal_type = str_to_title(animal_type)) %>% 
  mutate(intake_date = as.Date(intake_date)) %>% 
   mutate(animal_type = case_when(
          animal_type %in% c("amphibian","livestock") ~ "other",
          TRUE ~ animal_type
  )) 

Number of animals accepted at the shelter

intake_counts <- lb1 %>%
  filter(!is.na(intake_type)) %>%
  count(intake_type) %>%
  mutate(intake_type = fct_reorder(intake_type, n))

# Plot
tot_intake<-ggplot(intake_counts, aes(x = intake_type, y = n)) +
  geom_segment(aes(xend = intake_type, y = 0, yend = n),
               color = "black", linewidth = 1.5) +
  geom_point(aes(color = intake_type), size = 20) +
 geom_text(aes(label = comma(n)), color = "white", size = 4.5, fontface = "bold") +
  coord_flip() +
  scale_color_viridis_d(option = "D", end = 0.8, guide = "none") +
  labs(
    title = "Total Animals accepted into animal shelter by intake type",
    x = "Intake Type",
    y = "Number of Intakes"
  ) +
  theme_classic(base_family = "barlow", base_size = 16) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5), 
    axis.title = element_text(face = "bold"),
    axis.text = element_text(face = "bold"),
    axis.text.x = element_blank()  
  )

tot_intake

#ggsave("output/tot_intake.pdf", tot_intake, width = 12, height = 10, dpi = 1300)

Top Animal Names

# Count names by animal_type
top_names <- lb1 %>%
  filter(!is.na(animal_name), !animal_name %in% c("", "Unknown")) %>%
  filter(animal_type %in% c("Cat","Dog")) %>% 
  count(animal_type, animal_name, sort = TRUE) %>%
  group_by(animal_type) %>%
  slice_max(n, n = 10) %>%  
  ungroup() %>%
  mutate(animal_name = fct_reorder2(animal_name, animal_type, -n))

# Plot
top_names<-ggplot(top_names, aes(x = animal_name, y = n, fill = animal_type)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ animal_type, scales = "free_y") +
  coord_flip() +
  labs(
    title = "Top 10 Most Popular Cat and Dog Names ",
    x = "Animal Name",
    y = "Count"
  ) +
  scale_fill_viridis_d(option = "D", end = 0.8) +
  theme_classic(base_family = "barlow", base_size = 16) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),  
    axis.title = element_text(face = "bold"),
    axis.text = element_text(face = "bold"),
    strip.text = element_text(face = "bold")  
  )
top_names

#ggsave("output/top_names.pdf", top_names, width = 12, height = 10, dpi = 1300)

Static Sankey Plot

# filter for healthy/normal animals
lb_healthy <- lb1 %>%
  filter(
    !is.na(intake_date),
    !is.na(intake_type),
    !is.na(outcome_type),
    tolower(intake_condition) == "normal" ) %>%
  mutate(month = floor_date(as.Date(intake_date), "month")) %>%
  group_by(month, intake_type, outcome_type) %>%
  summarise(count = n()) %>% 
  ungroup()
  
#  select month 
selected_month <- as.Date("2021-04-01")
df_plot <- lb_healthy %>% filter(month == selected_month)

#  Sankey plot
sankey_plot <- ggplot(df_plot,
                      aes(axis1 = intake_type,
                          axis2 = outcome_type,
                          y = count)) +
  geom_alluvium(
    aes(fill = intake_type),
    width = 0.08,                  
    alpha = 0.5,
    color = "gray30",              
    size = 0.25,
    show.legend = FALSE ) +
  geom_stratum(
    width = 0.20,
    fill = "gray95",
    color = "gray30",
    linewidth = 0.6 ) +
  geom_text(
    stat = "stratum",
    infer.label = TRUE,
    size = 4.2,
    fontface = "bold",
    color = "black" ) +
  scale_x_discrete(
    limits = c("Intake Type", "Outcome Type"),
    expand = c(0.03, 0.03),
    labels = c("Intake Type", "Outcome Type")) +
  scale_fill_viridis_d(option = "D", direction = -1, begin=0.2) +
  labs(
    title = paste("Flow of Healthy Animals –", format(selected_month, "%B %Y")),
    subtitle = "Only animals with 'Normal' intake condition included" ) +
  theme_classic(base_family = "barlow", base_size = 16) +
  theme(
    panel.grid = element_blank(),
    axis.title = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.text.x = element_text(face = "bold", size = 12),
    legend.position = "none",
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray30")
  )

sankey_plot

#ggsave("output/sankey_plot.pdf", sankey_plot, width = 12, height = 10, dpi = 1300)

Final arranged plots

Final plot