library(readxl)
library(psych)
library(lattice)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(scales)
##
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
##
## alpha, rescale
library(tidyr)
#install.packages("forcats")
#install.packages("showtext")
#install.packages("sysfonts")
#install.packages("curl")
#install.packages("scales")
#install.packages("tidyr")
library(showtext)
library(sysfonts)
font_add_google("Noto Sans KR", "Noto Sans KR")
showtext_auto()
df <- read_excel("Austin_Animal_Center_Outcomes_(2020~2025).xlsx")
#View(df)
프로젝트 주제 유기동물 입양 결정 요인에 대한 통합 분석: 건강, 연령, 보호 상태, 시계열 구조 중심으로
데이터 분석의 목적
유기동물 보호소에 유입되는 개체는 종, 성별, 색상, 나이, 건강 상태 등 다양한 조건을 갖고 있으며, 이들 요소는 입양 가능성과 직결되는 핵심 변수로 작용함 본 분석은 ‘입양 결정 요인 분석’이라는 최종 목표에 앞서, 먼저 보호소에 유입되는 유기동물의 기초적인 특성과 분포 현황을 파악하는 것으로 시작함 특히, ‘어떤 동물이 보호소에 가장 많이 들어오는가’, ‘어떤 특징이 빈번한가’에 대한 탐색을 통해 분석의 출발점을 설정하며, 이는 이후의 건강·연령·입양률 분석과의 연결고리를 형성함
보호소에 유입되는 동물의 종 분포를 분석하여, 입양 결정의 기본 단위가 되는 개체 유형을 파악함
df$species <- as.factor(df$species_8)
ggplot(df, aes(x = species)) +
geom_bar(fill = "skyblue", color = "black") +
labs(title = "유기동물의 종류", x = "종", y = "개체수") +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold", margin = margin(b = 10)),
axis.text = element_text(size = 10),
axis.text.x = element_text(size = 10, margin = margin(b = 12, t = 6)),
axis.text.y = element_text(margin = margin(r = 10)),
axis.title.x = element_text(margin = margin(t = 20)), # 방향 수정
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
ggplot(df, aes(x = factor(sex_9))) +
geom_bar(fill = "tomato", color = "black") +
labs(title = "유기동물의 성별", x = "성별", y = "개채수") +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold", margin = margin(b = 10)),
axis.text = element_text(size = 10),
axis.text.x = element_text(size = 10, margin = margin(b = 12, t = 6)),
axis.text.y = element_text(margin = margin(r = 10)),
axis.title.x = element_text(margin = margin(r = 20)),
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
df$color <- as.factor(df$color_12)
color_counts <- table(df$color)
top_10_colors <- head(sort(color_counts, decreasing = TRUE), 10)
color_df <- as.data.frame(top_10_colors)
colnames(color_df) <- c("color", "count")
color_df$color <- factor(color_df$color, levels = color_df$color)
ggplot(color_df, aes(x = color, y = count)) +
geom_col(fill = "#1957FF", color = "black") +
labs(title = "유기동물 색상 상위 10개", x = "색상", y = "개체수") +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, size = 8),
axis.text.y = element_text(size = 10, margin = margin(r = 10)),
axis.title.x = element_text(margin = margin(t = 15)),
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
df$date <- as.Date(substr(df$outcome_4, 1, 10))
df$month <- as.Date(paste0(format(df$date, "%Y-%m"), "-01"))
monthly_counts <- aggregate(x = list(n = rep(1, nrow(df))),
by = list(month = df$month),
FUN = sum)
mean_count <- mean(monthly_counts$n, na.rm = TRUE)
ggplot(monthly_counts, aes(x = month, y = n)) +
geom_line(color = "skyblue", linewidth = 0.8) +
geom_point(color = "blue", size = 1) +
geom_hline(yintercept = mean_count, linetype = "dashed", color = "pink", linewidth = 0.6) +
labs(
title = "월별 유기동물 발생 건수", subtitle = "(2020-01 ~ 2025-04)",
x = "월",
y = "유기동물 수"
) +
scale_x_date(
date_breaks = "3 months", date_labels = "%Y-%m", limits = c(as.Date("2020-01-01"), as.Date("2025-05-01"))
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.x = element_text(angle = 45, hjust = 1, size = 7, margin = margin(b = 10, t = 6)),
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
subtype_counts <- table(df$subtype_7)
top_10_subtypes <- head(sort(subtype_counts, decreasing = TRUE), 10)
top_10_df <- as.data.frame(top_10_subtypes)
colnames(top_10_df) <- c("subtype", "count")
top_10_df$subtype <- factor(top_10_df$subtype, levels = top_10_df$subtype)
ggplot(top_10_df, aes(x = subtype, y = count)) +
geom_col(fill = "skyblue", color = "black") +
labs(
title = "유기동물 상태", subtitle = "상위 10개 기준", x = "상태", y = "유기동물 수"
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.x = element_text(angle = 45, hjust = 1, size = 7, margin = margin(b = 10, t = 6)),
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
임시 보호 이외의 전반적인 유기동물 상태는 Ravies Risk, Snr, Suffering등 과 같이 아픈 유기 동물이 상당 수를 차지하고 있음
-> 해당 유기동물은 어떤 연령대일까? 앞서 전반적인 나이에 대하여 분석해 봄
df$age_years <- df$age_10
df$age_years <- ifelse(grepl("year", df$age_10), as.numeric(gsub(" ?years?|\\s?year", "", df$age_10)),
ifelse(grepl("month", df$age_10), as.numeric(gsub(" ?months?|\\s?month", "", df$age_10)) / 12,
ifelse(grepl("week", df$age_10), as.numeric(gsub(" ?weeks?|\\s?week", "", df$age_10)) / 52,
ifelse(grepl("day", df$age_10), as.numeric(gsub(" ?days?|\\s?day", "", df$age_10)) / 365,
NA))))
## Warning in ifelse(grepl("year", df$age_10), as.numeric(gsub("
## ?years?|\\s?year", : NAs introduced by coercion
## Warning in ifelse(grepl("month", df$age_10), as.numeric(gsub("
## ?months?|\\s?month", : NAs introduced by coercion
## Warning in ifelse(grepl("week", df$age_10), as.numeric(gsub("
## ?weeks?|\\s?week", : NAs introduced by coercion
## Warning in ifelse(grepl("day", df$age_10), as.numeric(gsub(" ?days?|\\s?day", :
## NAs introduced by coercion
df$age_group <- cut(df$age_years,
breaks = c(-Inf, 1, 3, 6, 10, Inf),
labels = c("1살 미만", "1~3살", "3~6살", "6~10살", "10살 이상"),
right = FALSE)
ggplot(data = df %>% filter(!is.na(age_group))) +
geom_bar(aes(x = age_group), fill = "skyblue", color = "black") +
labs(
title = "나이대별 유기동물 분포", x = "나이대", y = "빈도수"
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold", margin = margin(b = 10)),
axis.text.x = element_text(size = 10, margin = margin(b = 6, t = 8)),
axis.text.y = element_text(size = 10, margin = margin(r = 6)),
axis.title.x = element_text(margin = margin(t = 6)),
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
target_subtypes <- c("Suffering", "Rabies Risk", "Snr", "At Vet")
df_focus <- df %>%
filter(!is.na(age_10), subtype_7 %in% target_subtypes) %>%
count(age_10, subtype_7)
top_ages <- df_focus %>%
group_by(age_10) %>%
summarise(total = sum(n)) %>%
slice_max(total, n = 10) %>%
pull(age_10)
df_filtered <- df_focus %>%
filter(age_10 %in% top_ages)
ggplot(df_filtered, aes(x = age_10, y = subtype_7, size = n)) +
geom_point(alpha = 0.75, color = "darkred") +
scale_size_continuous(name = "빈도 수") +
labs(
title = "주요 건강 상태와 나이 분포",
subtitle = "상위 10개 나이 기준 버블 차트",
x = "나이", y = "건강 상태"
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11),
axis.text.x = element_text(angle = 45, hjust = 1),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
입양이 되어야 할 연령대에서 건강 문제가 많이 보임 그렇다면 실제 입양은 어떠한가?
df_age <- df %>%
filter(!is.na(age_10), !is.na(type_6)) %>%
mutate(adopted = ifelse(type_6 == "Adoption", "입양", "비입양"))
age_count <- df_age %>%
count(age_10, sort = TRUE)
top_ages <- head(age_count$age_10, 10)
df_age_top10 <- df_age %>%
filter(age_10 %in% top_ages) %>%
group_by(adopted, age_10) %>%
summarise(count = n(), .groups = "drop")
ggplot(df_age_top10, aes(x = age_10, y = adopted, fill = count)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
labs(
title = "나이별 입양 여부",
x = "나이", y = "입양 여부"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(size = 14, face = "bold", margin = margin(b = 10)),
text = element_text(family = "Noto Sans KR"),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
=> 즉 건강상태 및 노령이 입양 여부에 대해서 중요하게 작용을 함(특히 2년령은 건강상의 이유로 유기 동물이 많고, 입양률이 적음)
df_neuter <- df %>%
mutate(neuter_status = case_when(
grepl("Spayed|Neutered", sex_9) ~ "중성화",
grepl("Intact", sex_9) ~ "비중성화",
TRUE ~ "미상"
)) %>%
filter(neuter_status != "미상")
top_ages <- df_neuter %>%
count(age_10, sort = TRUE) %>%
slice_head(n = 10) %>%
pull(age_10)
df_top_ages <- df_neuter %>%
filter(age_10 %in% top_ages) %>%
mutate(age_10 = fct_infreq(age_10))
ggplot(df_top_ages, aes(x = age_10, fill = neuter_status)) +
geom_bar(position = "fill", color = "black") +
scale_y_continuous(labels = scales::percent) +
labs(title = "상위 10개 나이별 유기동물의 중성화 여부 비율",
x = "나이", y = "비율", fill = "중성화 여부") +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold", margin = margin(b = 10)),
axis.text = element_text(size = 10),
axis.text.x = element_text(size = 9, angle = 45, hjust = 1),
axis.title.x = element_text(margin = margin(t = 12)),
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 1)
)
-> 중성화 여부는 입양 선호도에 영향을 미치는 것으로 나타남 -> 한편, 건강 이상 상태 역시 입양을 어렵게 만드는 요인이며, 보호소 체류 기간과 비용 증가의 원인이 됨 -> 중성화 여부가 입양에 미치는 영향은 건강 상태와도 관련이 있을 수 있기에, 이에 따라 2개월령 개체를 중심으로 중성화율과 입양률의 동시 분포를 확인함
df_2mo <- df %>%
filter(age_10 == "2 months") %>%
filter(!is.na(breed_11), !is.na(type_6)) %>%
mutate(
adopted = ifelse(type_6 == "Adoption", "입양", "비입양")
)
top_breeds <- df_2mo %>%
count(breed_11, sort = TRUE) %>%
top_n(5, n) %>%
pull(breed_11)
df_top_breeds <- df_2mo %>%
filter(breed_11 %in% top_breeds) %>%
mutate(
breed_11 = fct_infreq(breed_11),
adopted = factor(adopted, levels = c("입양", "비입양"))
)
ggplot(data = df_top_breeds) +
geom_bar(aes(x = breed_11, fill = adopted), position = "dodge", color = "black") +
labs(
title = "2개월령 유기동물의 상위 5개 품종별 입양 여부 분포",
x = "품종", y = "개체 수", fill = "입양 여부"
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold", margin = margin(b = 10)),
axis.text.x = element_text(size = 7, angle = 45, hjust = 1, margin = margin(b = 6, t = 8)),
axis.text.y = element_text(size = 10, margin = margin(r = 6)),
axis.title.x = element_text(margin = margin(t = 6)),
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
어떤 보호 상태에 있는 유기동물이 실제로 입양되는가?
df_subtype_adopt <- df %>%
filter(!is.na(subtype_7), !is.na(type_6)) %>%
mutate(adopted = ifelse(type_6 == "Adoption", "입양", "비입양"))
top_subtypes <- df_subtype_adopt %>%
count(subtype_7) %>%
top_n(10, n) %>%
pull(subtype_7)
df_top <- df_subtype_adopt %>%
filter(subtype_7 %in% top_subtypes) %>%
group_by(subtype_7) %>%
summarise(
total = n(),
adopted = sum(adopted == "입양"),
adoption_rate = adopted / total
) %>%
arrange(desc(adoption_rate)) %>%
mutate(subtype_7 = fct_inorder(subtype_7))
ggplot(df_top, aes(x = adoption_rate, y = subtype_7)) +
geom_segment(aes(x = 0, xend = adoption_rate, y = subtype_7, yend = subtype_7),
color = "#87CEEB", linewidth = 1.2) +
geom_point(color = "#003366", size = 4) +
scale_x_continuous(labels = scales::percent_format(accuracy = 1)) +
labs(
title = "유기동물 상태",
subtitle = "상위 10개 기준",
x = "입양률", y = "유기동물 수"
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.x = element_text(size = 7, margin = margin(b = 10, t = 6)),
axis.text.y = element_text(size = 10, margin = margin(r = 6)),
axis.title.y = element_text(margin = margin(r = 12)),
axis.title.x = element_text(margin = margin(t = 6)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
입양률이 낮은 상태는 어떤 연령대에 집중되어 있는가?
target_subtypes <- c("Suffering", "Snr", "At Vet", "Rabies Risk")
df_sub_age <- df %>%
filter(!is.na(age_group), subtype_7 %in% target_subtypes)
df_grouped <- df_sub_age %>%
group_by(subtype_7, age_group) %>%
summarise(count = n(), .groups = "drop")
df_grouped$age_group <- factor(df_grouped$age_group,
levels = c("1살 미만", "1~3살", "3~6살", "6~10살", "10살 이상"))
ggplot(df_grouped, aes(x = age_group, y = count, fill = age_group)) +
geom_col(color = "black") +
facet_wrap(~ subtype_7, scales = "free_y") +
labs(
title = "보호 상태별 연령대 구성",
subtitle = "입양률 낮은 주요 상태 기준",
x = "연령대", y = "유기동물 수"
) +
scale_fill_brewer(palette = "Blues") +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.x = element_text(angle = 45, hjust = 1, size = 9),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4),
legend.position = "none"
)
그렇다면 이 연령대 개체들이 실제로 입양되었는가?
df_3way <- df %>%
filter(!is.na(age_group), !is.na(subtype_7), !is.na(type_6)) %>%
mutate(
adopted = ifelse(type_6 == "Adoption", "입양", "비입양")
) %>%
filter(subtype_7 %in% c("Suffering", "Snr", "At Vet", "Rabies Risk")) %>%
group_by(subtype_7, age_group, adopted) %>%
summarise(count = n(), .groups = "drop")
df_3way$age_group <- factor(df_3way$age_group,
levels = c("1살 미만", "1~3살", "3~6살", "6~10살", "10살 이상"))
ggplot(df_3way, aes(x = age_group, y = adopted, fill = count)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
facet_wrap(~ subtype_7) +
labs(
title = "보호 상태별 연령대-입양 여부 교차",
subtitle = "입양률이 낮은 상태 중심 3자 비교",
x = "연령대", y = "입양 여부", fill = "개체 수"
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.x = element_text(angle = 45, hjust = 1, size = 9),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
df$date <- as.Date(substr(df$outcome_4, 1, 10))
df$month <- as.Date(paste0(format(df$date, "%Y-%m"), "-01"))
df_adopt <- df %>%
filter(!is.na(month), !is.na(type_6)) %>%
mutate(adopted = ifelse(type_6 == "Adoption", "입양", "비입양")) %>%
group_by(month, adopted) %>%
summarise(count = n(), .groups = "drop")
ggplot(df_adopt, aes(x = month, y = count, color = adopted)) +
geom_line(linewidth = 1) + # 수정됨
geom_point(size = 2) +
labs(
title = "월별 유기동물 입양 및 비입양 추이", subtitle = "(2020-01 ~ 2025-04)",
x = "월", y = "유기동물 수",
color = "입양 여부"
) +
scale_x_date(
date_breaks = "3 months", date_labels = "%Y-%m",
limits = c(as.Date("2020-01-01"), as.Date("2025-05-01"))
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.x = element_text(angle = 45, hjust = 1, size = 7, margin = margin(b = 10, t = 6)),
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
df$date <- as.Date(substr(df$outcome_4, 1, 10))
df$month <- as.Date(paste0(format(df$date, "%Y-%m"), "-01"))
df_age <- df %>%
filter(!is.na(month), !is.na(type_6)) %>%
mutate(adopted = ifelse(type_6 == "Adoption", "입양", "비입양")) %>%
group_by(month, adopted) %>%
summarise(count = n(), .groups = "drop") %>%
tidyr::pivot_wider(names_from = adopted, values_from = count, values_fill = 0) %>%
mutate(
total = 입양 + 비입양,
입양률 = 입양 / total
)
ggplot(df_age, aes(x = month, y = 입양률)) +
geom_line(color = "darkgreen", linewidth = 1.2) +
geom_point(color = "darkgreen", size = 2) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_x_date(
date_breaks = "3 months", date_labels = "%Y-%m",
limits = c(as.Date("2020-01-01"), as.Date("2025-05-01"))
) +
labs(
title = "월별 입양률 변화 추이", subtitle = "(2020-01 ~ 2025-04)",
x = "월", y = "입양률"
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.x = element_text(angle = 45, hjust = 1, size = 7, margin = margin(b = 10, t = 6)),
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
df$date <- as.Date(substr(df$outcome_4, 1, 10))
df$month <- as.Date(paste0(format(df$date, "%Y-%m"), "-01"))
df_total <- df %>%
filter(!is.na(month)) %>%
group_by(month) %>%
summarise(total_count = n(), .groups = "drop")
target_subtypes <- c("Suffering", "Snr", "At Vet", "Rabies Risk")
df_sub <- df %>%
filter(!is.na(month), subtype_7 %in% target_subtypes) %>%
group_by(month, subtype_7) %>%
summarise(sub_count = n(), .groups = "drop")
df_ratio <- merge(df_sub, df_total, by = "month")
df_ratio$ratio <- df_ratio$sub_count / df_ratio$total_count
ggplot(df_ratio, aes(x = month, y = ratio, color = subtype_7)) +
geom_line(linewidth = 0.8) +
geom_point(size = 1.8) +
scale_y_continuous(labels = scales::percent_format()) +
labs(
title = "건강 이상 상태의 전체 구조 대비 비중 추이",
subtitle = "(월별 %)",
x = "월", y = "비중",
color = "건강 이상 상태"
) +
scale_x_date(
date_breaks = "3 months", date_labels = "%Y-%m",
limits = c(as.Date("2020-01-01"), as.Date("2025-05-01"))
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.x = element_text(angle = 45, hjust = 1, size = 7, margin = margin(b = 10, t = 6)),
axis.title.y = element_text(margin = margin(r = 12)),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4),
legend.position = "bottom"
)
Snr 상태의 유기동물 비중이 높았던 시기에, 입양률은 실제로 하락했는가? 입양률이 낟아진 원인을 구조 동물의 특성인 Snr 비중이 높은 지 확인이 필요함
df$date <- as.Date(substr(df$outcome_4, 1, 10))
df$month <- as.Date(paste0(format(df$date, "%Y-%m"), "-01"))
df_total <- df %>%
filter(!is.na(month)) %>%
group_by(month) %>%
summarise(total = n(), .groups = "drop")
df_snr <- df %>%
filter(!is.na(month), subtype_7 == "Snr") %>%
group_by(month) %>%
summarise(snr_count = n(), .groups = "drop")
df_snr <- merge(df_snr, df_total, by = "month")
df_snr$snr_ratio <- df_snr$snr_count / df_snr$total
df_adopt <- df %>%
filter(!is.na(month), !is.na(type_6)) %>%
mutate(adopted = ifelse(type_6 == "Adoption", "입양", "비입양")) %>%
group_by(month, adopted) %>%
summarise(count = n(), .groups = "drop") %>%
tidyr::pivot_wider(names_from = adopted, values_from = count, values_fill = 0) %>%
mutate(total = 입양 + 비입양, 입양률 = 입양 / total)
df_join <- merge(df_snr[, c("month", "snr_ratio")],
df_adopt[, c("month", "입양률")],
by = "month")
ggplot(df_join, aes(x = month)) +
geom_line(aes(y = snr_ratio, color = "Snr 비중"), linewidth = 1.2) +
geom_line(aes(y = 입양률, color = "입양률"), linewidth = 1.2) +
scale_color_manual(values = c("Snr 비중" = "steelblue", "입양률" = "forestgreen")) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_x_date(date_breaks = "3 months", date_labels = "%Y-%m") +
labs(
title = "월별 Snr 비중과 입양률 변화 비교",
subtitle = "(노령 개체 구조 비중 vs 입양률)",
x = "월", y = "비율(%)", color = ""
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1, size = 7),
legend.position = "bottom",
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)
get_season <- function(month) {
if (month %in% 3:5) {
return("봄")
} else if (month %in% 6:8) {
return("여름")
} else if (month %in% 9:11) {
return("가을")
} else {
return("겨울")
}
}
df$date <- as.Date(substr(df$outcome_4, 1, 10))
df$month <- as.integer(format(df$date, "%m"))
df$season <- sapply(df$month, get_season)
df_season <- df %>%
filter(!is.na(season), !is.na(type_6)) %>%
mutate(adopted = ifelse(type_6 == "Adoption", "입양", "비입양")) %>%
group_by(season, adopted) %>%
summarise(count = n(), .groups = "drop")
df_season$season <- factor(df_season$season, levels = c("봄", "여름", "가을", "겨울"))
ggplot(df_season, aes(x = season, y = count, fill = adopted)) +
geom_bar(stat = "identity", position = "dodge", color = "black") +
labs(
title = "계절별 유기동물 구조 및 입양 수",
subtitle = "(전체 누적 기준)",
x = "계절", y = "유기동물 수", fill = "입양 여부"
) +
theme_minimal() +
theme(
text = element_text(family = "Noto Sans KR"),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.title.y = element_text(margin = margin(r = 12)),
axis.title.x = element_text(margin = margin(t = 8)),
axis.text.x = element_text(size = 11),
panel.border = element_rect(color = "#505050", fill = NA, linewidth = 0.4)
)