We defined unisex names as those that have at least 1,000 for both
male and female babies. This graph shows the top 10 unisex names, as
well as the proportion between male and female babies with the
names.
# Load libraries
library(ggplot2)
library(tidyverse) # This includes dplyr and scales
library(dplyr)
# Read the data
names <- read.csv("/Users/tylerdavis-kean/Desktop/Environ 427/r/yob2024.csv")
unisex_names <- names %>%
group_by(Name) %>%
# Filter for names that have both Male and Female entries
filter(all(c("M", "F") %in% Sex)) %>%
# Filter for names with at least 1,000 counts for both Male and Female
# This makes the names truly "common" and unisex
filter(min(Frequency[Sex == "M"]) >= 1000 & min(Frequency[Sex == "F"]) >= 1000) %>%
# Calculate the total frequency for each unisex name
mutate(total_frequency = sum(Frequency)) %>%
ungroup() %>%
# Rank the names by their total frequency and select the top 20
arrange(desc(total_frequency)) %>%
distinct(Name, .keep_all = TRUE) %>%
head(20)
# Data preparation: Filter, calculate proportions, and create labels
plot_data_with_labels <- names %>%
# Filter for names that have both 'M' and 'F' entries
group_by(Name) %>%
filter(all(c("M", "F") %in% Sex)) %>%
# Filter for unisex names with at least 1,000 counts for both M and F
filter(min(Frequency[Sex == "M"]) >= 1000 & min(Frequency[Sex == "F"]) >= 1000) %>%
# Calculate total frequency and proportion for each sex
mutate(
total_frequency = sum(Frequency),
proportion = Frequency / sum(Frequency)
) %>%
ungroup() %>%
# Rank the names by total frequency (descending) and select top 20
arrange(desc(total_frequency)) %>%
slice_head(n = 20) %>%
# Create the combined name label for the plot's axis
mutate(
name_label = paste0(Name, " (", format(total_frequency, big.mark = ","), ")")
)
# Set the factor levels for correct ordering in the plot
plot_data_with_labels$name_label <- factor(plot_data_with_labels$name_label,
levels = rev(unique(plot_data_with_labels$name_label)))
# Create the final stacked bar plot
ggplot(data = plot_data_with_labels, aes(x = name_label, y = proportion, fill = Sex)) +
geom_bar(stat = "identity") +
# Add percentage labels with improved readability
geom_text(aes(label = scales::percent(proportion, accuracy = 1)),
position = position_stack(vjust = 0.5),
color = "white",
size = 4,
fontface = "bold") +
labs(
title = "Top 10 Unisex Names in 2024 with Male vs. Female Ratio",
x = "Name (Total Frequency)",
y = "Proportion",
fill = "Sex"
) +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
theme_minimal()
