Names of 2024

World

A word cloud of the top baby names of 2024.

library(ggplot2)
library(tidyverse) # This includes dplyr and scales
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.1.0     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(wordcloud2)
# Replace "your_baby_names.csv" with the actual path to your CSV file
names_data <- read.csv("/Users/tylerdavis-kean/Desktop/Environ 427/r/yob2024.csv")

# Filter for the top 100 names to keep the word cloud from being too cluttered
top_names <- names_data %>%
  arrange(desc(Frequency)) %>%
  slice_head(n = 200)

# Add a color column based on sex
top_names <- top_names %>%
  mutate(
    color = case_when(
      Sex == "M" ~ "forestgreen",
      Sex == "F" ~ "purple",
      TRUE ~ "gray" # Fallback for unisex or unknown
    )
  )

# Prepare data frame for wordcloud2, which expects 'word' and 'freq' columns
word_cloud_data <- top_names %>%
  select(word = Name, freq = Frequency, color)

# Create the interactive word cloud
wordcloud2(data = word_cloud_data, color = word_cloud_data$color)

The Top Names

Here we show the top 10 most frequent baby names chosen in 2024.

library(tidyverse)
library(ggplot2)

# Read the data, using the correct column names
names_data <- read.csv("/Users/tylerdavis-kean/Desktop/Environ 427/r/yob2024.csv")

# Create a summary of total frequency per name
top_overall_names <- names_data %>%
  group_by(Name) %>%
  summarise(Total_Frequency = sum(Frequency)) %>%
  ungroup() %>%
  slice_max(Total_Frequency, n = 10)

# Create the bar chart for the top 10 names
ggplot(top_overall_names, aes(x = reorder(Name, Total_Frequency), y = Total_Frequency)) +
  geom_col(fill = "dodgerblue") +
  labs(
    title = "Top 10 Most Frequent Names of 2024",
    x = "Name",
    y = "Frequency"
  ) +
  theme_minimal() +
  coord_flip()

Unisex Names

We defined unisex names as those that have at least 1,000 for both male and female babies. This graph shows the top 10 unisex names, as well as the proportion between male and female babies with the names.

# Load libraries
library(ggplot2)
library(tidyverse) # This includes dplyr and scales
library(dplyr)

# Read the data
names <- read.csv("/Users/tylerdavis-kean/Desktop/Environ 427/r/yob2024.csv")

unisex_names <- names %>%
  group_by(Name) %>%
  # Filter for names that have both Male and Female entries
  filter(all(c("M", "F") %in% Sex)) %>%
  # Filter for names with at least 1,000 counts for both Male and Female
  # This makes the names truly "common" and unisex
  filter(min(Frequency[Sex == "M"]) >= 1000 & min(Frequency[Sex == "F"]) >= 1000) %>%
  # Calculate the total frequency for each unisex name
  mutate(total_frequency = sum(Frequency)) %>%
  ungroup() %>%
  # Rank the names by their total frequency and select the top 20
  arrange(desc(total_frequency)) %>%
  distinct(Name, .keep_all = TRUE) %>%
  head(20)

# Data preparation: Filter, calculate proportions, and create labels
plot_data_with_labels <- names %>%
  # Filter for names that have both 'M' and 'F' entries
  group_by(Name) %>%
  filter(all(c("M", "F") %in% Sex)) %>%
  # Filter for unisex names with at least 1,000 counts for both M and F
  filter(min(Frequency[Sex == "M"]) >= 1000 & min(Frequency[Sex == "F"]) >= 1000) %>%
  # Calculate total frequency and proportion for each sex
  mutate(
    total_frequency = sum(Frequency),
    proportion = Frequency / sum(Frequency)
  ) %>%
  ungroup() %>%
  # Rank the names by total frequency (descending) and select top 20
  arrange(desc(total_frequency)) %>%
  slice_head(n = 20) %>%
  # Create the combined name label for the plot's axis
  mutate(
    name_label = paste0(Name, " (", format(total_frequency, big.mark = ","), ")")
  )

# Set the factor levels for correct ordering in the plot
plot_data_with_labels$name_label <- factor(plot_data_with_labels$name_label, 
                                           levels = rev(unique(plot_data_with_labels$name_label)))

# Create the final stacked bar plot
ggplot(data = plot_data_with_labels, aes(x = name_label, y = proportion, fill = Sex)) +
  geom_bar(stat = "identity") +
  # Add percentage labels with improved readability
  geom_text(aes(label = scales::percent(proportion, accuracy = 1)),
            position = position_stack(vjust = 0.5),
            color = "white",
            size = 4,
            fontface = "bold") +
  labs(
    title = "Top 10 Unisex Names in 2024 with Male vs. Female Ratio",
    x = "Name (Total Frequency)",
    y = "Proportion",
    fill = "Sex"
  ) +
  scale_y_continuous(labels = scales::percent) +
  coord_flip() +
  theme_minimal()