Occupational Segregation

setwd("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project")

Median weekly earning by race-ethnicity and gender

library(ggplot2)
library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)

data <- read_csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\race1.csv")

## Rows: 10 Columns: 3

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): group
## dbl (2): total, weekly_wage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data_clean <- data %>%
  mutate(
    group = trimws(group),
    group = ifelse(group == "hispanic_wome", "hispanic_women", group),  
    weekly_wage = as.numeric(gsub("[$,]", "", weekly_wage)),
    gender = case_when(
      grepl("_men|^men$", group) ~ "Men",
      grepl("_women|^women$", group) ~ "Women",
      TRUE ~ NA_character_
    ),
    race_category = case_when(
      group %in% c("men", "women") ~ "All Races/Ethnicities",
      grepl("white", group) ~ "White",
      grepl("black", group) ~ "African American",
      grepl("asian", group) ~ "Asian",
      grepl("hispanic", group) ~ "Hispanic",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(gender), !is.na(race_category))
data_clean$race_category <- factor(data_clean$race_category,
                                   levels = c("Hispanic", "Asian", "African American", "White", "All Races/Ethnicities"))

ggplot(data_clean, aes(x = weekly_wage, y = race_category, fill = gender)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.6) +
  geom_text(aes(label = paste0("$", weekly_wage)),
            position = position_dodge(width = 0.8), hjust = -0.1, size = 4, color = "black") +
  scale_fill_manual(values = c("Men" = "deepskyblue3", "Women" = "tomato")) +
  labs(
    title = "Median Weekly Earnings 2023",
    x = NULL,
    y = NULL,
    fill = NULL
  ) +
  xlim(0, max(data_clean$weekly_wage) + 400) +
  theme_minimal(base_size = 13) +
  theme(axis.text.y = element_text(margin = margin(r = -8), size = 10, color = "black"),
    plot.title = element_text( face = "bold",size = 13.5, hjust = .2),
    legend.position = "bottom",
    panel.grid.major.y = element_blank()
  )

Median annual earning by education level and gender

library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)

data <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\educ..csv")
data <- data[-1, ]  # remove total row
colnames(data) <- c("Education", "Men", "Women")
data_clean <- data %>%
  mutate(
    Education = trimws(Education),
    Men = as.numeric(Men),
    Women = as.numeric(Women),
    Gap = Men - Women
  )
data_clean$Education <- fct_reorder(data_clean$Education, data_clean$Men)
label_map <- c(
  "Less than high school graduate" = "Less than\nhigh school\ngraduate",
  "High school graduate (includes equivalency)" = "High school\ngraduate",
  "Some college or associate's degree" = "Some college or\nassociate's degree",
  "Bachelor's degree" = "Bachelor's\ndegree",
  "Graduate or professional degree" = "Graduate or\nprofessional degree"
)
data_clean$Education <- label_map[data_clean$Education]
data_long <- pivot_longer(data_clean, cols = c("Men", "Women"),
                          names_to = "Gender", values_to = "Income")
ggplot(data_long, aes(x = Education, y = Income, fill = Gender)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7) +
  geom_text(aes(label = paste0("$", format(Income, big.mark = ","))),
            position = position_dodge(width = 0.8),
            vjust = -0.3, size = 3) +
  scale_fill_manual(values = c("Men" = "steelblue", "Women" = "salmon")) +
  scale_y_continuous(
    name = NULL,
    breaks = seq(0, 130000, by = 25000),
    limits = c(0, 130000),
    expand = c(0, 0)
  ) +
  labs(
    subtitle = "Median Annual Earnings by Educational Level and Gender, 2023",
    x = NULL,
    fill = NULL  # removes 'Gender' from legend title
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 0, hjust = 0.5),
    plot.subtitle = element_text(size = 14, hjust = .25),
    legend.position = "bottom",
    legend.direction = "horizontal",
    panel.grid.major.x = element_blank()
  )

Highest paying jobs

# Load libraries
library(ggplot2)
library(dplyr)
library(tidyr)


data <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\o1.csv")

data <- data %>%
  mutate(percent_men = 100 - percent_women)
data <- data %>%
  arrange(percent_women)
female <- data %>%
  mutate(Gender = "Female", start = 0, end = percent_women)
male <- data %>%
  mutate(Gender = "Male", start = percent_women, end = 100)
data_long <- bind_rows(female, male)
data_long$Gender <- factor(data_long$Gender, levels = c("Female", "Male"))
data_long$occupation <- factor(data_long$occupation, levels = unique(data$occupation))

ggplot(data_long) +
  geom_rect(aes(xmin = start, xmax = end, 
                ymin = as.numeric(occupation) - 0.35,   
                ymax = as.numeric(occupation) + 0.35, 
                fill = Gender)) +
  geom_vline(xintercept = c(25, 50, 75), 
             color = "gray40", linetype = "dashed", size = 0.4) +
  geom_text(data = subset(data_long, occupation == levels(data_long$occupation)[length(levels(data_long$occupation))] & Gender == "Female"),
            aes(x = (start + end)/2, y = as.numeric(occupation), label = "Female"),
            color = "black", size = 3) +
  geom_text(data = subset(data_long, occupation == levels(data_long$occupation)[length(levels(data_long$occupation))] & Gender == "Male"),
            aes(x = (start + end)/2, y = as.numeric(occupation), label = "Male"),
            color = "black", size = 3) +
  scale_y_continuous(breaks = 1:length(levels(data_long$occupation)),
                     labels = levels(data_long$occupation),
                     expand = c(0, 0)) +
  scale_x_continuous(labels = scales::percent_format(scale = 1), expand = c(0, 2)) +
  scale_fill_manual(values = c("Female" = "#29AB87", "Male" = "#E1573A")) +
  labs(
    title = "Gender composition of the Highest-paying U.S. Occupations, 2023",
    x = NULL,
    y = NULL
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "none",
    plot.title = element_text(hjust = .15, size = 13, margin = margin(b = 15)),
    axis.text.y = element_text(size = 10, color = "black"),
    axis.text.x = element_text(size = 10, color = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.margin = margin(t = 30, r = 30, b = 20, l = 20)
  )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Lowest paying jobs

library(ggplot2)
library(dplyr)
library(tidyr)

data <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\o2.csv")
data <- data %>%
  mutate(percent_women = round((total_women / total) * 100, 1),
         percent_men = 100 - percent_women)
data <- data %>%
  arrange(percent_women)
female <- data %>%
  mutate(Gender = "Female", start = 0, end = percent_women)
male <- data %>%
  mutate(Gender = "Male", start = percent_women, end = 100)
data_long <- bind_rows(female, male)
data_long$Gender <- factor(data_long$Gender, levels = c("Female", "Male"))
data_long$occupation <- factor(data_long$occupation, levels = unique(data$occupation))
ggplot(data_long) +
  geom_rect(aes(xmin = start, xmax = end, 
                ymin = as.numeric(occupation) - 0.35,   
                ymax = as.numeric(occupation) + 0.35, 
                fill = Gender)) +
  geom_vline(xintercept = c(25, 50, 75), 
             color = "gray40", linetype = "dashed", size = 0.4) +
  geom_text(data = subset(data_long, occupation == levels(data_long$occupation)[length(levels(data_long$occupation))] & Gender == "Female"),
            aes(x = (start + end)/2, y = as.numeric(occupation), label = "Female"),
            color = "black", size = 3) +
  geom_text(data = subset(data_long, occupation == levels(data_long$occupation)[length(levels(data_long$occupation))] & Gender == "Male"),
            aes(x = (start + end)/2, y = as.numeric(occupation), label = "Male"),
            color = "black", size = 3) +
  scale_y_continuous(breaks = 1:length(levels(data_long$occupation)),
                     labels = levels(data_long$occupation),
                     expand = c(0, 0)) +
  scale_x_continuous(labels = scales::percent_format(scale = 1), expand = c(0, 2)) +
  scale_fill_manual(values = c("Female" = "#29AB87", "Male" = "#E1573A")) +
  labs(
    title = "Gender composition of the Lowest-paying U.S. Occupations,2023",
    x = NULL,
    y = NULL
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "none",
    plot.title = element_text(hjust = .55, size = 13, margin = margin(b = 15)),
    axis.text.y = element_text(size = 10, color = "black"),
    axis.text.x = element_text(size = 10, color = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.margin = margin(t = 30, r = 30, b = 20, l = 20)
  )

# Load libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(patchwork)


high <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\o1.csv")
low <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\o2.csv")
prepare_plot_data <- function(data) {
  data <- data %>%
    mutate(percent_women = round((total_women / total) * 100, 1),
           percent_men = 100 - percent_women) %>%
    arrange(percent_women)

  female <- data %>%
    mutate(Gender = "Female", start = 0, end = percent_women)

  male <- data %>%
    mutate(Gender = "Male", start = percent_women, end = 100)

  df <- bind_rows(female, male)
  df$Gender <- factor(df$Gender, levels = c("Female", "Male"))
  df$occupation <- factor(df$occupation, levels = unique(df$occupation))
  return(df)
}

df_high <- prepare_plot_data(high)
df_low <- prepare_plot_data(low)
plot_high <- ggplot(df_high) +
  geom_rect(aes(xmin = start, xmax = end,
                ymin = as.numeric(occupation) - 0.35,
                ymax = as.numeric(occupation) + 0.35,
                fill = Gender)) +
  geom_vline(xintercept = c(25, 50, 75), color = "gray40", linetype = "dashed", size = 0.3) +
  geom_text(data = subset(df_high, occupation == levels(df_high$occupation)[length(levels(df_high$occupation))] & Gender == "Female"),
            aes(x = (start + end)/2, y = as.numeric(occupation), label = "Female"),
            color = "black", size = 2.5) +
  geom_text(data = subset(df_high, occupation == levels(df_high$occupation)[length(levels(df_high$occupation))] & Gender == "Male"),
            aes(x = (start + end)/2, y = as.numeric(occupation), label = "Male"),
            color = "black", size = 2.5) +
  scale_y_continuous(breaks = 1:length(levels(df_high$occupation)),
                     labels = levels(df_high$occupation),
                     expand = c(0, 0)) +
  scale_x_continuous(labels = scales::percent_format(scale = 1), expand = c(0, 2)) +
  scale_fill_manual(values = c("Female" = "#29AB87", "Male" = "#E1573A")) +
  labs(subtitle = "Gender composition of highest-paying occupations") +
  theme_minimal(base_size = 10) +
  theme(
    legend.position = "none",
    axis.title = element_blank(),
    axis.text.x = element_text(size = 8),
    axis.text.y = element_text(size = 8),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.subtitle = element_text(size = 10, hjust = 0),
    plot.margin = margin(t = 15, r = 20, b = 5, l = 10)
  )
plot_low <- ggplot(df_low) +
  geom_rect(aes(xmin = start, xmax = end,
                ymin = as.numeric(occupation) - 0.35,
                ymax = as.numeric(occupation) + 0.35,
                fill = Gender)) +
  geom_vline(xintercept = c(25, 50, 75), color = "gray40", linetype = "dashed", size = 0.3) +
  geom_text(data = subset(df_low, occupation == levels(df_low$occupation)[length(levels(df_low$occupation))] & Gender == "Female"),
            aes(x = (start + end)/2, y = as.numeric(occupation), label = "Female"),
            color = "black", size = 2.5) +
  geom_text(data = subset(df_low, occupation == levels(df_low$occupation)[length(levels(df_low$occupation))] & Gender == "Male"),
            aes(x = (start + end)/2, y = as.numeric(occupation), label = "Male"),
            color = "black", size = 2.5) +
  scale_y_continuous(breaks = 1:length(levels(df_low$occupation)),
                     labels = levels(df_low$occupation),
                     expand = c(0, 0)) +
  scale_x_continuous(labels = scales::percent_format(scale = 1), expand = c(0, 2)) +
  scale_fill_manual(values = c("Female" = "#29AB87", "Male" = "#E1573A")) +
  labs(subtitle = "Gender composition of lowest-paying occupations") +
  theme_minimal(base_size = 10) +
  theme(
    legend.position = "none",
    axis.title = element_blank(),
    axis.text.x = element_text(size = 7),
    axis.text.y = element_text(size = 7),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.subtitle = element_text(size = 10, hjust = 0),
    plot.margin = margin(t = 10, r = 20, b = 20, l = 10)
  )
final_plot <- plot_high / plot_low +
  plot_annotation(
    title = "Enduring U.S. divide between men and women at work",
    subtitle = "Gender composition of the highest- and the lowest-paying U.S. occupations, 2023",
    theme = theme(
      plot.title = element_text(size = 12, face = "bold", hjust = .75),
      plot.subtitle = element_text(size = 10, hjust = .9, margin = margin(b = 10))
    )
  )
final_plot

Women’s weekly median incomeby race-ethnicity

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ lubridate 1.9.3     ✔ stringr   1.5.1
## ✔ purrr     1.0.2     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl)

data <- read_csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\race-weeklyearning.csv")

## Rows: 7 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Occupations, Asian, white, African American, Hispanics
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data <- data %>%
  mutate(Occupation_Num = case_when(
    Occupations == "Management, business, and financial operations occupations" ~ "1",
    Occupations == "Professional and related occupations" ~ "2",
    Occupations == "All Occupations" ~ "3",
    Occupations == "Natural resources, construction, and maintenance occupations" ~ "4",
    Occupations == "Sales and office occupations" ~ "5",
    Occupations == "Production, transportation, and material moving occupations" ~ "6",
    Occupations == "Service occupations" ~ "7"
  ),
  Occupation_Label = case_when(
    Occupation_Num == "1" ~ "Management, business,\nand financial operations",
    Occupation_Num == "2" ~ "Professional and related\noccupations",
    Occupation_Num == "3" ~ "All Occupations",
    Occupation_Num == "4" ~ "Natural resources,\nconstruction and maintenance",
    Occupation_Num == "5" ~ "Sales and office\noccupations",
    Occupation_Num == "6" ~ "Production, transportation,\nand material moving",
    Occupation_Num == "7" ~ "Service occupations"
  ))

data_long <- data %>%
  pivot_longer(cols = c("Asian", "white", "African American", "Hispanics"),
               names_to = "Race", values_to = "Earnings") %>%
  mutate(
    Earnings = parse_number(Earnings),
    Race = factor(Race,
                  levels = c("Hispanics", "African American", "white", "Asian"),
                  labels = c("Hispanic", "African American", "White", "Asian")),
    Occupation_Label = factor(Occupation_Label, levels = rev(unique(Occupation_Label)))
  )
ggplot(data_long, aes(x = Earnings * 0.8, y = Occupation_Label, fill = Race)) +
  geom_col(position = position_dodge(width = 0.7), width = 0.6) +
  geom_text(data = filter(data_long, Race %in% c("Hispanic", "Asian")),
            aes(label = paste0("$", Earnings)),
            position = position_dodge(width = 0.7),
            hjust = -0.25,   # a little farther from bar
            size = 2.5) +
  scale_fill_manual(values = c("Asian" = "#7B3294",
                               "White" = "#0571B0",
                               "African American" = "#4D4D4D",
                               "Hispanic" = "#CA0020")) +
  coord_cartesian(clip = "off") +
  theme_minimal(base_size = 12) +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    axis.text.y = element_text(size = 10, lineheight = 0.95, hjust = 1, margin = margin(r = 3)),  # closer to bars
    panel.grid.major.y = element_blank(),
     # Horizontal legend bottom-left
    legend.position = c(0.05, -0.05),
    legend.justification = c("left", "top"),
    legend.direction = "horizontal",
    legend.title = element_blank(),
    legend.key.height = unit(0.3, "cm"),
    legend.key.width = unit(1, "cm"),
    legend.text = element_text(size = 9),
    plot.title = element_text(size = 12, hjust = 1),
    plot.margin = margin(t = 20, r = 50, b = 20, l = 30)
  ) +
  ggtitle("Median Weekly Earnings for Women by  Race/Ethnicity for Occupational Groups")

## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Occupational Segregation

Anamika Kumar

2025-04-29

Median weekly earning by race-ethnicity and gender

Median annual earning by education level and gender

Highest paying jobs

Lowest paying jobs

Women’s weekly median incomeby race-ethnicity