setwd("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project")
Median weekly earning by race-ethnicity and gender
library(ggplot2)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
data <- read_csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\race1.csv")
## Rows: 10 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): group
## dbl (2): total, weekly_wage
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_clean <- data %>%
mutate(
group = trimws(group),
group = ifelse(group == "hispanic_wome", "hispanic_women", group),
weekly_wage = as.numeric(gsub("[$,]", "", weekly_wage)),
gender = case_when(
grepl("_men|^men$", group) ~ "Men",
grepl("_women|^women$", group) ~ "Women",
TRUE ~ NA_character_
),
race_category = case_when(
group %in% c("men", "women") ~ "All Races/Ethnicities",
grepl("white", group) ~ "White",
grepl("black", group) ~ "African American",
grepl("asian", group) ~ "Asian",
grepl("hispanic", group) ~ "Hispanic",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(gender), !is.na(race_category))
data_clean$race_category <- factor(data_clean$race_category,
levels = c("Hispanic", "Asian", "African American", "White", "All Races/Ethnicities"))
ggplot(data_clean, aes(x = weekly_wage, y = race_category, fill = gender)) +
geom_col(position = position_dodge(width = 0.8), width = 0.6) +
geom_text(aes(label = paste0("$", weekly_wage)),
position = position_dodge(width = 0.8), hjust = -0.1, size = 4, color = "black") +
scale_fill_manual(values = c("Men" = "deepskyblue3", "Women" = "tomato")) +
labs(
title = "Median Weekly Earnings 2023",
x = NULL,
y = NULL,
fill = NULL
) +
xlim(0, max(data_clean$weekly_wage) + 400) +
theme_minimal(base_size = 13) +
theme(axis.text.y = element_text(margin = margin(r = -8), size = 10, color = "black"),
plot.title = element_text( face = "bold",size = 13.5, hjust = .2),
legend.position = "bottom",
panel.grid.major.y = element_blank()
)

Median annual earning by education level and gender
library(ggplot2)
library(dplyr)
library(tidyr)
library(forcats)
data <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\educ..csv")
data <- data[-1, ] # remove total row
colnames(data) <- c("Education", "Men", "Women")
data_clean <- data %>%
mutate(
Education = trimws(Education),
Men = as.numeric(Men),
Women = as.numeric(Women),
Gap = Men - Women
)
data_clean$Education <- fct_reorder(data_clean$Education, data_clean$Men)
label_map <- c(
"Less than high school graduate" = "Less than\nhigh school\ngraduate",
"High school graduate (includes equivalency)" = "High school\ngraduate",
"Some college or associate's degree" = "Some college or\nassociate's degree",
"Bachelor's degree" = "Bachelor's\ndegree",
"Graduate or professional degree" = "Graduate or\nprofessional degree"
)
data_clean$Education <- label_map[data_clean$Education]
data_long <- pivot_longer(data_clean, cols = c("Men", "Women"),
names_to = "Gender", values_to = "Income")
ggplot(data_long, aes(x = Education, y = Income, fill = Gender)) +
geom_col(position = position_dodge(width = 0.8), width = 0.7) +
geom_text(aes(label = paste0("$", format(Income, big.mark = ","))),
position = position_dodge(width = 0.8),
vjust = -0.3, size = 3) +
scale_fill_manual(values = c("Men" = "steelblue", "Women" = "salmon")) +
scale_y_continuous(
name = NULL,
breaks = seq(0, 130000, by = 25000),
limits = c(0, 130000),
expand = c(0, 0)
) +
labs(
subtitle = "Median Annual Earnings by Educational Level and Gender, 2023",
x = NULL,
fill = NULL # removes 'Gender' from legend title
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
plot.subtitle = element_text(size = 14, hjust = .25),
legend.position = "bottom",
legend.direction = "horizontal",
panel.grid.major.x = element_blank()
)

Highest paying jobs
# Load libraries
library(ggplot2)
library(dplyr)
library(tidyr)
data <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\o1.csv")
data <- data %>%
mutate(percent_men = 100 - percent_women)
data <- data %>%
arrange(percent_women)
female <- data %>%
mutate(Gender = "Female", start = 0, end = percent_women)
male <- data %>%
mutate(Gender = "Male", start = percent_women, end = 100)
data_long <- bind_rows(female, male)
data_long$Gender <- factor(data_long$Gender, levels = c("Female", "Male"))
data_long$occupation <- factor(data_long$occupation, levels = unique(data$occupation))
ggplot(data_long) +
geom_rect(aes(xmin = start, xmax = end,
ymin = as.numeric(occupation) - 0.35,
ymax = as.numeric(occupation) + 0.35,
fill = Gender)) +
geom_vline(xintercept = c(25, 50, 75),
color = "gray40", linetype = "dashed", size = 0.4) +
geom_text(data = subset(data_long, occupation == levels(data_long$occupation)[length(levels(data_long$occupation))] & Gender == "Female"),
aes(x = (start + end)/2, y = as.numeric(occupation), label = "Female"),
color = "black", size = 3) +
geom_text(data = subset(data_long, occupation == levels(data_long$occupation)[length(levels(data_long$occupation))] & Gender == "Male"),
aes(x = (start + end)/2, y = as.numeric(occupation), label = "Male"),
color = "black", size = 3) +
scale_y_continuous(breaks = 1:length(levels(data_long$occupation)),
labels = levels(data_long$occupation),
expand = c(0, 0)) +
scale_x_continuous(labels = scales::percent_format(scale = 1), expand = c(0, 2)) +
scale_fill_manual(values = c("Female" = "#29AB87", "Male" = "#E1573A")) +
labs(
title = "Gender composition of the Highest-paying U.S. Occupations, 2023",
x = NULL,
y = NULL
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "none",
plot.title = element_text(hjust = .15, size = 13, margin = margin(b = 15)),
axis.text.y = element_text(size = 10, color = "black"),
axis.text.x = element_text(size = 10, color = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = margin(t = 30, r = 30, b = 20, l = 20)
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Lowest paying jobs
library(ggplot2)
library(dplyr)
library(tidyr)
data <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\o2.csv")
data <- data %>%
mutate(percent_women = round((total_women / total) * 100, 1),
percent_men = 100 - percent_women)
data <- data %>%
arrange(percent_women)
female <- data %>%
mutate(Gender = "Female", start = 0, end = percent_women)
male <- data %>%
mutate(Gender = "Male", start = percent_women, end = 100)
data_long <- bind_rows(female, male)
data_long$Gender <- factor(data_long$Gender, levels = c("Female", "Male"))
data_long$occupation <- factor(data_long$occupation, levels = unique(data$occupation))
ggplot(data_long) +
geom_rect(aes(xmin = start, xmax = end,
ymin = as.numeric(occupation) - 0.35,
ymax = as.numeric(occupation) + 0.35,
fill = Gender)) +
geom_vline(xintercept = c(25, 50, 75),
color = "gray40", linetype = "dashed", size = 0.4) +
geom_text(data = subset(data_long, occupation == levels(data_long$occupation)[length(levels(data_long$occupation))] & Gender == "Female"),
aes(x = (start + end)/2, y = as.numeric(occupation), label = "Female"),
color = "black", size = 3) +
geom_text(data = subset(data_long, occupation == levels(data_long$occupation)[length(levels(data_long$occupation))] & Gender == "Male"),
aes(x = (start + end)/2, y = as.numeric(occupation), label = "Male"),
color = "black", size = 3) +
scale_y_continuous(breaks = 1:length(levels(data_long$occupation)),
labels = levels(data_long$occupation),
expand = c(0, 0)) +
scale_x_continuous(labels = scales::percent_format(scale = 1), expand = c(0, 2)) +
scale_fill_manual(values = c("Female" = "#29AB87", "Male" = "#E1573A")) +
labs(
title = "Gender composition of the Lowest-paying U.S. Occupations,2023",
x = NULL,
y = NULL
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "none",
plot.title = element_text(hjust = .55, size = 13, margin = margin(b = 15)),
axis.text.y = element_text(size = 10, color = "black"),
axis.text.x = element_text(size = 10, color = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.margin = margin(t = 30, r = 30, b = 20, l = 20)
)

# Load libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(patchwork)
high <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\o1.csv")
low <- read.csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\o2.csv")
prepare_plot_data <- function(data) {
data <- data %>%
mutate(percent_women = round((total_women / total) * 100, 1),
percent_men = 100 - percent_women) %>%
arrange(percent_women)
female <- data %>%
mutate(Gender = "Female", start = 0, end = percent_women)
male <- data %>%
mutate(Gender = "Male", start = percent_women, end = 100)
df <- bind_rows(female, male)
df$Gender <- factor(df$Gender, levels = c("Female", "Male"))
df$occupation <- factor(df$occupation, levels = unique(df$occupation))
return(df)
}
df_high <- prepare_plot_data(high)
df_low <- prepare_plot_data(low)
plot_high <- ggplot(df_high) +
geom_rect(aes(xmin = start, xmax = end,
ymin = as.numeric(occupation) - 0.35,
ymax = as.numeric(occupation) + 0.35,
fill = Gender)) +
geom_vline(xintercept = c(25, 50, 75), color = "gray40", linetype = "dashed", size = 0.3) +
geom_text(data = subset(df_high, occupation == levels(df_high$occupation)[length(levels(df_high$occupation))] & Gender == "Female"),
aes(x = (start + end)/2, y = as.numeric(occupation), label = "Female"),
color = "black", size = 2.5) +
geom_text(data = subset(df_high, occupation == levels(df_high$occupation)[length(levels(df_high$occupation))] & Gender == "Male"),
aes(x = (start + end)/2, y = as.numeric(occupation), label = "Male"),
color = "black", size = 2.5) +
scale_y_continuous(breaks = 1:length(levels(df_high$occupation)),
labels = levels(df_high$occupation),
expand = c(0, 0)) +
scale_x_continuous(labels = scales::percent_format(scale = 1), expand = c(0, 2)) +
scale_fill_manual(values = c("Female" = "#29AB87", "Male" = "#E1573A")) +
labs(subtitle = "Gender composition of highest-paying occupations") +
theme_minimal(base_size = 10) +
theme(
legend.position = "none",
axis.title = element_blank(),
axis.text.x = element_text(size = 8),
axis.text.y = element_text(size = 8),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.subtitle = element_text(size = 10, hjust = 0),
plot.margin = margin(t = 15, r = 20, b = 5, l = 10)
)
plot_low <- ggplot(df_low) +
geom_rect(aes(xmin = start, xmax = end,
ymin = as.numeric(occupation) - 0.35,
ymax = as.numeric(occupation) + 0.35,
fill = Gender)) +
geom_vline(xintercept = c(25, 50, 75), color = "gray40", linetype = "dashed", size = 0.3) +
geom_text(data = subset(df_low, occupation == levels(df_low$occupation)[length(levels(df_low$occupation))] & Gender == "Female"),
aes(x = (start + end)/2, y = as.numeric(occupation), label = "Female"),
color = "black", size = 2.5) +
geom_text(data = subset(df_low, occupation == levels(df_low$occupation)[length(levels(df_low$occupation))] & Gender == "Male"),
aes(x = (start + end)/2, y = as.numeric(occupation), label = "Male"),
color = "black", size = 2.5) +
scale_y_continuous(breaks = 1:length(levels(df_low$occupation)),
labels = levels(df_low$occupation),
expand = c(0, 0)) +
scale_x_continuous(labels = scales::percent_format(scale = 1), expand = c(0, 2)) +
scale_fill_manual(values = c("Female" = "#29AB87", "Male" = "#E1573A")) +
labs(subtitle = "Gender composition of lowest-paying occupations") +
theme_minimal(base_size = 10) +
theme(
legend.position = "none",
axis.title = element_blank(),
axis.text.x = element_text(size = 7),
axis.text.y = element_text(size = 7),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.subtitle = element_text(size = 10, hjust = 0),
plot.margin = margin(t = 10, r = 20, b = 20, l = 10)
)
final_plot <- plot_high / plot_low +
plot_annotation(
title = "Enduring U.S. divide between men and women at work",
subtitle = "Gender composition of the highest- and the lowest-paying U.S. occupations, 2023",
theme = theme(
plot.title = element_text(size = 12, face = "bold", hjust = .75),
plot.subtitle = element_text(size = 10, hjust = .9, margin = margin(b = 10))
)
)
final_plot

Women’s weekly median incomeby race-ethnicity
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ lubridate 1.9.3 ✔ stringr 1.5.1
## ✔ purrr 1.0.2 ✔ tibble 3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
data <- read_csv("C:\\Users\\anami\\OneDrive\\Documents\\Poverty&Inequality\\Project\\race-weeklyearning.csv")
## Rows: 7 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Occupations, Asian, white, African American, Hispanics
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data <- data %>%
mutate(Occupation_Num = case_when(
Occupations == "Management, business, and financial operations occupations" ~ "1",
Occupations == "Professional and related occupations" ~ "2",
Occupations == "All Occupations" ~ "3",
Occupations == "Natural resources, construction, and maintenance occupations" ~ "4",
Occupations == "Sales and office occupations" ~ "5",
Occupations == "Production, transportation, and material moving occupations" ~ "6",
Occupations == "Service occupations" ~ "7"
),
Occupation_Label = case_when(
Occupation_Num == "1" ~ "Management, business,\nand financial operations",
Occupation_Num == "2" ~ "Professional and related\noccupations",
Occupation_Num == "3" ~ "All Occupations",
Occupation_Num == "4" ~ "Natural resources,\nconstruction and maintenance",
Occupation_Num == "5" ~ "Sales and office\noccupations",
Occupation_Num == "6" ~ "Production, transportation,\nand material moving",
Occupation_Num == "7" ~ "Service occupations"
))
data_long <- data %>%
pivot_longer(cols = c("Asian", "white", "African American", "Hispanics"),
names_to = "Race", values_to = "Earnings") %>%
mutate(
Earnings = parse_number(Earnings),
Race = factor(Race,
levels = c("Hispanics", "African American", "white", "Asian"),
labels = c("Hispanic", "African American", "White", "Asian")),
Occupation_Label = factor(Occupation_Label, levels = rev(unique(Occupation_Label)))
)
ggplot(data_long, aes(x = Earnings * 0.8, y = Occupation_Label, fill = Race)) +
geom_col(position = position_dodge(width = 0.7), width = 0.6) +
geom_text(data = filter(data_long, Race %in% c("Hispanic", "Asian")),
aes(label = paste0("$", Earnings)),
position = position_dodge(width = 0.7),
hjust = -0.25, # a little farther from bar
size = 2.5) +
scale_fill_manual(values = c("Asian" = "#7B3294",
"White" = "#0571B0",
"African American" = "#4D4D4D",
"Hispanic" = "#CA0020")) +
coord_cartesian(clip = "off") +
theme_minimal(base_size = 12) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 10, lineheight = 0.95, hjust = 1, margin = margin(r = 3)), # closer to bars
panel.grid.major.y = element_blank(),
# Horizontal legend bottom-left
legend.position = c(0.05, -0.05),
legend.justification = c("left", "top"),
legend.direction = "horizontal",
legend.title = element_blank(),
legend.key.height = unit(0.3, "cm"),
legend.key.width = unit(1, "cm"),
legend.text = element_text(size = 9),
plot.title = element_text(size = 12, hjust = 1),
plot.margin = margin(t = 20, r = 50, b = 20, l = 30)
) +
ggtitle("Median Weekly Earnings for Women by Race/Ethnicity for Occupational Groups")
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
