library(ipumsr)
library(dplyr)
library(writexl)
library(ggplot2)
library(survey)Immigrant Top Occupations
I filtered for Texas before downloading. Unlike Coda’s code, I do not restrict to selected occupations. Instead, I just filter for the top 15 for all occupations.
2023 5-year ACS
ddi <- read_ipums_ddi("usa_00054.xml")
dat1 <- read_ipums_micro(ddi, data_file = "usa_00054.dat", verbose = FALSE)#dat1 <- zap_labels(dat1)tex_dat <- dat1 |>
mutate(immigration_status = case_when(
CITIZEN == 1 ~ "U.S. Born", # Born in US or abroad to American parents
CITIZEN == 2 ~ "Authorized Immigrant", # Naturalized citizen
CITIZEN == 4 ~ "Authorized Immigrant", # First papers
CITIZEN == 3 ~ "Unauthorized Immigrant", # Not a citizen
TRUE ~ NA_character_
)) |>
filter(!is.na(immigration_status) & OCC > 0) |>
mutate(occupation = as.character(OCC)) # for flexibility in plotting and filteringlibrary(scales)Warning: package 'scales' was built under R version 4.2.3
# Summarize total workers per occupation per immigration status
occupation_summary <- tex_dat |>
group_by(occupation, immigration_status) |>
summarise(worker_count = sum(PERWT, na.rm = TRUE), .groups = "drop")
# Calculate total workers per immigration status
group_totals <- occupation_summary |>
group_by(immigration_status) |>
summarise(total_group_workers = sum(worker_count, na.rm = TRUE), .groups = "drop")
# Combine and calculate percent of each group in each occupation
occupation_summary <- occupation_summary |>
left_join(group_totals, by = "immigration_status") |>
mutate(percent_within_group = worker_count / total_group_workers)# Get top 15 occupations for unauthorized immigrants
top_occupations <- occupation_summary |>
filter(immigration_status == "Unauthorized Immigrant") |>
arrange(desc(percent_within_group)) |>
slice(1:15) |>
pull(occupation)print(top_occupations) [1] "6260" "4230" "4020" "4220" "6230" "9130" "4251" "1021" "6410" "440"
[11] "9620" "4720" "4110" "4760" "8140"
occ_lookup <- tibble::tibble(
OCC = c(6260, 4230, 4020, 4220, 6230, 9130, 4251, 1021, 6410, 440, 9620, 4720, 4110, 4760, 8140),
occupation_label = c(
"Construction laborers",
"Maids and housekeeping cleaners",
"Cooks",
"Janitors and building cleaners",
"Carpenters",
"Driver/sales workers and truck drivers",
"Landscaping and groundskeeping workers",
"Software developers",
"Painters and paperhangers",
"Gambling services workers",
"Laborers and freight, stock, and material movers, hand",
"Cashiers",
"Waiters and waitresses",
"Retail salespersons",
"Welding, soldering, and brazing workers"
)
)brookings_style_data <- occupation_summary |>
filter(occupation %in% top_occupations) |>
mutate(occupation_code = as.numeric(occupation)) |>
left_join(occ_lookup, by = c("occupation_code" = "OCC")) |>
mutate(occupation_label = factor(occupation_label, levels = rev(occ_lookup$occupation_label)))ggplot(brookings_style_data, aes(x = percent_within_group, y = occupation_label, fill = immigration_status)) +
geom_col(position = "stack") +
scale_x_continuous(labels = percent_format()) +
scale_fill_manual(values = c(
"U.S. Born" = "#003f5c",
"Authorized Immigrant" = "#7a5195",
"Unauthorized Immigrant" = "#ef5675"
)) +
labs(
title = "Top 15 Occupations for Unauthorized Immigrant Workers in Texas (2022)",
subtitle = "Each bar shows the share of each immigration group working in these occupations",
x = "Percent of Group Working in This Occupation",
y = "Occupation",
fill = "Immigration Status"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")library(tidyr)Warning: package 'tidyr' was built under R version 4.2.3
Attaching package: 'tidyr'
The following objects are masked from 'package:Matrix':
expand, pack, unpack
# Create a readable summary table
final_table <- brookings_style_data |>
select(occupation_label, immigration_status, worker_count, percent_within_group) |>
mutate(
worker_count = round(worker_count),
percent_within_group = scales::percent(percent_within_group, accuracy = 0.1)
) |>
arrange(desc(occupation_label)) |>
pivot_wider(
names_from = immigration_status,
values_from = c(worker_count, percent_within_group),
names_glue = "{immigration_status}_{.value}"
)
# View the table in the document
final_table# A tibble: 15 × 7
occupation_label Authorized Immigrant…¹ U.S. Born_worker_cou…²
<fct> <dbl> <dbl>
1 Construction laborers 20606 2989
2 Maids and housekeeping cleaners 21996 1544
3 Cooks 22666 3367
4 Janitors and building cleaners 34396 2665
5 Carpenters 11953 1621
6 Driver/sales workers and truck… 54957 5047
7 Landscaping and groundskeeping… 10392 1171
8 Software developers 31067 2969
9 Painters and paperhangers 5875 820
10 Gambling services workers 46777 7350
11 Laborers and freight, stock, a… 17641 3704
12 Cashiers 26082 5179
13 Waiters and waitresses 14812 3777
14 Retail salespersons 28411 5240
15 Welding, soldering, and brazin… 10932 1234
# ℹ abbreviated names: ¹`Authorized Immigrant_worker_count`,
# ²`U.S. Born_worker_count`
# ℹ 4 more variables: `Unauthorized Immigrant_worker_count` <dbl>,
# `Authorized Immigrant_percent_within_group` <chr>,
# `U.S. Born_percent_within_group` <chr>,
# `Unauthorized Immigrant_percent_within_group` <chr>
# Save final summary table to Excel
write_xlsx(final_table, "5y_unauthorized_top_occupations_table.xlsx")