Immigrant Top Occupations

Author

Kaitlan (based on CRG)

I filtered for Texas before downloading. Unlike Coda’s code, I do not restrict to selected occupations. Instead, I just filter for the top 15 for all occupations.

2023 5-year ACS

library(ipumsr)
library(dplyr)
library(writexl)
library(ggplot2)
library(survey)

ddi <- read_ipums_ddi("usa_00054.xml")
dat1 <- read_ipums_micro(ddi, data_file = "usa_00054.dat", verbose = FALSE)

#dat1 <- zap_labels(dat1)

tex_dat <- dat1 |>
  mutate(immigration_status = case_when(
    CITIZEN == 1 ~ "U.S. Born",                        # Born in US or abroad to American parents
    CITIZEN == 2 ~ "Authorized Immigrant",             # Naturalized citizen
    CITIZEN == 4 ~ "Authorized Immigrant",             # First papers
    CITIZEN == 3 ~ "Unauthorized Immigrant",           # Not a citizen
    TRUE ~ NA_character_
  )) |>
  filter(!is.na(immigration_status) & OCC > 0) |>
  mutate(occupation = as.character(OCC))  # for flexibility in plotting and filtering

library(scales)

Warning: package 'scales' was built under R version 4.2.3

# Summarize total workers per occupation per immigration status
occupation_summary <- tex_dat |>
  group_by(occupation, immigration_status) |>
  summarise(worker_count = sum(PERWT, na.rm = TRUE), .groups = "drop")

# Calculate total workers per immigration status
group_totals <- occupation_summary |>
  group_by(immigration_status) |>
  summarise(total_group_workers = sum(worker_count, na.rm = TRUE), .groups = "drop")

# Combine and calculate percent of each group in each occupation
occupation_summary <- occupation_summary |>
  left_join(group_totals, by = "immigration_status") |>
  mutate(percent_within_group = worker_count / total_group_workers)

# Get top 15 occupations for unauthorized immigrants
top_occupations <- occupation_summary |>
  filter(immigration_status == "Unauthorized Immigrant") |>
  arrange(desc(percent_within_group)) |>
  slice(1:15) |>
  pull(occupation)

print(top_occupations)

 [1] "6260" "4230" "4020" "4220" "6230" "9130" "4251" "1021" "6410" "440" 
[11] "9620" "4720" "4110" "4760" "8140"

occ_lookup <- tibble::tibble(
  OCC = c(6260, 4230, 4020, 4220, 6230, 9130, 4251, 1021, 6410, 440, 9620, 4720, 4110, 4760, 8140),
  occupation_label = c(
    "Construction laborers",
    "Maids and housekeeping cleaners",
    "Cooks",
    "Janitors and building cleaners",
    "Carpenters",
    "Driver/sales workers and truck drivers",
    "Landscaping and groundskeeping workers",
    "Software developers",
    "Painters and paperhangers",
    "Gambling services workers",
    "Laborers and freight, stock, and material movers, hand",
    "Cashiers",
    "Waiters and waitresses",
    "Retail salespersons",
    "Welding, soldering, and brazing workers"
  )
)

brookings_style_data <- occupation_summary |>
  filter(occupation %in% top_occupations) |>
  mutate(occupation_code = as.numeric(occupation)) |>
  left_join(occ_lookup, by = c("occupation_code" = "OCC")) |>
  mutate(occupation_label = factor(occupation_label, levels = rev(occ_lookup$occupation_label)))

ggplot(brookings_style_data, aes(x = percent_within_group, y = occupation_label, fill = immigration_status)) +
  geom_col(position = "stack") +
  scale_x_continuous(labels = percent_format()) +
  scale_fill_manual(values = c(
    "U.S. Born" = "#003f5c",
    "Authorized Immigrant" = "#7a5195",
    "Unauthorized Immigrant" = "#ef5675"
  )) +
  labs(
    title = "Top 15 Occupations for Unauthorized Immigrant Workers in Texas (2022)",
    subtitle = "Each bar shows the share of each immigration group working in these occupations",
    x = "Percent of Group Working in This Occupation",
    y = "Occupation",
    fill = "Immigration Status"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

library(tidyr)

Warning: package 'tidyr' was built under R version 4.2.3


Attaching package: 'tidyr'

The following objects are masked from 'package:Matrix':

    expand, pack, unpack

# Create a readable summary table
final_table <- brookings_style_data |>
  select(occupation_label, immigration_status, worker_count, percent_within_group) |>
  mutate(
    worker_count = round(worker_count),
    percent_within_group = scales::percent(percent_within_group, accuracy = 0.1)
  ) |>
  arrange(desc(occupation_label)) |>
  pivot_wider(
    names_from = immigration_status,
    values_from = c(worker_count, percent_within_group),
    names_glue = "{immigration_status}_{.value}"
  )

# View the table in the document
final_table

# A tibble: 15 × 7
   occupation_label                Authorized Immigrant…¹ U.S. Born_worker_cou…²
   <fct>                                            <dbl>                  <dbl>
 1 Construction laborers                            20606                   2989
 2 Maids and housekeeping cleaners                  21996                   1544
 3 Cooks                                            22666                   3367
 4 Janitors and building cleaners                   34396                   2665
 5 Carpenters                                       11953                   1621
 6 Driver/sales workers and truck…                  54957                   5047
 7 Landscaping and groundskeeping…                  10392                   1171
 8 Software developers                              31067                   2969
 9 Painters and paperhangers                         5875                    820
10 Gambling services workers                        46777                   7350
11 Laborers and freight, stock, a…                  17641                   3704
12 Cashiers                                         26082                   5179
13 Waiters and waitresses                           14812                   3777
14 Retail salespersons                              28411                   5240
15 Welding, soldering, and brazin…                  10932                   1234
# ℹ abbreviated names: ¹`Authorized Immigrant_worker_count`,
#   ²`U.S. Born_worker_count`
# ℹ 4 more variables: `Unauthorized Immigrant_worker_count` <dbl>,
#   `Authorized Immigrant_percent_within_group` <chr>,
#   `U.S. Born_percent_within_group` <chr>,
#   `Unauthorized Immigrant_percent_within_group` <chr>

# Save final summary table to Excel
write_xlsx(final_table, "5y_unauthorized_top_occupations_table.xlsx")