Setup

suppressPackageStartupMessages({
  library(haven); library(dplyr); library(tidyr); library(purrr); library(stringr)
  library(ggplot2); library(metafor); library(Hmisc); library(weights); library(gt)
})
proj_dir <- "/media/emil/8tb_ssd_3/projects/LAPOP"
gm_file <- file.path(proj_dir,
  "data/Grand_Merge_2004-2023_LAPOP_AmericasBarometer_v1.0_FREEen.sav")
country_files <- list.files(file.path(proj_dir, "data/LAPOP"),
  pattern = "^2004-2023 .*\\.sav$", full.names = TRUE)
country_files <- country_files[!grepl("Grand Merge|Nicaragua", country_files)]

Knowledge items and pass rates

items_tbl <- tibble::tribble(
  ~var,   ~question,                                              ~correct,       ~pct,
  "gi1",  "Name of the current US President",                     "Obama",        86.8,
  "gi2",  "Name of the country's parliament/senate leader",       "varies",       51.9,
  "gi3",  "How many states does the US have?",                    "50",           71.4,
  "gi4",  "Presidential/PM/government's term length in [country]","varies",       86.6,
  "gi5",  "Name of the President of Brazil",                      "varies",       35.2,
  "gi7",  "Number of seats in country's lower house",             "varies",       17.5,
  "gix4", "On which continent is Nigeria?",                       "Africa",       57.8
)
items_tbl %>% gt() %>%
  tab_header(title = "LAPOP knowledge items, with pass rate across all waves") %>%
  cols_label(var = "Var", question = "Question", correct = "Correct", pct = "% correct")
LAPOP knowledge items, with pass rate across all waves
Var Question Correct % correct
gi1 Name of the current US President Obama 86.8
gi2 Name of the country's parliament/senate leader varies 51.9
gi3 How many states does the US have? 50 71.4
gi4 Presidential/PM/government's term length in [country] varies 86.6
gi5 Name of the President of Brazil varies 35.2
gi7 Number of seats in country's lower house varies 17.5
gix4 On which continent is Nigeria? Africa 57.8

Skin color measurement

LAPOP colorr is interviewer-rated using a printed 11-tone palette (“Paleta de Colores”), at the end of the interview, without asking the respondent. 1 = very light, 11 = very dark, 97 = could not classify.

Load data and score per country × wave

needed_gm <- c("pais","year","wave","wt","colorr","gi1","gi4")
d_gm <- read_spss(gm_file, col_select = all_of(needed_gm))
needed_cf <- c("pais","year","wave","wt","colorr",
               "gi1","gi2","gi3","gi4","gi5","gi7","gix4")
country_data <- map(country_files, function(f) {
  cols_in <- read_spss(f, n_max = 0) %>% names()
  read_spss(f, col_select = all_of(intersect(needed_cf, cols_in)))
})
gi7_correct <- c(`1`=500,`2`=158,`3`=84,`4`=128,`5`=92,`6`=57,`7`=71,`8`=166,
                 `9`=137,`10`=130,`11`=130,`12`=80,`13`=120,`14`=99,`15`=513,
                 `16`=164,`17`=257,`21`=178,`22`=99,`23`=63,`24`=65,`25`=41,
                 `26`=31,`27`=51,`28`=38,`40`=435)
country_name <- c(`1`="Mexico",`2`="Guatemala",`3`="El Salvador",`4`="Honduras",
                  `5`="Nicaragua",`6`="Costa Rica",`7`="Panama",`8`="Colombia",
                  `9`="Ecuador",`10`="Bolivia",`11`="Peru",`12`="Paraguay",
                  `13`="Chile",`14`="Uruguay",`15`="Brazil",`16`="Venezuela",
                  `17`="Argentina",`21`="Dominican Republic",`22`="Haiti",
                  `23`="Jamaica",`24`="Guyana",`25`="Trinidad and Tobago",
                  `26`="Belize",`27`="Suriname",`28`="Bahamas",`30`="Grenada",
                  `40`="USA",`41`="Canada")

clean_cf <- bind_rows(country_data) %>%
  transmute(
    pais   = as.integer(zap_labels(pais)),
    year   = as.integer(zap_labels(year)),
    wt     = as.numeric(wt),
    colorr = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
    gi1    = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
    gi2    = if_else(gi2 == 1, 1, if_else(gi2 == 2, 0, NA_real_)),
    gi3    = if_else(gi3 == 1, 1, if_else(gi3 == 2, 0, NA_real_)),
    gi4    = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
    gi5    = if_else(gi5 == 1, 1, if_else(gi5 == 2, 0, NA_real_)),
    gix4   = if_else(gix4 == 1, 1, if_else(gix4 == 2, 0, NA_real_)),
    gi7    = if_else(is.na(gi7), NA_real_,
                     as.numeric(gi7 == gi7_correct[as.character(pais)]))
  )

country_pais <- unique(clean_cf$pais)
clean_gm <- d_gm %>%
  filter(!(as.integer(zap_labels(pais)) %in% country_pais)) %>%
  transmute(
    pais   = as.integer(zap_labels(pais)),
    year   = as.integer(zap_labels(year)),
    wt     = as.numeric(wt),
    colorr = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
    gi1    = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
    gi4    = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_))
  )
score_cell <- function(df, items) {
  keep <- items[map_lgl(items, function(i) {
    x <- df[[i]]
    sum(!is.na(x)) >= 30 && length(unique(na.omit(x))) >= 2
  })]
  if (length(keep) == 0) return(NULL)
  m <- as.matrix(df[keep])
  n_row <- rowSums(!is.na(m))
  score <- ifelse(n_row == 0, NA, rowMeans(m, na.rm = TRUE))
  ok <- !is.na(score) & !is.na(df$colorr)
  if (sum(ok) < 50) return(NULL)
  s <- score[ok]; c <- df$colorr[ok]; w <- df$wt[ok]
  if (sd(s) == 0 || sd(c) == 0) return(NULL)
  r <- weights::wtd.cors(s, c, weight = w)[1, 1]
  tibble(
    n = sum(ok),
    items_used = paste(keep, collapse = "+"),
    k_items = length(keep),
    r = r,
    z = atanh(pmin(pmax(r, -0.999), 0.999)),
    se_z = 1 / sqrt(sum(ok) - 3),
    mean_score = weighted.mean(s, w),
    mean_color = weighted.mean(c, w)
  )
}

cf_results <- clean_cf %>% group_by(pais, year) %>%
  group_modify(~ score_cell(.x, c("gi1","gi2","gi3","gi4","gi5","gi7","gix4")) %||% tibble()) %>%
  ungroup()
gm_results <- clean_gm %>% group_by(pais, year) %>%
  group_modify(~ score_cell(.x, c("gi1","gi4")) %||% tibble()) %>%
  ungroup()
results <- bind_rows(cf_results, gm_results) %>%
  mutate(country = country_name[as.character(pais)]) %>%
  relocate(country, pais, year)
meta_country <- results %>%
  group_by(country) %>%
  group_modify(function(df, key) {
    if (nrow(df) == 1) {
      return(tibble(k = 1, total_n = df$n, z_mean = df$z, z_se = df$se_z,
                    tau2 = 0, items_summary = df$items_used))
    }
    fit <- metafor::rma(yi = df$z, sei = df$se_z, method = "REML")
    tibble(k = nrow(df), total_n = sum(df$n),
           z_mean = as.numeric(fit$b), z_se = fit$se, tau2 = fit$tau2,
           items_summary = paste(unique(df$items_used), collapse = " | "))
  }) %>% ungroup() %>%
  mutate(
    r_mean = tanh(z_mean),
    r_lo = tanh(z_mean - 1.96 * z_se),
    r_hi = tanh(z_mean + 1.96 * z_se)
  ) %>% arrange(r_mean)

overall <- metafor::rma(yi = results$z, sei = results$se_z, method = "REML")
overall_summary <- tibble(
  k = overall$k, total_n = sum(results$n),
  r = tanh(as.numeric(overall$b)),
  r_lo = tanh(as.numeric(overall$b) - 1.96 * overall$se),
  r_hi = tanh(as.numeric(overall$b) + 1.96 * overall$se),
  tau2 = overall$tau2, I2 = overall$I2, Q_p = overall$QEp
)
overall_summary

Forest plot

meta_plot <- meta_country %>%
  mutate(
    items_short = items_summary %>% str_replace_all("\\+", "·") %>% str_replace_all(" \\| ", "/"),
    label = sprintf("%s  (k=%d, N=%s, items=%s)",
                    country, k, formatC(total_n, big.mark = ","), items_short),
    label = factor(label, levels = rev(label))
  )

ggplot(meta_plot, aes(r_mean, label)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  geom_vline(xintercept = overall_summary$r, linetype = "dotted", color = "red") +
  geom_errorbar(aes(xmin = r_lo, xmax = r_hi), width = 0.3, orientation = "y") +
  geom_point(aes(size = sqrt(total_n))) +
  scale_size_continuous(guide = "none") +
  labs(
    x = "r(knowledge score, skin color), REML meta within country (95% CI)",
    y = NULL,
    title = "LAPOP: knowledge score × skin color, by country",
    subtitle = sprintf("Overall r = %.3f (95%% CI %.3f, %.3f), τ² = %.4f, I² = %.0f%%, k = %d cells, N = %s",
                       overall_summary$r, overall_summary$r_lo, overall_summary$r_hi,
                       overall_summary$tau2, overall_summary$I2,
                       overall_summary$k, formatC(overall_summary$total_n, big.mark = ","))
  ) +
  theme_bw(base_size = 10)

Guyana ethnic table

d_etid <- read_spss(gm_file,
  col_select = c("pais","year","wt","colorr","etid","gi1","gi4"))
guy <- d_etid %>%
  filter(as.integer(zap_labels(pais)) == 24) %>%
  mutate(
    etid_n = as.integer(zap_labels(etid)),
    colorr_clean = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
    gi1c = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
    gi4c = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
    score = rowMeans(cbind(gi1c, gi4c), na.rm = TRUE),
    score = if_else(is.nan(score), NA_real_, score),
    wt = as.numeric(wt)
  )
guyana_tbl <- guy %>%
  group_by(etid_n) %>%
  summarise(n = n(),
            mean_colorr = mean(colorr_clean, na.rm = TRUE),
            mean_score = weighted.mean(score, wt, na.rm = TRUE),
            pct_gi1 = weighted.mean(gi1c, wt, na.rm = TRUE),
            pct_gi4 = weighted.mean(gi4c, wt, na.rm = TRUE),
            .groups = "drop") %>%
  filter(!is.nan(mean_colorr), n >= 30) %>%
  mutate(Group = case_when(
    etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo / Mixed",
    etid_n == 3 ~ "Indigenous (Amerindian)", etid_n == 4 ~ "Black (Afro-Guyanese)",
    etid_n == 7 ~ "Other", etid_n == 2406 ~ "East Indian (Indo-Guyanese)",
    etid_n == 2409 ~ "Chinese", TRUE ~ as.character(etid_n))) %>%
  arrange(mean_colorr) %>%
  transmute(Group, n,
            `Mean colorr` = sprintf("%.2f", mean_colorr),
            `Mean score` = sprintf("%.2f", mean_score),
            `gi1 % correct` = sprintf("%.0f%%", 100*pct_gi1),
            `gi4 % correct` = sprintf("%.0f%%", 100*pct_gi4))
guyana_tbl %>% gt() %>%
  tab_header(title = "Guyana: skin color vs knowledge by self-id ethnic group")
Guyana: skin color vs knowledge by self-id ethnic group
Group n Mean colorr Mean score gi1 % correct gi4 % correct
2408 39 3.29 0.93 100% 82%
Indigenous (Amerindian) 983 3.96 0.82 90% 75%
Chinese 50 4.20 0.91 98% 83%
Other 40 5.00 0.84 100% 55%
Mestizo / Mixed 2994 5.25 0.89 98% 79%
East Indian (Indo-Guyanese) 3100 5.33 0.87 96% 78%
NA 36 6.00 0.80 86% 60%
Black (Afro-Guyanese) 3601 7.16 0.90 99% 80%

Belize ethnic table

extra <- read_spss(gm_file,
  col_select = c("pais","q1","q2","ed","ur","idiomaq"))
bel_all <- d_etid %>%
  filter(as.integer(zap_labels(pais)) == 26) %>%
  bind_cols(extra %>% filter(as.integer(zap_labels(pais)) == 26) %>%
              select(q1, q2, ed, ur, idiomaq)) %>%
  mutate(
    etid_n = as.integer(zap_labels(etid)),
    colorr_clean = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
    gi1c = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
    gi4c = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
    score = rowMeans(cbind(gi1c, gi4c), na.rm = TRUE),
    score = if_else(is.nan(score), NA_real_, score),
    lang = as_factor(idiomaq) %>% as.character(),
    lang_group = case_when(
      lang %in% c("Spanish", "Spanish (last language used)") ~ "Spanish",
      lang %in% c("English", "English (last language used)") ~ "English",
      TRUE ~ "Other/NA"),
    wt = as.numeric(wt), ed = as.numeric(ed)
  )

belize_tbl <- bel_all %>%
  group_by(etid_n) %>%
  summarise(
    n = n(),
    pct_spanish = mean(lang_group == "Spanish", na.rm = TRUE),
    pct_english = mean(lang_group == "English", na.rm = TRUE),
    mean_colorr = mean(colorr_clean, na.rm = TRUE),
    mean_ed = weighted.mean(ed, wt, na.rm = TRUE),
    mean_score = weighted.mean(score, wt, na.rm = TRUE),
    pct_gi1 = weighted.mean(gi1c, wt, na.rm = TRUE),
    pct_gi4 = weighted.mean(gi4c, wt, na.rm = TRUE),
    .groups = "drop") %>%
  filter(!is.nan(mean_colorr), n >= 20) %>%
  mutate(Group = case_when(
    etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
    etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black", etid_n == 5 ~ "Mulatto",
    etid_n == 7 ~ "Other", etid_n == 2608 ~ "Creole", etid_n == 2609 ~ "East Indian",
    etid_n == 2610 ~ "Garifuna", etid_n == 2611 ~ "Maya Ketchi",
    etid_n == 2612 ~ "Maya Mopan", etid_n == 2613 ~ "Maya Yucatec",
    etid_n == 2614 ~ "Mennonite", etid_n == 2615 ~ "Spanish",
    TRUE ~ as.character(etid_n))) %>%
  arrange(mean_colorr) %>%
  filter(Group != "NA") %>%
  transmute(Group, n,
            `% Spanish` = sprintf("%.0f%%", 100*pct_spanish),
            `% English` = sprintf("%.0f%%", 100*pct_english),
            `Mean colorr` = sprintf("%.2f", mean_colorr),
            `Mean ed (yrs)` = sprintf("%.1f", mean_ed),
            `Mean score` = sprintf("%.2f", mean_score),
            `gi1 % correct` = sprintf("%.0f%%", 100*pct_gi1),
            `gi4 % correct` = sprintf("%.0f%%", 100*pct_gi4))
belize_tbl %>% gt() %>%
  tab_header(title = "Belize: skin color, language, education and knowledge",
             subtitle = "Survey delivered bilingually; idiomaq = language of interview")
Belize: skin color, language, education and knowledge
Survey delivered bilingually; idiomaq = language of interview
Group n % Spanish % English Mean colorr Mean ed (yrs) Mean score gi1 % correct gi4 % correct
White 107 69% 31% 3.61 7.2 0.78 75% 80%
Spanish 678 52% 48% 4.54 7.0 0.80 79% 81%
Indigenous 53 38% 62% 5.02 6.3 0.86 71% 100%
Other 104 39% 61% 5.07 10.0 0.94 92% 94%
Mestizo 3463 66% 34% 5.15 7.5 0.88 90% 88%
Maya Yucatec 77 64% 36% 5.43 7.1 0.89 88% 88%
Mennonite 35 66% 34% 5.56 8.1 0.89 80% 96%
Maya Ketchi 321 22% 78% 5.58 6.5 0.77 72% 84%
Maya Mopan 212 24% 76% 5.70 7.7 0.86 83% 92%
East Indian 126 21% 79% 7.07 8.3 0.88 84% 90%
Creole 1911 19% 81% 7.14 8.8 0.90 91% 91%
Garifuna 391 16% 84% 8.05 9.3 0.93 92% 95%
Black 110 59% 41% 8.46 9.8 0.83 87% 81%
bel_all <- bel_all %>%
  mutate(
    broad = case_when(
      etid_n %in% c(1, 2615) ~ "White/Spanish", etid_n == 2 ~ "Mestizo",
      etid_n %in% c(3, 2611, 2612, 2613) ~ "Indigenous/Maya",
      etid_n %in% c(4, 2608) ~ "Black/Creole", etid_n == 2610 ~ "Garifuna",
      etid_n == 2614 ~ "Mennonite", etid_n == 2609 ~ "East Indian",
      etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other", TRUE ~ NA_character_),
    age = as.numeric(q2),
    sex = as_factor(q1) %>% as.character(),
    lang2 = factor(lang_group, levels = c("English","Spanish","Other/NA"))
  )
mdat <- bel_all %>%
  filter(!is.na(score), !is.na(broad), !is.na(age), !is.na(sex),
         !is.na(lang2), !is.na(ed)) %>%
  mutate(broad = factor(broad, levels = c("Mestizo","White/Spanish","Indigenous/Maya",
                                          "Black/Creole","Garifuna","Mennonite",
                                          "East Indian","Mulatto","Other")))
fit <- lm(score ~ lang2 + broad + age + sex + ed, data = mdat, weights = wt)
broom::tidy(fit, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), \(x) round(x, 3)))

Honduras: item-level logistic regression by wave

hon_file <- file.path(proj_dir, "data/LAPOP/2004-2023 Honduras.sav")
hon_cols <- read_spss(hon_file, n_max = 0) %>% names()
hon_keep <- intersect(
  c("pais","year","wt","colorr","etid","ur","ed","q1","q2",
    "gi1","gi3","gi4","gi7","gix4"), hon_cols)
d_hon <- read_spss(hon_file, col_select = all_of(hon_keep)) %>%
  mutate(
    year = as.integer(zap_labels(year)),
    colorr_n = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
    wt = as.numeric(wt))
items_hon <- c("gi1","gi3","gi4","gi7","gix4")
hon_long <- d_hon %>%
  filter(year %in% c(2010, 2012, 2014), !is.na(colorr_n)) %>%
  mutate(
    gi1c = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
    gi3c = if_else(gi3 == 1, 1, if_else(gi3 == 2, 0, NA_real_)),
    gi4c = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
    gi7c = if_else(is.na(gi7), NA_real_, as.numeric(gi7 == 128)),
    gix4c = if_else(gix4 == 1, 1, if_else(gix4 == 2, 0, NA_real_))
  )
fit_grid <- expand.grid(year = c(2010,2012,2014), item = items_hon, stringsAsFactors = FALSE)
res_logit <- pmap_df(fit_grid, function(year, item) {
  d <- hon_long %>% filter(year == !!year) %>%
    transmute(y = .data[[paste0(item, "c")]], colorr_n, wt) %>%
    filter(!is.na(y))
  if (nrow(d) < 50 || length(unique(d$y)) < 2) return(tibble(year=year, item=item))
  fit <- glm(y ~ colorr_n, data = d, family = binomial, weights = wt)
  s <- summary(fit)
  tibble(year=year, item=item, n=nrow(d),
         pct_correct = round(100*weighted.mean(d$y, d$wt), 1),
         beta = round(coef(fit)[2], 3),
         SE = round(s$coefficients[2, 2], 3),
         z = round(s$coefficients[2, 3], 2),
         p = signif(s$coefficients[2, 4], 2),
         OR = round(exp(coef(fit)[2]), 3))
}) %>% filter(!is.na(beta)) %>% arrange(item, year)

res_logit %>% gt() %>%
  tab_header(title = "Honduras: logistic regression of (item correct) on colorr, per wave",
             subtitle = "Positive β = darker → more likely correct")
Honduras: logistic regression of (item correct) on colorr, per wave
Positive β = darker → more likely correct
year item n pct_correct beta SE z p OR
2010 gi1 1327 91.3 -0.012 0.053 -0.24 8.1e-01 0.988
2012 gi1 1468 82.6 -0.141 0.034 -4.10 4.1e-05 0.869
2014 gi1 1255 90.7 0.261 0.057 4.60 4.3e-06 1.298
2010 gi3 1478 93.0 -0.129 0.052 -2.51 1.2e-02 0.879
2010 gi4 1547 97.9 0.148 0.113 1.32 1.9e-01 1.160
2012 gi4 1644 94.8 -0.097 0.055 -1.77 7.7e-02 0.908
2014 gi4 1498 95.3 0.367 0.077 4.79 1.7e-06 1.443
2014 gi7 748 38.6 0.094 0.041 2.30 2.1e-02 1.099
2014 gix4 698 56.3 0.207 0.042 4.94 7.6e-07 1.230

Honduras: skin color by ethnicity × wave

hon_colorr <- d_hon %>%
  filter(year %in% c(2010,2012,2014), !is.na(colorr_n)) %>%
  mutate(etid_n = as.integer(zap_labels(etid)),
         broad = case_when(
           etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
           etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black",
           etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other",
           TRUE ~ NA_character_)) %>%
  filter(!is.na(broad)) %>%
  group_by(broad, year) %>%
  summarise(n = n(), mean_colorr = mean(colorr_n),
            sd_colorr = sd(colorr_n), .groups = "drop") %>%
  pivot_wider(names_from = year,
              values_from = c(n, mean_colorr, sd_colorr)) %>%
  arrange(mean_colorr_2010) %>%
  transmute(Group = broad,
            `n10` = n_2010, `n12` = n_2012, `n14` = n_2014,
            `Mean 2010` = sprintf("%.2f", mean_colorr_2010),
            `Mean 2012` = sprintf("%.2f", mean_colorr_2012),
            `Mean 2014` = sprintf("%.2f", mean_colorr_2014),
            `Δ 2014−2012` = sprintf("%+.2f",
                                    as.numeric(mean_colorr_2014) - as.numeric(mean_colorr_2012)))
hon_colorr %>% gt() %>%
  tab_header(title = "Honduras: mean colorr by self-id ethnicity × wave",
             subtitle = "Ordering preserved; spread compresses in 2014 (Black − White: 6.09 → 4.73)")
Honduras: mean colorr by self-id ethnicity × wave
Ordering preserved; spread compresses in 2014 (Black − White: 6.09 → 4.73)
Group n10 n12 n14 Mean 2010 Mean 2012 Mean 2014 Δ 2014−2012
White 450 612 354 3.20 3.04 3.59 +0.55
Mestizo 1024 957 967 4.88 5.51 5.16 -0.35
Indigenous 21 71 99 6.24 5.86 5.45 -0.40
Mulatto 11 12 46 7.64 7.33 6.54 -0.79
Black 88 63 44 8.77 9.13 8.32 -0.81
Other NA 2 34 NA 5.50 6.62 +1.12

Honduras: education by ethnicity × wave

hon_ed <- d_hon %>%
  filter(year %in% c(2010,2012,2014), !is.na(colorr_n)) %>%
  mutate(etid_n = as.integer(zap_labels(etid)),
         broad = case_when(
           etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
           etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black",
           etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other",
           TRUE ~ NA_character_),
         ed_n = as.numeric(ed)) %>%
  filter(!is.na(broad)) %>%
  group_by(broad, year) %>%
  summarise(n = n(),
            mean_ed = round(weighted.mean(ed_n, wt, na.rm = TRUE), 2),
            .groups = "drop") %>%
  pivot_wider(names_from = year, values_from = c(n, mean_ed)) %>%
  arrange(mean_ed_2010) %>%
  transmute(Group = broad, `n10` = n_2010, `n12` = n_2012, `n14` = n_2014,
            `Ed 2010` = mean_ed_2010, `Ed 2012` = mean_ed_2012, `Ed 2014` = mean_ed_2014,
            `Δ ed 14−12` = sprintf("%+.2f", mean_ed_2014 - mean_ed_2012))
hon_ed %>% gt() %>%
  tab_header(title = "Honduras: mean years of education by ethnicity × wave",
             subtitle = "Indigenous & Mestizo gain ~1.3 yrs in 2014; Black & Mulatto lose ~1 yr")
Honduras: mean years of education by ethnicity × wave
Indigenous & Mestizo gain ~1.3 yrs in 2014; Black & Mulatto lose ~1 yr
Group n10 n12 n14 Ed 2010 Ed 2012 Ed 2014 Δ ed 14−12
Indigenous 21 71 99 5.81 4.74 6.23 +1.49
Mestizo 1024 957 967 6.84 6.28 7.53 +1.25
Black 88 63 44 7.38 7.87 6.75 -1.12
White 450 612 354 7.85 7.73 7.50 -0.23
Mulatto 11 12 46 8.73 8.66 7.63 -1.03
Other NA 2 34 NA 8.00 7.12 -0.88

Honduras: self-id ethnic shares over time

d_hon %>%
  mutate(etid_n = as.integer(zap_labels(etid)),
         broad = case_when(
           etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
           etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black",
           etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other",
           TRUE ~ NA_character_)) %>%
  filter(!is.na(broad)) %>%
  count(year, broad) %>%
  group_by(year) %>%
  mutate(pct = round(100*n/sum(n), 1)) %>% ungroup() %>%
  select(year, broad, pct) %>%
  pivot_wider(names_from = broad, values_from = pct, values_fill = 0) %>%
  gt() %>%
  tab_header(title = "Honduras: self-id ethnic share (%) by wave")
Honduras: self-id ethnic share (%) by wave
year Black Indigenous Mestizo Other White Mulatto
2004 4.8 7.9 57.8 0.1 29.4 0.0
2006 1.9 3.7 61.9 0.1 31.4 1.0
2008 2.4 4.4 59.9 0.5 31.0 1.9
2010 5.5 1.3 64.2 0.0 28.2 0.7
2012 3.7 4.1 55.7 0.1 35.7 0.7
2014 2.8 6.4 62.7 2.2 22.9 3.0
2016 4.1 3.8 67.5 0.5 22.0 2.1
2018 5.2 3.2 59.5 1.8 27.0 3.3
2021 2.4 4.2 57.5 3.3 29.0 3.7
2023 9.5 5.4 42.9 7.1 29.2 5.8

Session info

sessionInfo()
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gt_1.3.0            stringr_1.6.0       metafor_4.8-0      
##  [4] numDeriv_2016.8-1.1 metadat_1.4-0       Matrix_1.7-4       
##  [7] ggplot2_4.0.1.9000  Hmisc_5.2-4         weights_1.1.2      
## [10] purrr_1.2.0         tidyr_1.3.1         dplyr_1.1.4        
## [13] haven_2.5.5        
## 
## loaded via a namespace (and not attached):
##   [1] mathjaxr_2.0-0     RColorBrewer_1.1-3 sys_3.4.3         
##   [4] rstudioapi_0.17.1  jsonlite_2.0.0     shape_1.4.6.1     
##   [7] magrittr_2.0.4     jomo_2.7-6         farver_2.1.2      
##  [10] nloptr_2.2.1       rmarkdown_2.30     fs_1.6.6          
##  [13] ragg_1.5.0         vctrs_0.6.5        memoise_2.0.1     
##  [16] minqa_1.2.8        ellmer_0.4.0       askpass_1.2.1     
##  [19] base64enc_0.1-3    gh_1.5.0           htmltools_0.5.8.1 
##  [22] forcats_1.0.1      usethis_3.2.1      broom_1.0.11      
##  [25] Formula_1.2-5      mitml_0.4-5        sass_0.4.10       
##  [28] bslib_0.9.0        htmlwidgets_1.6.4  httr2_1.2.1       
##  [31] cachem_1.1.0       lifecycle_1.0.4    iterators_1.0.14  
##  [34] pkgconfig_2.0.3    webshot2_0.1.2     R6_2.6.1          
##  [37] fastmap_1.2.0      rbibutils_2.3      digest_0.6.39     
##  [40] colorspace_2.1-2   ps_1.9.1           pkgload_1.4.1     
##  [43] textshaping_1.0.4  labeling_0.4.3     fansi_1.0.6       
##  [46] gdata_3.0.1        mgcv_1.9-3         compiler_4.5.3    
##  [49] remotes_2.5.0      withr_3.0.2        htmlTable_2.4.3   
##  [52] S7_0.2.1           backports_1.5.0    DBI_1.2.3         
##  [55] pkgbuild_1.4.8     duckdb_1.4.4       pan_1.9           
##  [58] MASS_7.3-65        mcptools_0.2.0     openssl_2.3.4     
##  [61] rappdirs_0.3.3     sessioninfo_1.2.3  gtools_3.9.5      
##  [64] tools_4.5.3        chromote_0.5.1     foreign_0.8-91    
##  [67] nnet_7.3-20        glue_1.8.0         nlme_3.1-168      
##  [70] promises_1.3.3     grid_4.5.3         checkmate_2.3.3   
##  [73] cluster_2.1.8.2    generics_0.1.4     gtable_0.3.6      
##  [76] tzdb_0.5.0         websocket_1.4.4    data.table_1.17.8 
##  [79] hms_1.1.3          utf8_1.2.6         xml2_1.4.0        
##  [82] foreach_1.5.2      pillar_1.11.1      later_1.4.4       
##  [85] splines_4.5.3      lattice_0.22-7     survival_3.8-6    
##  [88] tidyselect_1.2.1   coro_1.1.0         knitr_1.50        
##  [91] reformulas_0.4.1   gitcreds_0.1.2     gridExtra_2.3     
##  [94] xfun_0.57          devtools_2.4.6     credentials_2.0.3 
##  [97] brio_1.1.5         stringi_1.8.7      yaml_2.3.10       
## [100] boot_1.3-32       
##  [ reached 'max' / getOption("max.print") -- omitted 21 entries ]