Setup

suppressPackageStartupMessages({
  library(haven); library(dplyr); library(tidyr); library(purrr); library(stringr)
  library(ggplot2); library(metafor); library(Hmisc); library(weights); library(gt)
})
proj_dir <- "/media/emil/8tb_ssd_3/projects/LAPOP"
gm_file <- file.path(proj_dir,
  "data/Grand_Merge_2004-2023_LAPOP_AmericasBarometer_v1.0_FREEen.sav")
country_files <- list.files(file.path(proj_dir, "data/LAPOP"),
  pattern = "^2004-2023 .*\\.sav$", full.names = TRUE)
country_files <- country_files[!grepl("Grand Merge|Nicaragua", country_files)]

Knowledge items and pass rates

items_tbl <- tibble::tribble(
  ~var,   ~question,                                              ~correct,       ~pct,
  "gi1",  "Name of the current US President",                     "Obama",        86.8,
  "gi2",  "Name of the country's parliament/senate leader",       "varies",       51.9,
  "gi3",  "How many states does the US have?",                    "50",           71.4,
  "gi4",  "Presidential/PM/government's term length in [country]","varies",       86.6,
  "gi5",  "Name of the President of Brazil",                      "varies",       35.2,
  "gi7",  "Number of seats in country's lower house",             "varies",       17.5,
  "gix4", "On which continent is Nigeria?",                       "Africa",       57.8
)
items_tbl %>% gt() %>%
  tab_header(title = "LAPOP knowledge items, with pass rate across all waves") %>%
  cols_label(var = "Var", question = "Question", correct = "Correct", pct = "% correct")

Var	Question	Correct	% correct
LAPOP knowledge items, with pass rate across all waves
gi1	Name of the current US President	Obama	86.8
gi2	Name of the country's parliament/senate leader	varies	51.9
gi3	How many states does the US have?	50	71.4
gi4	Presidential/PM/government's term length in [country]	varies	86.6
gi5	Name of the President of Brazil	varies	35.2
gi7	Number of seats in country's lower house	varies	17.5
gix4	On which continent is Nigeria?	Africa	57.8

Skin color measurement

LAPOP colorr is interviewer-rated using a printed 11-tone palette (“Paleta de Colores”), at the end of the interview, without asking the respondent. 1 = very light, 11 = very dark, 97 = could not classify.

Load data and score per country × wave

needed_gm <- c("pais","year","wave","wt","colorr","gi1","gi4")
d_gm <- read_spss(gm_file, col_select = all_of(needed_gm))

needed_cf <- c("pais","year","wave","wt","colorr",
               "gi1","gi2","gi3","gi4","gi5","gi7","gix4")
country_data <- map(country_files, function(f) {
  cols_in <- read_spss(f, n_max = 0) %>% names()
  read_spss(f, col_select = all_of(intersect(needed_cf, cols_in)))
})

gi7_correct <- c(`1`=500,`2`=158,`3`=84,`4`=128,`5`=92,`6`=57,`7`=71,`8`=166,
                 `9`=137,`10`=130,`11`=130,`12`=80,`13`=120,`14`=99,`15`=513,
                 `16`=164,`17`=257,`21`=178,`22`=99,`23`=63,`24`=65,`25`=41,
                 `26`=31,`27`=51,`28`=38,`40`=435)
country_name <- c(`1`="Mexico",`2`="Guatemala",`3`="El Salvador",`4`="Honduras",
                  `5`="Nicaragua",`6`="Costa Rica",`7`="Panama",`8`="Colombia",
                  `9`="Ecuador",`10`="Bolivia",`11`="Peru",`12`="Paraguay",
                  `13`="Chile",`14`="Uruguay",`15`="Brazil",`16`="Venezuela",
                  `17`="Argentina",`21`="Dominican Republic",`22`="Haiti",
                  `23`="Jamaica",`24`="Guyana",`25`="Trinidad and Tobago",
                  `26`="Belize",`27`="Suriname",`28`="Bahamas",`30`="Grenada",
                  `40`="USA",`41`="Canada")

clean_cf <- bind_rows(country_data) %>%
  transmute(
    pais   = as.integer(zap_labels(pais)),
    year   = as.integer(zap_labels(year)),
    wt     = as.numeric(wt),
    colorr = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
    gi1    = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
    gi2    = if_else(gi2 == 1, 1, if_else(gi2 == 2, 0, NA_real_)),
    gi3    = if_else(gi3 == 1, 1, if_else(gi3 == 2, 0, NA_real_)),
    gi4    = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
    gi5    = if_else(gi5 == 1, 1, if_else(gi5 == 2, 0, NA_real_)),
    gix4   = if_else(gix4 == 1, 1, if_else(gix4 == 2, 0, NA_real_)),
    gi7    = if_else(is.na(gi7), NA_real_,
                     as.numeric(gi7 == gi7_correct[as.character(pais)]))
  )

country_pais <- unique(clean_cf$pais)
clean_gm <- d_gm %>%
  filter(!(as.integer(zap_labels(pais)) %in% country_pais)) %>%
  transmute(
    pais   = as.integer(zap_labels(pais)),
    year   = as.integer(zap_labels(year)),
    wt     = as.numeric(wt),
    colorr = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
    gi1    = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
    gi4    = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_))
  )

score_cell <- function(df, items) {
  keep <- items[map_lgl(items, function(i) {
    x <- df[[i]]
    sum(!is.na(x)) >= 30 && length(unique(na.omit(x))) >= 2
  })]
  if (length(keep) == 0) return(NULL)
  m <- as.matrix(df[keep])
  n_row <- rowSums(!is.na(m))
  score <- ifelse(n_row == 0, NA, rowMeans(m, na.rm = TRUE))
  ok <- !is.na(score) & !is.na(df$colorr)
  if (sum(ok) < 50) return(NULL)
  s <- score[ok]; c <- df$colorr[ok]; w <- df$wt[ok]
  if (sd(s) == 0 || sd(c) == 0) return(NULL)
  r <- weights::wtd.cors(s, c, weight = w)[1, 1]
  tibble(
    n = sum(ok),
    items_used = paste(keep, collapse = "+"),
    k_items = length(keep),
    r = r,
    z = atanh(pmin(pmax(r, -0.999), 0.999)),
    se_z = 1 / sqrt(sum(ok) - 3),
    mean_score = weighted.mean(s, w),
    mean_color = weighted.mean(c, w)
  )
}

cf_results <- clean_cf %>% group_by(pais, year) %>%
  group_modify(~ score_cell(.x, c("gi1","gi2","gi3","gi4","gi5","gi7","gix4")) %||% tibble()) %>%
  ungroup()
gm_results <- clean_gm %>% group_by(pais, year) %>%
  group_modify(~ score_cell(.x, c("gi1","gi4")) %||% tibble()) %>%
  ungroup()
results <- bind_rows(cf_results, gm_results) %>%
  mutate(country = country_name[as.character(pais)]) %>%
  relocate(country, pais, year)

meta_country <- results %>%
  group_by(country) %>%
  group_modify(function(df, key) {
    if (nrow(df) == 1) {
      return(tibble(k = 1, total_n = df$n, z_mean = df$z, z_se = df$se_z,
                    tau2 = 0, items_summary = df$items_used))
    }
    fit <- metafor::rma(yi = df$z, sei = df$se_z, method = "REML")
    tibble(k = nrow(df), total_n = sum(df$n),
           z_mean = as.numeric(fit$b), z_se = fit$se, tau2 = fit$tau2,
           items_summary = paste(unique(df$items_used), collapse = " | "))
  }) %>% ungroup() %>%
  mutate(
    r_mean = tanh(z_mean),
    r_lo = tanh(z_mean - 1.96 * z_se),
    r_hi = tanh(z_mean + 1.96 * z_se)
  ) %>% arrange(r_mean)

overall <- metafor::rma(yi = results$z, sei = results$se_z, method = "REML")
overall_summary <- tibble(
  k = overall$k, total_n = sum(results$n),
  r = tanh(as.numeric(overall$b)),
  r_lo = tanh(as.numeric(overall$b) - 1.96 * overall$se),
  r_hi = tanh(as.numeric(overall$b) + 1.96 * overall$se),
  tau2 = overall$tau2, I2 = overall$I2, Q_p = overall$QEp
)
overall_summary

Forest plot

meta_plot <- meta_country %>%
  mutate(
    items_short = items_summary %>% str_replace_all("\\+", "·") %>% str_replace_all(" \\| ", "/"),
    label = sprintf("%s  (k=%d, N=%s, items=%s)",
                    country, k, formatC(total_n, big.mark = ","), items_short),
    label = factor(label, levels = rev(label))
  )

ggplot(meta_plot, aes(r_mean, label)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  geom_vline(xintercept = overall_summary$r, linetype = "dotted", color = "red") +
  geom_errorbar(aes(xmin = r_lo, xmax = r_hi), width = 0.3, orientation = "y") +
  geom_point(aes(size = sqrt(total_n))) +
  scale_size_continuous(guide = "none") +
  labs(
    x = "r(knowledge score, skin color), REML meta within country (95% CI)",
    y = NULL,
    title = "LAPOP: knowledge score × skin color, by country",
    subtitle = sprintf("Overall r = %.3f (95%% CI %.3f, %.3f), τ² = %.4f, I² = %.0f%%, k = %d cells, N = %s",
                       overall_summary$r, overall_summary$r_lo, overall_summary$r_hi,
                       overall_summary$tau2, overall_summary$I2,
                       overall_summary$k, formatC(overall_summary$total_n, big.mark = ","))
  ) +
  theme_bw(base_size = 10)

Guyana ethnic table

d_etid <- read_spss(gm_file,
  col_select = c("pais","year","wt","colorr","etid","gi1","gi4"))
guy <- d_etid %>%
  filter(as.integer(zap_labels(pais)) == 24) %>%
  mutate(
    etid_n = as.integer(zap_labels(etid)),
    colorr_clean = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
    gi1c = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
    gi4c = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
    score = rowMeans(cbind(gi1c, gi4c), na.rm = TRUE),
    score = if_else(is.nan(score), NA_real_, score),
    wt = as.numeric(wt)
  )
guyana_tbl <- guy %>%
  group_by(etid_n) %>%
  summarise(n = n(),
            mean_colorr = mean(colorr_clean, na.rm = TRUE),
            mean_score = weighted.mean(score, wt, na.rm = TRUE),
            pct_gi1 = weighted.mean(gi1c, wt, na.rm = TRUE),
            pct_gi4 = weighted.mean(gi4c, wt, na.rm = TRUE),
            .groups = "drop") %>%
  filter(!is.nan(mean_colorr), n >= 30) %>%
  mutate(Group = case_when(
    etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo / Mixed",
    etid_n == 3 ~ "Indigenous (Amerindian)", etid_n == 4 ~ "Black (Afro-Guyanese)",
    etid_n == 7 ~ "Other", etid_n == 2406 ~ "East Indian (Indo-Guyanese)",
    etid_n == 2409 ~ "Chinese", TRUE ~ as.character(etid_n))) %>%
  arrange(mean_colorr) %>%
  transmute(Group, n,
            `Mean colorr` = sprintf("%.2f", mean_colorr),
            `Mean score` = sprintf("%.2f", mean_score),
            `gi1 % correct` = sprintf("%.0f%%", 100*pct_gi1),
            `gi4 % correct` = sprintf("%.0f%%", 100*pct_gi4))
guyana_tbl %>% gt() %>%
  tab_header(title = "Guyana: skin color vs knowledge by self-id ethnic group")

Group	n	Mean colorr	Mean score	gi1 % correct	gi4 % correct
Guyana: skin color vs knowledge by self-id ethnic group
2408	39	3.29	0.93	100%	82%
Indigenous (Amerindian)	983	3.96	0.82	90%	75%
Chinese	50	4.20	0.91	98%	83%
Other	40	5.00	0.84	100%	55%
Mestizo / Mixed	2994	5.25	0.89	98%	79%
East Indian (Indo-Guyanese)	3100	5.33	0.87	96%	78%
NA	36	6.00	0.80	86%	60%
Black (Afro-Guyanese)	3601	7.16	0.90	99%	80%

Belize ethnic table

extra <- read_spss(gm_file,
  col_select = c("pais","q1","q2","ed","ur","idiomaq"))
bel_all <- d_etid %>%
  filter(as.integer(zap_labels(pais)) == 26) %>%
  bind_cols(extra %>% filter(as.integer(zap_labels(pais)) == 26) %>%
              select(q1, q2, ed, ur, idiomaq)) %>%
  mutate(
    etid_n = as.integer(zap_labels(etid)),
    colorr_clean = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
    gi1c = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
    gi4c = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
    score = rowMeans(cbind(gi1c, gi4c), na.rm = TRUE),
    score = if_else(is.nan(score), NA_real_, score),
    lang = as_factor(idiomaq) %>% as.character(),
    lang_group = case_when(
      lang %in% c("Spanish", "Spanish (last language used)") ~ "Spanish",
      lang %in% c("English", "English (last language used)") ~ "English",
      TRUE ~ "Other/NA"),
    wt = as.numeric(wt), ed = as.numeric(ed)
  )

belize_tbl <- bel_all %>%
  group_by(etid_n) %>%
  summarise(
    n = n(),
    pct_spanish = mean(lang_group == "Spanish", na.rm = TRUE),
    pct_english = mean(lang_group == "English", na.rm = TRUE),
    mean_colorr = mean(colorr_clean, na.rm = TRUE),
    mean_ed = weighted.mean(ed, wt, na.rm = TRUE),
    mean_score = weighted.mean(score, wt, na.rm = TRUE),
    pct_gi1 = weighted.mean(gi1c, wt, na.rm = TRUE),
    pct_gi4 = weighted.mean(gi4c, wt, na.rm = TRUE),
    .groups = "drop") %>%
  filter(!is.nan(mean_colorr), n >= 20) %>%
  mutate(Group = case_when(
    etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
    etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black", etid_n == 5 ~ "Mulatto",
    etid_n == 7 ~ "Other", etid_n == 2608 ~ "Creole", etid_n == 2609 ~ "East Indian",
    etid_n == 2610 ~ "Garifuna", etid_n == 2611 ~ "Maya Ketchi",
    etid_n == 2612 ~ "Maya Mopan", etid_n == 2613 ~ "Maya Yucatec",
    etid_n == 2614 ~ "Mennonite", etid_n == 2615 ~ "Spanish",
    TRUE ~ as.character(etid_n))) %>%
  arrange(mean_colorr) %>%
  filter(Group != "NA") %>%
  transmute(Group, n,
            `% Spanish` = sprintf("%.0f%%", 100*pct_spanish),
            `% English` = sprintf("%.0f%%", 100*pct_english),
            `Mean colorr` = sprintf("%.2f", mean_colorr),
            `Mean ed (yrs)` = sprintf("%.1f", mean_ed),
            `Mean score` = sprintf("%.2f", mean_score),
            `gi1 % correct` = sprintf("%.0f%%", 100*pct_gi1),
            `gi4 % correct` = sprintf("%.0f%%", 100*pct_gi4))
belize_tbl %>% gt() %>%
  tab_header(title = "Belize: skin color, language, education and knowledge",
             subtitle = "Survey delivered bilingually; idiomaq = language of interview")

Group	n	% Spanish	% English	Mean colorr	Mean ed (yrs)	Mean score	gi1 % correct	gi4 % correct
Belize: skin color, language, education and knowledge
Survey delivered bilingually; idiomaq = language of interview
White	107	69%	31%	3.61	7.2	0.78	75%	80%
Spanish	678	52%	48%	4.54	7.0	0.80	79%	81%
Indigenous	53	38%	62%	5.02	6.3	0.86	71%	100%
Other	104	39%	61%	5.07	10.0	0.94	92%	94%
Mestizo	3463	66%	34%	5.15	7.5	0.88	90%	88%
Maya Yucatec	77	64%	36%	5.43	7.1	0.89	88%	88%
Mennonite	35	66%	34%	5.56	8.1	0.89	80%	96%
Maya Ketchi	321	22%	78%	5.58	6.5	0.77	72%	84%
Maya Mopan	212	24%	76%	5.70	7.7	0.86	83%	92%
East Indian	126	21%	79%	7.07	8.3	0.88	84%	90%
Creole	1911	19%	81%	7.14	8.8	0.90	91%	91%
Garifuna	391	16%	84%	8.05	9.3	0.93	92%	95%
Black	110	59%	41%	8.46	9.8	0.83	87%	81%

bel_all <- bel_all %>%
  mutate(
    broad = case_when(
      etid_n %in% c(1, 2615) ~ "White/Spanish", etid_n == 2 ~ "Mestizo",
      etid_n %in% c(3, 2611, 2612, 2613) ~ "Indigenous/Maya",
      etid_n %in% c(4, 2608) ~ "Black/Creole", etid_n == 2610 ~ "Garifuna",
      etid_n == 2614 ~ "Mennonite", etid_n == 2609 ~ "East Indian",
      etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other", TRUE ~ NA_character_),
    age = as.numeric(q2),
    sex = as_factor(q1) %>% as.character(),
    lang2 = factor(lang_group, levels = c("English","Spanish","Other/NA"))
  )
mdat <- bel_all %>%
  filter(!is.na(score), !is.na(broad), !is.na(age), !is.na(sex),
         !is.na(lang2), !is.na(ed)) %>%
  mutate(broad = factor(broad, levels = c("Mestizo","White/Spanish","Indigenous/Maya",
                                          "Black/Creole","Garifuna","Mennonite",
                                          "East Indian","Mulatto","Other")))
fit <- lm(score ~ lang2 + broad + age + sex + ed, data = mdat, weights = wt)
broom::tidy(fit, conf.int = TRUE) %>%
  mutate(across(where(is.numeric), \(x) round(x, 3)))

Honduras: item-level logistic regression by wave

hon_file <- file.path(proj_dir, "data/LAPOP/2004-2023 Honduras.sav")
hon_cols <- read_spss(hon_file, n_max = 0) %>% names()
hon_keep <- intersect(
  c("pais","year","wt","colorr","etid","ur","ed","q1","q2",
    "gi1","gi3","gi4","gi7","gix4"), hon_cols)
d_hon <- read_spss(hon_file, col_select = all_of(hon_keep)) %>%
  mutate(
    year = as.integer(zap_labels(year)),
    colorr_n = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
    wt = as.numeric(wt))

items_hon <- c("gi1","gi3","gi4","gi7","gix4")
hon_long <- d_hon %>%
  filter(year %in% c(2010, 2012, 2014), !is.na(colorr_n)) %>%
  mutate(
    gi1c = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
    gi3c = if_else(gi3 == 1, 1, if_else(gi3 == 2, 0, NA_real_)),
    gi4c = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
    gi7c = if_else(is.na(gi7), NA_real_, as.numeric(gi7 == 128)),
    gix4c = if_else(gix4 == 1, 1, if_else(gix4 == 2, 0, NA_real_))
  )
fit_grid <- expand.grid(year = c(2010,2012,2014), item = items_hon, stringsAsFactors = FALSE)
res_logit <- pmap_df(fit_grid, function(year, item) {
  d <- hon_long %>% filter(year == !!year) %>%
    transmute(y = .data[[paste0(item, "c")]], colorr_n, wt) %>%
    filter(!is.na(y))
  if (nrow(d) < 50 || length(unique(d$y)) < 2) return(tibble(year=year, item=item))
  fit <- glm(y ~ colorr_n, data = d, family = binomial, weights = wt)
  s <- summary(fit)
  tibble(year=year, item=item, n=nrow(d),
         pct_correct = round(100*weighted.mean(d$y, d$wt), 1),
         beta = round(coef(fit)[2], 3),
         SE = round(s$coefficients[2, 2], 3),
         z = round(s$coefficients[2, 3], 2),
         p = signif(s$coefficients[2, 4], 2),
         OR = round(exp(coef(fit)[2]), 3))
}) %>% filter(!is.na(beta)) %>% arrange(item, year)

res_logit %>% gt() %>%
  tab_header(title = "Honduras: logistic regression of (item correct) on colorr, per wave",
             subtitle = "Positive β = darker → more likely correct")

year	item	n	pct_correct	beta	SE	z	p	OR
Honduras: logistic regression of (item correct) on colorr, per wave
Positive β = darker → more likely correct
2010	gi1	1327	91.3	-0.012	0.053	-0.24	8.1e-01	0.988
2012	gi1	1468	82.6	-0.141	0.034	-4.10	4.1e-05	0.869
2014	gi1	1255	90.7	0.261	0.057	4.60	4.3e-06	1.298
2010	gi3	1478	93.0	-0.129	0.052	-2.51	1.2e-02	0.879
2010	gi4	1547	97.9	0.148	0.113	1.32	1.9e-01	1.160
2012	gi4	1644	94.8	-0.097	0.055	-1.77	7.7e-02	0.908
2014	gi4	1498	95.3	0.367	0.077	4.79	1.7e-06	1.443
2014	gi7	748	38.6	0.094	0.041	2.30	2.1e-02	1.099
2014	gix4	698	56.3	0.207	0.042	4.94	7.6e-07	1.230

Honduras: skin color by ethnicity × wave

hon_colorr <- d_hon %>%
  filter(year %in% c(2010,2012,2014), !is.na(colorr_n)) %>%
  mutate(etid_n = as.integer(zap_labels(etid)),
         broad = case_when(
           etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
           etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black",
           etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other",
           TRUE ~ NA_character_)) %>%
  filter(!is.na(broad)) %>%
  group_by(broad, year) %>%
  summarise(n = n(), mean_colorr = mean(colorr_n),
            sd_colorr = sd(colorr_n), .groups = "drop") %>%
  pivot_wider(names_from = year,
              values_from = c(n, mean_colorr, sd_colorr)) %>%
  arrange(mean_colorr_2010) %>%
  transmute(Group = broad,
            `n10` = n_2010, `n12` = n_2012, `n14` = n_2014,
            `Mean 2010` = sprintf("%.2f", mean_colorr_2010),
            `Mean 2012` = sprintf("%.2f", mean_colorr_2012),
            `Mean 2014` = sprintf("%.2f", mean_colorr_2014),
            `Δ 2014−2012` = sprintf("%+.2f",
                                    as.numeric(mean_colorr_2014) - as.numeric(mean_colorr_2012)))
hon_colorr %>% gt() %>%
  tab_header(title = "Honduras: mean colorr by self-id ethnicity × wave",
             subtitle = "Ordering preserved; spread compresses in 2014 (Black − White: 6.09 → 4.73)")

Group	n10	n12	n14	Mean 2010	Mean 2012	Mean 2014	Δ 2014−2012
Honduras: mean colorr by self-id ethnicity × wave
Ordering preserved; spread compresses in 2014 (Black − White: 6.09 → 4.73)
White	450	612	354	3.20	3.04	3.59	+0.55
Mestizo	1024	957	967	4.88	5.51	5.16	-0.35
Indigenous	21	71	99	6.24	5.86	5.45	-0.40
Mulatto	11	12	46	7.64	7.33	6.54	-0.79
Black	88	63	44	8.77	9.13	8.32	-0.81
Other	NA	2	34	NA	5.50	6.62	+1.12

Honduras: education by ethnicity × wave

hon_ed <- d_hon %>%
  filter(year %in% c(2010,2012,2014), !is.na(colorr_n)) %>%
  mutate(etid_n = as.integer(zap_labels(etid)),
         broad = case_when(
           etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
           etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black",
           etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other",
           TRUE ~ NA_character_),
         ed_n = as.numeric(ed)) %>%
  filter(!is.na(broad)) %>%
  group_by(broad, year) %>%
  summarise(n = n(),
            mean_ed = round(weighted.mean(ed_n, wt, na.rm = TRUE), 2),
            .groups = "drop") %>%
  pivot_wider(names_from = year, values_from = c(n, mean_ed)) %>%
  arrange(mean_ed_2010) %>%
  transmute(Group = broad, `n10` = n_2010, `n12` = n_2012, `n14` = n_2014,
            `Ed 2010` = mean_ed_2010, `Ed 2012` = mean_ed_2012, `Ed 2014` = mean_ed_2014,
            `Δ ed 14−12` = sprintf("%+.2f", mean_ed_2014 - mean_ed_2012))
hon_ed %>% gt() %>%
  tab_header(title = "Honduras: mean years of education by ethnicity × wave",
             subtitle = "Indigenous & Mestizo gain ~1.3 yrs in 2014; Black & Mulatto lose ~1 yr")

Group	n10	n12	n14	Ed 2010	Ed 2012	Ed 2014	Δ ed 14−12
Honduras: mean years of education by ethnicity × wave
Indigenous & Mestizo gain ~1.3 yrs in 2014; Black & Mulatto lose ~1 yr
Indigenous	21	71	99	5.81	4.74	6.23	+1.49
Mestizo	1024	957	967	6.84	6.28	7.53	+1.25
Black	88	63	44	7.38	7.87	6.75	-1.12
White	450	612	354	7.85	7.73	7.50	-0.23
Mulatto	11	12	46	8.73	8.66	7.63	-1.03
Other	NA	2	34	NA	8.00	7.12	-0.88

Honduras: self-id ethnic shares over time

d_hon %>%
  mutate(etid_n = as.integer(zap_labels(etid)),
         broad = case_when(
           etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
           etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black",
           etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other",
           TRUE ~ NA_character_)) %>%
  filter(!is.na(broad)) %>%
  count(year, broad) %>%
  group_by(year) %>%
  mutate(pct = round(100*n/sum(n), 1)) %>% ungroup() %>%
  select(year, broad, pct) %>%
  pivot_wider(names_from = broad, values_from = pct, values_fill = 0) %>%
  gt() %>%
  tab_header(title = "Honduras: self-id ethnic share (%) by wave")

year	Black	Indigenous	Mestizo	Other	White	Mulatto
Honduras: self-id ethnic share (%) by wave
2004	4.8	7.9	57.8	0.1	29.4	0.0
2006	1.9	3.7	61.9	0.1	31.4	1.0
2008	2.4	4.4	59.9	0.5	31.0	1.9
2010	5.5	1.3	64.2	0.0	28.2	0.7
2012	3.7	4.1	55.7	0.1	35.7	0.7
2014	2.8	6.4	62.7	2.2	22.9	3.0
2016	4.1	3.8	67.5	0.5	22.0	2.1
2018	5.2	3.2	59.5	1.8	27.0	3.3
2021	2.4	4.2	57.5	3.3	29.0	3.7
2023	9.5	5.4	42.9	7.1	29.2	5.8

Session info

sessionInfo()

## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Brussels
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gt_1.3.0            stringr_1.6.0       metafor_4.8-0      
##  [4] numDeriv_2016.8-1.1 metadat_1.4-0       Matrix_1.7-4       
##  [7] ggplot2_4.0.1.9000  Hmisc_5.2-4         weights_1.1.2      
## [10] purrr_1.2.0         tidyr_1.3.1         dplyr_1.1.4        
## [13] haven_2.5.5        
## 
## loaded via a namespace (and not attached):
##   [1] mathjaxr_2.0-0     RColorBrewer_1.1-3 sys_3.4.3         
##   [4] rstudioapi_0.17.1  jsonlite_2.0.0     shape_1.4.6.1     
##   [7] magrittr_2.0.4     jomo_2.7-6         farver_2.1.2      
##  [10] nloptr_2.2.1       rmarkdown_2.30     fs_1.6.6          
##  [13] ragg_1.5.0         vctrs_0.6.5        memoise_2.0.1     
##  [16] minqa_1.2.8        ellmer_0.4.0       askpass_1.2.1     
##  [19] base64enc_0.1-3    gh_1.5.0           htmltools_0.5.8.1 
##  [22] forcats_1.0.1      usethis_3.2.1      broom_1.0.11      
##  [25] Formula_1.2-5      mitml_0.4-5        sass_0.4.10       
##  [28] bslib_0.9.0        htmlwidgets_1.6.4  httr2_1.2.1       
##  [31] cachem_1.1.0       lifecycle_1.0.4    iterators_1.0.14  
##  [34] pkgconfig_2.0.3    webshot2_0.1.2     R6_2.6.1          
##  [37] fastmap_1.2.0      rbibutils_2.3      digest_0.6.39     
##  [40] colorspace_2.1-2   ps_1.9.1           pkgload_1.4.1     
##  [43] textshaping_1.0.4  labeling_0.4.3     fansi_1.0.6       
##  [46] gdata_3.0.1        mgcv_1.9-3         compiler_4.5.3    
##  [49] remotes_2.5.0      withr_3.0.2        htmlTable_2.4.3   
##  [52] S7_0.2.1           backports_1.5.0    DBI_1.2.3         
##  [55] pkgbuild_1.4.8     duckdb_1.4.4       pan_1.9           
##  [58] MASS_7.3-65        mcptools_0.2.0     openssl_2.3.4     
##  [61] rappdirs_0.3.3     sessioninfo_1.2.3  gtools_3.9.5      
##  [64] tools_4.5.3        chromote_0.5.1     foreign_0.8-91    
##  [67] nnet_7.3-20        glue_1.8.0         nlme_3.1-168      
##  [70] promises_1.3.3     grid_4.5.3         checkmate_2.3.3   
##  [73] cluster_2.1.8.2    generics_0.1.4     gtable_0.3.6      
##  [76] tzdb_0.5.0         websocket_1.4.4    data.table_1.17.8 
##  [79] hms_1.1.3          utf8_1.2.6         xml2_1.4.0        
##  [82] foreach_1.5.2      pillar_1.11.1      later_1.4.4       
##  [85] splines_4.5.3      lattice_0.22-7     survival_3.8-6    
##  [88] tidyselect_1.2.1   coro_1.1.0         knitr_1.50        
##  [91] reformulas_0.4.1   gitcreds_0.1.2     gridExtra_2.3     
##  [94] xfun_0.57          devtools_2.4.6     credentials_2.0.3 
##  [97] brio_1.1.5         stringi_1.8.7      yaml_2.3.10       
## [100] boot_1.3-32       
##  [ reached 'max' / getOption("max.print") -- omitted 21 entries ]

Skin color and political knowledge in Latin America (LAPOP)

Emil O. W. Kirkegaard

11 May 2026