Setup
suppressPackageStartupMessages({
library(haven); library(dplyr); library(tidyr); library(purrr); library(stringr)
library(ggplot2); library(metafor); library(Hmisc); library(weights); library(gt)
})
proj_dir <- "/media/emil/8tb_ssd_3/projects/LAPOP"
gm_file <- file.path(proj_dir,
"data/Grand_Merge_2004-2023_LAPOP_AmericasBarometer_v1.0_FREEen.sav")
country_files <- list.files(file.path(proj_dir, "data/LAPOP"),
pattern = "^2004-2023 .*\\.sav$", full.names = TRUE)
country_files <- country_files[!grepl("Grand Merge|Nicaragua", country_files)]
Knowledge items and pass rates
items_tbl <- tibble::tribble(
~var, ~question, ~correct, ~pct,
"gi1", "Name of the current US President", "Obama", 86.8,
"gi2", "Name of the country's parliament/senate leader", "varies", 51.9,
"gi3", "How many states does the US have?", "50", 71.4,
"gi4", "Presidential/PM/government's term length in [country]","varies", 86.6,
"gi5", "Name of the President of Brazil", "varies", 35.2,
"gi7", "Number of seats in country's lower house", "varies", 17.5,
"gix4", "On which continent is Nigeria?", "Africa", 57.8
)
items_tbl %>% gt() %>%
tab_header(title = "LAPOP knowledge items, with pass rate across all waves") %>%
cols_label(var = "Var", question = "Question", correct = "Correct", pct = "% correct")
| LAPOP knowledge items, with pass rate across all waves |
| Var |
Question |
Correct |
% correct |
| gi1 |
Name of the current US President |
Obama |
86.8 |
| gi2 |
Name of the country's parliament/senate leader |
varies |
51.9 |
| gi3 |
How many states does the US have? |
50 |
71.4 |
| gi4 |
Presidential/PM/government's term length in [country] |
varies |
86.6 |
| gi5 |
Name of the President of Brazil |
varies |
35.2 |
| gi7 |
Number of seats in country's lower house |
varies |
17.5 |
| gix4 |
On which continent is Nigeria? |
Africa |
57.8 |
Skin color measurement
LAPOP colorr is interviewer-rated using a printed 11-tone palette (“Paleta de Colores”), at the end of the interview, without asking the respondent. 1 = very light, 11 = very dark, 97 = could not classify.
Load data and score per country × wave
needed_gm <- c("pais","year","wave","wt","colorr","gi1","gi4")
d_gm <- read_spss(gm_file, col_select = all_of(needed_gm))
needed_cf <- c("pais","year","wave","wt","colorr",
"gi1","gi2","gi3","gi4","gi5","gi7","gix4")
country_data <- map(country_files, function(f) {
cols_in <- read_spss(f, n_max = 0) %>% names()
read_spss(f, col_select = all_of(intersect(needed_cf, cols_in)))
})
gi7_correct <- c(`1`=500,`2`=158,`3`=84,`4`=128,`5`=92,`6`=57,`7`=71,`8`=166,
`9`=137,`10`=130,`11`=130,`12`=80,`13`=120,`14`=99,`15`=513,
`16`=164,`17`=257,`21`=178,`22`=99,`23`=63,`24`=65,`25`=41,
`26`=31,`27`=51,`28`=38,`40`=435)
country_name <- c(`1`="Mexico",`2`="Guatemala",`3`="El Salvador",`4`="Honduras",
`5`="Nicaragua",`6`="Costa Rica",`7`="Panama",`8`="Colombia",
`9`="Ecuador",`10`="Bolivia",`11`="Peru",`12`="Paraguay",
`13`="Chile",`14`="Uruguay",`15`="Brazil",`16`="Venezuela",
`17`="Argentina",`21`="Dominican Republic",`22`="Haiti",
`23`="Jamaica",`24`="Guyana",`25`="Trinidad and Tobago",
`26`="Belize",`27`="Suriname",`28`="Bahamas",`30`="Grenada",
`40`="USA",`41`="Canada")
clean_cf <- bind_rows(country_data) %>%
transmute(
pais = as.integer(zap_labels(pais)),
year = as.integer(zap_labels(year)),
wt = as.numeric(wt),
colorr = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
gi1 = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
gi2 = if_else(gi2 == 1, 1, if_else(gi2 == 2, 0, NA_real_)),
gi3 = if_else(gi3 == 1, 1, if_else(gi3 == 2, 0, NA_real_)),
gi4 = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
gi5 = if_else(gi5 == 1, 1, if_else(gi5 == 2, 0, NA_real_)),
gix4 = if_else(gix4 == 1, 1, if_else(gix4 == 2, 0, NA_real_)),
gi7 = if_else(is.na(gi7), NA_real_,
as.numeric(gi7 == gi7_correct[as.character(pais)]))
)
country_pais <- unique(clean_cf$pais)
clean_gm <- d_gm %>%
filter(!(as.integer(zap_labels(pais)) %in% country_pais)) %>%
transmute(
pais = as.integer(zap_labels(pais)),
year = as.integer(zap_labels(year)),
wt = as.numeric(wt),
colorr = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
gi1 = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
gi4 = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_))
)
score_cell <- function(df, items) {
keep <- items[map_lgl(items, function(i) {
x <- df[[i]]
sum(!is.na(x)) >= 30 && length(unique(na.omit(x))) >= 2
})]
if (length(keep) == 0) return(NULL)
m <- as.matrix(df[keep])
n_row <- rowSums(!is.na(m))
score <- ifelse(n_row == 0, NA, rowMeans(m, na.rm = TRUE))
ok <- !is.na(score) & !is.na(df$colorr)
if (sum(ok) < 50) return(NULL)
s <- score[ok]; c <- df$colorr[ok]; w <- df$wt[ok]
if (sd(s) == 0 || sd(c) == 0) return(NULL)
r <- weights::wtd.cors(s, c, weight = w)[1, 1]
tibble(
n = sum(ok),
items_used = paste(keep, collapse = "+"),
k_items = length(keep),
r = r,
z = atanh(pmin(pmax(r, -0.999), 0.999)),
se_z = 1 / sqrt(sum(ok) - 3),
mean_score = weighted.mean(s, w),
mean_color = weighted.mean(c, w)
)
}
cf_results <- clean_cf %>% group_by(pais, year) %>%
group_modify(~ score_cell(.x, c("gi1","gi2","gi3","gi4","gi5","gi7","gix4")) %||% tibble()) %>%
ungroup()
gm_results <- clean_gm %>% group_by(pais, year) %>%
group_modify(~ score_cell(.x, c("gi1","gi4")) %||% tibble()) %>%
ungroup()
results <- bind_rows(cf_results, gm_results) %>%
mutate(country = country_name[as.character(pais)]) %>%
relocate(country, pais, year)
meta_country <- results %>%
group_by(country) %>%
group_modify(function(df, key) {
if (nrow(df) == 1) {
return(tibble(k = 1, total_n = df$n, z_mean = df$z, z_se = df$se_z,
tau2 = 0, items_summary = df$items_used))
}
fit <- metafor::rma(yi = df$z, sei = df$se_z, method = "REML")
tibble(k = nrow(df), total_n = sum(df$n),
z_mean = as.numeric(fit$b), z_se = fit$se, tau2 = fit$tau2,
items_summary = paste(unique(df$items_used), collapse = " | "))
}) %>% ungroup() %>%
mutate(
r_mean = tanh(z_mean),
r_lo = tanh(z_mean - 1.96 * z_se),
r_hi = tanh(z_mean + 1.96 * z_se)
) %>% arrange(r_mean)
overall <- metafor::rma(yi = results$z, sei = results$se_z, method = "REML")
overall_summary <- tibble(
k = overall$k, total_n = sum(results$n),
r = tanh(as.numeric(overall$b)),
r_lo = tanh(as.numeric(overall$b) - 1.96 * overall$se),
r_hi = tanh(as.numeric(overall$b) + 1.96 * overall$se),
tau2 = overall$tau2, I2 = overall$I2, Q_p = overall$QEp
)
overall_summary
Forest plot
meta_plot <- meta_country %>%
mutate(
items_short = items_summary %>% str_replace_all("\\+", "·") %>% str_replace_all(" \\| ", "/"),
label = sprintf("%s (k=%d, N=%s, items=%s)",
country, k, formatC(total_n, big.mark = ","), items_short),
label = factor(label, levels = rev(label))
)
ggplot(meta_plot, aes(r_mean, label)) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
geom_vline(xintercept = overall_summary$r, linetype = "dotted", color = "red") +
geom_errorbar(aes(xmin = r_lo, xmax = r_hi), width = 0.3, orientation = "y") +
geom_point(aes(size = sqrt(total_n))) +
scale_size_continuous(guide = "none") +
labs(
x = "r(knowledge score, skin color), REML meta within country (95% CI)",
y = NULL,
title = "LAPOP: knowledge score × skin color, by country",
subtitle = sprintf("Overall r = %.3f (95%% CI %.3f, %.3f), τ² = %.4f, I² = %.0f%%, k = %d cells, N = %s",
overall_summary$r, overall_summary$r_lo, overall_summary$r_hi,
overall_summary$tau2, overall_summary$I2,
overall_summary$k, formatC(overall_summary$total_n, big.mark = ","))
) +
theme_bw(base_size = 10)

Guyana ethnic table
d_etid <- read_spss(gm_file,
col_select = c("pais","year","wt","colorr","etid","gi1","gi4"))
guy <- d_etid %>%
filter(as.integer(zap_labels(pais)) == 24) %>%
mutate(
etid_n = as.integer(zap_labels(etid)),
colorr_clean = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
gi1c = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
gi4c = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
score = rowMeans(cbind(gi1c, gi4c), na.rm = TRUE),
score = if_else(is.nan(score), NA_real_, score),
wt = as.numeric(wt)
)
guyana_tbl <- guy %>%
group_by(etid_n) %>%
summarise(n = n(),
mean_colorr = mean(colorr_clean, na.rm = TRUE),
mean_score = weighted.mean(score, wt, na.rm = TRUE),
pct_gi1 = weighted.mean(gi1c, wt, na.rm = TRUE),
pct_gi4 = weighted.mean(gi4c, wt, na.rm = TRUE),
.groups = "drop") %>%
filter(!is.nan(mean_colorr), n >= 30) %>%
mutate(Group = case_when(
etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo / Mixed",
etid_n == 3 ~ "Indigenous (Amerindian)", etid_n == 4 ~ "Black (Afro-Guyanese)",
etid_n == 7 ~ "Other", etid_n == 2406 ~ "East Indian (Indo-Guyanese)",
etid_n == 2409 ~ "Chinese", TRUE ~ as.character(etid_n))) %>%
arrange(mean_colorr) %>%
transmute(Group, n,
`Mean colorr` = sprintf("%.2f", mean_colorr),
`Mean score` = sprintf("%.2f", mean_score),
`gi1 % correct` = sprintf("%.0f%%", 100*pct_gi1),
`gi4 % correct` = sprintf("%.0f%%", 100*pct_gi4))
guyana_tbl %>% gt() %>%
tab_header(title = "Guyana: skin color vs knowledge by self-id ethnic group")
| Guyana: skin color vs knowledge by self-id ethnic group |
| Group |
n |
Mean colorr |
Mean score |
gi1 % correct |
gi4 % correct |
| 2408 |
39 |
3.29 |
0.93 |
100% |
82% |
| Indigenous (Amerindian) |
983 |
3.96 |
0.82 |
90% |
75% |
| Chinese |
50 |
4.20 |
0.91 |
98% |
83% |
| Other |
40 |
5.00 |
0.84 |
100% |
55% |
| Mestizo / Mixed |
2994 |
5.25 |
0.89 |
98% |
79% |
| East Indian (Indo-Guyanese) |
3100 |
5.33 |
0.87 |
96% |
78% |
| NA |
36 |
6.00 |
0.80 |
86% |
60% |
| Black (Afro-Guyanese) |
3601 |
7.16 |
0.90 |
99% |
80% |
Belize ethnic table
extra <- read_spss(gm_file,
col_select = c("pais","q1","q2","ed","ur","idiomaq"))
bel_all <- d_etid %>%
filter(as.integer(zap_labels(pais)) == 26) %>%
bind_cols(extra %>% filter(as.integer(zap_labels(pais)) == 26) %>%
select(q1, q2, ed, ur, idiomaq)) %>%
mutate(
etid_n = as.integer(zap_labels(etid)),
colorr_clean = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
gi1c = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
gi4c = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
score = rowMeans(cbind(gi1c, gi4c), na.rm = TRUE),
score = if_else(is.nan(score), NA_real_, score),
lang = as_factor(idiomaq) %>% as.character(),
lang_group = case_when(
lang %in% c("Spanish", "Spanish (last language used)") ~ "Spanish",
lang %in% c("English", "English (last language used)") ~ "English",
TRUE ~ "Other/NA"),
wt = as.numeric(wt), ed = as.numeric(ed)
)
belize_tbl <- bel_all %>%
group_by(etid_n) %>%
summarise(
n = n(),
pct_spanish = mean(lang_group == "Spanish", na.rm = TRUE),
pct_english = mean(lang_group == "English", na.rm = TRUE),
mean_colorr = mean(colorr_clean, na.rm = TRUE),
mean_ed = weighted.mean(ed, wt, na.rm = TRUE),
mean_score = weighted.mean(score, wt, na.rm = TRUE),
pct_gi1 = weighted.mean(gi1c, wt, na.rm = TRUE),
pct_gi4 = weighted.mean(gi4c, wt, na.rm = TRUE),
.groups = "drop") %>%
filter(!is.nan(mean_colorr), n >= 20) %>%
mutate(Group = case_when(
etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black", etid_n == 5 ~ "Mulatto",
etid_n == 7 ~ "Other", etid_n == 2608 ~ "Creole", etid_n == 2609 ~ "East Indian",
etid_n == 2610 ~ "Garifuna", etid_n == 2611 ~ "Maya Ketchi",
etid_n == 2612 ~ "Maya Mopan", etid_n == 2613 ~ "Maya Yucatec",
etid_n == 2614 ~ "Mennonite", etid_n == 2615 ~ "Spanish",
TRUE ~ as.character(etid_n))) %>%
arrange(mean_colorr) %>%
filter(Group != "NA") %>%
transmute(Group, n,
`% Spanish` = sprintf("%.0f%%", 100*pct_spanish),
`% English` = sprintf("%.0f%%", 100*pct_english),
`Mean colorr` = sprintf("%.2f", mean_colorr),
`Mean ed (yrs)` = sprintf("%.1f", mean_ed),
`Mean score` = sprintf("%.2f", mean_score),
`gi1 % correct` = sprintf("%.0f%%", 100*pct_gi1),
`gi4 % correct` = sprintf("%.0f%%", 100*pct_gi4))
belize_tbl %>% gt() %>%
tab_header(title = "Belize: skin color, language, education and knowledge",
subtitle = "Survey delivered bilingually; idiomaq = language of interview")
| Belize: skin color, language, education and knowledge |
| Survey delivered bilingually; idiomaq = language of interview |
| Group |
n |
% Spanish |
% English |
Mean colorr |
Mean ed (yrs) |
Mean score |
gi1 % correct |
gi4 % correct |
| White |
107 |
69% |
31% |
3.61 |
7.2 |
0.78 |
75% |
80% |
| Spanish |
678 |
52% |
48% |
4.54 |
7.0 |
0.80 |
79% |
81% |
| Indigenous |
53 |
38% |
62% |
5.02 |
6.3 |
0.86 |
71% |
100% |
| Other |
104 |
39% |
61% |
5.07 |
10.0 |
0.94 |
92% |
94% |
| Mestizo |
3463 |
66% |
34% |
5.15 |
7.5 |
0.88 |
90% |
88% |
| Maya Yucatec |
77 |
64% |
36% |
5.43 |
7.1 |
0.89 |
88% |
88% |
| Mennonite |
35 |
66% |
34% |
5.56 |
8.1 |
0.89 |
80% |
96% |
| Maya Ketchi |
321 |
22% |
78% |
5.58 |
6.5 |
0.77 |
72% |
84% |
| Maya Mopan |
212 |
24% |
76% |
5.70 |
7.7 |
0.86 |
83% |
92% |
| East Indian |
126 |
21% |
79% |
7.07 |
8.3 |
0.88 |
84% |
90% |
| Creole |
1911 |
19% |
81% |
7.14 |
8.8 |
0.90 |
91% |
91% |
| Garifuna |
391 |
16% |
84% |
8.05 |
9.3 |
0.93 |
92% |
95% |
| Black |
110 |
59% |
41% |
8.46 |
9.8 |
0.83 |
87% |
81% |
bel_all <- bel_all %>%
mutate(
broad = case_when(
etid_n %in% c(1, 2615) ~ "White/Spanish", etid_n == 2 ~ "Mestizo",
etid_n %in% c(3, 2611, 2612, 2613) ~ "Indigenous/Maya",
etid_n %in% c(4, 2608) ~ "Black/Creole", etid_n == 2610 ~ "Garifuna",
etid_n == 2614 ~ "Mennonite", etid_n == 2609 ~ "East Indian",
etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other", TRUE ~ NA_character_),
age = as.numeric(q2),
sex = as_factor(q1) %>% as.character(),
lang2 = factor(lang_group, levels = c("English","Spanish","Other/NA"))
)
mdat <- bel_all %>%
filter(!is.na(score), !is.na(broad), !is.na(age), !is.na(sex),
!is.na(lang2), !is.na(ed)) %>%
mutate(broad = factor(broad, levels = c("Mestizo","White/Spanish","Indigenous/Maya",
"Black/Creole","Garifuna","Mennonite",
"East Indian","Mulatto","Other")))
fit <- lm(score ~ lang2 + broad + age + sex + ed, data = mdat, weights = wt)
broom::tidy(fit, conf.int = TRUE) %>%
mutate(across(where(is.numeric), \(x) round(x, 3)))
Honduras: item-level logistic regression by wave
hon_file <- file.path(proj_dir, "data/LAPOP/2004-2023 Honduras.sav")
hon_cols <- read_spss(hon_file, n_max = 0) %>% names()
hon_keep <- intersect(
c("pais","year","wt","colorr","etid","ur","ed","q1","q2",
"gi1","gi3","gi4","gi7","gix4"), hon_cols)
d_hon <- read_spss(hon_file, col_select = all_of(hon_keep)) %>%
mutate(
year = as.integer(zap_labels(year)),
colorr_n = if_else(colorr >= 1 & colorr <= 11, as.numeric(colorr), NA_real_),
wt = as.numeric(wt))
items_hon <- c("gi1","gi3","gi4","gi7","gix4")
hon_long <- d_hon %>%
filter(year %in% c(2010, 2012, 2014), !is.na(colorr_n)) %>%
mutate(
gi1c = if_else(gi1 == 1, 1, if_else(gi1 == 2, 0, NA_real_)),
gi3c = if_else(gi3 == 1, 1, if_else(gi3 == 2, 0, NA_real_)),
gi4c = if_else(gi4 == 1, 1, if_else(gi4 == 2, 0, NA_real_)),
gi7c = if_else(is.na(gi7), NA_real_, as.numeric(gi7 == 128)),
gix4c = if_else(gix4 == 1, 1, if_else(gix4 == 2, 0, NA_real_))
)
fit_grid <- expand.grid(year = c(2010,2012,2014), item = items_hon, stringsAsFactors = FALSE)
res_logit <- pmap_df(fit_grid, function(year, item) {
d <- hon_long %>% filter(year == !!year) %>%
transmute(y = .data[[paste0(item, "c")]], colorr_n, wt) %>%
filter(!is.na(y))
if (nrow(d) < 50 || length(unique(d$y)) < 2) return(tibble(year=year, item=item))
fit <- glm(y ~ colorr_n, data = d, family = binomial, weights = wt)
s <- summary(fit)
tibble(year=year, item=item, n=nrow(d),
pct_correct = round(100*weighted.mean(d$y, d$wt), 1),
beta = round(coef(fit)[2], 3),
SE = round(s$coefficients[2, 2], 3),
z = round(s$coefficients[2, 3], 2),
p = signif(s$coefficients[2, 4], 2),
OR = round(exp(coef(fit)[2]), 3))
}) %>% filter(!is.na(beta)) %>% arrange(item, year)
res_logit %>% gt() %>%
tab_header(title = "Honduras: logistic regression of (item correct) on colorr, per wave",
subtitle = "Positive β = darker → more likely correct")
| Honduras: logistic regression of (item correct) on colorr, per wave |
| Positive β = darker → more likely correct |
| year |
item |
n |
pct_correct |
beta |
SE |
z |
p |
OR |
| 2010 |
gi1 |
1327 |
91.3 |
-0.012 |
0.053 |
-0.24 |
8.1e-01 |
0.988 |
| 2012 |
gi1 |
1468 |
82.6 |
-0.141 |
0.034 |
-4.10 |
4.1e-05 |
0.869 |
| 2014 |
gi1 |
1255 |
90.7 |
0.261 |
0.057 |
4.60 |
4.3e-06 |
1.298 |
| 2010 |
gi3 |
1478 |
93.0 |
-0.129 |
0.052 |
-2.51 |
1.2e-02 |
0.879 |
| 2010 |
gi4 |
1547 |
97.9 |
0.148 |
0.113 |
1.32 |
1.9e-01 |
1.160 |
| 2012 |
gi4 |
1644 |
94.8 |
-0.097 |
0.055 |
-1.77 |
7.7e-02 |
0.908 |
| 2014 |
gi4 |
1498 |
95.3 |
0.367 |
0.077 |
4.79 |
1.7e-06 |
1.443 |
| 2014 |
gi7 |
748 |
38.6 |
0.094 |
0.041 |
2.30 |
2.1e-02 |
1.099 |
| 2014 |
gix4 |
698 |
56.3 |
0.207 |
0.042 |
4.94 |
7.6e-07 |
1.230 |
Honduras: skin color by ethnicity × wave
hon_colorr <- d_hon %>%
filter(year %in% c(2010,2012,2014), !is.na(colorr_n)) %>%
mutate(etid_n = as.integer(zap_labels(etid)),
broad = case_when(
etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black",
etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other",
TRUE ~ NA_character_)) %>%
filter(!is.na(broad)) %>%
group_by(broad, year) %>%
summarise(n = n(), mean_colorr = mean(colorr_n),
sd_colorr = sd(colorr_n), .groups = "drop") %>%
pivot_wider(names_from = year,
values_from = c(n, mean_colorr, sd_colorr)) %>%
arrange(mean_colorr_2010) %>%
transmute(Group = broad,
`n10` = n_2010, `n12` = n_2012, `n14` = n_2014,
`Mean 2010` = sprintf("%.2f", mean_colorr_2010),
`Mean 2012` = sprintf("%.2f", mean_colorr_2012),
`Mean 2014` = sprintf("%.2f", mean_colorr_2014),
`Δ 2014−2012` = sprintf("%+.2f",
as.numeric(mean_colorr_2014) - as.numeric(mean_colorr_2012)))
hon_colorr %>% gt() %>%
tab_header(title = "Honduras: mean colorr by self-id ethnicity × wave",
subtitle = "Ordering preserved; spread compresses in 2014 (Black − White: 6.09 → 4.73)")
| Honduras: mean colorr by self-id ethnicity × wave |
| Ordering preserved; spread compresses in 2014 (Black − White: 6.09 → 4.73) |
| Group |
n10 |
n12 |
n14 |
Mean 2010 |
Mean 2012 |
Mean 2014 |
Δ 2014−2012 |
| White |
450 |
612 |
354 |
3.20 |
3.04 |
3.59 |
+0.55 |
| Mestizo |
1024 |
957 |
967 |
4.88 |
5.51 |
5.16 |
-0.35 |
| Indigenous |
21 |
71 |
99 |
6.24 |
5.86 |
5.45 |
-0.40 |
| Mulatto |
11 |
12 |
46 |
7.64 |
7.33 |
6.54 |
-0.79 |
| Black |
88 |
63 |
44 |
8.77 |
9.13 |
8.32 |
-0.81 |
| Other |
NA |
2 |
34 |
NA |
5.50 |
6.62 |
+1.12 |
Honduras: education by ethnicity × wave
hon_ed <- d_hon %>%
filter(year %in% c(2010,2012,2014), !is.na(colorr_n)) %>%
mutate(etid_n = as.integer(zap_labels(etid)),
broad = case_when(
etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black",
etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other",
TRUE ~ NA_character_),
ed_n = as.numeric(ed)) %>%
filter(!is.na(broad)) %>%
group_by(broad, year) %>%
summarise(n = n(),
mean_ed = round(weighted.mean(ed_n, wt, na.rm = TRUE), 2),
.groups = "drop") %>%
pivot_wider(names_from = year, values_from = c(n, mean_ed)) %>%
arrange(mean_ed_2010) %>%
transmute(Group = broad, `n10` = n_2010, `n12` = n_2012, `n14` = n_2014,
`Ed 2010` = mean_ed_2010, `Ed 2012` = mean_ed_2012, `Ed 2014` = mean_ed_2014,
`Δ ed 14−12` = sprintf("%+.2f", mean_ed_2014 - mean_ed_2012))
hon_ed %>% gt() %>%
tab_header(title = "Honduras: mean years of education by ethnicity × wave",
subtitle = "Indigenous & Mestizo gain ~1.3 yrs in 2014; Black & Mulatto lose ~1 yr")
| Honduras: mean years of education by ethnicity × wave |
| Indigenous & Mestizo gain ~1.3 yrs in 2014; Black & Mulatto lose ~1 yr |
| Group |
n10 |
n12 |
n14 |
Ed 2010 |
Ed 2012 |
Ed 2014 |
Δ ed 14−12 |
| Indigenous |
21 |
71 |
99 |
5.81 |
4.74 |
6.23 |
+1.49 |
| Mestizo |
1024 |
957 |
967 |
6.84 |
6.28 |
7.53 |
+1.25 |
| Black |
88 |
63 |
44 |
7.38 |
7.87 |
6.75 |
-1.12 |
| White |
450 |
612 |
354 |
7.85 |
7.73 |
7.50 |
-0.23 |
| Mulatto |
11 |
12 |
46 |
8.73 |
8.66 |
7.63 |
-1.03 |
| Other |
NA |
2 |
34 |
NA |
8.00 |
7.12 |
-0.88 |
Honduras: self-id ethnic shares over time
d_hon %>%
mutate(etid_n = as.integer(zap_labels(etid)),
broad = case_when(
etid_n == 1 ~ "White", etid_n == 2 ~ "Mestizo",
etid_n == 3 ~ "Indigenous", etid_n == 4 ~ "Black",
etid_n == 5 ~ "Mulatto", etid_n == 7 ~ "Other",
TRUE ~ NA_character_)) %>%
filter(!is.na(broad)) %>%
count(year, broad) %>%
group_by(year) %>%
mutate(pct = round(100*n/sum(n), 1)) %>% ungroup() %>%
select(year, broad, pct) %>%
pivot_wider(names_from = broad, values_from = pct, values_fill = 0) %>%
gt() %>%
tab_header(title = "Honduras: self-id ethnic share (%) by wave")
| Honduras: self-id ethnic share (%) by wave |
| year |
Black |
Indigenous |
Mestizo |
Other |
White |
Mulatto |
| 2004 |
4.8 |
7.9 |
57.8 |
0.1 |
29.4 |
0.0 |
| 2006 |
1.9 |
3.7 |
61.9 |
0.1 |
31.4 |
1.0 |
| 2008 |
2.4 |
4.4 |
59.9 |
0.5 |
31.0 |
1.9 |
| 2010 |
5.5 |
1.3 |
64.2 |
0.0 |
28.2 |
0.7 |
| 2012 |
3.7 |
4.1 |
55.7 |
0.1 |
35.7 |
0.7 |
| 2014 |
2.8 |
6.4 |
62.7 |
2.2 |
22.9 |
3.0 |
| 2016 |
4.1 |
3.8 |
67.5 |
0.5 |
22.0 |
2.1 |
| 2018 |
5.2 |
3.2 |
59.5 |
1.8 |
27.0 |
3.3 |
| 2021 |
2.4 |
4.2 |
57.5 |
3.3 |
29.0 |
3.7 |
| 2023 |
9.5 |
5.4 |
42.9 |
7.1 |
29.2 |
5.8 |
Session info
sessionInfo()
## R version 4.5.3 (2026-03-11)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0 LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_DK.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_DK.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Brussels
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gt_1.3.0 stringr_1.6.0 metafor_4.8-0
## [4] numDeriv_2016.8-1.1 metadat_1.4-0 Matrix_1.7-4
## [7] ggplot2_4.0.1.9000 Hmisc_5.2-4 weights_1.1.2
## [10] purrr_1.2.0 tidyr_1.3.1 dplyr_1.1.4
## [13] haven_2.5.5
##
## loaded via a namespace (and not attached):
## [1] mathjaxr_2.0-0 RColorBrewer_1.1-3 sys_3.4.3
## [4] rstudioapi_0.17.1 jsonlite_2.0.0 shape_1.4.6.1
## [7] magrittr_2.0.4 jomo_2.7-6 farver_2.1.2
## [10] nloptr_2.2.1 rmarkdown_2.30 fs_1.6.6
## [13] ragg_1.5.0 vctrs_0.6.5 memoise_2.0.1
## [16] minqa_1.2.8 ellmer_0.4.0 askpass_1.2.1
## [19] base64enc_0.1-3 gh_1.5.0 htmltools_0.5.8.1
## [22] forcats_1.0.1 usethis_3.2.1 broom_1.0.11
## [25] Formula_1.2-5 mitml_0.4-5 sass_0.4.10
## [28] bslib_0.9.0 htmlwidgets_1.6.4 httr2_1.2.1
## [31] cachem_1.1.0 lifecycle_1.0.4 iterators_1.0.14
## [34] pkgconfig_2.0.3 webshot2_0.1.2 R6_2.6.1
## [37] fastmap_1.2.0 rbibutils_2.3 digest_0.6.39
## [40] colorspace_2.1-2 ps_1.9.1 pkgload_1.4.1
## [43] textshaping_1.0.4 labeling_0.4.3 fansi_1.0.6
## [46] gdata_3.0.1 mgcv_1.9-3 compiler_4.5.3
## [49] remotes_2.5.0 withr_3.0.2 htmlTable_2.4.3
## [52] S7_0.2.1 backports_1.5.0 DBI_1.2.3
## [55] pkgbuild_1.4.8 duckdb_1.4.4 pan_1.9
## [58] MASS_7.3-65 mcptools_0.2.0 openssl_2.3.4
## [61] rappdirs_0.3.3 sessioninfo_1.2.3 gtools_3.9.5
## [64] tools_4.5.3 chromote_0.5.1 foreign_0.8-91
## [67] nnet_7.3-20 glue_1.8.0 nlme_3.1-168
## [70] promises_1.3.3 grid_4.5.3 checkmate_2.3.3
## [73] cluster_2.1.8.2 generics_0.1.4 gtable_0.3.6
## [76] tzdb_0.5.0 websocket_1.4.4 data.table_1.17.8
## [79] hms_1.1.3 utf8_1.2.6 xml2_1.4.0
## [82] foreach_1.5.2 pillar_1.11.1 later_1.4.4
## [85] splines_4.5.3 lattice_0.22-7 survival_3.8-6
## [88] tidyselect_1.2.1 coro_1.1.0 knitr_1.50
## [91] reformulas_0.4.1 gitcreds_0.1.2 gridExtra_2.3
## [94] xfun_0.57 devtools_2.4.6 credentials_2.0.3
## [97] brio_1.1.5 stringi_1.8.7 yaml_2.3.10
## [100] boot_1.3-32
## [ reached 'max' / getOption("max.print") -- omitted 21 entries ]