library(tidyverse)
library(haven)
library(labelled)
library(survey)
library(knitr)
library(kableExtra)

# ── Focal countries ──────────────────────────────────────────────────────────
focal_codes  <- c(356L, 156L, 840L, 276L, 710L, 458L)
focal_labels <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")
focal_lookup <- tibble(country = focal_codes, country_name = focal_labels)

# ── W3 (1996) ─────────────────────────────────────────────────────────────────
datafile_w3 <- "D:/Populism and Democrary/World value survey/WVS 1996/F00008205-WV3_Data_R_v20180912.rds"
stopifnot(file.exists(datafile_w3))
wvs3 <- readRDS(datafile_w3)

# ── W5 (2006) ─────────────────────────────────────────────────────────────────
datafile_w5 <- "D:/Populism and Democrary/World value survey/WVS 2006/F00007944-WV5_Data_R_v20180912.rds"
stopifnot(file.exists(datafile_w5))
wvs5 <- readRDS(datafile_w5)

# ── W6 (2012) ─────────────────────────────────────────────────────────────────
datafile_w6 <- "D:/Populism and Democrary/World value survey/WVS 2012/WV6_Data_R_v20201117.rdata"
stopifnot(file.exists(datafile_w6))
load(datafile_w6)
wvs6 <- WV6_Data_R_v20201117

# ── W7 (2022) ─────────────────────────────────────────────────────────────────
datafile_w7 <- "D:/Populism and Democrary/World value survey/WVS 2017/WVS_Cross-National_Wave_7_Rdata_v6_0.rdata"
stopifnot(file.exists(datafile_w7))
load(datafile_w7)
wvs7 <- `WVS_Cross-National_Wave_7_v6_0`

# ── Sanity check ─────────────────────────────────────────────────────────────
dims <- tibble(
  wave  = c("W3", "W5", "W6", "W7"),
  year  = c(1996, 2006, 2012, 2022),
  nrows = c(nrow(wvs3), nrow(wvs5), nrow(wvs6), nrow(wvs7)),
  ncols = c(ncol(wvs3), ncol(wvs5), ncol(wvs6), ncol(wvs7))
)
kable(dims, caption = "Sanity check: datasets loaded (rows × cols)") |>
  kable_styling(full_width = FALSE)
Sanity check: datasets loaded (rows × cols)
wave year nrows ncols
W3 1996 77818 332
W5 2006 83975 414
W6 2012 89565 442
W7 2022 97220 613

Cross country comparision

India, China, USA, Germany, South Africa (2022 missing), Malaysia (1996 missing)

focal_codes  <- c(356L, 156L, 840L, 276L, 710L, 458L)
focal_labels <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")
focal_lookup <- tibble(country = focal_codes, country_name = focal_labels)

make_workfile <- function(df, country_var, weight_var) {
  df |>
    mutate(
      country = as.integer(zap_labels(.data[[country_var]])),
      wt      = as.numeric(zap_labels(.data[[weight_var]]))
    ) |>
    filter(country %in% focal_codes, !is.na(wt)) |>
    left_join(focal_lookup, by = "country")
}

w3 <- make_workfile(wvs3, "V2",       "V236")
w5 <- make_workfile(wvs5, "V2",       "V259")
w6 <- make_workfile(wvs6, "V2",       "V258")
w7 <- make_workfile(wvs7, "B_COUNTRY", "W_WEIGHT")

# Coverage check
coverage <- focal_lookup |>
  mutate(
    W3_1996 = country %in% unique(w3$country),
    W5_2006 = country %in% unique(w5$country),
    W6_2012 = country %in% unique(w6$country),
    W7_2022 = country %in% unique(w7$country)
  )

kable(coverage, caption = "Focal country coverage across waves") |>
  kable_styling(full_width = FALSE)
Focal country coverage across waves
country country_name W3_1996 W5_2006 W6_2012 W7_2022
356 India TRUE TRUE TRUE TRUE
156 China TRUE TRUE TRUE TRUE
840 USA TRUE TRUE TRUE TRUE
276 Germany TRUE TRUE TRUE TRUE
710 South Africa TRUE TRUE TRUE FALSE
458 Malaysia FALSE TRUE TRUE TRUE
# ── 1. Raw N per country per wave ────────────────────────────────────────────
n_table <- bind_rows(
  w3 |> count(country_name) |> mutate(Wave = "W3", Year = 1996),
  w5 |> count(country_name) |> mutate(Wave = "W5", Year = 2006),
  w6 |> count(country_name) |> mutate(Wave = "W6", Year = 2012),
  w7 |> count(country_name) |> mutate(Wave = "W7", Year = 2022)
) |>
  pivot_wider(names_from = c(Wave, Year), values_from = n) |>
  rename(Country = country_name)

kable(n_table, caption = "Raw N per country per wave (unweighted)",
      format = "html") |>
  kable_styling(full_width = FALSE,
                bootstrap_options = c("striped", "hover", "condensed"))
Raw N per country per wave (unweighted)
Country W3_1996 W5_2006 W6_2012 W7_2022
China 1500 1991 2300 3036
Germany 2026 2064 2046 1528
India 2040 2001 4078 1692
South Africa 2935 2988 3531 NA
USA 1542 1249 2232 2596
Malaysia NA 1201 1300 1313

Demographics across countries

recode_age <- function(df, age_var) {
  df |>
    mutate(
      age_raw = as.numeric(zap_labels(.data[[age_var]])),
      age_raw = if_else(age_raw < 0, NA_real_, age_raw),
      age_cat = case_when(
        age_raw >= 18 & age_raw <= 34 ~ "Young (18–34)",
        age_raw >= 35 & age_raw <= 54 ~ "Middle (35–54)",
        age_raw >= 55                 ~ "Elderly (55+)",
        TRUE                          ~ NA_character_
      ),
      age_cat = factor(age_cat, levels = c("Young (18–34)", "Middle (35–54)", "Elderly (55+)"))
    )
}

recode_educ <- function(df, educ_var, wave) {
  df |>
    mutate(
      educ_raw = as.numeric(zap_labels(.data[[educ_var]])),
      educ_raw = if_else(educ_raw < 0, NA_real_, educ_raw),
      educ_cat = if (wave == "W7") {
        case_when(
          educ_raw %in% 0:2 ~ "Below Primary",
          educ_raw %in% 3:5 ~ "Secondary",
          educ_raw %in% 6:8 ~ "Higher Education",
          TRUE              ~ NA_character_
        )
      } else {
        case_when(
          educ_raw %in% 1:3 ~ "Below Primary",
          educ_raw %in% 4:6 ~ "Secondary",
          educ_raw %in% 7:9 ~ "Higher Education",
          TRUE              ~ NA_character_
        )
      },
      educ_cat = factor(educ_cat,
                        levels = c("Below Primary", "Secondary", "Higher Education"))
    )
}

# W5 age variable is V237 not V242 — V242 is interview year in W5
w3 <- w3 |> recode_age("V216") |> recode_educ("V217", "W3")
w5 <- w5 |> recode_age("V237") |> recode_educ("V238", "W5")
w6 <- w6 |> recode_age("V242") |> recode_educ("V248", "W6")
w7 <- w7 |> recode_age("Q262") |> recode_educ("Q275", "W7")

# ── Verification: % computed WITHIN each variable separately ─────────────────
verify_cats <- function(df, wave, year) {
  age_tbl <- df |>
    count(category = age_cat) |>
    mutate(var = "Age", pct = round(n / sum(n) * 100, 1))

  educ_tbl <- df |>
    count(category = educ_cat) |>
    mutate(var = "Educ", pct = round(n / sum(n) * 100, 1))

  bind_rows(age_tbl, educ_tbl) |>
    mutate(wave = wave, year = year)
}

verify_all <- bind_rows(
  verify_cats(w3, "W3", 1996),
  verify_cats(w5, "W5", 2006),
  verify_cats(w6, "W6", 2012),
  verify_cats(w7, "W7", 2022)
)

verify_all |>
  select(Wave = wave, Year = year, Variable = var, Category = category, N = n, `%` = pct) |>
  kable(format = "html", caption = "Age and education category distributions across waves") |>
  kable_styling(full_width = FALSE,
                bootstrap_options = c("striped", "hover", "condensed")) |>
  collapse_rows(columns = 1:3, valign = "top")
Age and education category distributions across waves
Wave Year Variable Category N %
W3 1996 Age Young (18–34) 3978 39.6
Middle (35–54) 3838 38.2
Elderly (55+) 2037 20.3
NA 190 1.9
Educ Below Primary 3154 31.4
Secondary 3378 33.6
Higher Education 3460 34.5
NA 51 0.5
W5 2006 Age Young (18–34) 3730 32.5
Middle (35–54) 4588 39.9
Elderly (55+) 2872 25.0
NA 304 2.6
Educ Below Primary 3735 32.5
Secondary 3884 33.8
Higher Education 3825 33.3
NA 50 0.4
W6 2012 Age Young (18–34) 5502 35.5
Middle (35–54) 6068 39.2
Elderly (55+) 3800 24.5
NA 117 0.8
Educ Below Primary 3884 25.1
Secondary 5070 32.7
Higher Education 6479 41.8
NA 54 0.3
W7 2022 Age Young (18–34) 3349 32.9
Middle (35–54) 3819 37.6
Elderly (55+) 2676 26.3
NA 321 3.2
Educ Below Primary 3090 30.4
Secondary 4266 42.0
Higher Education 2739 26.9
NA 70 0.7
library(ggplot2)
library(scales)

# ── Build long demographic table ─────────────────────────────────────────────
build_demo_long <- function(df, wave, year, cat_var, label) {
  df |>
    filter(!is.na(.data[[cat_var]])) |>
    count(country_name, category = .data[[cat_var]]) |>
    group_by(country_name) |>
    mutate(pct = n / sum(n) * 100) |>
    ungroup() |>
    mutate(wave = wave, year = year, variable = label)
}

demo_long <- bind_rows(
  # Age
  build_demo_long(w3, "W3", 1996, "age_cat",  "Age"),
  build_demo_long(w5, "W5", 2006, "age_cat",  "Age"),
  build_demo_long(w6, "W6", 2012, "age_cat",  "Age"),
  build_demo_long(w7, "W7", 2022, "age_cat",  "Age"),
  # Education
  build_demo_long(w3, "W3", 1996, "educ_cat", "Education"),
  build_demo_long(w5, "W5", 2006, "educ_cat", "Education"),
  build_demo_long(w6, "W6", 2012, "educ_cat", "Education"),
  build_demo_long(w7, "W7", 2022, "educ_cat", "Education")
) |>
  mutate(
    wave_label = paste0(wave, "\n(", year, ")"),
    wave_label = factor(wave_label,
                        levels = c("W3\n(1996)", "W5\n(2006)",
                                   "W6\n(2012)", "W7\n(2022)"))
  )

# ── Colour palettes ───────────────────────────────────────────────────────────
age_colors  <- c("Young (18–34)"   = "#4E79A7",
                 "Middle (35–54)"  = "#F28E2B",
                 "Elderly (55+)"   = "#E15759")

educ_colors <- c("Below Primary"   = "#FABFD2",
                 "Secondary"       = "#8CD17D",
                 "Higher Education"= "#499894")

# ── Plot 1: Age distribution ──────────────────────────────────────────────────
p_age <- demo_long |>
  filter(variable == "Age") |>
  ggplot(aes(x = wave_label, y = pct, fill = category)) +
  geom_col(position = "stack", width = 0.7) +
  geom_text(aes(label = ifelse(pct >= 8, paste0(round(pct, 0), "%"), "")),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  facet_wrap(~country_name, nrow = 1) +
  scale_fill_manual(values = age_colors, name = NULL) +
  scale_y_continuous(labels = label_percent(scale = 1)) +
  labs(
    title    = "Age Distribution by Country and Wave",
    subtitle = "World Values Survey — W3 (1996) to W7 (2022)",
    x = NULL, y = "% of respondents"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position  = "bottom",
    strip.text       = element_text(face = "bold", size = 10),
    panel.grid.major.x = element_blank(),
    plot.title       = element_text(face = "bold", size = 13),
    plot.subtitle    = element_text(colour = "grey40", size = 10)
  )

# ── Plot 2: Education distribution ───────────────────────────────────────────
p_educ <- demo_long |>
  filter(variable == "Education") |>
  ggplot(aes(x = wave_label, y = pct, fill = category)) +
  geom_col(position = "stack", width = 0.7) +
  geom_text(aes(label = ifelse(pct >= 8, paste0(round(pct, 0), "%"), "")),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  facet_wrap(~country_name, nrow = 1) +
  scale_fill_manual(values = educ_colors, name = NULL) +
  scale_y_continuous(labels = label_percent(scale = 1)) +
  labs(
    title    = "Education Distribution by Country and Wave",
    subtitle = "World Values Survey — W3 (1996) to W7 (2022)",
    x = NULL, y = "% of respondents"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position  = "bottom",
    strip.text       = element_text(face = "bold", size = 10),
    panel.grid.major.x = element_blank(),
    plot.title       = element_text(face = "bold", size = 13),
    plot.subtitle    = element_text(colour = "grey40", size = 10)
  )

# ── Recode sex variable ───────────────────────────────────────────────────────
# 1 = Male, 2 = Female across all waves (W3: V214, W5: V235, W6: V235, W7: Q260)

recode_sex <- function(df, sex_var) {
  df |>
    mutate(
      sex_raw = as.numeric(zap_labels(.data[[sex_var]])),
      sex_cat = case_when(
        sex_raw == 1 ~ "Male",
        sex_raw == 2 ~ "Female",
        TRUE         ~ NA_character_
      ),
      sex_cat = factor(sex_cat, levels = c("Male", "Female"))
    )
}

w3 <- w3 |> recode_sex("V214")
w5 <- w5 |> recode_sex("V235")
w6 <- w6 |> recode_sex("V235")
w7 <- w7 |> recode_sex("Q260")

# ── Build long table ──────────────────────────────────────────────────────────
gender_long <- bind_rows(
  build_demo_long(w3, "W3", 1996, "sex_cat", "Gender"),
  build_demo_long(w5, "W5", 2006, "sex_cat", "Gender"),
  build_demo_long(w6, "W6", 2012, "sex_cat", "Gender"),
  build_demo_long(w7, "W7", 2022, "sex_cat", "Gender")
) |>
  mutate(
    wave_label = paste0(wave, "\n(", year, ")"),
    wave_label = factor(wave_label,
                        levels = c("W3\n(1996)", "W5\n(2006)",
                                   "W6\n(2012)", "W7\n(2022)"))
  )

# ── Plot ──────────────────────────────────────────────────────────────────────
gender_colors <- c("Male" = "#4E79A7", "Female" = "#E15759")

p_gender <- gender_long |>
  ggplot(aes(x = wave_label, y = pct, fill = category)) +
  geom_col(position = "stack", width = 0.7) +
  geom_text(aes(label = paste0(round(pct, 0), "%")),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  facet_wrap(~country_name, nrow = 1) +
  scale_fill_manual(values = gender_colors, name = NULL) +
  scale_y_continuous(labels = label_percent(scale = 1)) +
  labs(
    title    = "Gender Distribution by Country and Wave",
    subtitle = "World Values Survey — W3 (1996) to W7 (2022)",
    x = NULL, y = "% of respondents"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position    = "bottom",
    strip.text         = element_text(face = "bold", size = 10),
    panel.grid.major.x = element_blank(),
    plot.title         = element_text(face = "bold", size = 13),
    plot.subtitle      = element_text(colour = "grey40", size = 10)
  )

print(p_gender)

print(p_age)

print(p_educ)

# Raw unrecoded sex variable check per country per wave
check_sex <- function(df, wave, year, sex_var) {
  df |>
    mutate(sex_raw = as.numeric(zap_labels(.data[[sex_var]]))) |>
    count(country_name, sex_raw) |>
    group_by(country_name) |>
    mutate(pct = round(n / sum(n) * 100, 1)) |>
    ungroup() |>
    mutate(wave = wave, year = year)
}

bind_rows(
  check_sex(w3, "W3", 1996, "V214"),
  check_sex(w5, "W5", 2006, "V235"),
  check_sex(w6, "W6", 2012, "V235"),
  check_sex(w7, "W7", 2022, "Q260")
) |>
  filter(country_name %in% c("Germany", "India")) |>  # focus on odd ones
  arrange(wave, country_name, sex_raw) |>
  kable(format = "html", caption = "Raw sex variable counts — Germany & India") |>
  kable_styling(full_width = FALSE,
                bootstrap_options = c("striped", "hover", "condensed"))
Raw sex variable counts — Germany & India
country_name sex_raw n pct wave year
Germany 1 928 45.8 W3 1996
Germany 2 1098 54.2 W3 1996
India 1 1120 54.9 W3 1996
India 2 920 45.1 W3 1996
Germany 1 911 44.1 W5 2006
Germany 2 1153 55.9 W5 2006
India -2 3 0.1 W5 2006
India 1 1137 56.8 W5 2006
India 2 861 43.0 W5 2006
Germany 1 1297 63.4 W6 2012
Germany 2 710 34.7 W6 2012
Germany NA 39 1.9 W6 2012
India 1 1590 39.0 W6 2012
India 2 2488 61.0 W6 2012
Germany 1 743 48.6 W7 2022
Germany 2 785 51.4 W7 2022
India 1 958 56.6 W7 2022
India 2 734 43.4 W7 2022

Economic values

Scale reference (1–10 scale)

  • Income equality — 1 = Incomes should be more equal; 10 = We need larger income differences as incentives
  • Govt vs private ownership — 1 = Private ownership of business should be increased; 10 = Government ownership should be increased
  • State vs individual responsibility — 1 = The state should take more responsibility; 10 = Individuals should take more responsibility
  • Competition good/harmful — 1 = Competition is good, it stimulates people to work hard; 10 = Competition is harmful, it brings out the worst in people
  • Hard work vs luck — 1 = Hard work brings a better life; 10 = Hard work doesn’t bring success — it’s more a matter of luck and connections
  • Wealth accumulation (W3/W5/W6 only) — 1 = Wealth grows so there’s enough for everyone; 10 = Accumulation of wealth harms poor countries
# ── Variable mapping ──────────────────────────────────────────────────────────
# All are 10-point scales: negative codes = missing
# W3: V125–V130 | W5: V116–V121 | W6: V96–V101 | W7: Q106–Q111

econ_vars <- list(
  W3 = c("Income equality"            = "V125",
          "Govt vs private ownership"  = "V126",
          "State vs individual resp."  = "V127",
          "Competition good/harmful"   = "V128",
          "Hard work vs luck"          = "V129",
          "Wealth accumulation"        = "V130"),   # W3/W5/W6 only
  W5 = c("Income equality"            = "V116",
          "Govt vs private ownership"  = "V117",
          "State vs individual resp."  = "V118",
          "Competition good/harmful"   = "V119",
          "Hard work vs luck"          = "V120",
          "Wealth accumulation"        = "V121"),
  W6 = c("Income equality"            = "V96",
          "Govt vs private ownership"  = "V97",
          "State vs individual resp."  = "V98",
          "Competition good/harmful"   = "V99",
          "Hard work vs luck"          = "V100",
          "Wealth accumulation"        = "V101"),
  W7 = c("Income equality"            = "Q106",
          "Govt vs private ownership"  = "Q107",
          "State vs individual resp."  = "Q108",
          "Competition good/harmful"   = "Q109",
          "Hard work vs luck"          = "Q110")
)
# ── Clean helper: 10-pt scale, negatives = NA ────────────────────────────────
clean_10pt <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  ifelse(x < 0, NA_real_, x)
}

# ── Compute weighted mean per country per wave ────────────────────────────────
compute_econ <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {
    df2 <- df |>
      mutate(
        item = clean_10pt(.data[[v]]),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(item), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~item, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        mean     = round(item, 2),
        wave     = wave,
        year     = year
      )
  })
}

econ_long <- bind_rows(
  compute_econ(w3, "W3", 1996, econ_vars$W3),
  compute_econ(w5, "W5", 2006, econ_vars$W5),
  compute_econ(w6, "W6", 2012, econ_vars$W6),
  compute_econ(w7, "W7", 2022, econ_vars$W7)
) |>
  filter(!(country == "Malaysia" & year == 1996)) |>
  mutate(
    country  = factor(country,
                      levels = c("India", "China", "USA",
                                 "Germany", "South Africa", "Malaysia")),
    question = factor(question,
                      levels = c("Income equality",
                                 "Govt vs private ownership",
                                 "State vs individual resp.",
                                 "Competition good/harmful",
                                 "Hard work vs luck",
                                 "Wealth accumulation"))
  )

# ── Plot ──────────────────────────────────────────────────────────────────────
country_colors <- c(
  "India"        = "#E15759",
  "China"        = "#F28E2B",
  "USA"          = "#4E79A7",
  "Germany"      = "#76B7B2",
  "South Africa" = "#59A14F",
  "Malaysia"     = "#B07AA1"
)
p_econ <- econ_long |>
  ggplot(aes(x = year, y = mean, colour = country, group = country)) +
  geom_line(linewidth = 0.9) +
  geom_point(size = 2.5) +
  facet_wrap(~question, nrow = 2, scales = "free_y") +
  scale_colour_manual(values = country_colors, name = NULL) +
  scale_x_continuous(breaks = c(1996, 2006, 2012, 2022)) +
  scale_y_continuous(limits = c(1, 10), breaks = c(1, 5, 10)) +
  labs(
    title    = "Economic Values: Country Means Across Waves (1–10 scale)",
    subtitle = "World Values Survey W3–W7 | Weighted means",
    x = NULL, y = "Mean (scale varies by item)"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position    = "bottom",
    strip.text         = element_text(face = "bold", size = 10),
    panel.grid.minor   = element_blank(),
    plot.title         = element_text(face = "bold", size = 13),
    plot.subtitle      = element_text(colour = "grey40", size = 10),
    axis.text.x        = element_text(size = 9)
  ) +
  guides(colour = guide_legend(nrow = 1))

print(p_econ)

Regime Preferences

Scale reference (1–4 scale)

  • All four items (Strong leader, Expert rule, Army rule, Democracy) — 1 = Very good; 2 = Fairly good; 3 = Fairly bad; 4 = Very bad
  • We code 1–2 as acceptable and 3–4 as not acceptable and report the % acceptable
# Standard 4-point: 1=Very good, 2=Fairly good, 3=Fairly bad, 4=Very bad
clean_regime_4pt <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  x <- ifelse(x <= 0, NA, x)
  case_when(
    x %in% c(1, 2) ~ 1L,
    x %in% c(3, 4) ~ 0L,
    TRUE           ~ NA_integer_
  )
}

# W3 Democracy (V157): 3-point: 1=Fairly good, 2=Fairly bad, 3=Very bad
clean_regime_3pt <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  x <- ifelse(x <= 0, NA, x)
  case_when(
    x == 1      ~ 1L,   # Fairly good = acceptable
    x %in% c(2, 3) ~ 0L,
    TRUE        ~ NA_integer_
  )
}

# ── 1. Variable mapping ───────────────────────────────────────────────────────
regime_vars <- list(
  W3 = c("Strong leader" = "V154", "Expert rule" = "V155",
          "Army rule"     = "V156", "Democracy"   = "V157"),
  W5 = c("Strong leader" = "V148", "Expert rule" = "V149",
          "Army rule"     = "V150", "Democracy"   = "V151"),
  W6 = c("Strong leader" = "V127", "Expert rule" = "V128",
          "Army rule"     = "V129", "Democracy"   = "V130"),
  W7 = c("Strong leader" = "Q235", "Expert rule" = "Q236",
          "Army rule"     = "Q237", "Democracy"   = "Q238")
)

# ── 2. Compute per country per wave ──────────────────────────────────────────
compute_regime <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {

    # Use 3pt cleaner only for W3 Democracy
    cleaner <- if (wave == "W3" & label == "Democracy") {
      clean_regime_3pt
    } else {
      clean_regime_4pt
    }

    df2 <- df |>
      mutate(
        item = cleaner(.data[[v]]),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(item), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~item, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        pct      = round(item * 100, 1),
        wave     = wave,
        year     = year
      )
  })
}

regime_long <- bind_rows(
  compute_regime(w3, "W3", 1996, regime_vars$W3),
  compute_regime(w5, "W5", 2006, regime_vars$W5),
  compute_regime(w6, "W6", 2012, regime_vars$W6),
  compute_regime(w7, "W7", 2022, regime_vars$W7)
) |>
  mutate(
    question = factor(question,
                      levels = c("Democracy", "Strong leader",
                                 "Expert rule", "Army rule")),
    country = factor(country,
                     levels = c("India", "China", "USA",
                                "Germany", "South Africa", "Malaysia"))
  )

# ── 3. Line chart: all countries across waves, faceted by question ────────────
country_colors <- c(
  "India"        = "#E15759",
  "China"        = "#F28E2B",
  "USA"          = "#4E79A7",
  "Germany"      = "#76B7B2",
  "South Africa" = "#59A14F",
  "Malaysia"     = "#B07AA1"
)

p_regime <- regime_long |>
  filter(!(country == "Malaysia" & year == 1996)) |>   # ADD THIS LINE
  ggplot(aes(x = year, y = pct, colour = country, group = country)) +
  geom_line(linewidth = 0.9) +
  geom_point(size = 2.5) +
  facet_wrap(~question, nrow = 2, scales = "free_y") +
  scale_colour_manual(values = country_colors, name = NULL) +
  scale_x_continuous(breaks = c(1996, 2006, 2012, 2022)) +
  scale_y_continuous(labels = label_percent(scale = 1),
                     limits = c(0, 100)) +              # ADD THIS LINE
  labs(
    title    = "Regime Preferences: % Finding Each System 'Acceptable'",
    subtitle = "World Values Survey W3–W7 | Weighted country means",
    x = NULL, y = "% acceptable"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position    = "bottom",
    strip.text         = element_text(face = "bold", size = 11),
    panel.grid.minor   = element_blank(),
    plot.title         = element_text(face = "bold", size = 13),
    plot.subtitle      = element_text(colour = "grey40", size = 10),
    axis.text.x        = element_text(size = 9)
  ) +
  guides(colour = guide_legend(nrow = 1))

print(p_regime)

Institutional Trust

(1–4 scale)

  • 1 = A great deal of confidence; 2 = Quite a lot; 3 = Not very much; 4 = None at all
  • We collapse 1–2 as high confidence and report the % with high confidence
# ── Variable mapping ──────────────────────────────────────────────────────────
# All waves: 1=Great deal, 2=Quite a lot, 3=Not very much, 4=None at all
# Negatives = missing
# W3: V135–V145 | W5: V131–V141 | W6: V108–V118 | W7: Q64–Q74

trust_vars <- list(
  W3 = c("Religious institutions" = "V135",
          "Armed forces"           = "V136",
          "Legal system/Courts"    = "V137",
          "Press"                  = "V138",
          "Television"             = "V139",
          "Police"                 = "V141",
          "Government"             = "V142",
          "Political parties"      = "V143",
          "Parliament"             = "V144",
          "Civil service"          = "V145"),
  W5 = c("Religious institutions" = "V131",
          "Armed forces"           = "V132",
          "Press"                  = "V133",
          "Television"             = "V134",
          "Police"                 = "V136",
          "Legal system/Courts"    = "V137",
          "Government"             = "V138",
          "Political parties"      = "V139",
          "Parliament"             = "V140",
          "Civil service"          = "V141"),
  W6 = c("Religious institutions" = "V108",
          "Armed forces"           = "V109",
          "Press"                  = "V110",
          "Television"             = "V111",
          "Police"                 = "V113",
          "Legal system/Courts"    = "V114",
          "Government"             = "V115",
          "Political parties"      = "V116",
          "Parliament"             = "V117",
          "Civil service"          = "V118"),
  W7 = c("Religious institutions" = "Q64",
          "Armed forces"           = "Q65",
          "Press"                  = "Q66",
          "Television"             = "Q67",
          "Police"                 = "Q69",
          "Legal system/Courts"    = "Q70",
          "Government"             = "Q71",
          "Political parties"      = "Q72",
          "Parliament"             = "Q73",
          "Civil service"          = "Q74")
)

# ── Clean helper: 1/2 = high confidence, 3/4 = low, else NA ──────────────────
clean_trust <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  x <- ifelse(x <= 0, NA, x)
  case_when(
    x %in% c(1, 2) ~ 1L,
    x %in% c(3, 4) ~ 0L,
    TRUE           ~ NA_integer_
  )
}

# ── Compute weighted % high confidence per country per wave ───────────────────
compute_trust <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {
    df2 <- df |>
      mutate(
        item = clean_trust(.data[[v]]),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(item), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~item, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        pct      = round(item * 100, 1),
        wave     = wave,
        year     = year
      )
  })
}

trust_long <- bind_rows(
  compute_trust(w3, "W3", 1996, trust_vars$W3),
  compute_trust(w5, "W5", 2006, trust_vars$W5),
  compute_trust(w6, "W6", 2012, trust_vars$W6),
  compute_trust(w7, "W7", 2022, trust_vars$W7)
) |>
  filter(!(country == "Malaysia" & year == 1996),
         pct >= 0, pct <= 100) |>
  mutate(
    country  = factor(country,
                      levels = c("India", "China", "USA",
                                 "Germany", "South Africa", "Malaysia")),
    question = factor(question,
                      levels = c("Parliament", "Government",
                                 "Political parties", "Civil service",
                                 "Armed forces", "Police",
                                 "Legal system/Courts", "Press",
                                 "Television", "Religious institutions"))
  )

# ── Plot ──────────────────────────────────────────────────────────────────────
# ── Colour helper ─────────────────────────────────────────────────────────────
colour_cell <- function(val) {
  if (is.na(val)) return("white")
  if (val >= 75)  return("#c7e9c0")
  if (val >= 50)  return("#ffffb2")
  if (val >= 25)  return("#fdd0a2")
  return("#fc8d59")
}

# ── Reshape wide ──────────────────────────────────────────────────────────────
country_cols <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")

trust_wide <- trust_long |>
  mutate(
    country  = as.character(country),
    question = as.character(question),
    wave_yr  = paste0(wave, " (", year, ")")
  ) |>
  select(question, wave_yr, country, pct) |>
  pivot_wider(
    names_from  = country,
    values_from = pct,
    values_fill = NA,
    names_sort  = FALSE          # don't let pivot sort alphabetically
  ) |>
  # Force exact column order regardless of pivot output order
  select(question, wave_yr, any_of(country_cols)) |>
  arrange(question, wave_yr) |>
  rename(Institution = question, Wave = wave_yr)

# Verify no NAs for USA W7
trust_wide |> filter(grepl("2022", Wave)) |> select(Institution, Wave, USA)
## # A tibble: 10 × 3
##    Institution            Wave        USA
##    <chr>                  <chr>     <dbl>
##  1 Armed forces           W7 (2022)  81.3
##  2 Civil service          W7 (2022)  42  
##  3 Government             W7 (2022)  33.7
##  4 Legal system/Courts    W7 (2022)  57.8
##  5 Parliament             W7 (2022)  15.1
##  6 Police                 W7 (2022)  68.8
##  7 Political parties      W7 (2022)  11.3
##  8 Press                  W7 (2022)  29.7
##  9 Religious institutions W7 (2022)  53.7
## 10 Television             W7 (2022)  22.6
# ── Apply cell_spec colouring BEFORE kable ────────────────────────────────────
trust_coloured <- trust_wide |>
  mutate(across(all_of(country_cols), function(x) {
    cell_spec(
      ifelse(is.na(x), "—", paste0(round(x, 1), "%")),
      background = sapply(x, colour_cell),
      color      = "black",
      bold       = FALSE
    )
  }))

# ── Render table ──────────────────────────────────────────────────────────────
trust_coloured |>
  kable(format  = "html", escape = FALSE,
        caption = "Institutional Trust: % High Confidence by Country and Wave",
        col.names = c("Institution", "Wave", country_cols),
        align     = c("l", "l", rep("c", 6))) |>
  kable_styling(full_width        = TRUE,
                bootstrap_options = c("striped", "hover", "condensed"),
                font_size         = 12,
                fixed_thead       = TRUE) |>
  collapse_rows(columns = 1, valign = "top") |>
  column_spec(1, bold = TRUE, width = "15em") |>
  column_spec(2, width = "8em")
Institutional Trust: % High Confidence by Country and Wave
Institution Wave India China USA Germany South Africa Malaysia
Armed forces W3 (1996) 84.5% 86.4% 45.3% 56.8%
W5 (2006) 83.3% 91.5% 82.3% 50.1% 63.8% 84.4%
W6 (2012) 87.1% 92.6% 83.2% 65.9% 50.1% 80.1%
W7 (2022) 86.9% 95.4% 81.3% 58.1% 74.5%
Civil service W3 (1996) 69.7% 51.8% 44.6% 58.6%
W5 (2006) 54.3% 85.6% 41.5% 31% 55.9% 69.8%
W6 (2012) 61% 77.9% 46.2% 55.3% 44.8% 77.7%
W7 (2022) 80.6% 87% 42% 64.1% 60%
Government W3 (1996) 58.2% 30.6% 20.6% 68.9%
W5 (2006) 54.9% 92.4% 38.2% 23.5% 70.7% 75.4%
W6 (2012) 50.4% 92.2% 33.3% 45.1% 47.9% 75.2%
W7 (2022) 65.1% 94.9% 33.7% 45.5% 50.2%
Legal system/Courts W3 (1996) 78.3% 36.3% 43.8% 63.6%
W5 (2006) 68.9% 81.8% 57.4% 57.5% 66.4% 77.7%
W6 (2012) 64.5% 80.2% 54.9% 72.9% 51.8% 80.3%
W7 (2022) 74.6% 86.3% 57.8% 74.9% 62.7%
Parliament W3 (1996) 65.3% 30.3% 23% 68.8%
W5 (2006) 62.4% 91.7% 20.6% 21.9% 65.6% 67.6%
W6 (2012) 58.4% 87% 20.8% 44.7% 45% 69.1%
W7 (2022) 73.8% 92.7% 15.1% 44.2% 42.3%
Police W3 (1996) 41.1% 71.2% 61.3% 74.8%
W5 (2006) 64.1% 78.4% 70.3% 73.9% 61.9% 74.6%
W6 (2012) 51.2% 73.3% 69.4% 82.6% 47% 73.7%
W7 (2022) 66.8% 84.8% 68.8% 86.8% 59.5%
Political parties W3 (1996) 47% 21.2% 12.1% 46.9%
W5 (2006) 46.4% 86.4% 15.3% 12.8% 43.8% 58.3%
W6 (2012) 37.4% 85% 12.8% 24.6% 38.1% 62%
W7 (2022) 42.3% 91% 11.3% 24.2% 31.5%
Press W3 (1996) 63.1% 27.4% 17.5% 54%
W5 (2006) 75.8% 70.8% 23.8% 29.3% 60.9% 63.7%
W6 (2012) 71.8% 69.9% 23.1% 44.8% 55.3% 68.6%
W7 (2022) 65.8% 68.5% 29.7% 37% 38.4%
Religious institutions W3 (1996) 72.7% 75.9% 27.8% 84.6%
W5 (2006) 83.4% 39.9% 66.3% 37.6% 84.9% 90.8%
W6 (2012) 96.1% 24.3% 58.7% 38.2% 77.9% 91.4%
W7 (2022) 89.7% 20.1% 53.7% 36.2% 85%
Television W3 (1996) 56.7% 29.3% 21.8% 66.8%
W5 (2006) 74.9% 75% 25.3% 34.2% 74.3% 71.9%
W6 (2012) 74.5% 72.1% 24.3% 47.8% 65.5% 70.7%
W7 (2022) 63.4% 73.2% 22.6% 32.3% 39.9%

Neighbour

Neighbour Rejection (binary)

  • 1 = Mentioned (would not want as neighbour); 2 = Not mentioned
  • We report the % who mentioned each group
# ── Variable mapping ──────────────────────────────────────────────────────────
# 1 = Mentioned, 2 = Not mentioned, negatives = missing
# W3: V51–V60 | W5: V34–V39 | W6: V36–V41 | W7: Q27–Q32

neighbour_vars <- list(
  W3 = c("Drug addicts"               = "V59",
          "Different race"             = "V52",
          "People with AIDS"           = "V58",
          "Immigrants/Foreign workers" = "V57",
          "Homosexuals"                = "V60"),
  # ↑ "Different religion" intentionally omitted from W3 — no equivalent variable
  W5 = c("Drug addicts"               = "V34",
          "Different race"             = "V35",
          "People with AIDS"           = "V36",
          "Immigrants/Foreign workers" = "V37",
          "Homosexuals"                = "V38",
          "Different religion"         = "V39"),
  W6 = c("Drug addicts"               = "V36",
          "Different race"             = "V37",
          "People with AIDS"           = "V38",
          "Immigrants/Foreign workers" = "V39",
          "Homosexuals"                = "V40",
          "Different religion"         = "V41"),
  W7 = c("Drug addicts"               = "Q27",
          "Different race"             = "Q28",
          "People with AIDS"           = "Q29",
          "Immigrants/Foreign workers" = "Q30",
          "Homosexuals"                = "Q31",
          "Different religion"         = "Q32")
)

# ── Clean helper: 1 = mentioned (intolerant), 2 = not mentioned ──────────────
clean_neighbour <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  case_when(
    x == 1 ~ 1L,
    x == 2 ~ 0L,
    TRUE   ~ NA_integer_
  )
}

# ── Compute weighted % mentioning per country per wave ────────────────────────
compute_neighbour <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {
    df2 <- df |>
      mutate(
        item = clean_neighbour(.data[[v]]),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(item), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~item, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        pct      = round(item * 100, 1),
        wave     = wave,
        year     = year
      )
  })
}

# ── Combine waves ─────────────────────────────────────────────────────────────
neighbour_long <- bind_rows(
  compute_neighbour(w3, "W3", 1996, neighbour_vars$W3),
  compute_neighbour(w5, "W5", 2006, neighbour_vars$W5),
  compute_neighbour(w6, "W6", 2012, neighbour_vars$W6),
  compute_neighbour(w7, "W7", 2022, neighbour_vars$W7)
) |>
  filter(pct >= 0, pct <= 100)

# ── Colour helper — HIGH % = MORE intolerant = RED ───────────────────────────
colour_intol <- function(val) {
  if (is.na(val)) return("white")
  if (val >= 75)  return("#d73027")
  if (val >= 50)  return("#fc8d59")
  if (val >= 25)  return("#ffffb2")
  return("#91cf60")
}

# ── Reshape wide ──────────────────────────────────────────────────────────────
country_cols <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")

# Define row order explicitly
question_order <- c(
  "Drug addicts", "Different race", "People with AIDS",
  "Immigrants/Foreign workers", "Homosexuals", "Different religion"
)

neighbour_wide <- neighbour_long |>
  mutate(
    country   = as.character(country),
    question  = factor(question, levels = question_order),
    wave_yr   = paste0(wave, " (", year, ")")
  ) |>
  select(question, wave_yr, country, pct) |>
  pivot_wider(names_from = country, values_from = pct, values_fill = NA) |>
  select(question, wave_yr, any_of(country_cols)) |>
  arrange(question, wave_yr) |>
  rename(Group = question, Wave = wave_yr)

# ── Apply cell_spec colouring ─────────────────────────────────────────────────
neighbour_coloured <- neighbour_wide |>
  mutate(across(all_of(intersect(country_cols, names(neighbour_wide))), function(x) {
    cell_spec(
      ifelse(is.na(x), "—", paste0(round(x, 1), "%")),
      background = sapply(x, colour_intol),
      color      = "black",
      bold       = FALSE
    )
  }))

# ── Render table ──────────────────────────────────────────────────────────────
neighbour_coloured |>
  kable(format    = "html", escape = FALSE,
        caption   = "Neighbour Intolerance: % Who Would NOT Want as Neighbour",
        col.names = c("Group", "Wave", intersect(country_cols, names(neighbour_coloured))),
        align     = c("l", "l", rep("c", 6))) |>
  kable_styling(full_width        = TRUE,
                bootstrap_options = c("striped", "hover", "condensed"),
                font_size         = 12,
                fixed_thead       = TRUE) |>
  collapse_rows(columns = 1, valign = "top") |>
  column_spec(1, bold = TRUE, width = "15em") |>
  column_spec(2, width = "8em") |>
  footnote(general = paste0(
    "Higher % = more intolerant. ",
    "Green <25%, Yellow 25–49%, Orange 50–74%, Red ≥75%. ",
    "W3 (1996) does not include a 'Different religion' item — shown as — for that wave."
  ))
Neighbour Intolerance: % Who Would NOT Want as Neighbour
Group Wave India China USA Germany South Africa Malaysia
Drug addicts W3 (1996) 65.9% 70.8% 82.8% 51.8% 74.8%
W5 (2006) 53.8% 92.9% 90.7% 62.6% 89.5% 87.3%
W6 (2012) 93.6% 96.5% 88.3% 66.4% 85.7% 73.8%
W7 (2022) 83.4% 30.7% 38.6% 36.5% 63.6%
Different race W3 (1996) 36% 22.9% 6.6% 2.8% 11.2%
W5 (2006) 43.5% 12.6% 4% 7.2% 8.1% 20.6%
W6 (2012) 25.6% 10.5% 5.6% 14.9% 19.2% 31.3%
W7 (2022) 48% 22.7% 15.9% 18.9% 30%
People with AIDS W3 (1996) 60.3% 62.3% 18.6% 6.5% 44%
W5 (2006) 43.7% 74.5% 15.4% 15.4% 7.3% 69.8%
W6 (2012) 61.5% 73.2% 13.4% 24% 17.7% 59.5%
W7 (2022) 44.5% 19.6% 21.4% 15.4% 35.6%
Immigrants/Foreign workers W3 (1996) 33.1% 20.3% 9.7% 7.2% 21.3%
W5 (2006) 35% 17.2% 12.7% 13.3% 24.9% 56.7%
W6 (2012) 47.1% 12.2% 13.6% 21.5% 40.9% 59.7%
W7 (2022) 48.1% 24.4% 21% 15.6% 34.2%
Homosexuals W3 (1996) 61.4% 60.7% 29.5% 13.3% 50.5%
W5 (2006) 40.3% 69.6% 25.1% 14.6% 45.9% 70.6%
W6 (2012) 65% 52.7% 20.4% 22.4% 37.9% 58.7%
W7 (2022) 42.2% 14.9% 17.3% 10% 28.4%
Different religion W5 (2006) 44% 15.7% 2.5% 4.3% 4.5% 22.4%
W6 (2012) 28.4% 9.2% 3.4% 14.1% 16% 30.3%
W7 (2022) 47.2% 15.6% 32% 32.9% 25.5%
Note:
Higher % = more intolerant. Green <25%, Yellow 25–49%, Orange 50–74%, Red ≥75%. W3 (1996) does not include a ‘Different religion’ item — shown as — for that wave.

Child qualities

  • 1 = Mentioned; 2 = Not mentioned
  • We report the % who mentioned each group
# ── Variable mapping ──────────────────────────────────────────────────────────
child_vars <- list(
  W3 = c("Independence"        = "V15",
          "Hard work"           = "V16",
          "Responsibility"      = "V17",
          "Imagination"         = "V18",
          "Tolerance & respect" = "V19",
          "Thrift"              = "V20",
          "Determination"       = "V21"),
  W5 = c("Independence"        = "V12",
          "Hard work"           = "V13",
          "Responsibility"      = "V14",
          "Imagination"         = "V15",
          "Tolerance & respect" = "V16",
          "Thrift"              = "V17",
          "Determination"       = "V18",
          "Religious faith"     = "V19",
          "Unselfishness"       = "V20",
          "Obedience"           = "V21"),
  W7 = c("Good manners"        = "Q7",
          "Independence"        = "Q8",
          "Hard work"           = "Q9",
          "Responsibility"      = "Q10",
          "Imagination"         = "Q11",
          "Tolerance & respect" = "Q12",
          "Thrift"              = "Q13",
          "Determination"       = "Q14",
          "Religious faith"     = "Q15",
          "Unselfishness"       = "Q16",
          "Obedience"           = "Q17")
)

# ── Clean helper ──────────────────────────────────────────────────────────────
clean_child <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  case_when(
    x == 1 ~ 1L,
    x == 2 ~ 0L,
    TRUE   ~ NA_integer_
  )
}

# ── Compute weighted % per country per wave ───────────────────────────────────
compute_child <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {
    df2 <- df |>
      mutate(
        item = clean_child(.data[[v]]),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(item), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~item, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        pct      = round(item * 100, 1),
        wave     = wave,
        year     = year
      )
  })
}

# ── Combine waves (no W6) ─────────────────────────────────────────────────────
child_long <- bind_rows(
  compute_child(w3, "W3", 1996, child_vars$W3),
  compute_child(w5, "W5", 2006, child_vars$W5),
  compute_child(w7, "W7", 2022, child_vars$W7)
) |>
  filter(pct >= 0, pct <= 100)

# ── Colour helper — white (low) → green (high) ───────────────────────────────
colour_child <- function(val) {
  if (is.na(val)) return("#f5f5f5")
  if (val >= 75)  return("#1a9850")
  if (val >= 50)  return("#a6d96a")
  if (val >= 25)  return("#d9ef8b")
  return("#ffffff")
}

# ── Row order ─────────────────────────────────────────────────────────────────
question_order <- c(
  "Good manners", "Independence", "Hard work", "Responsibility",
  "Imagination", "Tolerance & respect", "Thrift", "Determination",
  "Religious faith", "Unselfishness", "Obedience"
)

country_cols <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")

# ── Reshape wide ──────────────────────────────────────────────────────────────
child_wide <- child_long |>
  mutate(
    country  = as.character(country),
    question = factor(question, levels = question_order),
    wave_yr  = paste0(wave, " (", year, ")")
  ) |>
  select(question, wave_yr, country, pct) |>
  pivot_wider(names_from = country, values_from = pct, values_fill = NA) |>
  select(question, wave_yr, any_of(country_cols)) |>
  arrange(question, wave_yr) |>
  rename(Quality = question, Wave = wave_yr)

# ── Apply cell_spec colouring ─────────────────────────────────────────────────
child_coloured <- child_wide |>
  mutate(across(all_of(intersect(country_cols, names(child_wide))), function(x) {
    cell_spec(
      ifelse(is.na(x), "—", paste0(round(x, 1), "%")),
      background = sapply(x, colour_child),
      color      = ifelse(is.na(x), "black", ifelse(x >= 75, "white", "black")),
      bold       = FALSE
    )
  }))

# ── Render table ──────────────────────────────────────────────────────────────
child_coloured |>
  kable(format    = "html", escape = FALSE,
        caption   = "Child Qualities: % Mentioning as Important (survey-weighted)",
        col.names = c("Quality", "Wave", intersect(country_cols, names(child_coloured))),
        align     = c("l", "l", rep("c", 6))) |>
  kable_styling(full_width        = TRUE,
                bootstrap_options = c("striped", "hover", "condensed"),
                font_size         = 12,
                fixed_thead       = TRUE) |>
  collapse_rows(columns = 1, valign = "top") |>
  column_spec(1, bold = TRUE, width = "15em") |>
  column_spec(2, width = "8em") |>
  footnote(general = paste0(
    "Higher % = more frequently cited as important. ",
    "W3 (1996) includes 7 items only; 'Good manners' available in W7 only. ",
    "Green scale: Dark green ≥75%, Medium green 50–74%, Yellow-green 25–49%, White <25%. ",
    "W6 (2012) excluded due to unavailability of comparable items."
  ))
Child Qualities: % Mentioning as Important (survey-weighted)
Quality Wave India China USA Germany South Africa Malaysia
Good manners W7 (2022) 80.7% 84.1% 51.8% 83.8% 81.6%
Independence W3 (1996) 37.4% 50.1% 45.2% 51.5% 31.2%
W5 (2006) 66.9% 69.8% 53.8% 77.6% 58.8% 78.7%
W7 (2022) 57.6% 78.6% 55.5% 69.8% 55.1%
Hard work W3 (1996) 71.6% 72.7% 52.5% 9.9% 58.5%
W5 (2006) 81.3% 83.4% 61.7% 26% 70.7% 49.1%
W7 (2022) 74.2% 70.7% 68% 39.7% 32.9%
Responsibility W3 (1996) 48.2% 34.5% 69% 92.3% 48.8%
W5 (2006) 68% 67.2% 72.2% 85.1% 54.8% 78.6%
W7 (2022) 65.3% 78.8% 59.4% 79.7% 74.9%
Imagination W3 (1996) 10.8% 22.1% 26.5% 33.5% 10.6%
W5 (2006) 25.4% 25.3% 31.7% 39.6% 15.6% 21.6%
W7 (2022) 22.3% 21.7% 29.8% 23.1% 9.3%
Tolerance & respect W3 (1996) 51.6% 43% 75% 88.3% 70%
W5 (2006) 55.8% 64.7% 78.3% 74.6% 77.9% 73.7%
W7 (2022) 44.1% 60.8% 70.9% 84.1% 69%
Thrift W3 (1996) 41.9% 62.2% 26.7% 52.8% 28.7%
W5 (2006) 55.4% 61.6% 29.9% 48.2% 36.9% 50.7%
W7 (2022) 32% 40.4% 27.3% 36.9% 38.7%
Determination W3 (1996) 28.8% 36.3% 41.5% 41% 29.7%
W5 (2006) 40.7% 24% 40.2% 64.2% 32.9% 33.3%
W7 (2022) 30.4% 20.6% 38.7% 33.5% 23.5%
Religious faith W5 (2006) 41.3% 2.4% 50.6% 9.3% 56.2% 59.6%
W7 (2022) 26.8% 1.1% 32.2% 9.5% 59.7%
Unselfishness W5 (2006) 34.2% 30.8% 37.6% 6.8% 30.6% 30.1%
W7 (2022) 21.3% 28.8% 28.3% 5.6% 18.5%
Obedience W5 (2006) 55.9% 13.8% 28.3% 15.8% 46.8% 25.9%
W7 (2022) 22.1% 5.8% 20.5% 11.8% 13%
Note:
Higher % = more frequently cited as important. W3 (1996) includes 7 items only; ‘Good manners’ available in W7 only. Green scale: Dark green ≥75%, Medium green 50–74%, Yellow-green 25–49%, White <25%. W6 (2012) excluded due to unavailability of comparable items.

Moral permisiveness

(1–10 scale)

  • 1 = Never justifiable; 10 = Always justifiable We typically report the % saying 1–3 (never justifiable) as the conservative/strict moral stance.
# ── Variable mapping ──────────────────────────────────────────────────────────
# Scale: 1 = Never justifiable → 10 = Always justifiable
# We report % scoring 1, 2, or 3 (strongly opposed)
# W3: 5 items only | W5/W6/W7: 11 items

moral_vars <- list(
  W3 = c("Prostitution"          = "V198",
          "Abortion"              = "V199",
          "Divorce"               = "V200",
          "Euthanasia"            = "V201",
          "Suicide"               = "V202"),
  W5 = c("Government benefits"   = "V198",
          "Public transport fare" = "V199",
          "Tax evasion"           = "V200",
          "Bribing"               = "V201",
          "Homosexuality"         = "V202",
          "Prostitution"          = "V203",
          "Abortion"              = "V204",
          "Divorce"               = "V205",
          "Euthanasia"            = "V206",
          "Suicide"               = "V207",
          "Domestic violence"     = "V208"),
  W6 = c("Government benefits"   = "V198",
          "Public transport fare" = "V199",
          "Tax evasion"           = "V201",
          "Bribing"               = "V202",
          "Homosexuality"         = "V203",
          "Prostitution"          = "V203A",
          "Abortion"              = "V204",
          "Divorce"               = "V205",
          "Euthanasia"            = "V207A",
          "Suicide"               = "V207",
          "Domestic violence"     = "V208"),
  W7 = c("Government benefits"   = "Q177",
          "Public transport fare" = "Q178",
          "Tax evasion"           = "Q180",
          "Bribing"               = "Q181",
          "Homosexuality"         = "Q182",
          "Prostitution"          = "Q183",
          "Abortion"              = "Q184",
          "Divorce"               = "Q185",
          "Euthanasia"            = "Q188",
          "Suicide"               = "Q187",
          "Domestic violence"     = "Q189")
)

# ── Clean helper: keep 1–10, everything else NA ───────────────────────────────
clean_moral <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  ifelse(x >= 1 & x <= 10, x, NA_real_)
}

# ── Compute weighted % scoring 1–3 per country per wave ──────────────────────
compute_moral <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {
    df2 <- df |>
      mutate(
        item = clean_moral(.data[[v]]),
        opposed = ifelse(!is.na(item), as.integer(item <= 3), NA_integer_),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(opposed), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~opposed, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        pct      = round(opposed * 100, 1),
        wave     = wave,
        year     = year
      )
  })
}

# ── Combine all four waves ────────────────────────────────────────────────────
moral_long <- bind_rows(
  compute_moral(w3, "W3", 1996, moral_vars$W3),
  compute_moral(w5, "W5", 2006, moral_vars$W5),
  compute_moral(w6, "W6", 2012, moral_vars$W6),
  compute_moral(w7, "W7", 2022, moral_vars$W7)
) |>
  filter(pct >= 0, pct <= 100)

# ── Colour helper — HIGH % = MORE conservative = RED ─────────────────────────
colour_moral <- function(val) {
  if (is.na(val)) return("#f5f5f5")
  if (val >= 75)  return("#d73027")
  if (val >= 50)  return("#fc8d59")
  if (val >= 25)  return("#ffffb2")
  return("#91cf60")
}

# ── Row order ─────────────────────────────────────────────────────────────────
question_order <- c(
  "Government benefits", "Public transport fare", "Tax evasion", "Bribing",
  "Homosexuality", "Prostitution", "Abortion", "Divorce",
  "Euthanasia", "Suicide", "Domestic violence"
)

country_cols <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")

# ── Reshape wide ──────────────────────────────────────────────────────────────
moral_wide <- moral_long |>
  mutate(
    country  = as.character(country),
    question = factor(question, levels = question_order),
    wave_yr  = paste0(wave, " (", year, ")")
  ) |>
  select(question, wave_yr, country, pct) |>
  pivot_wider(names_from = country, values_from = pct, values_fill = NA) |>
  select(question, wave_yr, any_of(country_cols)) |>
  arrange(question, wave_yr) |>
  rename(Behaviour = question, Wave = wave_yr)

# ── Apply cell_spec colouring ─────────────────────────────────────────────────
moral_coloured <- moral_wide |>
  mutate(across(all_of(intersect(country_cols, names(moral_wide))), function(x) {
    cell_spec(
      ifelse(is.na(x), "—", paste0(round(x, 1), "%")),
      background = sapply(x, colour_moral),
      color      = ifelse(is.na(x), "black", ifelse(x >= 75, "white", "black")),
      bold       = FALSE
    )
  }))

# ── Render table ──────────────────────────────────────────────────────────────
moral_coloured |>
  kable(format    = "html", escape = FALSE,
        caption   = "Moral Permissiveness: % Rating Behaviour as Never Justifiable (scores 1–3 on 1–10 scale)",
        col.names = c("Behaviour", "Wave", intersect(country_cols, names(moral_coloured))),
        align     = c("l", "l", rep("c", 6))) |>
  kable_styling(full_width        = TRUE,
                bootstrap_options = c("striped", "hover", "condensed"),
                font_size         = 12,
                fixed_thead       = TRUE) |>
  collapse_rows(columns = 1, valign = "top") |>
  column_spec(1, bold = TRUE, width = "15em") |>
  column_spec(2, width = "8em") |>
  footnote(general = paste0(
    "Higher % = more conservative (never justifiable). ",
    "Red ≥75%, Orange 50–74%, Yellow 25–49%, Green <25%. ",
    "W3 (1996) includes 5 items only: Prostitution, Abortion, Divorce, Euthanasia, Suicide. ",
    "Survey-weighted estimates."
  ))
Moral Permissiveness: % Rating Behaviour as Never Justifiable (scores 1–3 on 1–10 scale)
Behaviour Wave India China USA Germany South Africa Malaysia
Government benefits W5 (2006) 60% 64.7% 79% 81% 75.6% 43.2%
W6 (2012) 78.5% 58.8% 78.7% 86.8% 48.1% 68.5%
W7 (2022) 75.7% 60% 73.4% 93.9% 39.5%
Public transport fare W5 (2006) 58.1% 87.1% 70.9% 79.8% 72.2% 50.8%
W6 (2012) 89.3% 76.6% 72.7% 85.9% 47.3% 72.3%
W7 (2022) 80.5% 92.5% 65.8% 87.7% 49.5%
Tax evasion W5 (2006) 63.2% 87% 82.9% 80.2% 77% 52.7%
W6 (2012) 90.9% 83.7% 85.3% 51.8% 73.6%
W7 (2022) 85.4% 93.9% 83.7% 93.5% 59.3%
Bribing W5 (2006) 64.2% 90.6% 88% 89.2% 79.7% 59.6%
W6 (2012) 91.7% 86.8% 87.1% 90.1% 51.4% 76.9%
W7 (2022) 87.4% 91.9% 88.2% 96.7% 66.8%
Homosexuality W5 (2006) 63.7% 91.6% 41.3% 19.7% 66.6% 62.9%
W6 (2012) 91% 83.7% 32.1% 27.7% 46.2% 75.5%
W7 (2022) 72.7% 80.6% 25.6% 12.9% 56.1%
Prostitution W3 (1996) 86.8% 97.8% 74.4% 33.4% 80.7%
W5 (2006) 63% 94.6% 58.4% 41.3% 76.6% 64.1%
W6 (2012) 91.3% 89.4% 44.1% 52% 82.5%
W7 (2022) 82.6% 93.3% 54.7% 35.4% 60.5%
Abortion W3 (1996) 74.7% 47.4% 47.6% 29% 73.6%
W5 (2006) 58.9% 83.3% 38.9% 31.1% 73.5% 65.1%
W6 (2012) 90.4% 69.7% 35.5% 40% 51.1% 83%
W7 (2022) 68.9% 76.9% 37.5% 27.8% 57.6%
Divorce W3 (1996) 64.5% 25.3% 21.6% 14% 50.5%
W5 (2006) 47.2% 73.6% 15.8% 13.4% 50.6% 54.4%
W6 (2012) 85.9% 58% 12.9% 15.8% 40% 67.6%
W7 (2022) 63.8% 54.5% 10.9% 10% 32.7%
Euthanasia W3 (1996) 71.7% 32.5% 41.3% 65%
W5 (2006) 48.5% 68% 35% 36.3% 63.5% 59.8%
W6 (2012) 62% 42.3%
W7 (2022) 70.7% 55.8% 32.3% 14.7% 47.6%
Suicide W3 (1996) 87.9% 69.4% 77.6% 42.8% 81.8%
W5 (2006) 57.4% 88.1% 68.3% 55.1% 77.5% 65.2%
W6 (2012) 91.3% 77.6% 64.2% 62.8% 50.9% 83.8%
W7 (2022) 87.4% 85.4% 61.8% 49.8% 65%
Domestic violence W5 (2006) 61.2% 91.3% 91.8% 88.5% 79.4% 63.9%
W6 (2012) 88.1% 78.8% 90.9% 90.8% 53.5% 80.5%
W7 (2022) 83.6% 92.8% 93.3% 98.9% 66.2%
Note:
Higher % = more conservative (never justifiable). Red ≥75%, Orange 50–74%, Yellow 25–49%, Green <25%. W3 (1996) includes 5 items only: Prostitution, Abortion, Divorce, Euthanasia, Suicide. Survey-weighted estimates.