library(tidyverse)
library(haven)
library(labelled)
library(survey)
library(knitr)
library(kableExtra)

# ── Focal countries ──────────────────────────────────────────────────────────
focal_codes  <- c(356L, 156L, 840L, 276L, 710L, 458L)
focal_labels <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")
focal_lookup <- tibble(country = focal_codes, country_name = focal_labels)

# ── W3 (1996) ─────────────────────────────────────────────────────────────────
datafile_w3 <- "D:/Populism and Democrary/World value survey/WVS 1996/F00008205-WV3_Data_R_v20180912.rds"
stopifnot(file.exists(datafile_w3))
wvs3 <- readRDS(datafile_w3)

# ── W5 (2006) ─────────────────────────────────────────────────────────────────
datafile_w5 <- "D:/Populism and Democrary/World value survey/WVS 2006/F00007944-WV5_Data_R_v20180912.rds"
stopifnot(file.exists(datafile_w5))
wvs5 <- readRDS(datafile_w5)

# ── W6 (2012) ─────────────────────────────────────────────────────────────────
datafile_w6 <- "D:/Populism and Democrary/World value survey/WVS 2012/WV6_Data_R_v20201117.rdata"
stopifnot(file.exists(datafile_w6))
load(datafile_w6)
wvs6 <- WV6_Data_R_v20201117

# ── W7 (2022) ─────────────────────────────────────────────────────────────────
datafile_w7 <- "D:/Populism and Democrary/World value survey/WVS 2017/WVS_Cross-National_Wave_7_Rdata_v6_0.rdata"
stopifnot(file.exists(datafile_w7))
load(datafile_w7)
wvs7 <- `WVS_Cross-National_Wave_7_v6_0`

# ── Sanity check ─────────────────────────────────────────────────────────────
dims <- tibble(
  wave  = c("W3", "W5", "W6", "W7"),
  year  = c(1996, 2006, 2012, 2022),
  nrows = c(nrow(wvs3), nrow(wvs5), nrow(wvs6), nrow(wvs7)),
  ncols = c(ncol(wvs3), ncol(wvs5), ncol(wvs6), ncol(wvs7))
)
kable(dims, caption = "Sanity check: datasets loaded (rows × cols)") |>
  kable_styling(full_width = FALSE)

Sanity check: datasets loaded (rows × cols)
wave	year	nrows	ncols
W3	1996	77818	332
W5	2006	83975	414
W6	2012	89565	442
W7	2022	97220	613

Cross country comparision

India, China, USA, Germany, South Africa (2022 missing), Malaysia (1996 missing)

focal_codes  <- c(356L, 156L, 840L, 276L, 710L, 458L)
focal_labels <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")
focal_lookup <- tibble(country = focal_codes, country_name = focal_labels)

make_workfile <- function(df, country_var, weight_var) {
  df |>
    mutate(
      country = as.integer(zap_labels(.data[[country_var]])),
      wt      = as.numeric(zap_labels(.data[[weight_var]]))
    ) |>
    filter(country %in% focal_codes, !is.na(wt)) |>
    left_join(focal_lookup, by = "country")
}

w3 <- make_workfile(wvs3, "V2",       "V236")
w5 <- make_workfile(wvs5, "V2",       "V259")
w6 <- make_workfile(wvs6, "V2",       "V258")
w7 <- make_workfile(wvs7, "B_COUNTRY", "W_WEIGHT")

# Coverage check
coverage <- focal_lookup |>
  mutate(
    W3_1996 = country %in% unique(w3$country),
    W5_2006 = country %in% unique(w5$country),
    W6_2012 = country %in% unique(w6$country),
    W7_2022 = country %in% unique(w7$country)
  )

kable(coverage, caption = "Focal country coverage across waves") |>
  kable_styling(full_width = FALSE)

Focal country coverage across waves
country	country_name	W3_1996	W5_2006	W6_2012	W7_2022
356	India	TRUE	TRUE	TRUE	TRUE
156	China	TRUE	TRUE	TRUE	TRUE
840	USA	TRUE	TRUE	TRUE	TRUE
276	Germany	TRUE	TRUE	TRUE	TRUE
710	South Africa	TRUE	TRUE	TRUE	FALSE
458	Malaysia	FALSE	TRUE	TRUE	TRUE

# ── 1. Raw N per country per wave ────────────────────────────────────────────
n_table <- bind_rows(
  w3 |> count(country_name) |> mutate(Wave = "W3", Year = 1996),
  w5 |> count(country_name) |> mutate(Wave = "W5", Year = 2006),
  w6 |> count(country_name) |> mutate(Wave = "W6", Year = 2012),
  w7 |> count(country_name) |> mutate(Wave = "W7", Year = 2022)
) |>
  pivot_wider(names_from = c(Wave, Year), values_from = n) |>
  rename(Country = country_name)

kable(n_table, caption = "Raw N per country per wave (unweighted)",
      format = "html") |>
  kable_styling(full_width = FALSE,
                bootstrap_options = c("striped", "hover", "condensed"))

Raw N per country per wave (unweighted)
Country	W3_1996	W5_2006	W6_2012	W7_2022
China	1500	1991	2300	3036
Germany	2026	2064	2046	1528
India	2040	2001	4078	1692
South Africa	2935	2988	3531	NA
USA	1542	1249	2232	2596
Malaysia	NA	1201	1300	1313

Demographics across countries

recode_age <- function(df, age_var) {
  df |>
    mutate(
      age_raw = as.numeric(zap_labels(.data[[age_var]])),
      age_raw = if_else(age_raw < 0, NA_real_, age_raw),
      age_cat = case_when(
        age_raw >= 18 & age_raw <= 34 ~ "Young (18–34)",
        age_raw >= 35 & age_raw <= 54 ~ "Middle (35–54)",
        age_raw >= 55                 ~ "Elderly (55+)",
        TRUE                          ~ NA_character_
      ),
      age_cat = factor(age_cat, levels = c("Young (18–34)", "Middle (35–54)", "Elderly (55+)"))
    )
}

recode_educ <- function(df, educ_var, wave) {
  df |>
    mutate(
      educ_raw = as.numeric(zap_labels(.data[[educ_var]])),
      educ_raw = if_else(educ_raw < 0, NA_real_, educ_raw),
      educ_cat = if (wave == "W7") {
        case_when(
          educ_raw %in% 0:2 ~ "Below Primary",
          educ_raw %in% 3:5 ~ "Secondary",
          educ_raw %in% 6:8 ~ "Higher Education",
          TRUE              ~ NA_character_
        )
      } else {
        case_when(
          educ_raw %in% 1:3 ~ "Below Primary",
          educ_raw %in% 4:6 ~ "Secondary",
          educ_raw %in% 7:9 ~ "Higher Education",
          TRUE              ~ NA_character_
        )
      },
      educ_cat = factor(educ_cat,
                        levels = c("Below Primary", "Secondary", "Higher Education"))
    )
}

# W5 age variable is V237 not V242 — V242 is interview year in W5
w3 <- w3 |> recode_age("V216") |> recode_educ("V217", "W3")
w5 <- w5 |> recode_age("V237") |> recode_educ("V238", "W5")
w6 <- w6 |> recode_age("V242") |> recode_educ("V248", "W6")
w7 <- w7 |> recode_age("Q262") |> recode_educ("Q275", "W7")

# ── Verification: % computed WITHIN each variable separately ─────────────────
verify_cats <- function(df, wave, year) {
  age_tbl <- df |>
    count(category = age_cat) |>
    mutate(var = "Age", pct = round(n / sum(n) * 100, 1))

  educ_tbl <- df |>
    count(category = educ_cat) |>
    mutate(var = "Educ", pct = round(n / sum(n) * 100, 1))

  bind_rows(age_tbl, educ_tbl) |>
    mutate(wave = wave, year = year)
}

verify_all <- bind_rows(
  verify_cats(w3, "W3", 1996),
  verify_cats(w5, "W5", 2006),
  verify_cats(w6, "W6", 2012),
  verify_cats(w7, "W7", 2022)
)

verify_all |>
  select(Wave = wave, Year = year, Variable = var, Category = category, N = n, `%` = pct) |>
  kable(format = "html", caption = "Age and education category distributions across waves") |>
  kable_styling(full_width = FALSE,
                bootstrap_options = c("striped", "hover", "condensed")) |>
  collapse_rows(columns = 1:3, valign = "top")

Age and education category distributions across waves
Wave	Year	Variable	Category	N	%
W3	1996	Age	Young (18–34)	3978	39.6
			Middle (35–54)	3838	38.2
			Elderly (55+)	2037	20.3
			NA	190	1.9
		Educ	Below Primary	3154	31.4
			Secondary	3378	33.6
			Higher Education	3460	34.5
			NA	51	0.5
W5	2006	Age	Young (18–34)	3730	32.5
			Middle (35–54)	4588	39.9
			Elderly (55+)	2872	25.0
			NA	304	2.6
		Educ	Below Primary	3735	32.5
			Secondary	3884	33.8
			Higher Education	3825	33.3
			NA	50	0.4
W6	2012	Age	Young (18–34)	5502	35.5
			Middle (35–54)	6068	39.2
			Elderly (55+)	3800	24.5
			NA	117	0.8
		Educ	Below Primary	3884	25.1
			Secondary	5070	32.7
			Higher Education	6479	41.8
			NA	54	0.3
W7	2022	Age	Young (18–34)	3349	32.9
			Middle (35–54)	3819	37.6
			Elderly (55+)	2676	26.3
			NA	321	3.2
		Educ	Below Primary	3090	30.4
			Secondary	4266	42.0
			Higher Education	2739	26.9
			NA	70	0.7

library(ggplot2)
library(scales)

# ── Build long demographic table ─────────────────────────────────────────────
build_demo_long <- function(df, wave, year, cat_var, label) {
  df |>
    filter(!is.na(.data[[cat_var]])) |>
    count(country_name, category = .data[[cat_var]]) |>
    group_by(country_name) |>
    mutate(pct = n / sum(n) * 100) |>
    ungroup() |>
    mutate(wave = wave, year = year, variable = label)
}

demo_long <- bind_rows(
  # Age
  build_demo_long(w3, "W3", 1996, "age_cat",  "Age"),
  build_demo_long(w5, "W5", 2006, "age_cat",  "Age"),
  build_demo_long(w6, "W6", 2012, "age_cat",  "Age"),
  build_demo_long(w7, "W7", 2022, "age_cat",  "Age"),
  # Education
  build_demo_long(w3, "W3", 1996, "educ_cat", "Education"),
  build_demo_long(w5, "W5", 2006, "educ_cat", "Education"),
  build_demo_long(w6, "W6", 2012, "educ_cat", "Education"),
  build_demo_long(w7, "W7", 2022, "educ_cat", "Education")
) |>
  mutate(
    wave_label = paste0(wave, "\n(", year, ")"),
    wave_label = factor(wave_label,
                        levels = c("W3\n(1996)", "W5\n(2006)",
                                   "W6\n(2012)", "W7\n(2022)"))
  )

# ── Colour palettes ───────────────────────────────────────────────────────────
age_colors  <- c("Young (18–34)"   = "#4E79A7",
                 "Middle (35–54)"  = "#F28E2B",
                 "Elderly (55+)"   = "#E15759")

educ_colors <- c("Below Primary"   = "#FABFD2",
                 "Secondary"       = "#8CD17D",
                 "Higher Education"= "#499894")

# ── Plot 1: Age distribution ──────────────────────────────────────────────────
p_age <- demo_long |>
  filter(variable == "Age") |>
  ggplot(aes(x = wave_label, y = pct, fill = category)) +
  geom_col(position = "stack", width = 0.7) +
  geom_text(aes(label = ifelse(pct >= 8, paste0(round(pct, 0), "%"), "")),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  facet_wrap(~country_name, nrow = 1) +
  scale_fill_manual(values = age_colors, name = NULL) +
  scale_y_continuous(labels = label_percent(scale = 1)) +
  labs(
    title    = "Age Distribution by Country and Wave",
    subtitle = "World Values Survey — W3 (1996) to W7 (2022)",
    x = NULL, y = "% of respondents"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position  = "bottom",
    strip.text       = element_text(face = "bold", size = 10),
    panel.grid.major.x = element_blank(),
    plot.title       = element_text(face = "bold", size = 13),
    plot.subtitle    = element_text(colour = "grey40", size = 10)
  )

# ── Plot 2: Education distribution ───────────────────────────────────────────
p_educ <- demo_long |>
  filter(variable == "Education") |>
  ggplot(aes(x = wave_label, y = pct, fill = category)) +
  geom_col(position = "stack", width = 0.7) +
  geom_text(aes(label = ifelse(pct >= 8, paste0(round(pct, 0), "%"), "")),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  facet_wrap(~country_name, nrow = 1) +
  scale_fill_manual(values = educ_colors, name = NULL) +
  scale_y_continuous(labels = label_percent(scale = 1)) +
  labs(
    title    = "Education Distribution by Country and Wave",
    subtitle = "World Values Survey — W3 (1996) to W7 (2022)",
    x = NULL, y = "% of respondents"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position  = "bottom",
    strip.text       = element_text(face = "bold", size = 10),
    panel.grid.major.x = element_blank(),
    plot.title       = element_text(face = "bold", size = 13),
    plot.subtitle    = element_text(colour = "grey40", size = 10)
  )

# ── Recode sex variable ───────────────────────────────────────────────────────
# 1 = Male, 2 = Female across all waves (W3: V214, W5: V235, W6: V235, W7: Q260)

recode_sex <- function(df, sex_var) {
  df |>
    mutate(
      sex_raw = as.numeric(zap_labels(.data[[sex_var]])),
      sex_cat = case_when(
        sex_raw == 1 ~ "Male",
        sex_raw == 2 ~ "Female",
        TRUE         ~ NA_character_
      ),
      sex_cat = factor(sex_cat, levels = c("Male", "Female"))
    )
}

w3 <- w3 |> recode_sex("V214")
w5 <- w5 |> recode_sex("V235")
w6 <- w6 |> recode_sex("V235")
w7 <- w7 |> recode_sex("Q260")

# ── Build long table ──────────────────────────────────────────────────────────
gender_long <- bind_rows(
  build_demo_long(w3, "W3", 1996, "sex_cat", "Gender"),
  build_demo_long(w5, "W5", 2006, "sex_cat", "Gender"),
  build_demo_long(w6, "W6", 2012, "sex_cat", "Gender"),
  build_demo_long(w7, "W7", 2022, "sex_cat", "Gender")
) |>
  mutate(
    wave_label = paste0(wave, "\n(", year, ")"),
    wave_label = factor(wave_label,
                        levels = c("W3\n(1996)", "W5\n(2006)",
                                   "W6\n(2012)", "W7\n(2022)"))
  )

# ── Plot ──────────────────────────────────────────────────────────────────────
gender_colors <- c("Male" = "#4E79A7", "Female" = "#E15759")

p_gender <- gender_long |>
  ggplot(aes(x = wave_label, y = pct, fill = category)) +
  geom_col(position = "stack", width = 0.7) +
  geom_text(aes(label = paste0(round(pct, 0), "%")),
            position = position_stack(vjust = 0.5),
            size = 3, colour = "white", fontface = "bold") +
  facet_wrap(~country_name, nrow = 1) +
  scale_fill_manual(values = gender_colors, name = NULL) +
  scale_y_continuous(labels = label_percent(scale = 1)) +
  labs(
    title    = "Gender Distribution by Country and Wave",
    subtitle = "World Values Survey — W3 (1996) to W7 (2022)",
    x = NULL, y = "% of respondents"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position    = "bottom",
    strip.text         = element_text(face = "bold", size = 10),
    panel.grid.major.x = element_blank(),
    plot.title         = element_text(face = "bold", size = 13),
    plot.subtitle      = element_text(colour = "grey40", size = 10)
  )

print(p_gender)

print(p_age)

print(p_educ)

# Raw unrecoded sex variable check per country per wave
check_sex <- function(df, wave, year, sex_var) {
  df |>
    mutate(sex_raw = as.numeric(zap_labels(.data[[sex_var]]))) |>
    count(country_name, sex_raw) |>
    group_by(country_name) |>
    mutate(pct = round(n / sum(n) * 100, 1)) |>
    ungroup() |>
    mutate(wave = wave, year = year)
}

bind_rows(
  check_sex(w3, "W3", 1996, "V214"),
  check_sex(w5, "W5", 2006, "V235"),
  check_sex(w6, "W6", 2012, "V235"),
  check_sex(w7, "W7", 2022, "Q260")
) |>
  filter(country_name %in% c("Germany", "India")) |>  # focus on odd ones
  arrange(wave, country_name, sex_raw) |>
  kable(format = "html", caption = "Raw sex variable counts — Germany & India") |>
  kable_styling(full_width = FALSE,
                bootstrap_options = c("striped", "hover", "condensed"))

Raw sex variable counts — Germany & India
country_name	sex_raw	n	pct	wave	year
Germany	1	928	45.8	W3	1996
Germany	2	1098	54.2	W3	1996
India	1	1120	54.9	W3	1996
India	2	920	45.1	W3	1996
Germany	1	911	44.1	W5	2006
Germany	2	1153	55.9	W5	2006
India	-2	3	0.1	W5	2006
India	1	1137	56.8	W5	2006
India	2	861	43.0	W5	2006
Germany	1	1297	63.4	W6	2012
Germany	2	710	34.7	W6	2012
Germany	NA	39	1.9	W6	2012
India	1	1590	39.0	W6	2012
India	2	2488	61.0	W6	2012
Germany	1	743	48.6	W7	2022
Germany	2	785	51.4	W7	2022
India	1	958	56.6	W7	2022
India	2	734	43.4	W7	2022

Economic values

Scale reference (1–10 scale)

Income equality — 1 = Incomes should be more equal; 10 = We need larger income differences as incentives
Govt vs private ownership — 1 = Private ownership of business should be increased; 10 = Government ownership should be increased
State vs individual responsibility — 1 = The state should take more responsibility; 10 = Individuals should take more responsibility
Competition good/harmful — 1 = Competition is good, it stimulates people to work hard; 10 = Competition is harmful, it brings out the worst in people
Hard work vs luck — 1 = Hard work brings a better life; 10 = Hard work doesn’t bring success — it’s more a matter of luck and connections
Wealth accumulation (W3/W5/W6 only) — 1 = Wealth grows so there’s enough for everyone; 10 = Accumulation of wealth harms poor countries

# ── Variable mapping ──────────────────────────────────────────────────────────
# All are 10-point scales: negative codes = missing
# W3: V125–V130 | W5: V116–V121 | W6: V96–V101 | W7: Q106–Q111

econ_vars <- list(
  W3 = c("Income equality"            = "V125",
          "Govt vs private ownership"  = "V126",
          "State vs individual resp."  = "V127",
          "Competition good/harmful"   = "V128",
          "Hard work vs luck"          = "V129",
          "Wealth accumulation"        = "V130"),   # W3/W5/W6 only
  W5 = c("Income equality"            = "V116",
          "Govt vs private ownership"  = "V117",
          "State vs individual resp."  = "V118",
          "Competition good/harmful"   = "V119",
          "Hard work vs luck"          = "V120",
          "Wealth accumulation"        = "V121"),
  W6 = c("Income equality"            = "V96",
          "Govt vs private ownership"  = "V97",
          "State vs individual resp."  = "V98",
          "Competition good/harmful"   = "V99",
          "Hard work vs luck"          = "V100",
          "Wealth accumulation"        = "V101"),
  W7 = c("Income equality"            = "Q106",
          "Govt vs private ownership"  = "Q107",
          "State vs individual resp."  = "Q108",
          "Competition good/harmful"   = "Q109",
          "Hard work vs luck"          = "Q110")
)
# ── Clean helper: 10-pt scale, negatives = NA ────────────────────────────────
clean_10pt <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  ifelse(x < 0, NA_real_, x)
}

# ── Compute weighted mean per country per wave ────────────────────────────────
compute_econ <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {
    df2 <- df |>
      mutate(
        item = clean_10pt(.data[[v]]),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(item), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~item, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        mean     = round(item, 2),
        wave     = wave,
        year     = year
      )
  })
}

econ_long <- bind_rows(
  compute_econ(w3, "W3", 1996, econ_vars$W3),
  compute_econ(w5, "W5", 2006, econ_vars$W5),
  compute_econ(w6, "W6", 2012, econ_vars$W6),
  compute_econ(w7, "W7", 2022, econ_vars$W7)
) |>
  filter(!(country == "Malaysia" & year == 1996)) |>
  mutate(
    country  = factor(country,
                      levels = c("India", "China", "USA",
                                 "Germany", "South Africa", "Malaysia")),
    question = factor(question,
                      levels = c("Income equality",
                                 "Govt vs private ownership",
                                 "State vs individual resp.",
                                 "Competition good/harmful",
                                 "Hard work vs luck",
                                 "Wealth accumulation"))
  )

# ── Plot ──────────────────────────────────────────────────────────────────────
country_colors <- c(
  "India"        = "#E15759",
  "China"        = "#F28E2B",
  "USA"          = "#4E79A7",
  "Germany"      = "#76B7B2",
  "South Africa" = "#59A14F",
  "Malaysia"     = "#B07AA1"
)
p_econ <- econ_long |>
  ggplot(aes(x = year, y = mean, colour = country, group = country)) +
  geom_line(linewidth = 0.9) +
  geom_point(size = 2.5) +
  facet_wrap(~question, nrow = 2, scales = "free_y") +
  scale_colour_manual(values = country_colors, name = NULL) +
  scale_x_continuous(breaks = c(1996, 2006, 2012, 2022)) +
  scale_y_continuous(limits = c(1, 10), breaks = c(1, 5, 10)) +
  labs(
    title    = "Economic Values: Country Means Across Waves (1–10 scale)",
    subtitle = "World Values Survey W3–W7 | Weighted means",
    x = NULL, y = "Mean (scale varies by item)"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position    = "bottom",
    strip.text         = element_text(face = "bold", size = 10),
    panel.grid.minor   = element_blank(),
    plot.title         = element_text(face = "bold", size = 13),
    plot.subtitle      = element_text(colour = "grey40", size = 10),
    axis.text.x        = element_text(size = 9)
  ) +
  guides(colour = guide_legend(nrow = 1))

print(p_econ)

Regime Preferences

Scale reference (1–4 scale)

All four items (Strong leader, Expert rule, Army rule, Democracy) — 1 = Very good; 2 = Fairly good; 3 = Fairly bad; 4 = Very bad
We code 1–2 as acceptable and 3–4 as not acceptable and report the % acceptable

# Standard 4-point: 1=Very good, 2=Fairly good, 3=Fairly bad, 4=Very bad
clean_regime_4pt <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  x <- ifelse(x <= 0, NA, x)
  case_when(
    x %in% c(1, 2) ~ 1L,
    x %in% c(3, 4) ~ 0L,
    TRUE           ~ NA_integer_
  )
}

# W3 Democracy (V157): 3-point: 1=Fairly good, 2=Fairly bad, 3=Very bad
clean_regime_3pt <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  x <- ifelse(x <= 0, NA, x)
  case_when(
    x == 1      ~ 1L,   # Fairly good = acceptable
    x %in% c(2, 3) ~ 0L,
    TRUE        ~ NA_integer_
  )
}

# ── 1. Variable mapping ───────────────────────────────────────────────────────
regime_vars <- list(
  W3 = c("Strong leader" = "V154", "Expert rule" = "V155",
          "Army rule"     = "V156", "Democracy"   = "V157"),
  W5 = c("Strong leader" = "V148", "Expert rule" = "V149",
          "Army rule"     = "V150", "Democracy"   = "V151"),
  W6 = c("Strong leader" = "V127", "Expert rule" = "V128",
          "Army rule"     = "V129", "Democracy"   = "V130"),
  W7 = c("Strong leader" = "Q235", "Expert rule" = "Q236",
          "Army rule"     = "Q237", "Democracy"   = "Q238")
)

# ── 2. Compute per country per wave ──────────────────────────────────────────
compute_regime <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {

    # Use 3pt cleaner only for W3 Democracy
    cleaner <- if (wave == "W3" & label == "Democracy") {
      clean_regime_3pt
    } else {
      clean_regime_4pt
    }

    df2 <- df |>
      mutate(
        item = cleaner(.data[[v]]),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(item), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~item, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        pct      = round(item * 100, 1),
        wave     = wave,
        year     = year
      )
  })
}

regime_long <- bind_rows(
  compute_regime(w3, "W3", 1996, regime_vars$W3),
  compute_regime(w5, "W5", 2006, regime_vars$W5),
  compute_regime(w6, "W6", 2012, regime_vars$W6),
  compute_regime(w7, "W7", 2022, regime_vars$W7)
) |>
  mutate(
    question = factor(question,
                      levels = c("Democracy", "Strong leader",
                                 "Expert rule", "Army rule")),
    country = factor(country,
                     levels = c("India", "China", "USA",
                                "Germany", "South Africa", "Malaysia"))
  )

# ── 3. Line chart: all countries across waves, faceted by question ────────────
country_colors <- c(
  "India"        = "#E15759",
  "China"        = "#F28E2B",
  "USA"          = "#4E79A7",
  "Germany"      = "#76B7B2",
  "South Africa" = "#59A14F",
  "Malaysia"     = "#B07AA1"
)

p_regime <- regime_long |>
  filter(!(country == "Malaysia" & year == 1996)) |>   # ADD THIS LINE
  ggplot(aes(x = year, y = pct, colour = country, group = country)) +
  geom_line(linewidth = 0.9) +
  geom_point(size = 2.5) +
  facet_wrap(~question, nrow = 2, scales = "free_y") +
  scale_colour_manual(values = country_colors, name = NULL) +
  scale_x_continuous(breaks = c(1996, 2006, 2012, 2022)) +
  scale_y_continuous(labels = label_percent(scale = 1),
                     limits = c(0, 100)) +              # ADD THIS LINE
  labs(
    title    = "Regime Preferences: % Finding Each System 'Acceptable'",
    subtitle = "World Values Survey W3–W7 | Weighted country means",
    x = NULL, y = "% acceptable"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position    = "bottom",
    strip.text         = element_text(face = "bold", size = 11),
    panel.grid.minor   = element_blank(),
    plot.title         = element_text(face = "bold", size = 13),
    plot.subtitle      = element_text(colour = "grey40", size = 10),
    axis.text.x        = element_text(size = 9)
  ) +
  guides(colour = guide_legend(nrow = 1))

print(p_regime)

Institutional Trust

(1–4 scale)

1 = A great deal of confidence; 2 = Quite a lot; 3 = Not very much; 4 = None at all
We collapse 1–2 as high confidence and report the % with high confidence

# ── Variable mapping ──────────────────────────────────────────────────────────
# All waves: 1=Great deal, 2=Quite a lot, 3=Not very much, 4=None at all
# Negatives = missing
# W3: V135–V145 | W5: V131–V141 | W6: V108–V118 | W7: Q64–Q74

trust_vars <- list(
  W3 = c("Religious institutions" = "V135",
          "Armed forces"           = "V136",
          "Legal system/Courts"    = "V137",
          "Press"                  = "V138",
          "Television"             = "V139",
          "Police"                 = "V141",
          "Government"             = "V142",
          "Political parties"      = "V143",
          "Parliament"             = "V144",
          "Civil service"          = "V145"),
  W5 = c("Religious institutions" = "V131",
          "Armed forces"           = "V132",
          "Press"                  = "V133",
          "Television"             = "V134",
          "Police"                 = "V136",
          "Legal system/Courts"    = "V137",
          "Government"             = "V138",
          "Political parties"      = "V139",
          "Parliament"             = "V140",
          "Civil service"          = "V141"),
  W6 = c("Religious institutions" = "V108",
          "Armed forces"           = "V109",
          "Press"                  = "V110",
          "Television"             = "V111",
          "Police"                 = "V113",
          "Legal system/Courts"    = "V114",
          "Government"             = "V115",
          "Political parties"      = "V116",
          "Parliament"             = "V117",
          "Civil service"          = "V118"),
  W7 = c("Religious institutions" = "Q64",
          "Armed forces"           = "Q65",
          "Press"                  = "Q66",
          "Television"             = "Q67",
          "Police"                 = "Q69",
          "Legal system/Courts"    = "Q70",
          "Government"             = "Q71",
          "Political parties"      = "Q72",
          "Parliament"             = "Q73",
          "Civil service"          = "Q74")
)

# ── Clean helper: 1/2 = high confidence, 3/4 = low, else NA ──────────────────
clean_trust <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  x <- ifelse(x <= 0, NA, x)
  case_when(
    x %in% c(1, 2) ~ 1L,
    x %in% c(3, 4) ~ 0L,
    TRUE           ~ NA_integer_
  )
}

# ── Compute weighted % high confidence per country per wave ───────────────────
compute_trust <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {
    df2 <- df |>
      mutate(
        item = clean_trust(.data[[v]]),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(item), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~item, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        pct      = round(item * 100, 1),
        wave     = wave,
        year     = year
      )
  })
}

trust_long <- bind_rows(
  compute_trust(w3, "W3", 1996, trust_vars$W3),
  compute_trust(w5, "W5", 2006, trust_vars$W5),
  compute_trust(w6, "W6", 2012, trust_vars$W6),
  compute_trust(w7, "W7", 2022, trust_vars$W7)
) |>
  filter(!(country == "Malaysia" & year == 1996),
         pct >= 0, pct <= 100) |>
  mutate(
    country  = factor(country,
                      levels = c("India", "China", "USA",
                                 "Germany", "South Africa", "Malaysia")),
    question = factor(question,
                      levels = c("Parliament", "Government",
                                 "Political parties", "Civil service",
                                 "Armed forces", "Police",
                                 "Legal system/Courts", "Press",
                                 "Television", "Religious institutions"))
  )

# ── Plot ──────────────────────────────────────────────────────────────────────
# ── Colour helper ─────────────────────────────────────────────────────────────
colour_cell <- function(val) {
  if (is.na(val)) return("white")
  if (val >= 75)  return("#c7e9c0")
  if (val >= 50)  return("#ffffb2")
  if (val >= 25)  return("#fdd0a2")
  return("#fc8d59")
}

# ── Reshape wide ──────────────────────────────────────────────────────────────
country_cols <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")

trust_wide <- trust_long |>
  mutate(
    country  = as.character(country),
    question = as.character(question),
    wave_yr  = paste0(wave, " (", year, ")")
  ) |>
  select(question, wave_yr, country, pct) |>
  pivot_wider(
    names_from  = country,
    values_from = pct,
    values_fill = NA,
    names_sort  = FALSE          # don't let pivot sort alphabetically
  ) |>
  # Force exact column order regardless of pivot output order
  select(question, wave_yr, any_of(country_cols)) |>
  arrange(question, wave_yr) |>
  rename(Institution = question, Wave = wave_yr)

# Verify no NAs for USA W7
trust_wide |> filter(grepl("2022", Wave)) |> select(Institution, Wave, USA)

## # A tibble: 10 × 3
##    Institution            Wave        USA
##    <chr>                  <chr>     <dbl>
##  1 Armed forces           W7 (2022)  81.3
##  2 Civil service          W7 (2022)  42  
##  3 Government             W7 (2022)  33.7
##  4 Legal system/Courts    W7 (2022)  57.8
##  5 Parliament             W7 (2022)  15.1
##  6 Police                 W7 (2022)  68.8
##  7 Political parties      W7 (2022)  11.3
##  8 Press                  W7 (2022)  29.7
##  9 Religious institutions W7 (2022)  53.7
## 10 Television             W7 (2022)  22.6

# ── Apply cell_spec colouring BEFORE kable ────────────────────────────────────
trust_coloured <- trust_wide |>
  mutate(across(all_of(country_cols), function(x) {
    cell_spec(
      ifelse(is.na(x), "—", paste0(round(x, 1), "%")),
      background = sapply(x, colour_cell),
      color      = "black",
      bold       = FALSE
    )
  }))

# ── Render table ──────────────────────────────────────────────────────────────
trust_coloured |>
  kable(format  = "html", escape = FALSE,
        caption = "Institutional Trust: % High Confidence by Country and Wave",
        col.names = c("Institution", "Wave", country_cols),
        align     = c("l", "l", rep("c", 6))) |>
  kable_styling(full_width        = TRUE,
                bootstrap_options = c("striped", "hover", "condensed"),
                font_size         = 12,
                fixed_thead       = TRUE) |>
  collapse_rows(columns = 1, valign = "top") |>
  column_spec(1, bold = TRUE, width = "15em") |>
  column_spec(2, width = "8em")

Institutional Trust: % High Confidence by Country and Wave
Institution	Wave	India	China	USA	Germany	South Africa	Malaysia
Armed forces	W3 (1996)	84.5%	—	86.4%	45.3%	56.8%	—
	W5 (2006)	83.3%	91.5%	82.3%	50.1%	63.8%	84.4%
	W6 (2012)	87.1%	92.6%	83.2%	65.9%	50.1%	80.1%
	W7 (2022)	86.9%	95.4%	81.3%	58.1%	—	74.5%
Civil service	W3 (1996)	69.7%	—	51.8%	44.6%	58.6%	—
	W5 (2006)	54.3%	85.6%	41.5%	31%	55.9%	69.8%
	W6 (2012)	61%	77.9%	46.2%	55.3%	44.8%	77.7%
	W7 (2022)	80.6%	87%	42%	64.1%	—	60%
Government	W3 (1996)	58.2%	—	30.6%	20.6%	68.9%	—
	W5 (2006)	54.9%	92.4%	38.2%	23.5%	70.7%	75.4%
	W6 (2012)	50.4%	92.2%	33.3%	45.1%	47.9%	75.2%
	W7 (2022)	65.1%	94.9%	33.7%	45.5%	—	50.2%
Legal system/Courts	W3 (1996)	78.3%	—	36.3%	43.8%	63.6%	—
	W5 (2006)	68.9%	81.8%	57.4%	57.5%	66.4%	77.7%
	W6 (2012)	64.5%	80.2%	54.9%	72.9%	51.8%	80.3%
	W7 (2022)	74.6%	86.3%	57.8%	74.9%	—	62.7%
Parliament	W3 (1996)	65.3%	—	30.3%	23%	68.8%	—
	W5 (2006)	62.4%	91.7%	20.6%	21.9%	65.6%	67.6%
	W6 (2012)	58.4%	87%	20.8%	44.7%	45%	69.1%
	W7 (2022)	73.8%	92.7%	15.1%	44.2%	—	42.3%
Police	W3 (1996)	41.1%	—	71.2%	61.3%	74.8%	—
	W5 (2006)	64.1%	78.4%	70.3%	73.9%	61.9%	74.6%
	W6 (2012)	51.2%	73.3%	69.4%	82.6%	47%	73.7%
	W7 (2022)	66.8%	84.8%	68.8%	86.8%	—	59.5%
Political parties	W3 (1996)	47%	—	21.2%	12.1%	46.9%	—
	W5 (2006)	46.4%	86.4%	15.3%	12.8%	43.8%	58.3%
	W6 (2012)	37.4%	85%	12.8%	24.6%	38.1%	62%
	W7 (2022)	42.3%	91%	11.3%	24.2%	—	31.5%
Press	W3 (1996)	63.1%	—	27.4%	17.5%	54%	—
	W5 (2006)	75.8%	70.8%	23.8%	29.3%	60.9%	63.7%
	W6 (2012)	71.8%	69.9%	23.1%	44.8%	55.3%	68.6%
	W7 (2022)	65.8%	68.5%	29.7%	37%	—	38.4%
Religious institutions	W3 (1996)	72.7%	—	75.9%	27.8%	84.6%	—
	W5 (2006)	83.4%	39.9%	66.3%	37.6%	84.9%	90.8%
	W6 (2012)	96.1%	24.3%	58.7%	38.2%	77.9%	91.4%
	W7 (2022)	89.7%	20.1%	53.7%	36.2%	—	85%
Television	W3 (1996)	56.7%	—	29.3%	21.8%	66.8%	—
	W5 (2006)	74.9%	75%	25.3%	34.2%	74.3%	71.9%
	W6 (2012)	74.5%	72.1%	24.3%	47.8%	65.5%	70.7%
	W7 (2022)	63.4%	73.2%	22.6%	32.3%	—	39.9%

Neighbour

Neighbour Rejection (binary)

1 = Mentioned (would not want as neighbour); 2 = Not mentioned
We report the % who mentioned each group

# ── Variable mapping ──────────────────────────────────────────────────────────
# 1 = Mentioned, 2 = Not mentioned, negatives = missing
# W3: V51–V60 | W5: V34–V39 | W6: V36–V41 | W7: Q27–Q32

neighbour_vars <- list(
  W3 = c("Drug addicts"               = "V59",
          "Different race"             = "V52",
          "People with AIDS"           = "V58",
          "Immigrants/Foreign workers" = "V57",
          "Homosexuals"                = "V60"),
  # ↑ "Different religion" intentionally omitted from W3 — no equivalent variable
  W5 = c("Drug addicts"               = "V34",
          "Different race"             = "V35",
          "People with AIDS"           = "V36",
          "Immigrants/Foreign workers" = "V37",
          "Homosexuals"                = "V38",
          "Different religion"         = "V39"),
  W6 = c("Drug addicts"               = "V36",
          "Different race"             = "V37",
          "People with AIDS"           = "V38",
          "Immigrants/Foreign workers" = "V39",
          "Homosexuals"                = "V40",
          "Different religion"         = "V41"),
  W7 = c("Drug addicts"               = "Q27",
          "Different race"             = "Q28",
          "People with AIDS"           = "Q29",
          "Immigrants/Foreign workers" = "Q30",
          "Homosexuals"                = "Q31",
          "Different religion"         = "Q32")
)

# ── Clean helper: 1 = mentioned (intolerant), 2 = not mentioned ──────────────
clean_neighbour <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  case_when(
    x == 1 ~ 1L,
    x == 2 ~ 0L,
    TRUE   ~ NA_integer_
  )
}

# ── Compute weighted % mentioning per country per wave ────────────────────────
compute_neighbour <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {
    df2 <- df |>
      mutate(
        item = clean_neighbour(.data[[v]]),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(item), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~item, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        pct      = round(item * 100, 1),
        wave     = wave,
        year     = year
      )
  })
}

# ── Combine waves ─────────────────────────────────────────────────────────────
neighbour_long <- bind_rows(
  compute_neighbour(w3, "W3", 1996, neighbour_vars$W3),
  compute_neighbour(w5, "W5", 2006, neighbour_vars$W5),
  compute_neighbour(w6, "W6", 2012, neighbour_vars$W6),
  compute_neighbour(w7, "W7", 2022, neighbour_vars$W7)
) |>
  filter(pct >= 0, pct <= 100)

# ── Colour helper — HIGH % = MORE intolerant = RED ───────────────────────────
colour_intol <- function(val) {
  if (is.na(val)) return("white")
  if (val >= 75)  return("#d73027")
  if (val >= 50)  return("#fc8d59")
  if (val >= 25)  return("#ffffb2")
  return("#91cf60")
}

# ── Reshape wide ──────────────────────────────────────────────────────────────
country_cols <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")

# Define row order explicitly
question_order <- c(
  "Drug addicts", "Different race", "People with AIDS",
  "Immigrants/Foreign workers", "Homosexuals", "Different religion"
)

neighbour_wide <- neighbour_long |>
  mutate(
    country   = as.character(country),
    question  = factor(question, levels = question_order),
    wave_yr   = paste0(wave, " (", year, ")")
  ) |>
  select(question, wave_yr, country, pct) |>
  pivot_wider(names_from = country, values_from = pct, values_fill = NA) |>
  select(question, wave_yr, any_of(country_cols)) |>
  arrange(question, wave_yr) |>
  rename(Group = question, Wave = wave_yr)

# ── Apply cell_spec colouring ─────────────────────────────────────────────────
neighbour_coloured <- neighbour_wide |>
  mutate(across(all_of(intersect(country_cols, names(neighbour_wide))), function(x) {
    cell_spec(
      ifelse(is.na(x), "—", paste0(round(x, 1), "%")),
      background = sapply(x, colour_intol),
      color      = "black",
      bold       = FALSE
    )
  }))

# ── Render table ──────────────────────────────────────────────────────────────
neighbour_coloured |>
  kable(format    = "html", escape = FALSE,
        caption   = "Neighbour Intolerance: % Who Would NOT Want as Neighbour",
        col.names = c("Group", "Wave", intersect(country_cols, names(neighbour_coloured))),
        align     = c("l", "l", rep("c", 6))) |>
  kable_styling(full_width        = TRUE,
                bootstrap_options = c("striped", "hover", "condensed"),
                font_size         = 12,
                fixed_thead       = TRUE) |>
  collapse_rows(columns = 1, valign = "top") |>
  column_spec(1, bold = TRUE, width = "15em") |>
  column_spec(2, width = "8em") |>
  footnote(general = paste0(
    "Higher % = more intolerant. ",
    "Green <25%, Yellow 25–49%, Orange 50–74%, Red ≥75%. ",
    "W3 (1996) does not include a 'Different religion' item — shown as — for that wave."
  ))

Neighbour Intolerance: % Who Would NOT Want as Neighbour
Group	Wave	India	China	USA	Germany	South Africa	Malaysia
Drug addicts	W3 (1996)	65.9%	70.8%	82.8%	51.8%	74.8%	—
	W5 (2006)	53.8%	92.9%	90.7%	62.6%	89.5%	87.3%
	W6 (2012)	93.6%	96.5%	88.3%	66.4%	85.7%	73.8%
	W7 (2022)	83.4%	30.7%	38.6%	36.5%	—	63.6%
Different race	W3 (1996)	36%	22.9%	6.6%	2.8%	11.2%	—
	W5 (2006)	43.5%	12.6%	4%	7.2%	8.1%	20.6%
	W6 (2012)	25.6%	10.5%	5.6%	14.9%	19.2%	31.3%
	W7 (2022)	48%	22.7%	15.9%	18.9%	—	30%
People with AIDS	W3 (1996)	60.3%	62.3%	18.6%	6.5%	44%	—
	W5 (2006)	43.7%	74.5%	15.4%	15.4%	7.3%	69.8%
	W6 (2012)	61.5%	73.2%	13.4%	24%	17.7%	59.5%
	W7 (2022)	44.5%	19.6%	21.4%	15.4%	—	35.6%
Immigrants/Foreign workers	W3 (1996)	33.1%	20.3%	9.7%	7.2%	21.3%	—
	W5 (2006)	35%	17.2%	12.7%	13.3%	24.9%	56.7%
	W6 (2012)	47.1%	12.2%	13.6%	21.5%	40.9%	59.7%
	W7 (2022)	48.1%	24.4%	21%	15.6%	—	34.2%
Homosexuals	W3 (1996)	61.4%	60.7%	29.5%	13.3%	50.5%	—
	W5 (2006)	40.3%	69.6%	25.1%	14.6%	45.9%	70.6%
	W6 (2012)	65%	52.7%	20.4%	22.4%	37.9%	58.7%
	W7 (2022)	42.2%	14.9%	17.3%	10%	—	28.4%
Different religion	W5 (2006)	44%	15.7%	2.5%	4.3%	4.5%	22.4%
	W6 (2012)	28.4%	9.2%	3.4%	14.1%	16%	30.3%
	W7 (2022)	47.2%	15.6%	32%	32.9%	—	25.5%
Note:
Higher % = more intolerant. Green <25%, Yellow 25–49%, Orange 50–74%, Red ≥75%. W3 (1996) does not include a ‘Different religion’ item — shown as — for that wave.

Child qualities

1 = Mentioned; 2 = Not mentioned
We report the % who mentioned each group

# ── Variable mapping ──────────────────────────────────────────────────────────
child_vars <- list(
  W3 = c("Independence"        = "V15",
          "Hard work"           = "V16",
          "Responsibility"      = "V17",
          "Imagination"         = "V18",
          "Tolerance & respect" = "V19",
          "Thrift"              = "V20",
          "Determination"       = "V21"),
  W5 = c("Independence"        = "V12",
          "Hard work"           = "V13",
          "Responsibility"      = "V14",
          "Imagination"         = "V15",
          "Tolerance & respect" = "V16",
          "Thrift"              = "V17",
          "Determination"       = "V18",
          "Religious faith"     = "V19",
          "Unselfishness"       = "V20",
          "Obedience"           = "V21"),
  W7 = c("Good manners"        = "Q7",
          "Independence"        = "Q8",
          "Hard work"           = "Q9",
          "Responsibility"      = "Q10",
          "Imagination"         = "Q11",
          "Tolerance & respect" = "Q12",
          "Thrift"              = "Q13",
          "Determination"       = "Q14",
          "Religious faith"     = "Q15",
          "Unselfishness"       = "Q16",
          "Obedience"           = "Q17")
)

# ── Clean helper ──────────────────────────────────────────────────────────────
clean_child <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  case_when(
    x == 1 ~ 1L,
    x == 2 ~ 0L,
    TRUE   ~ NA_integer_
  )
}

# ── Compute weighted % per country per wave ───────────────────────────────────
compute_child <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {
    df2 <- df |>
      mutate(
        item = clean_child(.data[[v]]),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(item), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~item, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        pct      = round(item * 100, 1),
        wave     = wave,
        year     = year
      )
  })
}

# ── Combine waves (no W6) ─────────────────────────────────────────────────────
child_long <- bind_rows(
  compute_child(w3, "W3", 1996, child_vars$W3),
  compute_child(w5, "W5", 2006, child_vars$W5),
  compute_child(w7, "W7", 2022, child_vars$W7)
) |>
  filter(pct >= 0, pct <= 100)

# ── Colour helper — white (low) → green (high) ───────────────────────────────
colour_child <- function(val) {
  if (is.na(val)) return("#f5f5f5")
  if (val >= 75)  return("#1a9850")
  if (val >= 50)  return("#a6d96a")
  if (val >= 25)  return("#d9ef8b")
  return("#ffffff")
}

# ── Row order ─────────────────────────────────────────────────────────────────
question_order <- c(
  "Good manners", "Independence", "Hard work", "Responsibility",
  "Imagination", "Tolerance & respect", "Thrift", "Determination",
  "Religious faith", "Unselfishness", "Obedience"
)

country_cols <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")

# ── Reshape wide ──────────────────────────────────────────────────────────────
child_wide <- child_long |>
  mutate(
    country  = as.character(country),
    question = factor(question, levels = question_order),
    wave_yr  = paste0(wave, " (", year, ")")
  ) |>
  select(question, wave_yr, country, pct) |>
  pivot_wider(names_from = country, values_from = pct, values_fill = NA) |>
  select(question, wave_yr, any_of(country_cols)) |>
  arrange(question, wave_yr) |>
  rename(Quality = question, Wave = wave_yr)

# ── Apply cell_spec colouring ─────────────────────────────────────────────────
child_coloured <- child_wide |>
  mutate(across(all_of(intersect(country_cols, names(child_wide))), function(x) {
    cell_spec(
      ifelse(is.na(x), "—", paste0(round(x, 1), "%")),
      background = sapply(x, colour_child),
      color      = ifelse(is.na(x), "black", ifelse(x >= 75, "white", "black")),
      bold       = FALSE
    )
  }))

# ── Render table ──────────────────────────────────────────────────────────────
child_coloured |>
  kable(format    = "html", escape = FALSE,
        caption   = "Child Qualities: % Mentioning as Important (survey-weighted)",
        col.names = c("Quality", "Wave", intersect(country_cols, names(child_coloured))),
        align     = c("l", "l", rep("c", 6))) |>
  kable_styling(full_width        = TRUE,
                bootstrap_options = c("striped", "hover", "condensed"),
                font_size         = 12,
                fixed_thead       = TRUE) |>
  collapse_rows(columns = 1, valign = "top") |>
  column_spec(1, bold = TRUE, width = "15em") |>
  column_spec(2, width = "8em") |>
  footnote(general = paste0(
    "Higher % = more frequently cited as important. ",
    "W3 (1996) includes 7 items only; 'Good manners' available in W7 only. ",
    "Green scale: Dark green ≥75%, Medium green 50–74%, Yellow-green 25–49%, White <25%. ",
    "W6 (2012) excluded due to unavailability of comparable items."
  ))

Child Qualities: % Mentioning as Important (survey-weighted)
Quality	Wave	India	China	USA	Germany	South Africa	Malaysia
Good manners	W7 (2022)	80.7%	84.1%	51.8%	83.8%	—	81.6%
Independence	W3 (1996)	37.4%	50.1%	45.2%	51.5%	31.2%	—
	W5 (2006)	66.9%	69.8%	53.8%	77.6%	58.8%	78.7%
	W7 (2022)	57.6%	78.6%	55.5%	69.8%	—	55.1%
Hard work	W3 (1996)	71.6%	72.7%	52.5%	9.9%	58.5%	—
	W5 (2006)	81.3%	83.4%	61.7%	26%	70.7%	49.1%
	W7 (2022)	74.2%	70.7%	68%	39.7%	—	32.9%
Responsibility	W3 (1996)	48.2%	34.5%	69%	92.3%	48.8%	—
	W5 (2006)	68%	67.2%	72.2%	85.1%	54.8%	78.6%
	W7 (2022)	65.3%	78.8%	59.4%	79.7%	—	74.9%
Imagination	W3 (1996)	10.8%	22.1%	26.5%	33.5%	10.6%	—
	W5 (2006)	25.4%	25.3%	31.7%	39.6%	15.6%	21.6%
	W7 (2022)	22.3%	21.7%	29.8%	23.1%	—	9.3%
Tolerance & respect	W3 (1996)	51.6%	43%	75%	88.3%	70%	—
	W5 (2006)	55.8%	64.7%	78.3%	74.6%	77.9%	73.7%
	W7 (2022)	44.1%	60.8%	70.9%	84.1%	—	69%
Thrift	W3 (1996)	41.9%	62.2%	26.7%	52.8%	28.7%	—
	W5 (2006)	55.4%	61.6%	29.9%	48.2%	36.9%	50.7%
	W7 (2022)	32%	40.4%	27.3%	36.9%	—	38.7%
Determination	W3 (1996)	28.8%	36.3%	41.5%	41%	29.7%	—
	W5 (2006)	40.7%	24%	40.2%	64.2%	32.9%	33.3%
	W7 (2022)	30.4%	20.6%	38.7%	33.5%	—	23.5%
Religious faith	W5 (2006)	41.3%	2.4%	50.6%	9.3%	56.2%	59.6%
Religious faith	W7 (2022)	26.8%	1.1%	32.2%	9.5%	—	59.7%
Unselfishness	W5 (2006)	34.2%	30.8%	37.6%	6.8%	30.6%	30.1%
Unselfishness	W7 (2022)	21.3%	28.8%	28.3%	5.6%	—	18.5%
Obedience	W5 (2006)	55.9%	13.8%	28.3%	15.8%	46.8%	25.9%
Obedience	W7 (2022)	22.1%	5.8%	20.5%	11.8%	—	13%
Note:
Higher % = more frequently cited as important. W3 (1996) includes 7 items only; ‘Good manners’ available in W7 only. Green scale: Dark green ≥75%, Medium green 50–74%, Yellow-green 25–49%, White <25%. W6 (2012) excluded due to unavailability of comparable items.

Moral permisiveness

(1–10 scale)

1 = Never justifiable; 10 = Always justifiable We typically report the % saying 1–3 (never justifiable) as the conservative/strict moral stance.

# ── Variable mapping ──────────────────────────────────────────────────────────
# Scale: 1 = Never justifiable → 10 = Always justifiable
# We report % scoring 1, 2, or 3 (strongly opposed)
# W3: 5 items only | W5/W6/W7: 11 items

moral_vars <- list(
  W3 = c("Prostitution"          = "V198",
          "Abortion"              = "V199",
          "Divorce"               = "V200",
          "Euthanasia"            = "V201",
          "Suicide"               = "V202"),
  W5 = c("Government benefits"   = "V198",
          "Public transport fare" = "V199",
          "Tax evasion"           = "V200",
          "Bribing"               = "V201",
          "Homosexuality"         = "V202",
          "Prostitution"          = "V203",
          "Abortion"              = "V204",
          "Divorce"               = "V205",
          "Euthanasia"            = "V206",
          "Suicide"               = "V207",
          "Domestic violence"     = "V208"),
  W6 = c("Government benefits"   = "V198",
          "Public transport fare" = "V199",
          "Tax evasion"           = "V201",
          "Bribing"               = "V202",
          "Homosexuality"         = "V203",
          "Prostitution"          = "V203A",
          "Abortion"              = "V204",
          "Divorce"               = "V205",
          "Euthanasia"            = "V207A",
          "Suicide"               = "V207",
          "Domestic violence"     = "V208"),
  W7 = c("Government benefits"   = "Q177",
          "Public transport fare" = "Q178",
          "Tax evasion"           = "Q180",
          "Bribing"               = "Q181",
          "Homosexuality"         = "Q182",
          "Prostitution"          = "Q183",
          "Abortion"              = "Q184",
          "Divorce"               = "Q185",
          "Euthanasia"            = "Q188",
          "Suicide"               = "Q187",
          "Domestic violence"     = "Q189")
)

# ── Clean helper: keep 1–10, everything else NA ───────────────────────────────
clean_moral <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  ifelse(x >= 1 & x <= 10, x, NA_real_)
}

# ── Compute weighted % scoring 1–3 per country per wave ──────────────────────
compute_moral <- function(df, wave, year, vars) {
  imap_dfr(vars, function(v, label) {
    df2 <- df |>
      mutate(
        item = clean_moral(.data[[v]]),
        opposed = ifelse(!is.na(item), as.integer(item <= 3), NA_integer_),
        wt2  = as.numeric(wt)
      ) |>
      filter(!is.na(opposed), !is.na(wt2))

    if (nrow(df2) == 0) return(tibble())

    des <- svydesign(ids = ~1, weights = ~wt2, data = df2)

    svyby(~opposed, by = ~country_name, design = des,
          FUN = svymean, na.rm = TRUE) |>
      as_tibble() |>
      transmute(
        country  = country_name,
        question = label,
        pct      = round(opposed * 100, 1),
        wave     = wave,
        year     = year
      )
  })
}

# ── Combine all four waves ────────────────────────────────────────────────────
moral_long <- bind_rows(
  compute_moral(w3, "W3", 1996, moral_vars$W3),
  compute_moral(w5, "W5", 2006, moral_vars$W5),
  compute_moral(w6, "W6", 2012, moral_vars$W6),
  compute_moral(w7, "W7", 2022, moral_vars$W7)
) |>
  filter(pct >= 0, pct <= 100)

# ── Colour helper — HIGH % = MORE conservative = RED ─────────────────────────
colour_moral <- function(val) {
  if (is.na(val)) return("#f5f5f5")
  if (val >= 75)  return("#d73027")
  if (val >= 50)  return("#fc8d59")
  if (val >= 25)  return("#ffffb2")
  return("#91cf60")
}

# ── Row order ─────────────────────────────────────────────────────────────────
question_order <- c(
  "Government benefits", "Public transport fare", "Tax evasion", "Bribing",
  "Homosexuality", "Prostitution", "Abortion", "Divorce",
  "Euthanasia", "Suicide", "Domestic violence"
)

country_cols <- c("India", "China", "USA", "Germany", "South Africa", "Malaysia")

# ── Reshape wide ──────────────────────────────────────────────────────────────
moral_wide <- moral_long |>
  mutate(
    country  = as.character(country),
    question = factor(question, levels = question_order),
    wave_yr  = paste0(wave, " (", year, ")")
  ) |>
  select(question, wave_yr, country, pct) |>
  pivot_wider(names_from = country, values_from = pct, values_fill = NA) |>
  select(question, wave_yr, any_of(country_cols)) |>
  arrange(question, wave_yr) |>
  rename(Behaviour = question, Wave = wave_yr)

# ── Apply cell_spec colouring ─────────────────────────────────────────────────
moral_coloured <- moral_wide |>
  mutate(across(all_of(intersect(country_cols, names(moral_wide))), function(x) {
    cell_spec(
      ifelse(is.na(x), "—", paste0(round(x, 1), "%")),
      background = sapply(x, colour_moral),
      color      = ifelse(is.na(x), "black", ifelse(x >= 75, "white", "black")),
      bold       = FALSE
    )
  }))

# ── Render table ──────────────────────────────────────────────────────────────
moral_coloured |>
  kable(format    = "html", escape = FALSE,
        caption   = "Moral Permissiveness: % Rating Behaviour as Never Justifiable (scores 1–3 on 1–10 scale)",
        col.names = c("Behaviour", "Wave", intersect(country_cols, names(moral_coloured))),
        align     = c("l", "l", rep("c", 6))) |>
  kable_styling(full_width        = TRUE,
                bootstrap_options = c("striped", "hover", "condensed"),
                font_size         = 12,
                fixed_thead       = TRUE) |>
  collapse_rows(columns = 1, valign = "top") |>
  column_spec(1, bold = TRUE, width = "15em") |>
  column_spec(2, width = "8em") |>
  footnote(general = paste0(
    "Higher % = more conservative (never justifiable). ",
    "Red ≥75%, Orange 50–74%, Yellow 25–49%, Green <25%. ",
    "W3 (1996) includes 5 items only: Prostitution, Abortion, Divorce, Euthanasia, Suicide. ",
    "Survey-weighted estimates."
  ))

Moral Permissiveness: % Rating Behaviour as Never Justifiable (scores 1–3 on 1–10 scale)
Behaviour	Wave	India	China	USA	Germany	South Africa	Malaysia
Government benefits	W5 (2006)	60%	64.7%	79%	81%	75.6%	43.2%
	W6 (2012)	78.5%	58.8%	78.7%	86.8%	48.1%	68.5%
	W7 (2022)	75.7%	60%	73.4%	93.9%	—	39.5%
Public transport fare	W5 (2006)	58.1%	87.1%	70.9%	79.8%	72.2%	50.8%
	W6 (2012)	89.3%	76.6%	72.7%	85.9%	47.3%	72.3%
	W7 (2022)	80.5%	92.5%	65.8%	87.7%	—	49.5%
Tax evasion	W5 (2006)	63.2%	87%	82.9%	80.2%	77%	52.7%
	W6 (2012)	90.9%	83.7%	85.3%	—	51.8%	73.6%
	W7 (2022)	85.4%	93.9%	83.7%	93.5%	—	59.3%
Bribing	W5 (2006)	64.2%	90.6%	88%	89.2%	79.7%	59.6%
	W6 (2012)	91.7%	86.8%	87.1%	90.1%	51.4%	76.9%
	W7 (2022)	87.4%	91.9%	88.2%	96.7%	—	66.8%
Homosexuality	W5 (2006)	63.7%	91.6%	41.3%	19.7%	66.6%	62.9%
	W6 (2012)	91%	83.7%	32.1%	27.7%	46.2%	75.5%
	W7 (2022)	72.7%	80.6%	25.6%	12.9%	—	56.1%
Prostitution	W3 (1996)	86.8%	97.8%	74.4%	33.4%	80.7%	—
	W5 (2006)	63%	94.6%	58.4%	41.3%	76.6%	64.1%
	W6 (2012)	91.3%	89.4%	—	44.1%	52%	82.5%
	W7 (2022)	82.6%	93.3%	54.7%	35.4%	—	60.5%
Abortion	W3 (1996)	74.7%	47.4%	47.6%	29%	73.6%	—
	W5 (2006)	58.9%	83.3%	38.9%	31.1%	73.5%	65.1%
	W6 (2012)	90.4%	69.7%	35.5%	40%	51.1%	83%
	W7 (2022)	68.9%	76.9%	37.5%	27.8%	—	57.6%
Divorce	W3 (1996)	64.5%	25.3%	21.6%	14%	50.5%	—
	W5 (2006)	47.2%	73.6%	15.8%	13.4%	50.6%	54.4%
	W6 (2012)	85.9%	58%	12.9%	15.8%	40%	67.6%
	W7 (2022)	63.8%	54.5%	10.9%	10%	—	32.7%
Euthanasia	W3 (1996)	71.7%	32.5%	41.3%	—	65%	—
	W5 (2006)	48.5%	68%	35%	36.3%	63.5%	59.8%
	W6 (2012)	—	62%	—	—	42.3%	—
	W7 (2022)	70.7%	55.8%	32.3%	14.7%	—	47.6%
Suicide	W3 (1996)	87.9%	69.4%	77.6%	42.8%	81.8%	—
	W5 (2006)	57.4%	88.1%	68.3%	55.1%	77.5%	65.2%
	W6 (2012)	91.3%	77.6%	64.2%	62.8%	50.9%	83.8%
	W7 (2022)	87.4%	85.4%	61.8%	49.8%	—	65%
Domestic violence	W5 (2006)	61.2%	91.3%	91.8%	88.5%	79.4%	63.9%
	W6 (2012)	88.1%	78.8%	90.9%	90.8%	53.5%	80.5%
	W7 (2022)	83.6%	92.8%	93.3%	98.9%	—	66.2%
Note:
Higher % = more conservative (never justifiable). Red ≥75%, Orange 50–74%, Yellow 25–49%, Green <25%. W3 (1996) includes 5 items only: Prostitution, Abortion, Divorce, Euthanasia, Suicide. Survey-weighted estimates.

WVS cross country comparison

Arslan

2026-03-06

Cross country comparision

Demographics across countries

Economic values

Scale reference (1–10 scale)

Regime Preferences

Scale reference (1–4 scale)

Institutional Trust

(1–4 scale)

Neighbour

Neighbour Rejection (binary)

Child qualities

Moral permisiveness

(1–10 scale)