library(tidyverse)
library(haven)
library(labelled)
library(survey)
library(knitr)
library(kableExtra)


datafile_w5 <- "D:/Populism and Democrary/World value survey/WVS 2006/F00007944-WV5_Data_R_v20180912.rds"
stopifnot(file.exists(datafile_w5))

wvs5 <- readRDS(datafile_w5)

# W6 (2012)
datafile_w6 <- "D:/Populism and Democrary/World value survey/WVS 2012/WV6_Data_R_v20201117.rdata"
stopifnot(file.exists(datafile_w6))

load(datafile_w6)

wvs6 <- WV6_Data_R_v20201117

# W7 (2022)
datafile_w7 <- "D:/Populism and Democrary/World value survey/WVS 2017/WVS_Cross-National_Wave_7_Rdata_v6_0.rdata"
stopifnot(file.exists(datafile_w7))

load(datafile_w7)

wvs7 <- `WVS_Cross-National_Wave_7_v6_0`

stopifnot(exists("wvs5"), exists("wvs6"), exists("wvs7"))

dims <- tibble(
  wave = c("W5","W6","W7"),
  year = c(2006, 2012, 2022),
  n_rows = c(nrow(wvs5), nrow(wvs6), nrow(wvs7)),
  n_cols = c(ncol(wvs5), ncol(wvs6), ncol(wvs7))
)
kable(dims, caption = "Sanity check: datasets loaded (rows/cols)") %>%
  kable_styling(full_width = FALSE)
Sanity check: datasets loaded (rows/cols)
wave year n_rows n_cols
W5 2006 83975 414
W6 2012 89565 442
W7 2022 97220 613

Macro regions

## [1] 71
## # A tibble: 10 × 2
##       V2 macroregion        
##    <dbl> <chr>              
##  1   356 India              
##  2   156 East Southeast Asia
##  3   344 East Southeast Asia
##  4   392 East Southeast Asia
##  5   410 East Southeast Asia
##  6   158 East Southeast Asia
##  7   458 East Southeast Asia
##  8   608 East Southeast Asia
##  9   764 East Southeast Asia
## 10   360 East Southeast Asia
## # A tibble: 7 × 2
##   macroregion                       n
##   <chr>                         <int>
## 1 Western Europe Offshoots      20499
## 2 East Southeast Asia           13011
## 3 Latin America Caribbean       12589
## 4 Middle East North Africa MENA 12165
## 5 Sub-Saharan Africa            12097
## 6 Eastern Europe Post-Soviet    11613
## 7 India                          2001
##                     macroregion     n
## 1           East Southeast Asia 23218
## 2      Western Europe Offshoots 19417
## 3       Latin America Caribbean 17439
## 4    Eastern Europe Post-Soviet 13951
## 5 Middle East North Africa MENA 12321
## 6            Sub-Saharan Africa  4948
## 7                    South Asia  4234
## 8                         India  1692
##                          region     n
## 1    Eastern Europe Post-Soviet 18410
## 2 Middle East North Africa MENA 16827
## 3           East Southeast Asia 13853
## 4       Latin America Caribbean 13435
## 5      Western Europe Offshoots 11893
## 6            Sub-Saharan Africa  9869
## 7                         India  4078
## 8                      Pakistan  1200
##                   macroregion_h     n
## 1    Eastern Europe Post-Soviet 18410
## 2 Middle East North Africa MENA 16827
## 3           East Southeast Asia 13853
## 4       Latin America Caribbean 13435
## 5      Western Europe Offshoots 11893
## 6            Sub-Saharan Africa  9869
## 7                         India  4078
## 8                    South Asia  1200
# W5 (2006): V2 -> macroregion
w5_unmapped <- wvs5 %>%
  distinct(V2) %>%
  filter(!is.na(V2)) %>%
  left_join(macromapw5 %>% distinct(V2), by = "V2") %>%
  mutate(mapped = V2 %in% macromapw5$V2) %>%
  filter(!mapped)

nrow(w5_unmapped)
## [1] 0
w5_unmapped %>% arrange(V2)
## # A tibble: 0 × 2
## # ℹ 2 variables: V2 <dbl>, mapped <lgl>
# Also check for any respondents with NA macroregion (should be 0 or very small if mapping complete)
wvs5 %>% summarise(n_na_macroregion = sum(is.na(macroregion)))
## # A tibble: 1 × 1
##   n_na_macroregion
##              <int>
## 1                0
# W6 (2012): V2 -> region
w6_unmapped <- wvs6 %>%
  distinct(V2) %>%
  filter(!is.na(V2)) %>%
  left_join(countrymap_w6 %>% distinct(V2), by = "V2") %>%
  mutate(mapped = V2 %in% countrymap_w6$V2) %>%
  filter(!mapped)

nrow(w6_unmapped)
## [1] 0
w6_unmapped %>% arrange(V2)
## [1] V2     mapped
## <0 rows> (or 0-length row.names)
wvs6 %>% summarise(n_na_region = sum(is.na(region)))
##   n_na_region
## 1           0
# W7 (2022): B_COUNTRY -> macroregion
w7_unmapped <- wvs7 %>%
  distinct(B_COUNTRY) %>%
  filter(!is.na(B_COUNTRY)) %>%
  left_join(macromapw7 %>% distinct(B_COUNTRY), by = "B_COUNTRY") %>%
  mutate(mapped = B_COUNTRY %in% macromapw7$B_COUNTRY) %>%
  filter(!mapped)

nrow(w7_unmapped)
## [1] 0
w7_unmapped %>% arrange(B_COUNTRY)
## [1] B_COUNTRY mapped   
## <0 rows> (or 0-length row.names)
wvs7 %>% summarise(n_na_macroregion = sum(is.na(macroregion)))
##   n_na_macroregion
## 1                0
# W5: already has macroregion
wvs5 <- wvs5 %>%
  mutate(macroregion_h = macroregion)

# W6: fold Pakistan into South Asia for harmonized displays
wvs6 <- wvs6 %>%
  mutate(macroregion_h = dplyr::recode(region, "Pakistan" = "South Asia", .default = region))

# W7: already has macroregion (includes South Asia)
wvs7 <- wvs7 %>%
  mutate(macroregion_h = macroregion)

# Quick verify
wvs5 %>% count(macroregion_h, sort = TRUE)
## # A tibble: 7 × 2
##   macroregion_h                     n
##   <chr>                         <int>
## 1 Western Europe Offshoots      20499
## 2 East Southeast Asia           13011
## 3 Latin America Caribbean       12589
## 4 Middle East North Africa MENA 12165
## 5 Sub-Saharan Africa            12097
## 6 Eastern Europe Post-Soviet    11613
## 7 India                          2001
wvs6 %>% count(macroregion_h, sort = TRUE)   # should include South Asia, not Pakistan
##                   macroregion_h     n
## 1    Eastern Europe Post-Soviet 18410
## 2 Middle East North Africa MENA 16827
## 3           East Southeast Asia 13853
## 4       Latin America Caribbean 13435
## 5      Western Europe Offshoots 11893
## 6            Sub-Saharan Africa  9869
## 7                         India  4078
## 8                    South Asia  1200
wvs7 %>% count(macroregion_h, sort = TRUE)
##                   macroregion_h     n
## 1           East Southeast Asia 23218
## 2      Western Europe Offshoots 19417
## 3       Latin America Caribbean 17439
## 4    Eastern Europe Post-Soviet 13951
## 5 Middle East North Africa MENA 12321
## 6            Sub-Saharan Africa  4948
## 7                    South Asia  4234
## 8                         India  1692

Economic ranking

clean_0_to_na <- function(x) {
  x <- suppressWarnings(as.numeric(x))
  ifelse(x == 0, NA, x)
}

compute_country_means <- function(df, country_var, weight_var, vars) {
  df2 <- df %>%
    mutate(across(all_of(vars), clean_0_to_na)) %>%
    filter(!is.na(.data[[country_var]]), !is.na(.data[[weight_var]]))

  des <- svydesign(ids = ~1, weights = as.formula(paste0("~", weight_var)), data = df2)

  purrr::map_dfr(vars, function(v) {
    out <- svyby(
      formula = as.formula(paste0("~", v)),
      by      = as.formula(paste0("~", country_var)),
      design  = des,
      FUN     = svymean,
      na.rm   = TRUE
    ) %>%
      as_tibble() %>%
      rename(country = all_of(country_var)) %>%
      transmute(variable = v, country, mean = .data[[v]])
    out
  })
}

compute_region_means_from_countrymeans <- function(country_means, country_to_region_df, region_var) {
  country_means %>%
    left_join(country_to_region_df, by = "country") %>%
    filter(!is.na(.data[[region_var]])) %>%
    group_by(.data[[region_var]], variable) %>%
    summarise(
      region_mean = mean(mean, na.rm = TRUE),
      n_countries = n_distinct(country),
      .groups = "drop"
    ) %>%
    rename(region = all_of(region_var))
}

compute_india_stats <- function(country_means, india_code = 356) {
  country_means %>%
    group_by(variable) %>%
    mutate(
      n_countries = n_distinct(country),
      rank_desc = dense_rank(desc(mean))
    ) %>%
    ungroup() %>%
    filter(country == india_code) %>%
    transmute(variable, india_mean = mean, n_countries, india_rank = rank_desc)
}
# 0) Helpers (if not already defined in your cross-wave Rmd)
clean_0_to_na <- function(x) {
  x <- suppressWarnings(as.numeric(x))
  ifelse(x == 0, NA, x)
}

compute_country_means <- function(df, country_var, weight_var, vars) {
  df2 <- df %>%
    mutate(across(all_of(vars), clean_0_to_na)) %>%
    filter(!is.na(.data[[country_var]]), !is.na(.data[[weight_var]]))

  des <- survey::svydesign(ids = ~1, weights = as.formula(paste0("~", weight_var)), data = df2)

  purrr::map_dfr(vars, function(v) {
    survey::svyby(
      formula = as.formula(paste0("~", v)),
      by      = as.formula(paste0("~", country_var)),
      design  = des,
      FUN     = survey::svymean,
      na.rm   = TRUE
    ) %>%
      as_tibble() %>%
      rename(country = all_of(country_var)) %>%
      transmute(variable = v, country, mean = .data[[v]])
  })
}

compute_region_means_from_countrymeans <- function(country_means, country_to_region_df, region_var) {
  country_means %>%
    left_join(country_to_region_df, by = "country") %>%
    filter(!is.na(.data[[region_var]])) %>%
    group_by(.data[[region_var]], variable) %>%
    summarise(
      region_mean = mean(mean, na.rm = TRUE),
      n_countries = n_distinct(country),
      .groups = "drop"
    ) %>%
    rename(region = all_of(region_var))
}

compute_india_stats <- function(country_means, india_code = 356) {
  country_means %>%
    group_by(variable) %>%
    mutate(
      n_countries = n_distinct(country),
      india_rank  = dense_rank(desc(mean))
    ) %>%
    ungroup() %>%
    filter(country == india_code) %>%
    transmute(variable, india_mean = mean, n_countries, india_rank)
}

compute_wave_outputs <- function(df, wave, year, country_var, weight_var, region_var, vars) {

  df_work <- df %>%
    filter(!is.na(.data[[region_var]]), !is.na(.data[[weight_var]]))

  country_means <- compute_country_means(
    df = df_work,
    country_var = country_var,
    weight_var  = weight_var,
    vars        = vars
  )

  country_to_region <- df_work %>%
    distinct(country = .data[[country_var]], region = .data[[region_var]]) %>%
    rename(!!region_var := region)

  region_means <- compute_region_means_from_countrymeans(
    country_means = country_means,
    country_to_region_df = country_to_region,
    region_var = region_var
  )

  india_stats <- compute_india_stats(country_means, india_code = 356)

  list(
    country_means = country_means %>% mutate(wave = wave, year = year),
    region_means  = region_means  %>% mutate(wave = wave, year = year),
    india_stats   = india_stats   %>% mutate(wave = wave, year = year)
  )
}

# 1) Economic item lists (match your wave files)
vars_w5 <- paste0("V", 116:121)   # W5 economic system [file:1]
vars_w6 <- paste0("V", 96:101)    # W6 economic system (NOT V95) [file:6]
vars_w7 <- paste0("Q", 106:111)   # W7 economic system [file:2]

# 2) Run per-wave computations (using harmonized region var macroregion_h)
out_w5 <- compute_wave_outputs(wvs5, "W5", 2006, "V2",       "V259",    "macroregion_h", vars_w5)
out_w6 <- compute_wave_outputs(wvs6, "W6", 2012, "V2",       "V258",    "macroregion_h", vars_w6)
out_w7 <- compute_wave_outputs(wvs7, "W7", 2022, "B_COUNTRY","W_WEIGHT", "macroregion_h", vars_w7)

# 3) Combine India outputs into one table (rank is within-wave, among countries)
india_table <- bind_rows(out_w5$india_stats, out_w6$india_stats, out_w7$india_stats) %>%
  arrange(variable, year)

india_table
## # A tibble: 18 × 6
##    variable india_mean n_countries india_rank wave   year
##    <chr>         <dbl>       <int>      <int> <chr> <dbl>
##  1 Q106           6.94          66         15 W7     2022
##  2 Q107           6.23          66          8 W7     2022
##  3 Q108           5.22          66         24 W7     2022
##  4 Q109           4.82          66          6 W7     2022
##  5 Q110           4.44          66         31 W7     2022
##  6 Q111           1.18          66         60 W7     2022
##  7 V100           3.30          60         52 W6     2012
##  8 V101           5.05          60         59 W6     2012
##  9 V116           3.75          58         57 W5     2006
## 10 V117           3.90          58         50 W5     2006
## 11 V118           3.50          58         52 W5     2006
## 12 V119           2.01          58         57 W5     2006
## 13 V120           2.88          58         52 W5     2006
## 14 V121           3.68          58         55 W5     2006
## 15 V96            2.92          60         60 W6     2012
## 16 V97            5.29          60         41 W6     2012
## 17 V98            3.36          60         52 W6     2012
## 18 V99            2.73          60         57 W6     2012
# 4) Combine region means into one table
region_table <- bind_rows(out_w5$region_means, out_w6$region_means, out_w7$region_means) %>%
  arrange(year, variable, region)
library(dplyr)
library(tidyr)
library(stringr)

# Uses your existing helper
# clean_0_to_na <- function(x) { x <- suppressWarnings(as.numeric(x)); ifelse(x == 0, NA, x) }

# 1) Harmonised question key (concept-level; unmatched stay separate)
harm_key <- tibble::tribble(
  ~wave, ~variable, ~Questions,
  "W5", "V116", "Income equality vs inequality incentives",
  "W6", "V96",  "Income equality vs inequality incentives",
  "W7", "Q106", "Income equality vs inequality incentives",

  "W5", "V117", "Private vs government ownership of business",
  "W6", "V97",  "Private vs government ownership of business",
  "W7", "Q107", "Private vs government ownership of business",

  "W5", "V118", "Government vs individual responsibility",
  "W6", "V98",  "Government vs individual responsibility",
  "W7", "Q108", "Government vs individual responsibility",

  "W5", "V119", "Competition good vs harmful",
  "W6", "V99",  "Competition good vs harmful",
  "W7", "Q109", "Competition good vs harmful",

  "W5", "V120", "Success hard work vs luck",
  "W6", "V100", "Success hard work vs luck",
  "W7", "Q110", "Success hard work vs luck",

  "W5", "V121", "Wealth accumulation (others expense vs grows)",
  "W6", "V101", "Wealth accumulation (others expense vs grows)",

  "W7", "Q111", "Environment vs economic growth"   # unmatched (Wave 7 only)
)

# 2) India valid N per question (unweighted N; after recoding 0->NA; requires non-missing weight too)
get_india_n_by_item <- function(df, wave, year, country_col, weight_col, vars) {
  df %>%
    filter(.data[[country_col]] == 356) %>%
    mutate(across(all_of(vars), clean_0_to_na)) %>%
    pivot_longer(cols = all_of(vars), names_to = "variable", values_to = "value") %>%
    summarise(
      india_n = sum(!is.na(value) & !is.na(.data[[weight_col]])),
      .by = variable
    ) %>%
    mutate(wave = wave, year = year)
}

india_n_w5 <- get_india_n_by_item(wvs5, "W5", 2006, "V2",        "V259",    vars_w5)
india_n_w6 <- get_india_n_by_item(wvs6, "W6", 2012, "V2",        "V258",    vars_w6)
india_n_w7 <- get_india_n_by_item(wvs7, "W7", 2022, "B_COUNTRY", "W_WEIGHT", vars_w7)

india_n_all <- bind_rows(india_n_w5, india_n_w6, india_n_w7)

# 3) India mean + India rank AMONG MACROREGIONS (computed from region_table)
india_macroregion_stats <- region_table %>%
  group_by(wave, year, variable) %>%
  mutate(
    n_macroregions = n_distinct(region),
    india_rank = dense_rank(desc(region_mean))  # 1 = highest mean among macroregions
  ) %>%
  ungroup() %>%
  filter(region == "India") %>%
  transmute(wave, year, variable,
            india_mean = round(region_mean, 2),
            india_rank,
            n_macroregions)

# 4) Build final long -> wide
india_final_table <- india_macroregion_stats %>%
  left_join(india_n_all, by = c("wave","year","variable")) %>%
  left_join(harm_key,     by = c("wave","variable")) %>%
  mutate(Questions = coalesce(Questions, variable)) %>%
  select(Questions, year, india_rank, india_mean, india_n) %>%
  mutate(year = as.character(year)) %>%
  pivot_wider(
    names_from = year,
    values_from = c(india_rank, india_mean, india_n),
    names_glue = "{year}_{.value}"
  ) %>%
  select(
    Questions,
    `2006_india_rank`, `2012_india_rank`, `2022_india_rank`,
    `2006_india_mean`, `2012_india_mean`, `2022_india_mean`,
    `2006_india_n`,    `2012_india_n`,    `2022_india_n`
  ) %>%
  rename(
    `2006_rank` = `2006_india_rank`,
    `2012_rank` = `2012_india_rank`,
    `2022_rank` = `2022_india_rank`,
    `2006_mean` = `2006_india_mean`,
    `2012_mean` = `2012_india_mean`,
    `2022_mean` = `2022_india_mean`,
    `2006_n`    = `2006_india_n`,
    `2012_n`    = `2012_india_n`,
    `2022_n`    = `2022_india_n`
  ) %>%
  arrange(Questions)

india_final_table %>%
  kable(
    format  = "html",
    digits  = 2,
    caption = "India across waves: mean and rank among macroregions",
    align   = "lccccccccc"
  ) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed", "responsive")
  ) %>%
  scroll_box(height = "350px", width = "100%")
India across waves: mean and rank among macroregions
Questions 2006_rank 2012_rank 2022_rank 2006_mean 2012_mean 2022_mean 2006_n 2012_n 2022_n
Competition good vs harmful 6 8 1 2.01 2.73 4.82 2001 3682 1692
Environment vs economic growth NA NA 8 NA NA 1.18 NA NA 1692
Government vs individual responsibility 6 8 3 3.50 3.36 5.22 2001 3723 1692
Income equality vs inequality incentives 7 8 2 3.75 2.92 6.94 2001 3780 1692
Private vs government ownership of business 6 6 1 3.90 5.29 6.23 2001 3583 1692
Success hard work vs luck 6 8 3 2.88 3.30 4.44 2001 3828 1692
Wealth accumulation (others expense vs grows) 6 8 NA 3.68 5.05 NA 2001 3419 NA

Democratic system

# Assumes already in memory:
# wvs5, wvs6, wvs7
# macroregion_h exists in each wave data
# Wave 7 uses B_COUNTRY and W_WEIGHT (your chosen convention)

# ----------------------------
# 0) Item lists (Democratic system)
# ----------------------------
vars_w5_dem <- paste0("V", 152:162)   # W5 democratic system block[1]
vars_w6_dem <- paste0("V", 131:140)   # W6 democratic system block[2]
vars_w7_dem <- paste0("Q", 241:250)   # W7 democratic system items (excluding Q240 left-right)[3]

# Guardrails (fail loudly if names not present)
stopifnot(all(vars_w5_dem %in% names(wvs5)))
stopifnot(all(vars_w6_dem %in% names(wvs6)))
stopifnot(all(vars_w7_dem %in% names(wvs7)))
stopifnot(all(c("B_COUNTRY","W_WEIGHT","macroregion_h") %in% names(wvs7)))

# ----------------------------
# 1) Cleaning rule for demo variables
#    W5 uses 0 as invalid; W6/W7 also have negative codes in some items.
#    We'll treat <=0 as missing for all waves.
# ----------------------------
clean_demo_to_na <- function(x) {
  x <- suppressWarnings(as.numeric(x))
  ifelse(x <= 0, NA, x)
}

# ----------------------------
# 2) Country means (survey-weighted)
# ----------------------------
compute_country_means <- function(df, country_var, weight_var, vars) {
  df2 <- df %>%
    mutate(across(all_of(vars), clean_demo_to_na)) %>%
    filter(!is.na(.data[[country_var]]), !is.na(.data[[weight_var]]))

  des <- svydesign(ids = ~1, weights = as.formula(paste0("~", weight_var)), data = df2)

  purrr::map_dfr(vars, function(v) {
    svyby(
      formula = as.formula(paste0("~", v)),
      by      = as.formula(paste0("~", country_var)),
      design  = des,
      FUN     = svymean,
      na.rm   = TRUE
    ) %>%
      as_tibble() %>%
      rename(country = all_of(country_var)) %>%
      transmute(variable = v, country, mean = .data[[v]])
  })
}

# ----------------------------
# 3) Macroregion mean = average of country means
# ----------------------------
compute_region_means_from_countrymeans <- function(country_means, country_to_region_df, region_var) {
  country_means %>%
    left_join(country_to_region_df, by = "country") %>%
    filter(!is.na(.data[[region_var]])) %>%
    group_by(.data[[region_var]], variable) %>%
    summarise(
      region_mean = mean(mean, na.rm = TRUE),
      n_countries = n_distinct(country),
      .groups = "drop"
    ) %>%
    rename(region = all_of(region_var))
}

compute_wave_region_table <- function(df, wave, year, country_var, weight_var, region_var, vars) {

  df_work <- df %>%
    filter(!is.na(.data[[region_var]]), !is.na(.data[[weight_var]]))

  country_means <- compute_country_means(
    df = df_work,
    country_var = country_var,
    weight_var  = weight_var,
    vars        = vars
  )

  country_to_region <- df_work %>%
    distinct(country = .data[[country_var]], region = .data[[region_var]]) %>%
    rename(!!region_var := region)

  compute_region_means_from_countrymeans(
    country_means = country_means,
    country_to_region_df = country_to_region,
    region_var = region_var
  ) %>%
    mutate(wave = wave, year = year)
}

region_table_demo <- bind_rows(
  compute_wave_region_table(wvs5, "W5", 2006, "V2",        "V259",    "macroregion_h", vars_w5_dem),
  compute_wave_region_table(wvs6, "W6", 2012, "V2",        "V258",    "macroregion_h", vars_w6_dem),
  compute_wave_region_table(wvs7, "W7", 2022, "B_COUNTRY", "W_WEIGHT","macroregion_h", vars_w7_dem)
) %>%
  arrange(year, variable, region)

# ----------------------------
# 4) Harmonised label key (concept-level)
#    (Shared across waves; W5 has a few extras, kept as W5-only rows.)
# ----------------------------
harm_key_demo <- tibble::tribble(
  ~wave, ~variable, ~Questions,
  "W5","V152","Democracy: Tax rich subsidize poor (essential)",
  "W6","V131","Democracy: Tax rich subsidize poor (essential)",
  "W7","Q241","Democracy: Tax rich subsidize poor (essential)",

  "W5","V153","Democracy: Religious authorities interpret laws (essential)",
  "W6","V132","Democracy: Religious authorities interpret laws (essential)",
  "W7","Q242","Democracy: Religious authorities interpret laws (essential)",

  "W5","V154","Democracy: Free elections for leaders (essential)",
  "W6","V133","Democracy: Free elections for leaders (essential)",
  "W7","Q243","Democracy: Free elections for leaders (essential)",

  "W5","V155","Democracy: State aid for unemployment (essential)",
  "W6","V134","Democracy: State aid for unemployment (essential)",
  "W7","Q244","Democracy: State aid for unemployment (essential)",

  "W5","V156","Democracy: Army takeover when govt incompetent (essential)",
  "W6","V135","Democracy: Army takeover when govt incompetent (essential)",
  "W7","Q245","Democracy: Army takeover when govt incompetent (essential)",

  "W5","V157","Democracy: Civil rights protect against oppression (essential)",
  "W6","V136","Democracy: Civil rights protect against oppression (essential)",
  "W7","Q246","Democracy: Civil rights protect against oppression (essential)",

  "W5","V158","Democracy: Economy is prospering (essential)",            # W5-only[1]
  "W5","V159","Democracy: Criminals are severely punished (essential)",  # W5-only[1]
  "W5","V160","Democracy: Referendums can change laws (essential)",      # W5-only[1]

  "W5","V161","Democracy: Women have same rights as men (essential)",
  "W6","V139","Democracy: Women equal rights (essential)",
  "W7","Q249","Democracy: Women equal rights (essential)",

  "W5","V162","Importance of democracy",
  "W6","V140","Importance of democracy",
  "W7","Q250","Importance of democratic country"
)

# ----------------------------
# 5) India valid N per item (unweighted N; requires non-missing weight; <=0 -> NA)
# ----------------------------
get_india_n_by_item <- function(df, wave, year, country_col, weight_col, vars) {
  df %>%
    filter(.data[[country_col]] == 356) %>%
    mutate(across(all_of(vars), clean_demo_to_na)) %>%
    pivot_longer(cols = all_of(vars), names_to = "variable", values_to = "value") %>%
    summarise(india_n = sum(!is.na(value) & !is.na(.data[[weight_col]])), .by = variable) %>%
    mutate(wave = wave, year = year)
}

india_n_demo <- bind_rows(
  get_india_n_by_item(wvs5, "W5", 2006, "V2",        "V259",     vars_w5_dem),
  get_india_n_by_item(wvs6, "W6", 2012, "V2",        "V258",     vars_w6_dem),
  get_india_n_by_item(wvs7, "W7", 2022, "B_COUNTRY", "W_WEIGHT", vars_w7_dem)
)

# ----------------------------
# 6) India mean + India rank among macroregions (within wave×variable)
# ----------------------------
india_macro_stats_demo <- region_table_demo %>%
  group_by(wave, year, variable) %>%
  mutate(
    n_macroregions = n_distinct(region),
    india_rank = dense_rank(desc(region_mean))   # 1 = highest mean among macroregions
  ) %>%
  ungroup() %>%
  filter(region == "India") %>%
  transmute(
    wave, year, variable,
    india_mean = round(region_mean, 2),
    india_rank,
    n_macroregions
  )

# ----------------------------
# 7) Final long -> wide table + scrollable kable
# ----------------------------
india_demo_table <- india_macro_stats_demo %>%
  left_join(india_n_demo,  by = c("wave","year","variable")) %>%
  left_join(harm_key_demo, by = c("wave","variable")) %>%
  mutate(Questions = coalesce(Questions, variable)) %>%
  select(Questions, year, india_rank, india_mean, india_n) %>%
  mutate(year = as.character(year)) %>%
  pivot_wider(
    names_from  = year,
    values_from = c(india_rank, india_mean, india_n),
    names_glue  = "{year}_{.value}"
  ) %>%
  transmute(
    Questions,
    `2006_rank` = `2006_india_rank`,
    `2012_rank` = `2012_india_rank`,
    `2022_rank` = `2022_india_rank`,
    `2006_mean` = `2006_india_mean`,
    `2012_mean` = `2012_india_mean`,
    `2022_mean` = `2022_india_mean`,
    `2006_n`    = `2006_india_n`,
    `2012_n`    = `2012_india_n`,
    `2022_n`    = `2022_india_n`
  ) %>%
  arrange(Questions)

india_demo_table %>%
  kable(
    format  = "html",
    digits  = 2,
    caption = "India across waves: Democratic system (mean + rank among macroregions)"
  ) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed", "responsive")
  ) %>%
  scroll_box(height = "450px", width = "100%")
India across waves: Democratic system (mean + rank among macroregions)
Questions 2006_rank 2012_rank 2022_rank 2006_mean 2012_mean 2022_mean 2006_n 2012_n 2022_n
Democracy: Army takeover when govt incompetent (essential) 2 2 3 5.47 5.70 5.49 1362 4005 1309
Democracy: Civil rights protect against oppression (essential) 3 6 5 7.95 7.27 7.54 1420 4004 1476
Democracy: Criminals are severely punished (essential) 4 NA NA 7.53 NA NA 1578 NA NA
Democracy: Economy is prospering (essential) 5 NA NA 7.39 NA NA 1441 NA NA
Democracy: Free elections for leaders (essential) 2 6 3 8.62 7.82 8.12 1697 4009 1581
Democracy: Referendums can change laws (essential) 4 NA NA 7.36 NA NA 1446 NA NA
Democracy: Religious authorities interpret laws (essential) 4 5 4 4.17 4.00 4.69 1436 4003 1203
Democracy: State aid for unemployment (essential) 1 3 5 8.30 7.37 7.19 1663 4001 1569
Democracy: Tax rich subsidize poor (essential) 1 2 2 8.13 7.05 7.35 1687 4014 1512
Democracy: Women equal rights (essential) NA 3 3 NA 8.11 8.18 NA 4012 1601
Democracy: Women have same rights as men (essential) 2 NA NA 8.21 NA NA 1620 NA NA
Importance of democracy 7 8 NA 7.08 7.77 NA 1609 4045 NA
Importance of democratic country NA NA 3 NA NA 8.43 NA NA 1612
Q247 NA NA 3 NA NA 6.51 NA NA 1488
Q248 NA NA 3 NA NA 6.65 NA NA 1519
V137 NA 1 NA NA 7.10 NA NA 3993 NA
V138 NA 4 NA NA 6.41 NA NA 3996 NA

Neighbour

calc_india_props <- function(dat, year, country_var, weight_var, items_named_vec) {

  vars <- names(items_named_vec)

  # Clean only necessary variables
  dat_i <- dat %>%
    mutate(
      across(all_of(c(country_var, weight_var, vars)),
             ~ suppressWarnings(as.numeric(zap_labels(.x))))
    ) %>%
    filter(.data[[country_var]] == 356,
           !is.na(.data[[weight_var]]))

  # Survey design
  des <- svydesign(
    ids = ~1,
    weights = as.formula(paste0("~", weight_var)),
    data = dat_i
  )

  # Estimate proportion where response == 1 (REJECTION)
  purrr::imap_dfr(items_named_vec, function(label, v) {

    est <- svymean(
      as.formula(paste0("~I(", v, " == 1)")),
      des,
      na.rm = TRUE
    )

    rejection_rate <- as.numeric(coef(est)[1]) * 100

    tibble(
      year  = as.character(year),
      item  = label,
      value = round(rejection_rate)   # nearest integer
    )
  })
}


# ---------------------------
# CORRECT VARIABLE MAPPING
# ---------------------------

items_w5 <- c(
  V34 = "Drug addicts",
  V35 = "People of a different race",
  V36 = "People who have AIDS",
  V37 = "Immigrants/foreign workers",
  V38 = "Homosexuals",
  V39 = "People of a different religion"
)

items_w6 <- c(
  V36 = "Drug addicts",
  V37 = "People of a different race",
  V38 = "People who have AIDS",
  V39 = "Immigrants/foreign workers",
  V40 = "Homosexuals",
  V41 = "People of a different religion"
)

items_w7 <- c(
  Q27 = "Drug addicts",
  Q28 = "People of a different race",
  Q29 = "People who have AIDS",
  Q30 = "Immigrants/foreign workers",
  Q31 = "Homosexuals",
  Q32 = "People of a different religion"
)

# ---------------------------
# Resolve W7 column names
# ---------------------------

country_var_w7 <- if ("B_COUNTRY" %in% names(wvs7)) "B_COUNTRY" else "BCOUNTRY"
weight_var_w7  <- if ("W_WEIGHT" %in% names(wvs7)) "W_WEIGHT" else "WWEIGHT"

# ---------------------------
# Combine waves
# ---------------------------

res_long <- bind_rows(
  calc_india_props(wvs5, 2006, "V2", "V259", items_w5),
  calc_india_props(wvs6, 2012, "V2", "V258", items_w6),
  calc_india_props(wvs7, 2022, country_var_w7, weight_var_w7, items_w7)
)

# ---------------------------
# FINAL TABLE FORMAT
# ---------------------------

res_wide <- res_long %>%
  pivot_wider(names_from = item, values_from = value) %>%
  arrange(year)

res_wide %>%
  knitr::kable(
    format = "html",
    digits = 1,
    caption = "India: % mentioning each group as undesirable neighbour (survey-weighted)"
  ) %>%
  kableExtra::kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped","hover","condensed","responsive")
  )
India: % mentioning each group as undesirable neighbour (survey-weighted)
year Drug addicts People of a different race People who have AIDS Immigrants/foreign workers Homosexuals People of a different religion
2006 46 56 56 65 60 56
2012 6 74 38 53 35 72
2022 22 66 77 83 79 75

Moral permisiveness

moral_w5_named <- c(
  V198 = "Government benefits",
  V199 = "Public transport fare",
  V200 = "Tax evasion",
  V201 = "Bribing",
  V202 = "Homosexuality",
  V203 = "Prostitution",
  V204 = "Abortion",
  V205 = "Divorce",
  V206 = "Euthanasia",
  V207 = "Suicide",
  V208 = "Domestic violence"
 
)

moral_w6_named <- c(
  V198  = "Government benefits",
  V199  = "Public transport fare",
  V201  = "Tax evasion",
  V202  = "Bribing",
  V203  = "Homosexuality",
  V203A = "Prostitution",
  V204  = "Abortion",
  V205  = "Divorce",
  V206  = "Premarital sex",
  V207  = "Suicide",
  V207A = "Euthanasia",
  V208  = "Domestic violence"
)

moral_w7_named <- c(
  Q177 = "Government benefits",
  Q178 = "Public transport fare",
  Q180 = "Tax evasion",
  Q181 = "Bribing",
  Q182 = "Homosexuality",
  Q183 = "Prostitution",
  Q184 = "Abortion",
  Q185 = "Divorce",
  Q186 = "Premarital sex",
  Q187 = "Suicide",
  Q188 = "Euthanasia",
  Q189 = "Domestic violence"
)
# ── Sanity check ───────────────────────────────────────────────────────────────
stopifnot(all(names(moral_w5_named) %in% names(wvs5)))
stopifnot(all(names(moral_w6_named) %in% names(wvs6)))
stopifnot(all(names(moral_w7_named) %in% names(wvs7)))

# ── Survey-weighted India means ────────────────────────────────────────────────
calc_india_moral <- function(dat, year, country_var, weight_var, named_vars) {

  vars <- names(named_vars)

  dat_i <- dat %>%
    mutate(across(
      all_of(vars),
      ~ { x <- suppressWarnings(as.numeric(zap_labels(.x))); ifelse(x <= 0, NA, x) }
    )) %>%
    filter(.data[[country_var]] == 356,
           !is.na(.data[[weight_var]]))

  des <- svydesign(
    ids     = ~1,
    weights = as.formula(paste0("~", weight_var)),
    data    = dat_i
  )

  imap_dfr(named_vars, function(label, v) {

    if (sum(!is.na(dat_i[[v]])) == 0)
      return(tibble(year = as.character(year), item = label, value = NA_real_))

    est <- svymean(as.formula(paste0("~", v)), des, na.rm = TRUE)

    tibble(
      year  = as.character(year),
      item  = label,
      value = round(as.numeric(coef(est)[1]), 1)
    )
  })
}

# ── Run all waves ──────────────────────────────────────────────────────────────
moral_long <- bind_rows(
  calc_india_moral(wvs5, 2006, "V2",        "V259",     moral_w5_named),
  calc_india_moral(wvs6, 2012, "V2",        "V258",     moral_w6_named),
  calc_india_moral(wvs7, 2022, "B_COUNTRY", "W_WEIGHT", moral_w7_named)
)

# ── Wide: Year as rows, items as columns ───────────────────────────────────────
item_order <- c(
  "Government benefits",
  "Public transport fare",
  "Tax evasion",
  "Bribing",
  "Homosexuality",
  "Prostitution",
  "Abortion",
  "Divorce",
  "Premartial sex",
  "Suicide",
  "Euthanasia",
  "Domestic voilence"
)

moral_table <- moral_long %>%
  mutate(item = factor(item, levels = item_order)) %>%
  pivot_wider(names_from = item, values_from = value) %>%
  arrange(year) %>%
  rename(Year = year)
## Warning: Values from `value` are not uniquely identified; output will contain list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
##   {data} |>
##   dplyr::summarise(n = dplyr::n(), .by = c(year, item)) |>
##   dplyr::filter(n > 1L)
# ── Render ─────────────────────────────────────────────────────────────────────
moral_table %>%
  kable(
    format  = "html",
    digits  = 1,
    caption = "India: Moral Permissiveness (1 = Never justifiable, 10 = Always justifiable)"
  ) %>%
  kable_styling(
    full_width        = FALSE,
    bootstrap_options = c("striped", "hover", "condensed", "responsive")
  ) %>%
  column_spec(1, bold = TRUE) %>%
  scroll_box(height = "200px", width = "100%")
India: Moral Permissiveness (1 = Never justifiable, 10 = Always justifiable)
Year Government benefits Public transport fare Tax evasion Bribing Homosexuality Prostitution Abortion Divorce Euthanasia Suicide NA
2006 3.1 3.3 3 3 3 3.1 3.3 3.9 3.9 3.4 3.1
2012 2.5 1.7 1.7 1.6 1.6 1.6 1.6 1.9 NA 1.6 1.6, 1.8
2022 2.7 2.3 2 1.9 2.7 2.2 2.8 3.1 2.9 1.9 2.2, 2.0

Institutional trust

trust_w5_named <- c(
  V131 = "Religious institutions",
  V132 = "Armed forces",
  V133 = "Press",
  V134 = "Television",
  V136 = "Police",
  V137 = "Courts",
  V138 = "Government",
  V139 = "Political parties",
  V140 = "Parliament",
  V141 = "Civil service"
)

trust_w6_named <- c(
  V108 = "Religious institutions",
  V109 = "Armed forces",
  V110 = "Press",
  V111 = "Television",
  V113 = "Police",
  V114 = "Courts",
  V115 = "Government",
  V116 = "Political parties",
  V117 = "Parliament",
  V118 = "Civil service"
)

trust_w7_named <- c(
  Q64 = "Religious institutions",
  Q65 = "Armed forces",
  Q66 = "Press",
  Q67 = "Television",
  Q69 = "Police",
  Q70 = "Courts",
  Q71 = "Government",
  Q72 = "Political parties",
  Q73 = "Parliament",
  Q74 = "Civil service"
)
# ─────────────────────────────────────────────
# INSTITUTIONAL TRUST – HIGH CONFIDENCE SHARE
# (1 & 2 collapsed)
# ─────────────────────────────────────────────

# ── Sanity check ─────────────────────────────
stopifnot(all(names(trust_w5_named) %in% names(wvs5)))
stopifnot(all(names(trust_w6_named) %in% names(wvs6)))
stopifnot(all(names(trust_w7_named) %in% names(wvs7)))

calc_india_trust <- function(dat, year, country_var, weight_var, named_vars) {

  vars <- names(named_vars)

  dat_i <- dat %>%
    mutate(across(
      all_of(vars),
      ~ {
        x <- suppressWarnings(as.numeric(zap_labels(.x)))
        case_when(
          x %in% c(1, 2) ~ 1,   # High confidence
          x %in% c(3, 4) ~ 0,   # Low confidence
          TRUE ~ NA_real_
        )
      }
    )) %>%
    filter(.data[[country_var]] == 356,
           !is.na(.data[[weight_var]]))

  des <- svydesign(
    ids     = ~1,
    weights = as.formula(paste0("~", weight_var)),
    data    = dat_i
  )

  imap_dfr(named_vars, function(label, v) {

    if (sum(!is.na(dat_i[[v]])) == 0)
      return(tibble(year = as.character(year), item = label, value = NA_real_))

    est <- svymean(as.formula(paste0("~", v)), des, na.rm = TRUE)

    tibble(
      year  = as.character(year),
      item  = label,
      value = round(as.numeric(coef(est)[1]) * 100, 1)  # percentage
    )
  })
}

# ── Run all waves ────────────────────────────
trust_long <- bind_rows(
  calc_india_trust(wvs5, 2006, "V2",        "V259",     trust_w5_named),
  calc_india_trust(wvs6, 2012, "V2",        "V258",     trust_w6_named),
  calc_india_trust(wvs7, 2022, "B_COUNTRY", "W_WEIGHT", trust_w7_named)
)

institution_order <- c(
  "Parliament",
  "Government",
  "Political parties",
  "Civil service",
  "Armed forces",
  "Police",
  "Courts",
  "Press",
  "Television",
  "Religious institutions"
)

trust_table <- trust_long %>%
  mutate(item = factor(item, levels = institution_order)) %>%
  pivot_wider(names_from = item, values_from = value) %>%
  arrange(year) %>%
  rename(Year = year)

trust_table %>%
  kable(
    format  = "html",
    digits  = 1,
    caption = "India: High Confidence in Institutions (% saying 'A great deal' or 'Quite a lot').
               Harmonised across WVS 2006–2022."
  ) %>%
  kable_styling(
    full_width        = FALSE,
    bootstrap_options = c("striped", "hover", "condensed", "responsive")
  ) %>%
  column_spec(1, bold = TRUE) %>%
  scroll_box(height = "200px", width = "100%")
India: High Confidence in Institutions (% saying ‘A great deal’ or ‘Quite a lot’). Harmonised across WVS 2006–2022.
Year Religious institutions Armed forces Press Television Police Courts Government Political parties Parliament Civil service
2006 83.4 83.3 75.8 74.9 64.1 68.9 54.9 46.4 62.4 54.3
2012 96.1 87.1 71.8 74.5 51.2 64.5 50.4 37.4 58.4 61.0
2022 89.7 86.9 65.8 63.4 66.8 74.6 65.1 42.3 73.8 80.6

Regime preferences

clean_reg <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  x[x < 0] <- NA; x
}

regime_cfg <- list(
  list(year="2006", dat=quote(wvs5), ctry="V2",        wt="V259",
       vars=c(V148="Strong leader", V149="Experts", V150="Army rule", V151="Democratic system")),
  list(year="2012", dat=quote(wvs6), ctry="V2",        wt="V258",
       vars=c(V127="Strong leader", V128="Experts", V129="Army rule", V130="Democratic system")),
  list(year="2022", dat=quote(wvs7), ctry="B_COUNTRY", wt="W_WEIGHT",
       vars=c(Q235="Strong leader", Q236="Experts", Q237="Army rule", Q238="Democratic system"))
)

compute_block <- function(cfg) {
  dat  <- eval(cfg$dat)
  vars <- names(cfg$vars)
  labs <- cfg$vars

  dat_c <- dat %>%
    mutate(
      across(all_of(c(cfg$ctry, cfg$wt)), ~suppressWarnings(as.numeric(haven::zap_labels(.x)))),
      across(all_of(vars), clean_reg)
    ) %>%
    mutate(across(all_of(vars),
                  ~as.integer(.x %in% c(1, 2)),
                  .names="acc_{.col}")) %>%
    filter(!is.na(.data[[cfg$wt]]))

  # ── India % acceptable ──────────────────────────────────────────────
  india_dat <- dat_c %>% filter(.data[[cfg$ctry]] == 356)
  des_i <- svydesign(ids=~1, weights=as.formula(paste0("~", cfg$wt)), data=india_dat)

  india_pct <- imap_dfr(labs, function(lbl, v) {
    bv  <- paste0("acc_", v)
    est <- svymean(as.formula(paste0("~", bv)), des_i, na.rm=TRUE)
    tibble(question=lbl, india_pct=round(as.numeric(coef(est)) * 100, 1))
  })

  # ── Region country-weighted averages ───────────────────────────────
  rgn_dat <- dat_c %>% filter(!is.na(macroregion_h), macroregion_h != "India")

  region_avgs <- imap_dfr(labs, function(lbl, v) {
    bv  <- paste0("acc_", v)
    d   <- rgn_dat %>% filter(!is.na(.data[[bv]]))
    des <- svydesign(ids=~1, weights=as.formula(paste0("~", cfg$wt)), data=d)

    svyby(as.formula(paste0("~", bv)),
          as.formula(paste0("~", cfg$ctry, " + macroregion_h")),
          des, svymean, na.rm=TRUE) %>%
      as.data.frame() %>%
      select(macroregion_h, cpct=all_of(bv)) %>%
      mutate(label=lbl)
  }) %>%
    group_by(macroregion_h, label) %>%
    summarise(rpct=mean(cpct, na.rm=TRUE) * 100, .groups="drop")

  n_entities <- n_distinct(region_avgs$macroregion_h) + 1

  # ── Rank India ──────────────────────────────────────────────────────
  ranking <- region_avgs %>%
    bind_rows(india_pct %>% transmute(macroregion_h="India", label=question, rpct=india_pct)) %>%
    group_by(label) %>%
    mutate(rank=rank(-rpct, ties.method="min")) %>%
    filter(macroregion_h=="India") %>%
    ungroup() %>%
    select(question=label, rank) %>%
    mutate(n_entities=n_entities)

  india_pct %>%
    left_join(ranking, by="question") %>%
    mutate(year=cfg$year)
}

results <- map_dfr(regime_cfg, compute_block)

stopifnot(nrow(results %>% count(question, year) %>% filter(n > 1)) == 0)

res_wide <- results %>%
  mutate(rank_fmt=paste0(rank, "/", n_entities)) %>%
  select(question, year, rank_fmt, india_pct) %>%
  pivot_wider(
    names_from  = year,
    values_from = c(rank_fmt, india_pct),
    names_glue  = "{year}_{.value}"
  ) %>%
  select(question,
         `2006_rank_fmt`, `2012_rank_fmt`, `2022_rank_fmt`,
         `2006_india_pct`, `2012_india_pct`, `2022_india_pct`) %>%
  mutate(question=factor(question,
           levels=c("Strong leader","Experts","Army rule","Democratic system"))) %>%
  arrange(question)

res_wide %>%
  knitr::kable(
    format    = "html",
    col.names = c("Regime type",
                  "2006 Rank","2012 Rank","2022 Rank",
                  "2006%","2012%","2022%"),
    caption   = "India: % finding each regime 'acceptable' (Very/Fairly good) & rank among macro-regions"
  ) %>%
  kableExtra::kable_styling(
    full_width        = FALSE,
    bootstrap_options = c("striped","hover","condensed")
  ) %>%
  kableExtra::add_header_above(
    c(" "=1, "Rank among macro-regions"=3, "% Acceptable — India"=3)
  ) %>%
  kableExtra::column_spec(1, bold=TRUE)
India: % finding each regime ‘acceptable’ (Very/Fairly good) & rank among macro-regions
Rank among macro-regions
% Acceptable — India
Regime type 2006 Rank 2012 Rank 2022 Rank 2006% 2012% 2022%
Strong leader 1/7 1/8 1/8 45.6 56.4 62.9
Experts 4/7 2/8 2/8 50.0 59.4 63.2
Army rule 3/7 2/8 4/8 24.8 37.8 33.7
Democratic system 7/7 5/8 6/8 70.0 79.7 77.2

Jobs scarce

clean_reg <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  x[x < 0] <- NA; x
}

jobs_cfg <- list(
  # W5: 3-pt (1=Agree, 2=Neither, 3=Disagree)
  list(year="2006", dat=quote(wvs5), ctry="V2", wt="V259",
       agree_codes=c(1),
       vars=c(V44="Men over women", V45="Nationals over immigrants")),

  # W6: 3-pt (1=Agree, 2=Neither, 3=Disagree)
  list(year="2012", dat=quote(wvs6), ctry="V2", wt="V258",
       agree_codes=c(1),
       vars=c(V43="Men over women", V44="Nationals over immigrants")),

  # W7: 5-pt — collapse 1 (Strongly agree) + 2 (Agree) → "Agree"
  list(year="2022", dat=quote(wvs7), ctry="B_COUNTRY", wt="W_WEIGHT",
       agree_codes=c(1, 2),
       vars=c(Q33="Men over women", Q34="Nationals over immigrants"))
)

compute_jobs_block <- function(cfg) {
  dat  <- eval(cfg$dat)
  vars <- names(cfg$vars)
  labs <- cfg$vars
  ac   <- cfg$agree_codes

  dat_c <- dat %>%
    mutate(
      across(all_of(c(cfg$ctry, cfg$wt)), ~suppressWarnings(as.numeric(haven::zap_labels(.x)))),
      across(all_of(vars), clean_reg)
    ) %>%
    mutate(across(all_of(vars),
                  ~as.integer(.x %in% ac),
                  .names="acc_{.col}")) %>%
    filter(!is.na(.data[[cfg$wt]]))

  # ── India % agree ───────────────────────────────────────────────────
  india_dat <- dat_c %>% filter(.data[[cfg$ctry]] == 356)
  des_i <- svydesign(ids=~1, weights=as.formula(paste0("~", cfg$wt)), data=india_dat)

  india_pct <- imap_dfr(labs, function(lbl, v) {
    bv  <- paste0("acc_", v)
    est <- svymean(as.formula(paste0("~", bv)), des_i, na.rm=TRUE)
    tibble(question=lbl, india_pct=round(as.numeric(coef(est)) * 100, 1))
  })

  # ── Region country-weighted averages ───────────────────────────────
  rgn_dat <- dat_c %>% filter(!is.na(macroregion_h), macroregion_h != "India")

  region_avgs <- imap_dfr(labs, function(lbl, v) {
    bv  <- paste0("acc_", v)
    d   <- rgn_dat %>% filter(!is.na(.data[[bv]]))
    des <- svydesign(ids=~1, weights=as.formula(paste0("~", cfg$wt)), data=d)

    svyby(as.formula(paste0("~", bv)),
          as.formula(paste0("~", cfg$ctry, " + macroregion_h")),
          des, svymean, na.rm=TRUE) %>%
      as.data.frame() %>%
      select(macroregion_h, cpct=all_of(bv)) %>%
      mutate(label=lbl)
  }) %>%
    group_by(macroregion_h, label) %>%
    summarise(rpct=mean(cpct, na.rm=TRUE) * 100, .groups="drop")

  n_entities <- n_distinct(region_avgs$macroregion_h) + 1

  # ── Rank India ──────────────────────────────────────────────────────
  ranking <- region_avgs %>%
    bind_rows(india_pct %>% transmute(macroregion_h="India", label=question, rpct=india_pct)) %>%
    group_by(label) %>%
    mutate(rank=rank(-rpct, ties.method="min")) %>%
    filter(macroregion_h=="India") %>%
    ungroup() %>%
    select(question=label, rank) %>%
    mutate(n_entities=n_entities)

  india_pct %>%
    left_join(ranking, by="question") %>%
    mutate(year=cfg$year)
}

results_jobs <- map_dfr(jobs_cfg, compute_jobs_block)

stopifnot(nrow(results_jobs %>% count(question, year) %>% filter(n > 1)) == 0)

res_wide_jobs <- results_jobs %>%
  mutate(rank_fmt=paste0(rank, "/", n_entities)) %>%
  select(question, year, rank_fmt, india_pct) %>%
  pivot_wider(
    names_from  = year,
    values_from = c(rank_fmt, india_pct),
    names_glue  = "{year}_{.value}"
  ) %>%
  select(question,
         `2006_rank_fmt`, `2012_rank_fmt`, `2022_rank_fmt`,
         `2006_india_pct`, `2012_india_pct`, `2022_india_pct`) %>%
  mutate(question=factor(question,
           levels=c("Men over women", "Nationals over immigrants"))) %>%
  arrange(question)

res_wide_jobs %>%
  knitr::kable(
    format    = "html",
    col.names = c("Statement",
                  "2006 Rank","2012 Rank","2022 Rank",
                  "2006%","2012%","2022%"),
    caption   = "India: % agreeing jobs should go to men/nationals when scarce & rank among macro-regions"
  ) %>%
  kableExtra::kable_styling(
    full_width        = FALSE,
    bootstrap_options = c("striped","hover","condensed")
  ) %>%
  kableExtra::add_header_above(
    c(" "=1, "Rank among macro-regions"=3, "% Agree — India"=3)
  ) %>%
  kableExtra::column_spec(1, bold=TRUE) %>%
  kableExtra::footnote(
    general = paste0(
      "2006 & 2012: Agree on 3-point scale (Agree / Neither / Disagree). ",
      "2022: Strongly agree + Agree collapsed on 5-point scale."
    ),
    general_title = "Note: "
  )
India: % agreeing jobs should go to men/nationals when scarce & rank among macro-regions
Rank among macro-regions
% Agree — India
Statement 2006 Rank 2012 Rank 2022 Rank 2006% 2012% 2022%
Men over women 2/7 1/8 3/8 50.5 72.7 56.8
Nationals over immigrants 1/7 1/8 6/8 74.2 32.7 71.6
Note:
2006 & 2012: Agree on 3-point scale (Agree / Neither / Disagree). 2022: Strongly agree + Agree collapsed on 5-point scale.