library(tidyverse)
library(haven)
library(labelled)
library(survey)
library(knitr)
library(kableExtra)


datafile_w5 <- "D:/Populism and Democrary/World value survey/WVS 2006/F00007944-WV5_Data_R_v20180912.rds"
stopifnot(file.exists(datafile_w5))

wvs5 <- readRDS(datafile_w5)

# W6 (2012)
datafile_w6 <- "D:/Populism and Democrary/World value survey/WVS 2012/WV6_Data_R_v20201117.rdata"
stopifnot(file.exists(datafile_w6))

load(datafile_w6)

wvs6 <- WV6_Data_R_v20201117

# W7 (2022)
datafile_w7 <- "D:/Populism and Democrary/World value survey/WVS 2017/WVS_Cross-National_Wave_7_Rdata_v6_0.rdata"
stopifnot(file.exists(datafile_w7))

load(datafile_w7)

wvs7 <- `WVS_Cross-National_Wave_7_v6_0`

stopifnot(exists("wvs5"), exists("wvs6"), exists("wvs7"))

dims <- tibble(
  wave = c("W5","W6","W7"),
  year = c(2006, 2012, 2022),
  n_rows = c(nrow(wvs5), nrow(wvs6), nrow(wvs7)),
  n_cols = c(ncol(wvs5), ncol(wvs6), ncol(wvs7))
)
kable(dims, caption = "Sanity check: datasets loaded (rows/cols)") %>%
  kable_styling(full_width = FALSE)

Sanity check: datasets loaded (rows/cols)
wave	year	n_rows	n_cols
W5	2006	83975	414
W6	2012	89565	442
W7	2022	97220	613

Macro regions

## [1] 71

## # A tibble: 10 × 2
##       V2 macroregion        
##    <dbl> <chr>              
##  1   356 India              
##  2   156 East Southeast Asia
##  3   344 East Southeast Asia
##  4   392 East Southeast Asia
##  5   410 East Southeast Asia
##  6   158 East Southeast Asia
##  7   458 East Southeast Asia
##  8   608 East Southeast Asia
##  9   764 East Southeast Asia
## 10   360 East Southeast Asia

## # A tibble: 7 × 2
##   macroregion                       n
##   <chr>                         <int>
## 1 Western Europe Offshoots      20499
## 2 East Southeast Asia           13011
## 3 Latin America Caribbean       12589
## 4 Middle East North Africa MENA 12165
## 5 Sub-Saharan Africa            12097
## 6 Eastern Europe Post-Soviet    11613
## 7 India                          2001

##                     macroregion     n
## 1           East Southeast Asia 23218
## 2      Western Europe Offshoots 19417
## 3       Latin America Caribbean 17439
## 4    Eastern Europe Post-Soviet 13951
## 5 Middle East North Africa MENA 12321
## 6            Sub-Saharan Africa  4948
## 7                    South Asia  4234
## 8                         India  1692

##                          region     n
## 1    Eastern Europe Post-Soviet 18410
## 2 Middle East North Africa MENA 16827
## 3           East Southeast Asia 13853
## 4       Latin America Caribbean 13435
## 5      Western Europe Offshoots 11893
## 6            Sub-Saharan Africa  9869
## 7                         India  4078
## 8                      Pakistan  1200

##                   macroregion_h     n
## 1    Eastern Europe Post-Soviet 18410
## 2 Middle East North Africa MENA 16827
## 3           East Southeast Asia 13853
## 4       Latin America Caribbean 13435
## 5      Western Europe Offshoots 11893
## 6            Sub-Saharan Africa  9869
## 7                         India  4078
## 8                    South Asia  1200

# W5 (2006): V2 -> macroregion
w5_unmapped <- wvs5 %>%
  distinct(V2) %>%
  filter(!is.na(V2)) %>%
  left_join(macromapw5 %>% distinct(V2), by = "V2") %>%
  mutate(mapped = V2 %in% macromapw5$V2) %>%
  filter(!mapped)

nrow(w5_unmapped)

## [1] 0

w5_unmapped %>% arrange(V2)

## # A tibble: 0 × 2
## # ℹ 2 variables: V2 <dbl>, mapped <lgl>

# Also check for any respondents with NA macroregion (should be 0 or very small if mapping complete)
wvs5 %>% summarise(n_na_macroregion = sum(is.na(macroregion)))

## # A tibble: 1 × 1
##   n_na_macroregion
##              <int>
## 1                0

# W6 (2012): V2 -> region
w6_unmapped <- wvs6 %>%
  distinct(V2) %>%
  filter(!is.na(V2)) %>%
  left_join(countrymap_w6 %>% distinct(V2), by = "V2") %>%
  mutate(mapped = V2 %in% countrymap_w6$V2) %>%
  filter(!mapped)

nrow(w6_unmapped)

## [1] 0

w6_unmapped %>% arrange(V2)

## [1] V2     mapped
## <0 rows> (or 0-length row.names)

wvs6 %>% summarise(n_na_region = sum(is.na(region)))

##   n_na_region
## 1           0

# W7 (2022): B_COUNTRY -> macroregion
w7_unmapped <- wvs7 %>%
  distinct(B_COUNTRY) %>%
  filter(!is.na(B_COUNTRY)) %>%
  left_join(macromapw7 %>% distinct(B_COUNTRY), by = "B_COUNTRY") %>%
  mutate(mapped = B_COUNTRY %in% macromapw7$B_COUNTRY) %>%
  filter(!mapped)

nrow(w7_unmapped)

## [1] 0

w7_unmapped %>% arrange(B_COUNTRY)

## [1] B_COUNTRY mapped   
## <0 rows> (or 0-length row.names)

wvs7 %>% summarise(n_na_macroregion = sum(is.na(macroregion)))

##   n_na_macroregion
## 1                0

# W5: already has macroregion
wvs5 <- wvs5 %>%
  mutate(macroregion_h = macroregion)

# W6: fold Pakistan into South Asia for harmonized displays
wvs6 <- wvs6 %>%
  mutate(macroregion_h = dplyr::recode(region, "Pakistan" = "South Asia", .default = region))

# W7: already has macroregion (includes South Asia)
wvs7 <- wvs7 %>%
  mutate(macroregion_h = macroregion)

# Quick verify
wvs5 %>% count(macroregion_h, sort = TRUE)

## # A tibble: 7 × 2
##   macroregion_h                     n
##   <chr>                         <int>
## 1 Western Europe Offshoots      20499
## 2 East Southeast Asia           13011
## 3 Latin America Caribbean       12589
## 4 Middle East North Africa MENA 12165
## 5 Sub-Saharan Africa            12097
## 6 Eastern Europe Post-Soviet    11613
## 7 India                          2001

wvs6 %>% count(macroregion_h, sort = TRUE)   # should include South Asia, not Pakistan

##                   macroregion_h     n
## 1    Eastern Europe Post-Soviet 18410
## 2 Middle East North Africa MENA 16827
## 3           East Southeast Asia 13853
## 4       Latin America Caribbean 13435
## 5      Western Europe Offshoots 11893
## 6            Sub-Saharan Africa  9869
## 7                         India  4078
## 8                    South Asia  1200

wvs7 %>% count(macroregion_h, sort = TRUE)

##                   macroregion_h     n
## 1           East Southeast Asia 23218
## 2      Western Europe Offshoots 19417
## 3       Latin America Caribbean 17439
## 4    Eastern Europe Post-Soviet 13951
## 5 Middle East North Africa MENA 12321
## 6            Sub-Saharan Africa  4948
## 7                    South Asia  4234
## 8                         India  1692

Economic ranking

clean_0_to_na <- function(x) {
  x <- suppressWarnings(as.numeric(x))
  ifelse(x == 0, NA, x)
}

compute_country_means <- function(df, country_var, weight_var, vars) {
  df2 <- df %>%
    mutate(across(all_of(vars), clean_0_to_na)) %>%
    filter(!is.na(.data[[country_var]]), !is.na(.data[[weight_var]]))

  des <- svydesign(ids = ~1, weights = as.formula(paste0("~", weight_var)), data = df2)

  purrr::map_dfr(vars, function(v) {
    out <- svyby(
      formula = as.formula(paste0("~", v)),
      by      = as.formula(paste0("~", country_var)),
      design  = des,
      FUN     = svymean,
      na.rm   = TRUE
    ) %>%
      as_tibble() %>%
      rename(country = all_of(country_var)) %>%
      transmute(variable = v, country, mean = .data[[v]])
    out
  })
}

compute_region_means_from_countrymeans <- function(country_means, country_to_region_df, region_var) {
  country_means %>%
    left_join(country_to_region_df, by = "country") %>%
    filter(!is.na(.data[[region_var]])) %>%
    group_by(.data[[region_var]], variable) %>%
    summarise(
      region_mean = mean(mean, na.rm = TRUE),
      n_countries = n_distinct(country),
      .groups = "drop"
    ) %>%
    rename(region = all_of(region_var))
}

compute_india_stats <- function(country_means, india_code = 356) {
  country_means %>%
    group_by(variable) %>%
    mutate(
      n_countries = n_distinct(country),
      rank_desc = dense_rank(desc(mean))
    ) %>%
    ungroup() %>%
    filter(country == india_code) %>%
    transmute(variable, india_mean = mean, n_countries, india_rank = rank_desc)
}

# 0) Helpers (if not already defined in your cross-wave Rmd)
clean_0_to_na <- function(x) {
  x <- suppressWarnings(as.numeric(x))
  ifelse(x == 0, NA, x)
}

compute_country_means <- function(df, country_var, weight_var, vars) {
  df2 <- df %>%
    mutate(across(all_of(vars), clean_0_to_na)) %>%
    filter(!is.na(.data[[country_var]]), !is.na(.data[[weight_var]]))

  des <- survey::svydesign(ids = ~1, weights = as.formula(paste0("~", weight_var)), data = df2)

  purrr::map_dfr(vars, function(v) {
    survey::svyby(
      formula = as.formula(paste0("~", v)),
      by      = as.formula(paste0("~", country_var)),
      design  = des,
      FUN     = survey::svymean,
      na.rm   = TRUE
    ) %>%
      as_tibble() %>%
      rename(country = all_of(country_var)) %>%
      transmute(variable = v, country, mean = .data[[v]])
  })
}

compute_region_means_from_countrymeans <- function(country_means, country_to_region_df, region_var) {
  country_means %>%
    left_join(country_to_region_df, by = "country") %>%
    filter(!is.na(.data[[region_var]])) %>%
    group_by(.data[[region_var]], variable) %>%
    summarise(
      region_mean = mean(mean, na.rm = TRUE),
      n_countries = n_distinct(country),
      .groups = "drop"
    ) %>%
    rename(region = all_of(region_var))
}

compute_india_stats <- function(country_means, india_code = 356) {
  country_means %>%
    group_by(variable) %>%
    mutate(
      n_countries = n_distinct(country),
      india_rank  = dense_rank(desc(mean))
    ) %>%
    ungroup() %>%
    filter(country == india_code) %>%
    transmute(variable, india_mean = mean, n_countries, india_rank)
}

compute_wave_outputs <- function(df, wave, year, country_var, weight_var, region_var, vars) {

  df_work <- df %>%
    filter(!is.na(.data[[region_var]]), !is.na(.data[[weight_var]]))

  country_means <- compute_country_means(
    df = df_work,
    country_var = country_var,
    weight_var  = weight_var,
    vars        = vars
  )

  country_to_region <- df_work %>%
    distinct(country = .data[[country_var]], region = .data[[region_var]]) %>%
    rename(!!region_var := region)

  region_means <- compute_region_means_from_countrymeans(
    country_means = country_means,
    country_to_region_df = country_to_region,
    region_var = region_var
  )

  india_stats <- compute_india_stats(country_means, india_code = 356)

  list(
    country_means = country_means %>% mutate(wave = wave, year = year),
    region_means  = region_means  %>% mutate(wave = wave, year = year),
    india_stats   = india_stats   %>% mutate(wave = wave, year = year)
  )
}

# 1) Economic item lists (match your wave files)
vars_w5 <- paste0("V", 116:121)   # W5 economic system [file:1]
vars_w6 <- paste0("V", 96:101)    # W6 economic system (NOT V95) [file:6]
vars_w7 <- paste0("Q", 106:111)   # W7 economic system [file:2]

# 2) Run per-wave computations (using harmonized region var macroregion_h)
out_w5 <- compute_wave_outputs(wvs5, "W5", 2006, "V2",       "V259",    "macroregion_h", vars_w5)
out_w6 <- compute_wave_outputs(wvs6, "W6", 2012, "V2",       "V258",    "macroregion_h", vars_w6)
out_w7 <- compute_wave_outputs(wvs7, "W7", 2022, "B_COUNTRY","W_WEIGHT", "macroregion_h", vars_w7)

# 3) Combine India outputs into one table (rank is within-wave, among countries)
india_table <- bind_rows(out_w5$india_stats, out_w6$india_stats, out_w7$india_stats) %>%
  arrange(variable, year)

india_table

## # A tibble: 18 × 6
##    variable india_mean n_countries india_rank wave   year
##    <chr>         <dbl>       <int>      <int> <chr> <dbl>
##  1 Q106           6.94          66         15 W7     2022
##  2 Q107           6.23          66          8 W7     2022
##  3 Q108           5.22          66         24 W7     2022
##  4 Q109           4.82          66          6 W7     2022
##  5 Q110           4.44          66         31 W7     2022
##  6 Q111           1.18          66         60 W7     2022
##  7 V100           3.30          60         52 W6     2012
##  8 V101           5.05          60         59 W6     2012
##  9 V116           3.75          58         57 W5     2006
## 10 V117           3.90          58         50 W5     2006
## 11 V118           3.50          58         52 W5     2006
## 12 V119           2.01          58         57 W5     2006
## 13 V120           2.88          58         52 W5     2006
## 14 V121           3.68          58         55 W5     2006
## 15 V96            2.92          60         60 W6     2012
## 16 V97            5.29          60         41 W6     2012
## 17 V98            3.36          60         52 W6     2012
## 18 V99            2.73          60         57 W6     2012

# 4) Combine region means into one table
region_table <- bind_rows(out_w5$region_means, out_w6$region_means, out_w7$region_means) %>%
  arrange(year, variable, region)

library(dplyr)
library(tidyr)
library(stringr)

# Uses your existing helper
# clean_0_to_na <- function(x) { x <- suppressWarnings(as.numeric(x)); ifelse(x == 0, NA, x) }

# 1) Harmonised question key (concept-level; unmatched stay separate)
harm_key <- tibble::tribble(
  ~wave, ~variable, ~Questions,
  "W5", "V116", "Income equality vs inequality incentives",
  "W6", "V96",  "Income equality vs inequality incentives",
  "W7", "Q106", "Income equality vs inequality incentives",

  "W5", "V117", "Private vs government ownership of business",
  "W6", "V97",  "Private vs government ownership of business",
  "W7", "Q107", "Private vs government ownership of business",

  "W5", "V118", "Government vs individual responsibility",
  "W6", "V98",  "Government vs individual responsibility",
  "W7", "Q108", "Government vs individual responsibility",

  "W5", "V119", "Competition good vs harmful",
  "W6", "V99",  "Competition good vs harmful",
  "W7", "Q109", "Competition good vs harmful",

  "W5", "V120", "Success hard work vs luck",
  "W6", "V100", "Success hard work vs luck",
  "W7", "Q110", "Success hard work vs luck",

  "W5", "V121", "Wealth accumulation (others expense vs grows)",
  "W6", "V101", "Wealth accumulation (others expense vs grows)",

  "W7", "Q111", "Environment vs economic growth"   # unmatched (Wave 7 only)
)

# 2) India valid N per question (unweighted N; after recoding 0->NA; requires non-missing weight too)
get_india_n_by_item <- function(df, wave, year, country_col, weight_col, vars) {
  df %>%
    filter(.data[[country_col]] == 356) %>%
    mutate(across(all_of(vars), clean_0_to_na)) %>%
    pivot_longer(cols = all_of(vars), names_to = "variable", values_to = "value") %>%
    summarise(
      india_n = sum(!is.na(value) & !is.na(.data[[weight_col]])),
      .by = variable
    ) %>%
    mutate(wave = wave, year = year)
}

india_n_w5 <- get_india_n_by_item(wvs5, "W5", 2006, "V2",        "V259",    vars_w5)
india_n_w6 <- get_india_n_by_item(wvs6, "W6", 2012, "V2",        "V258",    vars_w6)
india_n_w7 <- get_india_n_by_item(wvs7, "W7", 2022, "B_COUNTRY", "W_WEIGHT", vars_w7)

india_n_all <- bind_rows(india_n_w5, india_n_w6, india_n_w7)

# 3) India mean + India rank AMONG MACROREGIONS (computed from region_table)
india_macroregion_stats <- region_table %>%
  group_by(wave, year, variable) %>%
  mutate(
    n_macroregions = n_distinct(region),
    india_rank = dense_rank(desc(region_mean))  # 1 = highest mean among macroregions
  ) %>%
  ungroup() %>%
  filter(region == "India") %>%
  transmute(wave, year, variable,
            india_mean = round(region_mean, 2),
            india_rank,
            n_macroregions)

# 4) Build final long -> wide
india_final_table <- india_macroregion_stats %>%
  left_join(india_n_all, by = c("wave","year","variable")) %>%
  left_join(harm_key,     by = c("wave","variable")) %>%
  mutate(Questions = coalesce(Questions, variable)) %>%
  select(Questions, year, india_rank, india_mean, india_n) %>%
  mutate(year = as.character(year)) %>%
  pivot_wider(
    names_from = year,
    values_from = c(india_rank, india_mean, india_n),
    names_glue = "{year}_{.value}"
  ) %>%
  select(
    Questions,
    `2006_india_rank`, `2012_india_rank`, `2022_india_rank`,
    `2006_india_mean`, `2012_india_mean`, `2022_india_mean`,
    `2006_india_n`,    `2012_india_n`,    `2022_india_n`
  ) %>%
  rename(
    `2006_rank` = `2006_india_rank`,
    `2012_rank` = `2012_india_rank`,
    `2022_rank` = `2022_india_rank`,
    `2006_mean` = `2006_india_mean`,
    `2012_mean` = `2012_india_mean`,
    `2022_mean` = `2022_india_mean`,
    `2006_n`    = `2006_india_n`,
    `2012_n`    = `2012_india_n`,
    `2022_n`    = `2022_india_n`
  ) %>%
  arrange(Questions)

india_final_table %>%
  kable(
    format  = "html",
    digits  = 2,
    caption = "India across waves: mean and rank among macroregions",
    align   = "lccccccccc"
  ) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed", "responsive")
  ) %>%
  scroll_box(height = "350px", width = "100%")

India across waves: mean and rank among macroregions
Questions	2006_rank	2012_rank	2022_rank	2006_mean	2012_mean	2022_mean	2006_n	2012_n	2022_n
Competition good vs harmful	6	8	1	2.01	2.73	4.82	2001	3682	1692
Environment vs economic growth	NA	NA	8	NA	NA	1.18	NA	NA	1692
Government vs individual responsibility	6	8	3	3.50	3.36	5.22	2001	3723	1692
Income equality vs inequality incentives	7	8	2	3.75	2.92	6.94	2001	3780	1692
Private vs government ownership of business	6	6	1	3.90	5.29	6.23	2001	3583	1692
Success hard work vs luck	6	8	3	2.88	3.30	4.44	2001	3828	1692
Wealth accumulation (others expense vs grows)	6	8	NA	3.68	5.05	NA	2001	3419	NA

Democratic system

# Assumes already in memory:
# wvs5, wvs6, wvs7
# macroregion_h exists in each wave data
# Wave 7 uses B_COUNTRY and W_WEIGHT (your chosen convention)

# ----------------------------
# 0) Item lists (Democratic system)
# ----------------------------
vars_w5_dem <- paste0("V", 152:162)   # W5 democratic system block[1]
vars_w6_dem <- paste0("V", 131:140)   # W6 democratic system block[2]
vars_w7_dem <- paste0("Q", 241:250)   # W7 democratic system items (excluding Q240 left-right)[3]

# Guardrails (fail loudly if names not present)
stopifnot(all(vars_w5_dem %in% names(wvs5)))
stopifnot(all(vars_w6_dem %in% names(wvs6)))
stopifnot(all(vars_w7_dem %in% names(wvs7)))
stopifnot(all(c("B_COUNTRY","W_WEIGHT","macroregion_h") %in% names(wvs7)))

# ----------------------------
# 1) Cleaning rule for demo variables
#    W5 uses 0 as invalid; W6/W7 also have negative codes in some items.
#    We'll treat <=0 as missing for all waves.
# ----------------------------
clean_demo_to_na <- function(x) {
  x <- suppressWarnings(as.numeric(x))
  ifelse(x <= 0, NA, x)
}

# ----------------------------
# 2) Country means (survey-weighted)
# ----------------------------
compute_country_means <- function(df, country_var, weight_var, vars) {
  df2 <- df %>%
    mutate(across(all_of(vars), clean_demo_to_na)) %>%
    filter(!is.na(.data[[country_var]]), !is.na(.data[[weight_var]]))

  des <- svydesign(ids = ~1, weights = as.formula(paste0("~", weight_var)), data = df2)

  purrr::map_dfr(vars, function(v) {
    svyby(
      formula = as.formula(paste0("~", v)),
      by      = as.formula(paste0("~", country_var)),
      design  = des,
      FUN     = svymean,
      na.rm   = TRUE
    ) %>%
      as_tibble() %>%
      rename(country = all_of(country_var)) %>%
      transmute(variable = v, country, mean = .data[[v]])
  })
}

# ----------------------------
# 3) Macroregion mean = average of country means
# ----------------------------
compute_region_means_from_countrymeans <- function(country_means, country_to_region_df, region_var) {
  country_means %>%
    left_join(country_to_region_df, by = "country") %>%
    filter(!is.na(.data[[region_var]])) %>%
    group_by(.data[[region_var]], variable) %>%
    summarise(
      region_mean = mean(mean, na.rm = TRUE),
      n_countries = n_distinct(country),
      .groups = "drop"
    ) %>%
    rename(region = all_of(region_var))
}

compute_wave_region_table <- function(df, wave, year, country_var, weight_var, region_var, vars) {

  df_work <- df %>%
    filter(!is.na(.data[[region_var]]), !is.na(.data[[weight_var]]))

  country_means <- compute_country_means(
    df = df_work,
    country_var = country_var,
    weight_var  = weight_var,
    vars        = vars
  )

  country_to_region <- df_work %>%
    distinct(country = .data[[country_var]], region = .data[[region_var]]) %>%
    rename(!!region_var := region)

  compute_region_means_from_countrymeans(
    country_means = country_means,
    country_to_region_df = country_to_region,
    region_var = region_var
  ) %>%
    mutate(wave = wave, year = year)
}

region_table_demo <- bind_rows(
  compute_wave_region_table(wvs5, "W5", 2006, "V2",        "V259",    "macroregion_h", vars_w5_dem),
  compute_wave_region_table(wvs6, "W6", 2012, "V2",        "V258",    "macroregion_h", vars_w6_dem),
  compute_wave_region_table(wvs7, "W7", 2022, "B_COUNTRY", "W_WEIGHT","macroregion_h", vars_w7_dem)
) %>%
  arrange(year, variable, region)

# ----------------------------
# 4) Harmonised label key (concept-level)
#    (Shared across waves; W5 has a few extras, kept as W5-only rows.)
# ----------------------------
harm_key_demo <- tibble::tribble(
  ~wave, ~variable, ~Questions,
  "W5","V152","Democracy: Tax rich subsidize poor (essential)",
  "W6","V131","Democracy: Tax rich subsidize poor (essential)",
  "W7","Q241","Democracy: Tax rich subsidize poor (essential)",

  "W5","V153","Democracy: Religious authorities interpret laws (essential)",
  "W6","V132","Democracy: Religious authorities interpret laws (essential)",
  "W7","Q242","Democracy: Religious authorities interpret laws (essential)",

  "W5","V154","Democracy: Free elections for leaders (essential)",
  "W6","V133","Democracy: Free elections for leaders (essential)",
  "W7","Q243","Democracy: Free elections for leaders (essential)",

  "W5","V155","Democracy: State aid for unemployment (essential)",
  "W6","V134","Democracy: State aid for unemployment (essential)",
  "W7","Q244","Democracy: State aid for unemployment (essential)",

  "W5","V156","Democracy: Army takeover when govt incompetent (essential)",
  "W6","V135","Democracy: Army takeover when govt incompetent (essential)",
  "W7","Q245","Democracy: Army takeover when govt incompetent (essential)",

  "W5","V157","Democracy: Civil rights protect against oppression (essential)",
  "W6","V136","Democracy: Civil rights protect against oppression (essential)",
  "W7","Q246","Democracy: Civil rights protect against oppression (essential)",

  "W5","V158","Democracy: Economy is prospering (essential)",            # W5-only[1]
  "W5","V159","Democracy: Criminals are severely punished (essential)",  # W5-only[1]
  "W5","V160","Democracy: Referendums can change laws (essential)",      # W5-only[1]

  "W5","V161","Democracy: Women have same rights as men (essential)",
  "W6","V139","Democracy: Women equal rights (essential)",
  "W7","Q249","Democracy: Women equal rights (essential)",

  "W5","V162","Importance of democracy",
  "W6","V140","Importance of democracy",
  "W7","Q250","Importance of democratic country"
)

# ----------------------------
# 5) India valid N per item (unweighted N; requires non-missing weight; <=0 -> NA)
# ----------------------------
get_india_n_by_item <- function(df, wave, year, country_col, weight_col, vars) {
  df %>%
    filter(.data[[country_col]] == 356) %>%
    mutate(across(all_of(vars), clean_demo_to_na)) %>%
    pivot_longer(cols = all_of(vars), names_to = "variable", values_to = "value") %>%
    summarise(india_n = sum(!is.na(value) & !is.na(.data[[weight_col]])), .by = variable) %>%
    mutate(wave = wave, year = year)
}

india_n_demo <- bind_rows(
  get_india_n_by_item(wvs5, "W5", 2006, "V2",        "V259",     vars_w5_dem),
  get_india_n_by_item(wvs6, "W6", 2012, "V2",        "V258",     vars_w6_dem),
  get_india_n_by_item(wvs7, "W7", 2022, "B_COUNTRY", "W_WEIGHT", vars_w7_dem)
)

# ----------------------------
# 6) India mean + India rank among macroregions (within wave×variable)
# ----------------------------
india_macro_stats_demo <- region_table_demo %>%
  group_by(wave, year, variable) %>%
  mutate(
    n_macroregions = n_distinct(region),
    india_rank = dense_rank(desc(region_mean))   # 1 = highest mean among macroregions
  ) %>%
  ungroup() %>%
  filter(region == "India") %>%
  transmute(
    wave, year, variable,
    india_mean = round(region_mean, 2),
    india_rank,
    n_macroregions
  )

# ----------------------------
# 7) Final long -> wide table + scrollable kable
# ----------------------------
india_demo_table <- india_macro_stats_demo %>%
  left_join(india_n_demo,  by = c("wave","year","variable")) %>%
  left_join(harm_key_demo, by = c("wave","variable")) %>%
  mutate(Questions = coalesce(Questions, variable)) %>%
  select(Questions, year, india_rank, india_mean, india_n) %>%
  mutate(year = as.character(year)) %>%
  pivot_wider(
    names_from  = year,
    values_from = c(india_rank, india_mean, india_n),
    names_glue  = "{year}_{.value}"
  ) %>%
  transmute(
    Questions,
    `2006_rank` = `2006_india_rank`,
    `2012_rank` = `2012_india_rank`,
    `2022_rank` = `2022_india_rank`,
    `2006_mean` = `2006_india_mean`,
    `2012_mean` = `2012_india_mean`,
    `2022_mean` = `2022_india_mean`,
    `2006_n`    = `2006_india_n`,
    `2012_n`    = `2012_india_n`,
    `2022_n`    = `2022_india_n`
  ) %>%
  arrange(Questions)

india_demo_table %>%
  kable(
    format  = "html",
    digits  = 2,
    caption = "India across waves: Democratic system (mean + rank among macroregions)"
  ) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed", "responsive")
  ) %>%
  scroll_box(height = "450px", width = "100%")

India across waves: Democratic system (mean + rank among macroregions)
Questions	2006_rank	2012_rank	2022_rank	2006_mean	2012_mean	2022_mean	2006_n	2012_n	2022_n
Democracy: Army takeover when govt incompetent (essential)	2	2	3	5.47	5.70	5.49	1362	4005	1309
Democracy: Civil rights protect against oppression (essential)	3	6	5	7.95	7.27	7.54	1420	4004	1476
Democracy: Criminals are severely punished (essential)	4	NA	NA	7.53	NA	NA	1578	NA	NA
Democracy: Economy is prospering (essential)	5	NA	NA	7.39	NA	NA	1441	NA	NA
Democracy: Free elections for leaders (essential)	2	6	3	8.62	7.82	8.12	1697	4009	1581
Democracy: Referendums can change laws (essential)	4	NA	NA	7.36	NA	NA	1446	NA	NA
Democracy: Religious authorities interpret laws (essential)	4	5	4	4.17	4.00	4.69	1436	4003	1203
Democracy: State aid for unemployment (essential)	1	3	5	8.30	7.37	7.19	1663	4001	1569
Democracy: Tax rich subsidize poor (essential)	1	2	2	8.13	7.05	7.35	1687	4014	1512
Democracy: Women equal rights (essential)	NA	3	3	NA	8.11	8.18	NA	4012	1601
Democracy: Women have same rights as men (essential)	2	NA	NA	8.21	NA	NA	1620	NA	NA
Importance of democracy	7	8	NA	7.08	7.77	NA	1609	4045	NA
Importance of democratic country	NA	NA	3	NA	NA	8.43	NA	NA	1612
Q247	NA	NA	3	NA	NA	6.51	NA	NA	1488
Q248	NA	NA	3	NA	NA	6.65	NA	NA	1519
V137	NA	1	NA	NA	7.10	NA	NA	3993	NA
V138	NA	4	NA	NA	6.41	NA	NA	3996	NA

Neighbour

calc_india_props <- function(dat, year, country_var, weight_var, items_named_vec) {

  vars <- names(items_named_vec)

  # Clean only necessary variables
  dat_i <- dat %>%
    mutate(
      across(all_of(c(country_var, weight_var, vars)),
             ~ suppressWarnings(as.numeric(zap_labels(.x))))
    ) %>%
    filter(.data[[country_var]] == 356,
           !is.na(.data[[weight_var]]))

  # Survey design
  des <- svydesign(
    ids = ~1,
    weights = as.formula(paste0("~", weight_var)),
    data = dat_i
  )

  # Estimate proportion where response == 1 (REJECTION)
  purrr::imap_dfr(items_named_vec, function(label, v) {

    est <- svymean(
      as.formula(paste0("~I(", v, " == 1)")),
      des,
      na.rm = TRUE
    )

    rejection_rate <- as.numeric(coef(est)[1]) * 100

    tibble(
      year  = as.character(year),
      item  = label,
      value = round(rejection_rate)   # nearest integer
    )
  })
}


# ---------------------------
# CORRECT VARIABLE MAPPING
# ---------------------------

items_w5 <- c(
  V34 = "Drug addicts",
  V35 = "People of a different race",
  V36 = "People who have AIDS",
  V37 = "Immigrants/foreign workers",
  V38 = "Homosexuals",
  V39 = "People of a different religion"
)

items_w6 <- c(
  V36 = "Drug addicts",
  V37 = "People of a different race",
  V38 = "People who have AIDS",
  V39 = "Immigrants/foreign workers",
  V40 = "Homosexuals",
  V41 = "People of a different religion"
)

items_w7 <- c(
  Q27 = "Drug addicts",
  Q28 = "People of a different race",
  Q29 = "People who have AIDS",
  Q30 = "Immigrants/foreign workers",
  Q31 = "Homosexuals",
  Q32 = "People of a different religion"
)

# ---------------------------
# Resolve W7 column names
# ---------------------------

country_var_w7 <- if ("B_COUNTRY" %in% names(wvs7)) "B_COUNTRY" else "BCOUNTRY"
weight_var_w7  <- if ("W_WEIGHT" %in% names(wvs7)) "W_WEIGHT" else "WWEIGHT"

# ---------------------------
# Combine waves
# ---------------------------

res_long <- bind_rows(
  calc_india_props(wvs5, 2006, "V2", "V259", items_w5),
  calc_india_props(wvs6, 2012, "V2", "V258", items_w6),
  calc_india_props(wvs7, 2022, country_var_w7, weight_var_w7, items_w7)
)

# ---------------------------
# FINAL TABLE FORMAT
# ---------------------------

res_wide <- res_long %>%
  pivot_wider(names_from = item, values_from = value) %>%
  arrange(year)

res_wide %>%
  knitr::kable(
    format = "html",
    digits = 1,
    caption = "India: % mentioning each group as undesirable neighbour (survey-weighted)"
  ) %>%
  kableExtra::kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped","hover","condensed","responsive")
  )

India: % mentioning each group as undesirable neighbour (survey-weighted)
year	Drug addicts	People of a different race	People who have AIDS	Immigrants/foreign workers	Homosexuals	People of a different religion
2006	46	56	56	65	60	56
2012	6	74	38	53	35	72
2022	22	66	77	83	79	75

Moral permisiveness

moral_w5_named <- c(
  V198 = "Government benefits",
  V199 = "Public transport fare",
  V200 = "Tax evasion",
  V201 = "Bribing",
  V202 = "Homosexuality",
  V203 = "Prostitution",
  V204 = "Abortion",
  V205 = "Divorce",
  V206 = "Euthanasia",
  V207 = "Suicide",
  V208 = "Domestic violence"
 
)

moral_w6_named <- c(
  V198  = "Government benefits",
  V199  = "Public transport fare",
  V201  = "Tax evasion",
  V202  = "Bribing",
  V203  = "Homosexuality",
  V203A = "Prostitution",
  V204  = "Abortion",
  V205  = "Divorce",
  V206  = "Premarital sex",
  V207  = "Suicide",
  V207A = "Euthanasia",
  V208  = "Domestic violence"
)

moral_w7_named <- c(
  Q177 = "Government benefits",
  Q178 = "Public transport fare",
  Q180 = "Tax evasion",
  Q181 = "Bribing",
  Q182 = "Homosexuality",
  Q183 = "Prostitution",
  Q184 = "Abortion",
  Q185 = "Divorce",
  Q186 = "Premarital sex",
  Q187 = "Suicide",
  Q188 = "Euthanasia",
  Q189 = "Domestic violence"
)
# ── Sanity check ───────────────────────────────────────────────────────────────
stopifnot(all(names(moral_w5_named) %in% names(wvs5)))
stopifnot(all(names(moral_w6_named) %in% names(wvs6)))
stopifnot(all(names(moral_w7_named) %in% names(wvs7)))

# ── Survey-weighted India means ────────────────────────────────────────────────
calc_india_moral <- function(dat, year, country_var, weight_var, named_vars) {

  vars <- names(named_vars)

  dat_i <- dat %>%
    mutate(across(
      all_of(vars),
      ~ { x <- suppressWarnings(as.numeric(zap_labels(.x))); ifelse(x <= 0, NA, x) }
    )) %>%
    filter(.data[[country_var]] == 356,
           !is.na(.data[[weight_var]]))

  des <- svydesign(
    ids     = ~1,
    weights = as.formula(paste0("~", weight_var)),
    data    = dat_i
  )

  imap_dfr(named_vars, function(label, v) {

    if (sum(!is.na(dat_i[[v]])) == 0)
      return(tibble(year = as.character(year), item = label, value = NA_real_))

    est <- svymean(as.formula(paste0("~", v)), des, na.rm = TRUE)

    tibble(
      year  = as.character(year),
      item  = label,
      value = round(as.numeric(coef(est)[1]), 1)
    )
  })
}

# ── Run all waves ──────────────────────────────────────────────────────────────
moral_long <- bind_rows(
  calc_india_moral(wvs5, 2006, "V2",        "V259",     moral_w5_named),
  calc_india_moral(wvs6, 2012, "V2",        "V258",     moral_w6_named),
  calc_india_moral(wvs7, 2022, "B_COUNTRY", "W_WEIGHT", moral_w7_named)
)

# ── Wide: Year as rows, items as columns ───────────────────────────────────────
item_order <- c(
  "Government benefits",
  "Public transport fare",
  "Tax evasion",
  "Bribing",
  "Homosexuality",
  "Prostitution",
  "Abortion",
  "Divorce",
  "Premartial sex",
  "Suicide",
  "Euthanasia",
  "Domestic voilence"
)

moral_table <- moral_long %>%
  mutate(item = factor(item, levels = item_order)) %>%
  pivot_wider(names_from = item, values_from = value) %>%
  arrange(year) %>%
  rename(Year = year)

## Warning: Values from `value` are not uniquely identified; output will contain list-cols.
## • Use `values_fn = list` to suppress this warning.
## • Use `values_fn = {summary_fun}` to summarise duplicates.
## • Use the following dplyr code to identify duplicates.
##   {data} |>
##   dplyr::summarise(n = dplyr::n(), .by = c(year, item)) |>
##   dplyr::filter(n > 1L)

# ── Render ─────────────────────────────────────────────────────────────────────
moral_table %>%
  kable(
    format  = "html",
    digits  = 1,
    caption = "India: Moral Permissiveness (1 = Never justifiable, 10 = Always justifiable)"
  ) %>%
  kable_styling(
    full_width        = FALSE,
    bootstrap_options = c("striped", "hover", "condensed", "responsive")
  ) %>%
  column_spec(1, bold = TRUE) %>%
  scroll_box(height = "200px", width = "100%")

India: Moral Permissiveness (1 = Never justifiable, 10 = Always justifiable)
Year	Government benefits	Public transport fare	Tax evasion	Bribing	Homosexuality	Prostitution	Abortion	Divorce	Euthanasia	Suicide	NA
2006	3.1	3.3	3	3	3	3.1	3.3	3.9	3.9	3.4	3.1
2012	2.5	1.7	1.7	1.6	1.6	1.6	1.6	1.9	NA	1.6	1.6, 1.8
2022	2.7	2.3	2	1.9	2.7	2.2	2.8	3.1	2.9	1.9	2.2, 2.0

Institutional trust

trust_w5_named <- c(
  V131 = "Religious institutions",
  V132 = "Armed forces",
  V133 = "Press",
  V134 = "Television",
  V136 = "Police",
  V137 = "Courts",
  V138 = "Government",
  V139 = "Political parties",
  V140 = "Parliament",
  V141 = "Civil service"
)

trust_w6_named <- c(
  V108 = "Religious institutions",
  V109 = "Armed forces",
  V110 = "Press",
  V111 = "Television",
  V113 = "Police",
  V114 = "Courts",
  V115 = "Government",
  V116 = "Political parties",
  V117 = "Parliament",
  V118 = "Civil service"
)

trust_w7_named <- c(
  Q64 = "Religious institutions",
  Q65 = "Armed forces",
  Q66 = "Press",
  Q67 = "Television",
  Q69 = "Police",
  Q70 = "Courts",
  Q71 = "Government",
  Q72 = "Political parties",
  Q73 = "Parliament",
  Q74 = "Civil service"
)

# ─────────────────────────────────────────────
# INSTITUTIONAL TRUST – HIGH CONFIDENCE SHARE
# (1 & 2 collapsed)
# ─────────────────────────────────────────────

# ── Sanity check ─────────────────────────────
stopifnot(all(names(trust_w5_named) %in% names(wvs5)))
stopifnot(all(names(trust_w6_named) %in% names(wvs6)))
stopifnot(all(names(trust_w7_named) %in% names(wvs7)))

calc_india_trust <- function(dat, year, country_var, weight_var, named_vars) {

  vars <- names(named_vars)

  dat_i <- dat %>%
    mutate(across(
      all_of(vars),
      ~ {
        x <- suppressWarnings(as.numeric(zap_labels(.x)))
        case_when(
          x %in% c(1, 2) ~ 1,   # High confidence
          x %in% c(3, 4) ~ 0,   # Low confidence
          TRUE ~ NA_real_
        )
      }
    )) %>%
    filter(.data[[country_var]] == 356,
           !is.na(.data[[weight_var]]))

  des <- svydesign(
    ids     = ~1,
    weights = as.formula(paste0("~", weight_var)),
    data    = dat_i
  )

  imap_dfr(named_vars, function(label, v) {

    if (sum(!is.na(dat_i[[v]])) == 0)
      return(tibble(year = as.character(year), item = label, value = NA_real_))

    est <- svymean(as.formula(paste0("~", v)), des, na.rm = TRUE)

    tibble(
      year  = as.character(year),
      item  = label,
      value = round(as.numeric(coef(est)[1]) * 100, 1)  # percentage
    )
  })
}

# ── Run all waves ────────────────────────────
trust_long <- bind_rows(
  calc_india_trust(wvs5, 2006, "V2",        "V259",     trust_w5_named),
  calc_india_trust(wvs6, 2012, "V2",        "V258",     trust_w6_named),
  calc_india_trust(wvs7, 2022, "B_COUNTRY", "W_WEIGHT", trust_w7_named)
)

institution_order <- c(
  "Parliament",
  "Government",
  "Political parties",
  "Civil service",
  "Armed forces",
  "Police",
  "Courts",
  "Press",
  "Television",
  "Religious institutions"
)

trust_table <- trust_long %>%
  mutate(item = factor(item, levels = institution_order)) %>%
  pivot_wider(names_from = item, values_from = value) %>%
  arrange(year) %>%
  rename(Year = year)

trust_table %>%
  kable(
    format  = "html",
    digits  = 1,
    caption = "India: High Confidence in Institutions (% saying 'A great deal' or 'Quite a lot').
               Harmonised across WVS 2006–2022."
  ) %>%
  kable_styling(
    full_width        = FALSE,
    bootstrap_options = c("striped", "hover", "condensed", "responsive")
  ) %>%
  column_spec(1, bold = TRUE) %>%
  scroll_box(height = "200px", width = "100%")

India: High Confidence in Institutions (% saying ‘A great deal’ or ‘Quite a lot’). Harmonised across WVS 2006–2022.
Year	Religious institutions	Armed forces	Press	Television	Police	Courts	Government	Political parties	Parliament	Civil service
2006	83.4	83.3	75.8	74.9	64.1	68.9	54.9	46.4	62.4	54.3
2012	96.1	87.1	71.8	74.5	51.2	64.5	50.4	37.4	58.4	61.0
2022	89.7	86.9	65.8	63.4	66.8	74.6	65.1	42.3	73.8	80.6

Regime preferences

clean_reg <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  x[x < 0] <- NA; x
}

regime_cfg <- list(
  list(year="2006", dat=quote(wvs5), ctry="V2",        wt="V259",
       vars=c(V148="Strong leader", V149="Experts", V150="Army rule", V151="Democratic system")),
  list(year="2012", dat=quote(wvs6), ctry="V2",        wt="V258",
       vars=c(V127="Strong leader", V128="Experts", V129="Army rule", V130="Democratic system")),
  list(year="2022", dat=quote(wvs7), ctry="B_COUNTRY", wt="W_WEIGHT",
       vars=c(Q235="Strong leader", Q236="Experts", Q237="Army rule", Q238="Democratic system"))
)

compute_block <- function(cfg) {
  dat  <- eval(cfg$dat)
  vars <- names(cfg$vars)
  labs <- cfg$vars

  dat_c <- dat %>%
    mutate(
      across(all_of(c(cfg$ctry, cfg$wt)), ~suppressWarnings(as.numeric(haven::zap_labels(.x)))),
      across(all_of(vars), clean_reg)
    ) %>%
    mutate(across(all_of(vars),
                  ~as.integer(.x %in% c(1, 2)),
                  .names="acc_{.col}")) %>%
    filter(!is.na(.data[[cfg$wt]]))

  # ── India % acceptable ──────────────────────────────────────────────
  india_dat <- dat_c %>% filter(.data[[cfg$ctry]] == 356)
  des_i <- svydesign(ids=~1, weights=as.formula(paste0("~", cfg$wt)), data=india_dat)

  india_pct <- imap_dfr(labs, function(lbl, v) {
    bv  <- paste0("acc_", v)
    est <- svymean(as.formula(paste0("~", bv)), des_i, na.rm=TRUE)
    tibble(question=lbl, india_pct=round(as.numeric(coef(est)) * 100, 1))
  })

  # ── Region country-weighted averages ───────────────────────────────
  rgn_dat <- dat_c %>% filter(!is.na(macroregion_h), macroregion_h != "India")

  region_avgs <- imap_dfr(labs, function(lbl, v) {
    bv  <- paste0("acc_", v)
    d   <- rgn_dat %>% filter(!is.na(.data[[bv]]))
    des <- svydesign(ids=~1, weights=as.formula(paste0("~", cfg$wt)), data=d)

    svyby(as.formula(paste0("~", bv)),
          as.formula(paste0("~", cfg$ctry, " + macroregion_h")),
          des, svymean, na.rm=TRUE) %>%
      as.data.frame() %>%
      select(macroregion_h, cpct=all_of(bv)) %>%
      mutate(label=lbl)
  }) %>%
    group_by(macroregion_h, label) %>%
    summarise(rpct=mean(cpct, na.rm=TRUE) * 100, .groups="drop")

  n_entities <- n_distinct(region_avgs$macroregion_h) + 1

  # ── Rank India ──────────────────────────────────────────────────────
  ranking <- region_avgs %>%
    bind_rows(india_pct %>% transmute(macroregion_h="India", label=question, rpct=india_pct)) %>%
    group_by(label) %>%
    mutate(rank=rank(-rpct, ties.method="min")) %>%
    filter(macroregion_h=="India") %>%
    ungroup() %>%
    select(question=label, rank) %>%
    mutate(n_entities=n_entities)

  india_pct %>%
    left_join(ranking, by="question") %>%
    mutate(year=cfg$year)
}

results <- map_dfr(regime_cfg, compute_block)

stopifnot(nrow(results %>% count(question, year) %>% filter(n > 1)) == 0)

res_wide <- results %>%
  mutate(rank_fmt=paste0(rank, "/", n_entities)) %>%
  select(question, year, rank_fmt, india_pct) %>%
  pivot_wider(
    names_from  = year,
    values_from = c(rank_fmt, india_pct),
    names_glue  = "{year}_{.value}"
  ) %>%
  select(question,
         `2006_rank_fmt`, `2012_rank_fmt`, `2022_rank_fmt`,
         `2006_india_pct`, `2012_india_pct`, `2022_india_pct`) %>%
  mutate(question=factor(question,
           levels=c("Strong leader","Experts","Army rule","Democratic system"))) %>%
  arrange(question)

res_wide %>%
  knitr::kable(
    format    = "html",
    col.names = c("Regime type",
                  "2006 Rank","2012 Rank","2022 Rank",
                  "2006%","2012%","2022%"),
    caption   = "India: % finding each regime 'acceptable' (Very/Fairly good) & rank among macro-regions"
  ) %>%
  kableExtra::kable_styling(
    full_width        = FALSE,
    bootstrap_options = c("striped","hover","condensed")
  ) %>%
  kableExtra::add_header_above(
    c(" "=1, "Rank among macro-regions"=3, "% Acceptable — India"=3)
  ) %>%
  kableExtra::column_spec(1, bold=TRUE)

India: % finding each regime ‘acceptable’ (Very/Fairly good) & rank among macro-regions
	Rank among macro-regions			% Acceptable — India
Regime type	2006 Rank	2012 Rank	2022 Rank	2006%	2012%	2022%
Strong leader	1/7	1/8	1/8	45.6	56.4	62.9
Experts	4/7	2/8	2/8	50.0	59.4	63.2
Army rule	3/7	2/8	4/8	24.8	37.8	33.7
Democratic system	7/7	5/8	6/8	70.0	79.7	77.2

Jobs scarce

clean_reg <- function(x) {
  x <- suppressWarnings(as.numeric(haven::zap_labels(x)))
  x[x < 0] <- NA; x
}

jobs_cfg <- list(
  # W5: 3-pt (1=Agree, 2=Neither, 3=Disagree)
  list(year="2006", dat=quote(wvs5), ctry="V2", wt="V259",
       agree_codes=c(1),
       vars=c(V44="Men over women", V45="Nationals over immigrants")),

  # W6: 3-pt (1=Agree, 2=Neither, 3=Disagree)
  list(year="2012", dat=quote(wvs6), ctry="V2", wt="V258",
       agree_codes=c(1),
       vars=c(V43="Men over women", V44="Nationals over immigrants")),

  # W7: 5-pt — collapse 1 (Strongly agree) + 2 (Agree) → "Agree"
  list(year="2022", dat=quote(wvs7), ctry="B_COUNTRY", wt="W_WEIGHT",
       agree_codes=c(1, 2),
       vars=c(Q33="Men over women", Q34="Nationals over immigrants"))
)

compute_jobs_block <- function(cfg) {
  dat  <- eval(cfg$dat)
  vars <- names(cfg$vars)
  labs <- cfg$vars
  ac   <- cfg$agree_codes

  dat_c <- dat %>%
    mutate(
      across(all_of(c(cfg$ctry, cfg$wt)), ~suppressWarnings(as.numeric(haven::zap_labels(.x)))),
      across(all_of(vars), clean_reg)
    ) %>%
    mutate(across(all_of(vars),
                  ~as.integer(.x %in% ac),
                  .names="acc_{.col}")) %>%
    filter(!is.na(.data[[cfg$wt]]))

  # ── India % agree ───────────────────────────────────────────────────
  india_dat <- dat_c %>% filter(.data[[cfg$ctry]] == 356)
  des_i <- svydesign(ids=~1, weights=as.formula(paste0("~", cfg$wt)), data=india_dat)

  india_pct <- imap_dfr(labs, function(lbl, v) {
    bv  <- paste0("acc_", v)
    est <- svymean(as.formula(paste0("~", bv)), des_i, na.rm=TRUE)
    tibble(question=lbl, india_pct=round(as.numeric(coef(est)) * 100, 1))
  })

  # ── Region country-weighted averages ───────────────────────────────
  rgn_dat <- dat_c %>% filter(!is.na(macroregion_h), macroregion_h != "India")

  region_avgs <- imap_dfr(labs, function(lbl, v) {
    bv  <- paste0("acc_", v)
    d   <- rgn_dat %>% filter(!is.na(.data[[bv]]))
    des <- svydesign(ids=~1, weights=as.formula(paste0("~", cfg$wt)), data=d)

    svyby(as.formula(paste0("~", bv)),
          as.formula(paste0("~", cfg$ctry, " + macroregion_h")),
          des, svymean, na.rm=TRUE) %>%
      as.data.frame() %>%
      select(macroregion_h, cpct=all_of(bv)) %>%
      mutate(label=lbl)
  }) %>%
    group_by(macroregion_h, label) %>%
    summarise(rpct=mean(cpct, na.rm=TRUE) * 100, .groups="drop")

  n_entities <- n_distinct(region_avgs$macroregion_h) + 1

  # ── Rank India ──────────────────────────────────────────────────────
  ranking <- region_avgs %>%
    bind_rows(india_pct %>% transmute(macroregion_h="India", label=question, rpct=india_pct)) %>%
    group_by(label) %>%
    mutate(rank=rank(-rpct, ties.method="min")) %>%
    filter(macroregion_h=="India") %>%
    ungroup() %>%
    select(question=label, rank) %>%
    mutate(n_entities=n_entities)

  india_pct %>%
    left_join(ranking, by="question") %>%
    mutate(year=cfg$year)
}

results_jobs <- map_dfr(jobs_cfg, compute_jobs_block)

stopifnot(nrow(results_jobs %>% count(question, year) %>% filter(n > 1)) == 0)

res_wide_jobs <- results_jobs %>%
  mutate(rank_fmt=paste0(rank, "/", n_entities)) %>%
  select(question, year, rank_fmt, india_pct) %>%
  pivot_wider(
    names_from  = year,
    values_from = c(rank_fmt, india_pct),
    names_glue  = "{year}_{.value}"
  ) %>%
  select(question,
         `2006_rank_fmt`, `2012_rank_fmt`, `2022_rank_fmt`,
         `2006_india_pct`, `2012_india_pct`, `2022_india_pct`) %>%
  mutate(question=factor(question,
           levels=c("Men over women", "Nationals over immigrants"))) %>%
  arrange(question)

res_wide_jobs %>%
  knitr::kable(
    format    = "html",
    col.names = c("Statement",
                  "2006 Rank","2012 Rank","2022 Rank",
                  "2006%","2012%","2022%"),
    caption   = "India: % agreeing jobs should go to men/nationals when scarce & rank among macro-regions"
  ) %>%
  kableExtra::kable_styling(
    full_width        = FALSE,
    bootstrap_options = c("striped","hover","condensed")
  ) %>%
  kableExtra::add_header_above(
    c(" "=1, "Rank among macro-regions"=3, "% Agree — India"=3)
  ) %>%
  kableExtra::column_spec(1, bold=TRUE) %>%
  kableExtra::footnote(
    general = paste0(
      "2006 & 2012: Agree on 3-point scale (Agree / Neither / Disagree). ",
      "2022: Strongly agree + Agree collapsed on 5-point scale."
    ),
    general_title = "Note: "
  )

India: % agreeing jobs should go to men/nationals when scarce & rank among macro-regions
	Rank among macro-regions			% Agree — India
Statement	2006 Rank	2012 Rank	2022 Rank	2006%	2012%	2022%
Men over women	2/7	1/8	3/8	50.5	72.7	56.8
Nationals over immigrants	1/7	1/8	6/8	74.2	32.7	71.6
Note:
2006 & 2012: Agree on 3-point scale (Agree / Neither / Disagree). 2022: Strongly agree + Agree collapsed on 5-point scale.

India analysis across waves

Arslan

2026-03-02

Macro regions

Economic ranking

Democratic system

Neighbour

Moral permisiveness

Institutional trust

Regime preferences

Jobs scarce