library(tidyverse)
library(haven)
library(labelled)
library(survey)
library(forcats)
library(openxlsx)
library(Hmisc)
library(ggplot2)
library(kableExtra)
library(dplyr)
library(tidyr)
library(stringr)
library(knitr)
library(cregg)
library(scales)
library(readxl)


data_path <- "D:/Populism and Democrary/World value survey/WVS 2012/WV6_Data_R_v20201117.rdata"
stopifnot(file.exists(data_path))

load(data_path)

wvs_raw <- WV6_Data_R_v20201117


wvs <- wvs_raw

Country and responses

# Respondents
n_respondents <- nrow(wvs)

# Countries: V2
n_countries <- dplyr::n_distinct(wvs$V2, na.rm = TRUE)
countries_present <- sort(unique(wvs$V2[!is.na(wvs$V2)]))

# Regions: V256 / V256B / V256C (some datasets may not include all)
region_vars <- intersect(c("V256", "V256B", "V256C"), names(wvs))

regions_summary <- lapply(region_vars, function(v) {
  x <- wvs[[v]]
  data.frame(
    var = v,
    n_unique = dplyr::n_distinct(x, na.rm = TRUE),
    n_missing = sum(is.na(x))
  )
}) |> dplyr::bind_rows()

# Country-region combinations (useful for checking how region is encoded)
country_region_summary <- if ("V256" %in% names(wvs)) {
  wvs |>
    dplyr::distinct(V2, V256) |>
    dplyr::filter(!is.na(V2), !is.na(V256)) |>
    dplyr::count(V2, name = "n_regions_in_country") |>
    dplyr::arrange(dplyr::desc(n_regions_in_country))
} else NULL



country_map <- tibble::tribble(
  ~V2, ~country,
  8, "Albania",
  12, "Algeria",
  16, "American Samoa",
  20, "Andorra",
  24, "Angola",
  28, "Antigua and Barbuda",
  31, "Azerbaijan",
  32, "Argentina",
  36, "Australia",
  40, "Austria",
  50, "Bangladesh",
  51, "Armenia",
  52, "Barbados",
  56, "Belgium",
  60, "Bermuda",
  64, "Bhutan",
  68, "Bolivia",
  70, "Bosnia",
  72, "Botswana",
  76, "Brazil",
  84, "Belize",
  100, "Bulgaria",
  104, "Myanmar",
  108, "Burundi",
  112, "Belarus",
  116, "Cambodia",
  120, "Cameroon",
  124, "Canada",
  144, "Sri Lanka",
  148, "Chad",
  152, "Chile",
  156, "China",
  158, "Taiwan",
  170, "Colombia",
  180, "Dem. Rep. of Congo",
  184, "Cook Islands",
  188, "Costa Rica",
  191, "Croatia",
  192, "Cuba",
  196, "Cyprus",
  203, "Czech Republic",
  208, "Denmark",
  214, "Dominican Republic",
  218, "Ecuador",
  222, "El Salvador",
  226, "Equatorial Guinea",
  231, "Ethiopia",
  232, "Eritrea",
  233, "Estonia",
  246, "Finland",
  250, "France",
  268, "Georgia",
  270, "Gambia",
  275, "Palestine",
  276, "Germany",
  288, "Ghana",
  292, "Gibraltar",
  300, "Greece",
  320, "Guatemala",
  324, "Guinea",
  328, "Guyana",
  332, "Haiti",
  340, "Honduras",
  344, "Hong Kong",
  348, "Hungary",
  352, "Iceland",
  356, "India",
  360, "Indonesia",
  364, "Iran",
  368, "Iraq",
  372, "Ireland",
  376, "Israel",
  380, "Italy",
  384, "Côte d'Ivoire",
  388, "Jamaica",
  392, "Japan",
  398, "Kazakhstan",
  400, "Jordan",
  404, "Kenya",
  410, "South Korea",
  414, "Kuwait",
  417, "Kyrgyzstan",
  418, "Laos",
  422, "Lebanon",
  426, "Lesotho",
  428, "Latvia",
  430, "Liberia",
  434, "Libya",
  438, "Liechtenstein",
  440, "Lithuania",
  442, "Luxembourg",
  450, "Madagascar",
  454, "Malawi",
  458, "Malaysia",
  466, "Mali",
  470, "Malta",
  474, "Martinique",
  478, "Mauritania",
  480, "Mauritius",
  484, "Mexico",
  492, "Monaco",
  496, "Mongolia",
  498, "Moldova",
  504, "Morocco",
  508, "Mozambique",
  512, "Oman",
  516, "Namibia",
  524, "Nepal",
  528, "Netherlands",
  554, "New Zealand",
  558, "Nicaragua",
  562, "Niger",
  566, "Nigeria",
  578, "Norway",
  586, "Pakistan",
  591, "Panama",
  598, "Papua New Guinea",
  600, "Paraguay",
  604, "Peru",
  608, "Philippines",
  616, "Poland",
  620, "Portugal",
  624, "Guinea-Bissau",
  626, "Timor-Leste",
  630, "Puerto Rico",
  634, "Qatar",
  642, "Romania",
  643, "Russia",
  646, "Rwanda",
  682, "Saudi Arabia",
  686, "Senegal",
  690, "Seychelles",
  694, "Sierra Leone",
  702, "Singapore",
  703, "Slovakia",
  704, "Viet Nam",
  705, "Slovenia",
  706, "Somalia",
  710, "South Africa",
  716, "Zimbabwe",
  724, "Spain",
  736, "Sudan",
  740, "Suriname",
  752, "Sweden",
  756, "Switzerland",
  760, "Syria",
  762, "Tajikistan",
  764, "Thailand",
  768, "Togo",
  780, "Trinidad",
  784, "United Arab Emirates",
  788, "Tunisia",
  792, "Turkey",
  795, "Turkmenistan",
  800, "Uganda",
  804, "Ukraine",
  807, "Macedonia",
  818, "Egypt",
  826, "Great Britain",
  834, "Tanzania",
  840, "United States",
  850, "U.S. Virgin Islands",
  858, "Uruguay",
  860, "Uzbekistan",
  862, "Venezuela",
  887, "Yemen",
  894, "Zambia",
  900, "West Germany",
  901, "East Germany",
  902, "Tambov",
  903, "Moscow",
  904, "Basque Country",
  906, "Andalusia",
  907, "Galicia",
  909, "North Ireland",
  910, "Valencia",
  911, "Serbia",
  912, "Montenegro",
  913, "SrpSka Republic"
)

# (B) Count respondents per country code, then join names
country_n <- wvs %>%
  filter(!is.na(V2)) %>%
  count(V2, name = "n_respondents", sort = TRUE) %>%
  left_join(country_map, by = "V2") %>%
  relocate(country, .before = V2)

country_n
##          country  V2 n_respondents
## 1          India 356          4078
## 2   South Africa 710          3531
## 3         Russia 643          2500
## 4          Japan 392          2443
## 5          China 156          2300
## 6  United States 840          2232
## 7          Libya 434          2131
## 8        Germany 276          2046
## 9         Mexico 484          2000
## 10         Haiti 332          1996
## 11     Singapore 702          1972
## 12   Netherlands 528          1902
## 13       Nigeria 566          1759
## 14        Turkey 792          1605
## 15         Ghana 288          1552
## 16       Belarus 112          1535
## 17       Estonia 233          1533
## 18        Rwanda 646          1527
## 19         Egypt 818          1523
## 20      Colombia 170          1512
## 21       Romania 642          1503
## 22    Kazakhstan 398          1500
## 23    Kyrgyzstan 417          1500
## 24      Zimbabwe 716          1500
## 25       Ukraine 804          1500
## 26    Uzbekistan 860          1500
## 27        Brazil  76          1486
## 28     Australia  36          1477
## 29        Kuwait 414          1303
## 30      Malaysia 458          1300
## 31        Taiwan 158          1238
## 32          Peru 604          1210
## 33        Sweden 752          1206
## 34       Tunisia 788          1205
## 35       Ecuador 218          1202
## 36       Georgia 268          1202
## 37       Algeria  12          1200
## 38          Iraq 368          1200
## 39        Jordan 400          1200
## 40   South Korea 410          1200
## 41       Lebanon 422          1200
## 42       Morocco 504          1200
## 43      Pakistan 586          1200
## 44   Philippines 608          1200
## 45      Thailand 764          1200
## 46         Spain 724          1189
## 47       Armenia  51          1100
## 48      Slovenia 705          1069
## 49         Qatar 634          1060
## 50     Argentina  32          1030
## 51    Azerbaijan  31          1002
## 52         Chile 152          1000
## 53        Cyprus 196          1000
## 54     Palestine 275          1000
## 55     Hong Kong 344          1000
## 56       Uruguay 858          1000
## 57         Yemen 887          1000
## 58      Trinidad 780           999
## 59        Poland 616           966
## 60   New Zealand 554           841

Country categorization into macro-region

country_map <- tibble::tribble(
  ~V2, ~country, ~region,
  
  # India (own category)
  356, "India", "India",
  
  # Pakistan
  586, "Pakistan",   "Pakistan",
  
  # East & Southeast Asia
  156, "China",                "East & Southeast Asia",
  344, "Hong Kong",            "East & Southeast Asia",
  392, "Japan",                "East & Southeast Asia",
  410, "South Korea",          "East & Southeast Asia",
  158, "Taiwan",               "East & Southeast Asia",
  702, "Singapore",            "East & Southeast Asia",
  458, "Malaysia",             "East & Southeast Asia",   
  608, "Philippines",          "East & Southeast Asia",   
  764, "Thailand",             "East & Southeast Asia", 
  
  # Middle East & North Africa (MENA)
  12,  "Algeria",              "Middle East & North Africa (MENA)",
  818, "Egypt",                "Middle East & North Africa (MENA)",
  368, "Iraq",                 "Middle East & North Africa (MENA)",
  400, "Jordan",               "Middle East & North Africa (MENA)",
  414, "Kuwait",               "Middle East & North Africa (MENA)",
  422, "Lebanon",              "Middle East & North Africa (MENA)",
  434, "Libya",                "Middle East & North Africa (MENA)",
  504, "Morocco",              "Middle East & North Africa (MENA)",
  275, "Palestine",            "Middle East & North Africa (MENA)",
  634, "Qatar",                "Middle East & North Africa (MENA)",
  788, "Tunisia",              "Middle East & North Africa (MENA)",
  887, "Yemen",                "Middle East & North Africa (MENA)",
  792, "Turkey",               "Middle East & North Africa (MENA)",
  
  # Sub-Saharan Africa
  288, "Ghana",                "Sub-Saharan Africa",
  566, "Nigeria",              "Sub-Saharan Africa",
  646, "Rwanda",               "Sub-Saharan Africa",
  710, "South Africa",         "Sub-Saharan Africa",
  716, "Zimbabwe",             "Sub-Saharan Africa",
  
  # Latin America & Caribbean
  32,  "Argentina",            "Latin America & Caribbean",
  76,  "Brazil",               "Latin America & Caribbean",
  152, "Chile",                "Latin America & Caribbean",
  170, "Colombia",             "Latin America & Caribbean",
  218, "Ecuador",              "Latin America & Caribbean",
  484, "Mexico",               "Latin America & Caribbean",
  604, "Peru",                 "Latin America & Caribbean",
  858, "Uruguay",              "Latin America & Caribbean",
  332, "Haiti",                "Latin America & Caribbean",  
  780, "Trinidad",             "Latin America & Caribbean",
  
  # Western Europe & Offshoots
  36,  "Australia",            "Western Europe & Offshoots",
  196, "Cyprus",               "Western Europe & Offshoots",
  276, "Germany",              "Western Europe & Offshoots",
  528, "Netherlands",          "Western Europe & Offshoots",
  554, "New Zealand",          "Western Europe & Offshoots",
  724, "Spain",                "Western Europe & Offshoots",
  752, "Sweden",               "Western Europe & Offshoots",
  840, "United States",        "Western Europe & Offshoots",
  
  
  # Eastern Europe & Post-Soviet
  51,  "Armenia",              "Eastern Europe & Post-Soviet",
  112, "Belarus",              "Eastern Europe & Post-Soviet",
  233, "Estonia",              "Eastern Europe & Post-Soviet",
  268, "Georgia",              "Eastern Europe & Post-Soviet",
  398, "Kazakhstan",           "Eastern Europe & Post-Soviet",
  417, "Kyrgyzstan",           "Eastern Europe & Post-Soviet",
  498, "Moldova",              "Eastern Europe & Post-Soviet",
  616, "Poland",               "Eastern Europe & Post-Soviet",
  642, "Romania",              "Eastern Europe & Post-Soviet",
  643, "Russia",               "Eastern Europe & Post-Soviet",
  705, "Slovenia",             "Eastern Europe & Post-Soviet",
  804, "Ukraine",              "Eastern Europe & Post-Soviet",
  31,  "Azerbaijan",           "Eastern Europe & Post-Soviet",
  70,  "Bosnia and Herzegovina", "Eastern Europe & Post-Soviet",
  911, "Serbia",               "Eastern Europe & Post-Soviet",
  860, "Uzbekistan",           "Eastern Europe & Post-Soviet"
)

country_n <- wvs %>%
  filter(!is.na(V2)) %>%
  count(V2, name = "n_respondents", sort = TRUE) %>%
  left_join(country_map, by = "V2") %>%
  relocate(country, region, .before = V2)

setdiff(unique(wvs$V2), country_map$V2)
## integer(0)

Demographics

wvs_region <- wvs %>% left_join(country_map, by = "V2")

# Age Cohorts

wvs_region <- wvs_region %>%
  mutate(
    age = V242,
    age_group = case_when(
      age < 25 ~ "18–24",
      age < 35 ~ "25–34",
      age < 45 ~ "35–44",
      age < 55 ~ "45–54",
      age < 65 ~ "55–64",
      age >= 65 ~ "65+",
      TRUE ~ NA_character_
    )
  )

age_region <- wvs_region %>%
  filter(!is.na(region), !is.na(age_group)) %>%
  count(region, age_group) %>%
  group_by(region) %>%
  mutate(share = n / sum(n)) %>%
  ungroup()

kable(
  age_region,
  digits = 3,
  caption = "Age-group distribution by macro region"
)
Age-group distribution by macro region
region age_group n share
East & Southeast Asia 18–24 1705 0.124
East & Southeast Asia 25–34 2460 0.178
East & Southeast Asia 35–44 2893 0.210
East & Southeast Asia 45–54 2741 0.199
East & Southeast Asia 55–64 2323 0.168
East & Southeast Asia 65+ 1670 0.121
Eastern Europe & Post-Soviet 18–24 2535 0.138
Eastern Europe & Post-Soviet 25–34 3660 0.199
Eastern Europe & Post-Soviet 35–44 3304 0.180
Eastern Europe & Post-Soviet 45–54 3384 0.184
Eastern Europe & Post-Soviet 55–64 2743 0.149
Eastern Europe & Post-Soviet 65+ 2780 0.151
India 18–24 422 0.104
India 25–34 1121 0.277
India 35–44 967 0.239
India 45–54 772 0.191
India 55–64 416 0.103
India 65+ 353 0.087
Latin America & Caribbean 18–24 2817 0.210
Latin America & Caribbean 25–34 3108 0.231
Latin America & Caribbean 35–44 2510 0.187
Latin America & Caribbean 45–54 2086 0.155
Latin America & Caribbean 55–64 1554 0.116
Latin America & Caribbean 65+ 1359 0.101
Middle East & North Africa (MENA) 18–24 3334 0.199
Middle East & North Africa (MENA) 25–34 4462 0.266
Middle East & North Africa (MENA) 35–44 3865 0.231
Middle East & North Africa (MENA) 45–54 2572 0.153
Middle East & North Africa (MENA) 55–64 1628 0.097
Middle East & North Africa (MENA) 65+ 901 0.054
Pakistan 18–24 297 0.248
Pakistan 25–34 333 0.278
Pakistan 35–44 331 0.276
Pakistan 45–54 148 0.123
Pakistan 55–64 68 0.057
Pakistan 65+ 23 0.019
Sub-Saharan Africa 18–24 2746 0.278
Sub-Saharan Africa 25–34 3397 0.344
Sub-Saharan Africa 35–44 1801 0.182
Sub-Saharan Africa 45–54 1012 0.103
Sub-Saharan Africa 55–64 567 0.057
Sub-Saharan Africa 65+ 346 0.035
Western Europe & Offshoots 18–24 1202 0.101
Western Europe & Offshoots 25–34 1676 0.141
Western Europe & Offshoots 35–44 1872 0.158
Western Europe & Offshoots 45–54 2169 0.183
Western Europe & Offshoots 55–64 2196 0.185
Western Europe & Offshoots 65+ 2756 0.232
# Gender 
wvs_region <- wvs_region %>%
  mutate(
    gender = case_when(
      V240 == 1 ~ "Male",
      V240 == 2 ~ "Female",
      TRUE ~ NA_character_
    )
  )

gender_region <- wvs_region %>%
  filter(!is.na(region), !is.na(gender)) %>%
  count(region, gender) %>%
  group_by(region) %>%
  mutate(share = n / sum(n)) %>%
  ungroup()

gender_region
## # A tibble: 16 × 4
##    region                            gender     n share
##    <chr>                             <chr>  <int> <dbl>
##  1 East & Southeast Asia             Female  7109 0.514
##  2 East & Southeast Asia             Male    6710 0.486
##  3 Eastern Europe & Post-Soviet      Female 10450 0.568
##  4 Eastern Europe & Post-Soviet      Male    7957 0.432
##  5 India                             Female  1785 0.438
##  6 India                             Male    2290 0.562
##  7 Latin America & Caribbean         Female  7062 0.526
##  8 Latin America & Caribbean         Male    6373 0.474
##  9 Middle East & North Africa (MENA) Female  8493 0.506
## 10 Middle East & North Africa (MENA) Male    8289 0.494
## 11 Pakistan                          Female   578 0.482
## 12 Pakistan                          Male     622 0.518
## 13 Sub-Saharan Africa                Female  4990 0.506
## 14 Sub-Saharan Africa                Male    4879 0.494
## 15 Western Europe & Offshoots        Female  6284 0.529
## 16 Western Europe & Offshoots        Male    5603 0.471
# immigrant status

wvs_region <- wvs_region %>%
  mutate(
    immigrant_status = case_when(
      V245 == 1 ~ "Native",
      V245 == 2 ~ "Immigrant",
      TRUE ~ NA_character_
    )
  )
immigrant_region <- wvs_region %>%
  filter(!is.na(region), !is.na(immigrant_status)) %>%
  count(region, immigrant_status) %>%
  group_by(region) %>%
  mutate(share = n / sum(n)) %>%
  ungroup()

immigrant_region
## # A tibble: 16 × 4
##    region                            immigrant_status     n   share
##    <chr>                             <chr>            <int>   <dbl>
##  1 East & Southeast Asia             Immigrant          329 0.0407 
##  2 East & Southeast Asia             Native            7756 0.959  
##  3 Eastern Europe & Post-Soviet      Immigrant         1063 0.0579 
##  4 Eastern Europe & Post-Soviet      Native           17301 0.942  
##  5 India                             Immigrant           24 0.00590
##  6 India                             Native            4044 0.994  
##  7 Latin America & Caribbean         Immigrant          179 0.0133 
##  8 Latin America & Caribbean         Native           13251 0.987  
##  9 Middle East & North Africa (MENA) Immigrant          453 0.0312 
## 10 Middle East & North Africa (MENA) Native           14056 0.969  
## 11 Pakistan                          Immigrant           16 0.0133 
## 12 Pakistan                          Native            1184 0.987  
## 13 Sub-Saharan Africa                Immigrant          193 0.0196 
## 14 Sub-Saharan Africa                Native            9676 0.980  
## 15 Western Europe & Offshoots        Immigrant         1097 0.120  
## 16 Western Europe & Offshoots        Native            8043 0.880
wvs_region %>%
  summarise(
    missing_age = sum(is.na(V242)),
    missing_gender = sum(is.na(V240)),
    missing_immigrant = sum(is.na(V245))
  )
##   missing_age missing_gender missing_immigrant
## 1         180             91             10900
table(wvs_region$V245, useNA = "always")
## 
##     1     2  <NA> 
## 75311  3354 10900
wvs_region %>%
  group_by(region) %>%    
  summarise(
    n_obs = n(),
    missing_age = sum(is.na(V242)),
    missing_gender = sum(is.na(V240)),
    missing_immigrant = sum(is.na(V245))
  )
## # A tibble: 8 × 5
##   region                      n_obs missing_age missing_gender missing_immigrant
##   <chr>                       <int>       <int>          <int>             <int>
## 1 East & Southeast Asia       13853          61             34              5768
## 2 Eastern Europe & Post-Sovi… 18410           4              3                46
## 3 India                        4078          27              3                10
## 4 Latin America & Caribbean   13435           1              0                 5
## 5 Middle East & North Africa… 16827          65             45              2318
## 6 Pakistan                     1200           0              0                 0
## 7 Sub-Saharan Africa           9869           0              0                 0
## 8 Western Europe & Offshoots  11893          22              6              2753

Macro-region analysis (Economic system)

likert_vars <- paste0("V", 95:101)

wvs_region <- wvs_region %>%
  mutate(across(all_of(likert_vars), ~ifelse(.x < 0, NA, .x)))

# keep valid cases
wvs_clean <- wvs_region %>%
  filter(!is.na(region), !is.na(V258))

svy_design <- svydesign(
  ids = ~1,
  weights = ~V258,
  data = wvs_clean
)

country_means <- lapply(likert_vars, function(v){

  svyby(
    as.formula(paste0("~", v)),
    ~V2 + region,
    svy_design,
    svymean,
    na.rm = TRUE
  ) %>%
    rename(country_mean = 3) %>%
    mutate(variable = v)

}) %>% bind_rows()


region_means_country_weighted <- country_means %>%
  group_by(region, variable) %>%
  summarise(
    mean = mean(country_mean, na.rm = TRUE),
    n_countries = n(),
    .groups = "drop"
  )

var_labels <- c(
  V95  = "Left–Right political self-placement (1=Left, 10=Right)",
  V96  = "Income equality vs inequality incentives (1=Equal incomes, 10=Inequality as incentive)",
  V97  = "Ownership of business (1=Private ownership, 10=Government ownership)",
  V98  = "Providing sustenance (1=Government provides, 10=Individual responsibility)",
  V99  = "Views on competition (1=Competition good, 10=Competition harmful)",
  V100 = "Hard work vs luck for success (1=Hard work brings success, 10=Luck/connections)",
  V101 = "Wealth accumulation (1=Wealth at others’ expense, 10=Wealth grows for all)"
)
for(v in likert_vars){

  temp_table <- region_means_country_weighted %>%
    filter(variable == v) %>%
    select(region, mean, n_countries) %>%
    arrange(mean)

  print(
    kable(
      temp_table,
      digits = 2,
      col.names = c("Region",
                    "Mean (country-weighted, 1–10)",
                    "N countries"),
      caption = paste("Regional comparison:", var_labels[v]),
      format = "html"
    ) %>%
      kable_styling(full_width = FALSE)
  )

  cat("<br><br>")
}
Regional comparison: Left–Right political self-placement (1=Left, 10=Right)
Region Mean (country-weighted, 1–10) N countries
East & Southeast Asia 4.48 9
Middle East & North Africa (MENA) 4.60 13
Latin America & Caribbean 5.32 10
Western Europe & Offshoots 5.33 8
Sub-Saharan Africa 5.62 5
India 5.70 1
Eastern Europe & Post-Soviet 5.73 13
Pakistan 7.44 1


Regional comparison: Income equality vs inequality incentives (1=Equal incomes, 10=Inequality as incentive)
Region Mean (country-weighted, 1–10) N countries
India 2.92 1
Western Europe & Offshoots 4.82 8
Eastern Europe & Post-Soviet 5.04 13
Latin America & Caribbean 5.10 10
East & Southeast Asia 5.77 9
Middle East & North Africa (MENA) 5.95 13
Sub-Saharan Africa 6.09 5
Pakistan 6.84 1


Regional comparison: Ownership of business (1=Private ownership, 10=Government ownership)
Region Mean (country-weighted, 1–10) N countries
Western Europe & Offshoots 4.85 8
Sub-Saharan Africa 5.24 5
India 5.29 1
East & Southeast Asia 5.61 9
Latin America & Caribbean 5.69 10
Eastern Europe & Post-Soviet 5.83 13
Pakistan 5.88 1
Middle East & North Africa (MENA) 6.01 13


Regional comparison: Providing sustenance (1=Government provides, 10=Individual responsibility)
Region Mean (country-weighted, 1–10) N countries
India 3.36 1
Middle East & North Africa (MENA) 3.78 13
Eastern Europe & Post-Soviet 4.03 13
Sub-Saharan Africa 4.44 5
Latin America & Caribbean 4.80 10
East & Southeast Asia 5.06 9
Western Europe & Offshoots 5.28 8
Pakistan 5.76 1


Regional comparison: Views on competition (1=Competition good, 10=Competition harmful)
Region Mean (country-weighted, 1–10) N countries
India 2.73 1
Middle East & North Africa (MENA) 3.15 13
Western Europe & Offshoots 3.84 8
Sub-Saharan Africa 3.95 5
East & Southeast Asia 3.98 9
Eastern Europe & Post-Soviet 4.01 13
Latin America & Caribbean 4.23 10
Pakistan 4.32 1


Regional comparison: Hard work vs luck for success (1=Hard work brings success, 10=Luck/connections)
Region Mean (country-weighted, 1–10) N countries
India 3.30 1
Middle East & North Africa (MENA) 3.59 13
Sub-Saharan Africa 3.93 5
East & Southeast Asia 4.09 9
Pakistan 4.23 1
Western Europe & Offshoots 4.35 8
Latin America & Caribbean 4.56 10
Eastern Europe & Post-Soviet 4.64 13


Regional comparison: Wealth accumulation (1=Wealth at others’ expense, 10=Wealth grows for all)
Region Mean (country-weighted, 1–10) N countries
India 5.05 1
Western Europe & Offshoots 6.01 8
Sub-Saharan Africa 6.01 5
Eastern Europe & Post-Soviet 6.17 13
Middle East & North Africa (MENA) 6.37 13
Latin America & Caribbean 6.52 10
East & Southeast Asia 6.62 9
Pakistan 7.12 1



The results are pretty different from what we have seen in PALS (a complete turn around), India is leaning more on the liberal side of the spectrum than the world average (mean lesser than other regions). It would be interesting to see if this was the case till 2012 and then a sudden change came post-2014?

Macro region analysis (democratic system)

# democratic system variables
demo_vars <- paste0("V", 131:140)

# remove invalid codes (-1 to -5)
wvs_region <- wvs_region %>%
  mutate(across(all_of(demo_vars), ~ifelse(.x < 0, NA, .x)))

# keep valid cases
wvs_clean_demo <- wvs_region %>%
  filter(!is.na(region), !is.na(V258))

# survey design
svy_design_demo <- svydesign(
  ids = ~1,
  weights = ~V258,
  data = wvs_clean_demo
)

# SURVEY-WEIGHTED COUNTRY MEANS
country_means_demo <- lapply(demo_vars, function(v){

  svyby(
    as.formula(paste0("~", v)),
    ~V2 + region,
    svy_design_demo,
    svymean,
    na.rm = TRUE
  ) %>%
    rename(country_mean = 3) %>%
    mutate(variable = v)

}) %>% bind_rows()


# MACRO REGION AVERAGE OF COUNTRY MEANS
region_means_demo <- country_means_demo %>%
  group_by(region, variable) %>%
  summarise(
    mean = mean(country_mean, na.rm = TRUE),
    n_countries = n(),
    .groups = "drop"
  )

demo_labels <- c(
  V131 = "Democracy: Taxing the rich & subsidizing the poor as essential",
  V132 = "Democracy: Religious authorities interpret laws",
  V133 = "Democracy: Free elections for leaders",
  V134 = "Democracy: State aid for unemployment",
  V135 = "Democracy: Army takeover when government incompetent",
  V136 = "Democracy: Civil rights protect against oppression",
  V137 = "Democracy: State makes incomes equal",
  V138 = "Democracy: People obey their rulers",
  V139 = "Democracy: Equal rights for women",
  V140 = "Importance of living in a democratically governed country"
)
for(v in demo_vars){

  temp_table <- region_means_demo %>%
    filter(variable == v) %>%
    select(region, mean, n_countries) %>%
    arrange(desc(mean))

  print(
    kable(
      temp_table,
      digits = 2,
      col.names = c("Region",
                    "Mean (country-weighted, 1–10)",
                    "N countries"),
      caption = paste("Regional comparison:", demo_labels[v]),
      format = "html"
    ) %>%
      kable_styling(full_width = FALSE)
  )

  cat("<br><br>")
}
Regional comparison: Democracy: Taxing the rich & subsidizing the poor as essential
Region Mean (country-weighted, 1–10) N countries
Pakistan 8.69 1
India 7.05 1
East & Southeast Asia 6.69 9
Middle East & North Africa (MENA) 6.67 13
Eastern Europe & Post-Soviet 6.42 13
Western Europe & Offshoots 6.09 8
Latin America & Caribbean 5.49 10
Sub-Saharan Africa 5.43 5


Regional comparison: Democracy: Religious authorities interpret laws
Region Mean (country-weighted, 1–10) N countries
Pakistan 7.51 1
Middle East & North Africa (MENA) 5.54 13
Sub-Saharan Africa 5.01 5
Latin America & Caribbean 4.05 10
India 4.00 1
East & Southeast Asia 3.90 9
Eastern Europe & Post-Soviet 3.75 13
Western Europe & Offshoots 2.65 8


Regional comparison: Democracy: Free elections for leaders
Region Mean (country-weighted, 1–10) N countries
Western Europe & Offshoots 8.73 8
Pakistan 8.21 1
Eastern Europe & Post-Soviet 8.19 13
Latin America & Caribbean 8.01 10
Middle East & North Africa (MENA) 7.89 13
India 7.82 1
East & Southeast Asia 7.80 9
Sub-Saharan Africa 7.72 5


Regional comparison: Democracy: State aid for unemployment
Region Mean (country-weighted, 1–10) N countries
Pakistan 8.38 1
Eastern Europe & Post-Soviet 7.56 13
India 7.37 1
Western Europe & Offshoots 7.08 8
Middle East & North Africa (MENA) 7.07 13
East & Southeast Asia 7.00 9
Sub-Saharan Africa 6.55 5
Latin America & Caribbean 6.31 10


Regional comparison: Democracy: Army takeover when government incompetent
Region Mean (country-weighted, 1–10) N countries
Pakistan 7.06 1
India 5.70 1
Sub-Saharan Africa 5.01 5
Middle East & North Africa (MENA) 4.76 13
Eastern Europe & Post-Soviet 4.43 13
East & Southeast Asia 4.11 9
Latin America & Caribbean 4.10 10
Western Europe & Offshoots 2.98 8


Regional comparison: Democracy: Civil rights protect against oppression
Region Mean (country-weighted, 1–10) N countries
Western Europe & Offshoots 7.85 8
Eastern Europe & Post-Soviet 7.78 13
Pakistan 7.42 1
Middle East & North Africa (MENA) 7.37 13
East & Southeast Asia 7.37 9
India 7.27 1
Sub-Saharan Africa 7.02 5
Latin America & Caribbean 6.90 10


Regional comparison: Democracy: State makes incomes equal
Region Mean (country-weighted, 1–10) N countries
India 7.10 1
Eastern Europe & Post-Soviet 6.62 13
Pakistan 6.61 1
Middle East & North Africa (MENA) 6.39 13
East & Southeast Asia 5.96 9
Sub-Saharan Africa 5.67 5
Latin America & Caribbean 5.46 10
Western Europe & Offshoots 4.75 8


Regional comparison: Democracy: People obey their rulers
Region Mean (country-weighted, 1–10) N countries
Sub-Saharan Africa 7.30 5
Pakistan 7.23 1
Middle East & North Africa (MENA) 6.70 13
India 6.41 1
Latin America & Caribbean 5.95 10
Eastern Europe & Post-Soviet 5.90 13
East & Southeast Asia 5.65 9
Western Europe & Offshoots 4.72 8


Regional comparison: Democracy: Equal rights for women
Region Mean (country-weighted, 1–10) N countries
Western Europe & Offshoots 8.84 8
Eastern Europe & Post-Soviet 8.18 13
India 8.11 1
Latin America & Caribbean 8.11 10
East & Southeast Asia 7.88 9
Pakistan 7.86 1
Sub-Saharan Africa 7.28 5
Middle East & North Africa (MENA) 7.06 13


Regional comparison: Importance of living in a democratically governed country
Region Mean (country-weighted, 1–10) N countries
Western Europe & Offshoots 8.86 8
Middle East & North Africa (MENA) 8.29 13
East & Southeast Asia 8.27 9
Latin America & Caribbean 8.26 10
Eastern Europe & Post-Soviet 8.24 13
Sub-Saharan Africa 8.06 5
Pakistan 7.85 1
India 7.77 1



Macro region analysis (value system)

Neighbours

# Neighbors rejection items (V36–V44): 1 = "Mentioned" as undesirable
neigh_vars <- paste0("V", 36:44)

# Clean negative codes
wvs_region <- wvs_region %>%
  mutate(across(all_of(neigh_vars), ~ifelse(.x < 0, NA, .x)))

wvs_clean_neigh <- wvs_region %>%
  filter(!is.na(region), !is.na(V258))

svy_design_neigh <- svydesign(
  ids = ~1, weights = ~V258, data = wvs_clean_neigh
)

# SURVEY-WEIGHTED COUNTRY % rejecting each group
country_prop_neigh <- lapply(neigh_vars, function(v){
  
  svyby(
    as.formula(paste0("~I(", v, " == 1)")),  # 1 if rejected (mentioned), 0 otherwise
    ~V2 + region,
    svy_design_neigh,
    svymean,
    na.rm = TRUE
  ) %>%
    rename(country_prop = 3) %>%
    mutate(variable = v)
  
}) %>% bind_rows()

# MACRO REGION AVERAGE OF COUNTRY PROPORTIONS
region_prop_neigh <- country_prop_neigh %>%
  group_by(region, variable) %>%
  summarise(
    mean_prop = mean(country_prop, na.rm = TRUE),
    n_countries = n(),
    .groups = "drop"
  )

# Labels
neigh_labels <- c(
  V36 = "Drug addicts",
  V37 = "Different race",
  V38 = "People with AIDS",
  V39 = "Immigrants/foreign workers",
  V40 = "Homosexuals",
  V41 = "Different religion",
  V42 = "Heavy drinkers",
  V43 = "Unmarried couples",
  V44 = "Different language"
)
# WIDE TABLE - ALL NEIGHBORS IN ONE SCROLLABLE TABLE
neigh_wide <- region_prop_neigh %>%
  mutate(
    mean_prop = percent(mean_prop, accuracy = 0.1),
    variable = recode(variable, !!!neigh_labels)
  ) %>%
  select(region, variable, mean_prop, n_countries) %>%
  pivot_wider(names_from = variable, values_from = mean_prop, 
              values_fill = "—") %>%
  arrange(desc(n_countries))

kable(neigh_wide, escape = FALSE, digits = 2) %>%
  kable_styling(full_width = FALSE, position = "center") %>%
  add_header_above(c(" " = 2, "% Rejecting as neighbors" = 9)) %>%
  scroll_box(height = "600px", width = "100%")
% Rejecting as neighbors
region n_countries Drug addicts Different race People with AIDS Immigrants/foreign workers Homosexuals Different religion Heavy drinkers Unmarried couples Different language
Eastern Europe & Post-Soviet 13 11.0% 77.1% 40.9% 74.8% 33.4% 77.7% 19.7% 77.6% 84.9%
Middle East & North Africa (MENA) 13 13.1% 65.6% 33.6% 60.7% 23.6% 54.8% 21.8% 28.3% 74.8%
Latin America & Caribbean 10 28.6% 92.5% 84.5% 91.5% 68.0% 91.4% 50.5% 93.2% 90.9%
East & Southeast Asia 9 12.0% 77.8% 34.2% 66.5% 48.3% 79.5% 26.9% 74.9% 79.8%
Western Europe & Offshoots 8 20.2% 91.4% 83.8% 85.3% 84.7% 93.4% 34.4% 95.5% 90.4%
Sub-Saharan Africa 5 18.6% 85.7% 76.4% 79.9% 26.7% 84.6% 40.5% 79.1% 84.9%
India 1 6.4% 74.4% 38.5% 52.9% 35.0% 71.6% 10.0% 27.3% 67.3%
Pakistan 1 22.8% 84.3% 70.0% 79.1% 41.0% 76.2% 32.8% 51.8% 88.0%

Moral permisiveness

# Moral justifiability (V198–V210): 1-10 scale, higher = more permissive

moral_vars <- c("V198", "V199", "V200", "V201", "V202", 
                "V203", "V203A", "V204", "V205", "V206", 
                "V207", "V207A", "V208", "V209", "V210")

names(wvs_region)[grep("20[3-9]", names(wvs_region))]
## [1] "V203"  "V203A" "V204"  "V205"  "V206"  "V207"  "V207A" "V208"  "V209"
wvs_region <- wvs_region %>%
  mutate(across(all_of(moral_vars), ~ifelse(.x < 0, NA, .x)))

wvs_clean_moral <- wvs_region %>%
  filter(!is.na(region), !is.na(V258))

svy_design_moral <- svydesign(ids = ~1, weights = ~V258, data = wvs_clean_moral)

# Country MEANS (higher = more permissive)
country_moral <- lapply(moral_vars, function(v){
  svyby(
    as.formula(paste0("~", v)),
    ~V2 + region,
    svy_design_moral,
    svymean,
    na.rm = TRUE
  ) %>%
    rename(country_mean = 3) %>%
    mutate(variable = v)
}) %>% bind_rows()

region_moral <- country_moral %>%
  group_by(region, variable) %>%
  summarise(
    mean_score = mean(country_mean, na.rm = TRUE),
    n_countries = n(),
    .groups = "drop"
  )

# Labels
moral_labels <- c(
  V198 = "Welfare cheating", V199 = "Fare dodging", V200 = "Stealing",
  V201 = "Tax cheating", V202 = "Bribery",
  V203 = "Homosexuality", "V203A" = "Prostitution",
  V204 = "Abortion", V205 = "Divorce", V206 = "Premarital sex",
  V207 = "Suicide", "V207A" = "Euthanasia",
  V208 = "Domestic violence", V209 = "Child beating", V210 = "Violence"
)

# WIDE TABLE
moral_wide <- region_moral %>%
  mutate(
    mean_score = round(mean_score, 1),
    variable = recode(variable, !!!moral_labels)
  ) %>%
  select(region, variable, mean_score, n_countries) %>%
  pivot_wider(names_from = variable, values_from = mean_score, 
              values_fill = NA) %>%  # NA first
  mutate(across(where(is.numeric) & !n_countries, ~round(.x, 1))) %>%  
  arrange(desc(n_countries))

kable(moral_wide, digits = 1, escape = FALSE, na_print = "—") %>%  
  kable_styling(full_width = TRUE, position = "center") %>%
  add_header_above(c(" " = 2, "Mean score (1-10)" = length(moral_labels))) %>%
  scroll_box(height = "700px")
Mean score (1-10)
region n_countries Welfare cheating Fare dodging Stealing Tax cheating Bribery Homosexuality Prostitution Abortion Divorce Premarital sex Suicide Euthanasia Domestic violence Child beating Violence
Eastern Europe & Post-Soviet 13 2.5 2.6 1.5 2.2 1.7 2.4 2.3 3.5 4.6 4.6 1.9 0.9 1.7 2.1 1.5
Middle East & North Africa (MENA) 13 2.9 2.4 1.7 2.2 1.7 1.4 0.1 2.1 3.8 0.6 1.7 1.5 2.4 3.3 1.9
Latin America & Caribbean 10 3.3 3.2 1.9 2.3 2.0 4.2 3.0 2.8 5.4 5.7 2.1 2.1 1.8 2.9 1.9
East & Southeast Asia 9 3.4 2.7 2.0 2.2 2.1 3.6 2.2 3.2 4.3 4.1 2.5 2.4 2.1 3.2 2.2
Western Europe & Offshoots 8 2.0 2.2 1.5 1.7 1.6 6.4 3.0 5.5 6.9 7.4 3.3 2.4 1.4 1.7 1.7
Sub-Saharan Africa 5 2.5 2.5 2.2 2.5 2.5 2.2 2.3 2.4 3.1 3.5 2.3 1.2 3.1 5.1 2.5
India 1 2.5 1.7 1.6 1.7 1.6 1.6 1.6 1.6 1.9 1.6 1.6 0.0 1.8 2.9 1.6
Pakistan 1 2.0 1.8 1.7 1.8 1.7 1.5 1.4 1.5 2.2 1.5 1.5 1.6 1.6 2.0 1.5

Institutional trust

# Institutional confidence variables
trust_vars <- paste0("V", 108:124)

# Labels
trust_labels <- c(
  V108 = "Churches/ equivalent", V109 = "Armed forces", V110 = "Press",
  V111 = "TV", V112 = "Labor unions", V113 = "Police",
  V114 = "Courts", V115 = "National government", V116 = "Political parties",
  V117 = "Parliament", V118 = "Civil service", V119 = "Universities",
  V120 = "Major companies", V121 = "Banks", V122 = "Environmental orgs",
  V123 = "Women's orgs", V124 = "Charities"
)

wvs_clean_trust <- wvs_region %>%
  mutate(across(all_of(trust_vars), ~ifelse(.x < 0, NA, .x))) %>%
  filter(!is.na(region), !is.na(V258))

region_trust_weighted <- lapply(trust_vars, function(v){

  wvs_clean_trust %>%
    filter(!is.na(.data[[v]])) %>%
    group_by(region) %>%
    summarise(
      prop_high = sum(V258 * (.data[[v]] <= 2)) / sum(V258),
      variable = v,
      .groups = "drop"
    )

}) %>% bind_rows()

trust_wide_weighted <- region_trust_weighted %>%
  mutate(
    variable = recode(variable, !!!trust_labels),   # ← ADD LABELS HERE
    prop_high = scales::percent(prop_high, accuracy = 0.1)
  ) %>%
  select(region, variable, prop_high) %>%
  pivot_wider(
    names_from = variable,
    values_from = prop_high
  )

kable(
  trust_wide_weighted,
  escape = FALSE,
  na_print = "—",
  caption = "Regional distribution of high institutional confidence (% respondents with high confidence: 1–2)"
) %>%  
  kable_styling(full_width = TRUE, position = "center") %>%
  add_header_above(c(" " = 1, "% High confidence" = length(trust_labels))) %>%
  scroll_box(height = "700px")
Regional distribution of high institutional confidence (% respondents with high confidence: 1–2)
% High confidence
region Churches/ equivalent Armed forces Press TV Labor unions Police Courts National government Political parties Parliament Civil service Universities Major companies Banks Environmental orgs Women’s orgs Charities
East & Southeast Asia 56.7% 74.1% 60.0% 62.5% 54.6% 69.8% 76.0% 61.4% 46.9% 53.5% 62.9% 79.4% 64.5% 77.4% 69.6% 69.0% 65.1%
Eastern Europe & Post-Soviet 66.4% 70.7% 42.9% 51.0% 39.9% 51.5% 46.4% 48.7% 32.8% 40.9% 52.9% 70.2% 50.3% 51.4% 59.5% 58.7% 59.3%
India 96.1% 87.1% 71.8% 74.5% 63.4% 51.2% 64.5% 50.4% 37.4% 58.4% 61.0% 80.1% 52.0% 86.6% 70.2% 75.0% 68.5%
Latin America & Caribbean 56.6% 43.7% 36.8% 38.8% 28.8% 34.9% 29.6% 34.4% 17.5% 23.8% 24.6% 67.0% 46.2% 44.6% 58.5% 57.3% 56.4%
Middle East & North Africa (MENA) 70.2% 65.4% 34.5% 41.5% 29.6% 62.3% 59.0% 43.6% 18.3% 32.7% 40.8% 59.2% 44.4% 50.5% 44.5% 40.9% 56.5%
Pakistan 98.1% 84.4% 42.4% 55.4% 46.2% 21.0% 47.1% 37.2% 31.1% 28.0% 39.2% 62.7% 51.2% 65.4% 40.4% 45.3% 41.9%
Sub-Saharan Africa 80.9% 61.1% 54.3% 62.7% 48.8% 49.5% 54.4% 51.0% 39.6% 49.6% 54.8% 68.4% 61.6% 66.8% 57.2% 59.2% 61.9%
Western Europe & Offshoots 42.4% 69.1% 32.3% 36.8% 36.6% 73.1% 62.7% 38.4% 20.5% 36.9% 46.9% 78.7% 40.1% 37.8% 59.5% 58.8% 62.8%

Child qualities

child_vars <- paste0("V", 12:22)

wvs_clean_child <- wvs_region %>%
  mutate(across(all_of(child_vars), ~ifelse(.x < 0, NA, .x))) %>%
  filter(!is.na(region), !is.na(V258))

region_child_weighted <- lapply(child_vars, function(v){

  wvs_clean_child %>%
    filter(!is.na(.data[[v]])) %>%
    group_by(region) %>%
    summarise(
      prop_mentioned = sum(V258 * (.data[[v]] == 1)) / sum(V258),
      variable = v,
      .groups = "drop"
    )

}) %>% bind_rows()

child_labels <- c(
  V12 = "Independence",
  V13 = "Hard work",
  V14 = "Responsibility",
  V15 = "Imagination",
  V16 = "Tolerance & respect",
  V17 = "Thrift",
  V18 = "Perseverance",
  V19 = "Religious faith",
  V20 = "Unselfishness",
  V21 = "Obedience",
  V22 = "Self-expression"
)

child_wide <- region_child_weighted %>%
  mutate(
    prop_mentioned = scales::percent(prop_mentioned, accuracy = 0.1),
    variable = recode(variable, !!!child_labels)
  ) %>%
  select(region, variable, prop_mentioned) %>%
  pivot_wider(names_from = variable, values_from = prop_mentioned)

kable(
  child_wide,
  escape = FALSE,
  na_print = "—",
  caption = "Regional distribution of qualities considered important for children (% mentioning each quality)"
) %>%
  kable_styling(full_width = TRUE, position = "center",
                bootstrap_options = c("striped","hover","condensed")) %>%
  add_header_above(c(" " = 1, "% Mentioned" = length(child_labels))) %>%
  scroll_box(height = "600px")
Regional distribution of qualities considered important for children (% mentioning each quality)
% Mentioned
region Independence Hard work Responsibility Imagination Tolerance & respect Thrift Perseverance Religious faith Unselfishness Obedience Self-expression
East & Southeast Asia 65.9% 57.6% 74.5% 20.9% 61.2% 51.1% 43.6% 22.6% 30.1% 20.2% 22.8%
Eastern Europe & Post-Soviet 49.4% 79.5% 78.8% 16.7% 67.7% 47.1% 43.6% 24.9% 24.0% 32.3% 31.1%
India 89.9% 94.6% 90.5% 69.2% 89.3% 85.7% 84.7% 83.1% 81.1% 89.9% 73.5%
Latin America & Caribbean 37.8% 43.0% 67.1% 19.8% 70.0% 26.2% 29.4% 37.6% 45.0% 59.2% 19.2%
Middle East & North Africa (MENA) 42.5% 52.3% 66.4% 17.3% 67.1% 29.8% 26.4% 71.0% 32.4% 47.2% 22.7%
Pakistan 43.1% 55.7% 62.3% 31.2% 52.8% 42.4% 35.9% 72.8% 36.6% 50.0% 16.7%
Sub-Saharan Africa 44.8% 74.7% 51.3% 21.8% 58.3% 28.4% 39.4% 55.5% 32.0% 56.1% 20.9%
Western Europe & Offshoots 59.3% 42.9% 75.8% 30.7% 77.4% 35.7% 43.2% 20.1% 29.1% 24.2% 28.5%
mean(wvs_region$total_selected, na.rm = TRUE)
## Warning in mean.default(wvs_region$total_selected, na.rm = TRUE): argument is
## not numeric or logical: returning NA
## [1] NA
max(wvs_region$total_selected, na.rm = TRUE)
## Warning in max(wvs_region$total_selected, na.rm = TRUE): no non-missing
## arguments to max; returning -Inf
## [1] -Inf
wvs_region %>%
  filter(V2 == 356) %>%
  mutate(total_selected =
    rowSums(across(V12:V22, ~ .x == 1), na.rm = TRUE)
  ) %>%
  summarise(mean_selected = mean(total_selected))
##   mean_selected
## 1       9.31486