PALS Survey Structure and Methods

Total questions: ~115-120 items shown per respondent (varies by randomization/filtering), organized into 6 modules (A-F) across ~90 distinct variables

A (Core: Individual Self-Determination): 3 items (A01 single scale, A02 6-group battery, A03 multi-select “live freely” list ~20 options randomized)

B (Political/Economic/Socio-Cultural Elements): 9 items (B01 5 scales, B02 3 scales, B03 3 scales, B04 1 scale, B05 4 scales, B06 1 scale, B07-B09 conjoint tasks)

C (Liberal Script Applications/Contestations): 20+ items (C01 9 scales borders, C02 multi-select levels, C03 4 scales interventions, C04 5 scales public goods, C05 7 scales scarce jobs, C06 3 scales leadership, C07 2 scales generations, C08 6 scales temporality)

D (Political Values/Attitudes): 20+ items (D01 multi-select threats ~15 randomized, D02 2 scales satisfaction, D03 5 scales evaluations, D04 3 scales deprivation, D05 3 scales identity, D06 multi-select postmaterialism, D07 6 scales RWA, D08 3 scales globalization, D09 3 vignette experiments security trade-offs)

E (Voting Behavior): 3 items (E01 participation, E02/E03 vote choice/intention country-specific parties)

F (Sociodemographics): 26+ items (F01-F27: gender, birth year, education ISCED, employment, migration, religion ~30 country-specific, income, etc.)

library(tidyverse)
library(haven)
library(labelled)
library(survey)
library(forcats)
library(openxlsx)
library(Hmisc)
library(ggplot2)

PALS_extended_dataset <- read_dta("PALS extended dataset.dta")
pals <- PALS_extended_dataset

Mapping the countries with the country codes

country_lookup <- tribble(
  ~country, ~country_name,
  11, "Australia", 12, "Brazil", 13, "Chile", 14, "France",
  15, "Germany", 16, "Ghana", 17, "India", 18, "Indonesia",
  19, "Italy", 20, "Japan", 21, "Latvia", 22, "Mexico",
  23, "Nigeria", 24, "Peru", 25, "Poland",
  26, "Republic of Korea", 27, "Russian Federation",
  28, "Senegal", 29, "Singapore", 30, "South Africa",
  31, "Spain", 32, "Sweden", 33, "Tunisia",
  34, "Türkiye", 35, "United Kingdom", 36, "United States"
)

country_codes <- country_lookup

Demographic profile

1. Gender

Gender distribution shows near parity everywhere (typically 45-55% Male/Female), with minimal “Other” responses.

gender_country_table <- pals %>%
  left_join(country_lookup, by = "country") %>%
  mutate(gender = case_when(
    F01 == 1 ~ "Male",
    F01 == 2 ~ "Female",
    F01 == 3 ~ "Other",
    TRUE ~ "Missing"
  )) %>%
  group_by(country_name, gender) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(country_name) %>%
  mutate(pct = round(100 * count / sum(count), 1))



ggplot(gender_country_table,
       aes(x = reorder(country_name, count, sum),
           y = pct, fill = gender)) +
  geom_col() +
  coord_flip() +
  theme_minimal() +
  labs(title = "Gender Distribution by Country (%)",
       x = "Country", y = "Percentage")

2. Age Cohorts

Age cohorts (Minor <18, Young 18-34, Middle-aged 35-54, Elderly 55+) mirror expected national demographics across countries.

age_country_table <- pals %>%
  left_join(country_lookup, by = "country") %>%
  mutate(
    age = 2021 - F02,
    age_cohort = case_when(
      age < 18 ~ "Minor",
      age <= 34 ~ "Young",
      age <= 54 ~ "Middle-aged",
      age >= 55 ~ "Elderly",
      TRUE ~ "Missing"
    )
  ) %>%
  group_by(country_name, age_cohort) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(country_name) %>%
  mutate(pct = round(100 * count / sum(count), 1))

view(age_country_table)

age_country_table %>%
  ggplot(aes(x = reorder(country_name, count, sum), y = pct, fill = age_cohort)) +
  geom_col() +
  coord_flip() +
  scale_fill_brewer(type = "qual", palette = "Set2") +
  theme_minimal() +
  labs(title = "Age Cohorts by Country (%)", x = "Country", y = "Percentage", fill = "Age Cohort")

3. Religion distribution

Collapse denomination into meaningful groups to avoid over fragmentation - codes from PALS codebook (F18 variable).

Religion reveals an interesting pattern: despite detailed Christian denomination options, many Western and Latin American respondents selected “No religion/Other”. India predictably shows Hindu dominance, African nations show Christian/Muslim splits.

religion_codes <- tribble(
  ~F18, ~religion_original, ~religion_major,
  
  0, "No religion", "No religion / Other",
  
  # Christian denominations
  1, "African traditional religions (Candomblé, Umbanda)", "Christian",
  3, "Baptist", "Christian",
  5, "Christian", "Christian",
  6, "Christian Free Church", "Christian",
  7, "Church of England/Anglican", "Christian",
  8, "Church of Sweden", "Christian",
  11, "Jehova's Witness", "Christian",
  13, "Lutheran", "Christian",
  14, "Mormon", "Christian",
  16, "Orthodox", "Christian",
  17, "Orthodox (Eastern Orthodox, Oriental Orthodox)", "Christian",
  18, "Orthodox Church of Greece", "Christian",
  19, "Other Christian", "Christian",
  20, "Pentecostal/Charismatic", "Christian",
  21, "Pentecostal", "Christian",
  22, "Presbyterian", "Christian",
  23, "Protestant", "Christian",
  24, "Protestant (Anglican, Uniting Church, Presbyterian and Reformed, Baptist, Pentecostal, Lutheran, other Protestant)", "Christian",
  25, "Protestant Free Church", "Christian",
  26, "Protestant or Evangelical", "Christian",
  27, "Roman Catholic", "Christian",
  28, "Russian Orthodox Church", "Christian",
  
  # Other major religions
  4, "Buddhist", "Buddhist",
  10, "Hindu", "Hindu",
  15, "Muslim", "Muslim",
  12, "Jewish", "Jewish",
  30, "Sikh", "Sikh",
  
  # Traditional/Folk religions
  2, "Ancestral, tribal, animist, and other traditional African religion", "No religion / Other",
  9, "Ethnic or folk religion", "No religion / Other",
  29, "Shintoist", "No religion / Other",
  31, "Spiritist", "No religion / Other",
  32, "Taoist", "No religion / Other",
  33, "Traditional", "No religion / Other",
  
  # Other
  997, "Other", "No religion / Other"
)

# Create country-wise religion table
religion_country_table <- pals %>%
  left_join(country_codes %>% rename(country_name = country_name), by = "country") %>%
  left_join(religion_codes %>% select(F18, religion_major), by = "F18") %>%
  mutate(
    religion_major = ifelse(is.na(religion_major), "No religion / Other", religion_major)
  ) %>%
  group_by(country_name, religion_major) %>%
  summarise(
    total = n(),
    .groups = "drop"
  ) %>%
  group_by(country_name) %>%
  mutate(
    pct = round(100 * total / sum(total), 1)
  ) %>%
  ungroup() %>%
  rename(religion = religion_major)

view(religion_country_table)

religion_country_table %>%
  ggplot(aes(x = reorder(country_name, total, sum), y = pct, fill = religion)) +
  geom_col() +
  coord_flip() +
  scale_fill_brewer(type = "qual", palette = "Set3") +
  theme_minimal() +
  labs(title = "Major Religion by Country (%)", x = "Country", y = "Percentage", fill = "Religion")

4. Education

Education levels vary predictably by development level—higher tertiary strong in EU/West, lower secondary dominant in developing nations.

edu_country <- pals %>%
  left_join(country_lookup, by = "country") %>%   # ← THIS WAS MISSING
  mutate(
    education_level = case_when(
      F03 == 1 ~ "Less than lower secondary",
      F03 == 2 ~ "Lower secondary",
      F03 == 3 ~ "Upper secondary",
      F03 == 4 ~ "Post-secondary non-tertiary",
      F03 == 5 ~ "Lower tertiary (BA)",
      F03 == 6 ~ "Higher tertiary (MA+)",
      F03 == 7 ~ "Still in education",
      F03 == 97 ~ "Other",
      F03 == 98 ~ "Prefer not to say",
      F03 == 99 ~ "Don't know",
      TRUE ~ "Missing"
    )
  ) %>%
  group_by(country_name, education_level) %>%
  summarise(
    total = n(),
    .groups = "drop"
  ) %>%
  group_by(country_name) %>%
  mutate(
    pct = round(100 * total / sum(total), 1)
  ) %>%
  arrange(country_name, education_level)
View(edu_country)

edu_country %>%
  group_by(country_name) %>%
  mutate(total_n = sum(total)) %>%
  ungroup() %>%
  slice_max(total_n, n = 13, by = country_name) %>%
  mutate(education_level = fct_reorder(education_level, pct, .fun = mean)) %>%
  ggplot(aes(x = reorder(country_name, total_n), y = pct, fill = education_level)) +
  geom_col() +
  coord_flip() +
  labs(title = "Education by Countries (%)", x = "Country", y = "Percentage", fill = "Education") +
  theme_minimal() +
  scale_fill_brewer(type = "qual", palette = "Set3", name = NULL)

The interesting part!

India “extremes” analysis

India - CAPI method (responses – 2822). 20 states, 2011 census sampling frame - 1 major district/state. 149 Primary sampling unit urban and rural. 20 sample/PSU.

Random probability sample, stratified by degree of urbanity.

Random sampling point within each stratum, followed by random walk - “next birthday rule” for 18+ interviewee. ……………………………… Examining 6-point Likert scale questions (1=fully disagree/liberal to 6=fully agree/conservative). I calculated extreme concentration (% choosing 1 + % choosing 6) for all such items, then filtered to ≥35% threshold to focus on truly polarized questions. Used post-stratification weights (w1a) for within-India comparisons to ensure representativeness.

Some of the key findings: • 42% on “Society should accept everyone equally” - strong equality preference.

• 47% fully accept government health data collection.

• Market economy control: Remarkably balanced at 22% (private) vs 20% (state) extremes.

• Collective self-determination questions heavily favor citizen opinions over religious leaders, elected politicians, or experts (35-42% extremes). This basically tells that Indians still favour the citizenry over others.

• 35% prefer “societal openness to change” vs tradition.

india_data <- pals %>%
  filter(country == 17) %>%
  mutate(weight = ifelse(w1a > 0 & !is.na(w1a), w1a, NA))

# Define all 6-point scale variables
six_point_vars <- c(
  # Module A
  "A01", 
  paste0("A02_", letters[1:6]),
  
  # Module B
  paste0("B01_", letters[1:5]),
  paste0("B02_", letters[1:3]),
  paste0("B03_", letters[1:3]),
  "B04",
  paste0("B05_", letters[1:4]),
  "B06",
  
  # Module C
  paste0("C01_", letters[1:8]),
  paste0("C03_", c("a1", "a2", "b1", "b2")),
  paste0("C04_", letters[1:5]),
  paste0("C05_", letters[1:7]),
  paste0("C06_", letters[1:3]),
  paste0("C07_", letters[1:2]),
  paste0("C08_", letters[1:6]),
  
  # Module D
  paste0("D02_", letters[1:2]),
  paste0("D03_", letters[1:5]),
  paste0("D04_", letters[1:3]),
  paste0("D05_", letters[1:3]),
  paste0("D07_", letters[1:6]),
  paste0("D08_", letters[1:3]),
  paste0("D09_", letters[1:3])
)

# Function to calculate weighted extremes
weighted_extreme_6pt <- function(x, w) {
  valid <- !is.na(x) & !is.na(w) & x %in% 1:6
  
  if (sum(valid) == 0) {
    return(c(pct_1 = NA, pct_6 = NA, extreme = NA))
  }
  
  total_w <- sum(w[valid])
  pct_1 <- sum(w[valid & x == 1]) / total_w * 100
  pct_6 <- sum(w[valid & x == 6]) / total_w * 100
  
  c(pct_1 = pct_1, pct_6 = pct_6, extreme = pct_1 + pct_6)
}

# Calculate weighted extremes for all variables
india_extremes_weighted <- map_df(
  six_point_vars,
  ~{
    res <- weighted_extreme_6pt(india_data[[.x]], india_data$weight)
    tibble(
      variable = .x,
      pct_1 = res["pct_1"],
      pct_6 = res["pct_6"],
      extreme = res["extreme"]
    )
  }
) %>%
  arrange(desc(extreme))

# Filter for extremes > 35%
india_extremes_35 <- india_extremes_weighted %>%
  filter(extreme > 35)

# Function to extract labels from PALS dataset
get_labels <- function(var) {
  lbls <- attr(PALS_extended_dataset[[var]], "labels")
  
  tibble(
    question = attr(PALS_extended_dataset[[var]], "label"),
    meaning_1 = names(lbls[lbls == 1])[1],  # Get first match
    meaning_6 = names(lbls[lbls == 6])[1]   # Get first match
  )
}

# Create final table with labels
india_extremes_final <- india_extremes_35 %>%
  rowwise() %>%
  mutate(labels = list(get_labels(variable))) %>%
  unnest(labels) %>%
  ungroup() %>%
  mutate(
    dominant_direction = case_when(
      pct_1 > pct_6 ~ "Scale 1",
      pct_6 > pct_1 ~ "Scale 6",
      TRUE ~ "Balanced"
    ),
    dominant_meaning = if_else(pct_1 > pct_6, meaning_1, meaning_6)
  ) %>%
  select(
    variable,
    question,
    pct_1,
    meaning_1,
    pct_6,
    meaning_6,
    dominant_direction,
    dominant_meaning,
    total_extreme = extreme
  ) %>%
  arrange(desc(total_extreme))

# Display results
View(india_extremes_final)

India v/s the World

Next, I compared India’s weighted means against global averages (across all countries). Set threshold at ≥1.0 point deviation on 1-6 scale.

Striking result: Zero questions where India scores below global average by 1+ point (closest is 0.97). On 12 questions, India is 1.0+ points higher than the global average. In other words, when India diverges strongly from the world, it does so in one direction only.

Now if we look at these question we would see these are those questions where self interest comes in conflict with the liberal values, hence a conservative shift.

For example:

C07 | Generational conflict: (b) Current generations should be allowed to take on public debt to maintain their prosperity regardless of the fact that this constitutes a burden for future generations.

C01 | Borders: (f) My country should have the right to shoot at a person who crosses the country’s border illegally.

C01 | Borders: (b) My country should have the right to hinder citizens from leaving their country.

pals_long_w2 <- pals %>%
  left_join(country_lookup, by = "country") %>%
  mutate(across(all_of(six_point_vars), haven::zap_labels)) %>%  # <-- add this
  select(country_name, w2, all_of(six_point_vars)) %>%
  pivot_longer(
    cols = all_of(six_point_vars),
    names_to = "variable",
    values_to = "response"
  ) %>%
  filter(response %in% 1:6, w2 > 0)

country_means_w2 <- pals_long_w2 %>%
  group_by(country_name, variable) %>%
  summarise(
    country_mean = weighted.mean(response, w2, na.rm = TRUE),
    .groups = "drop"
  )

global_means_w2 <- pals_long_w2 %>%
  group_by(variable) %>%
  summarise(
    global_mean = weighted.mean(response, w2, na.rm = TRUE),
    .groups = "drop"
  )

india_vs_global_w2 <- country_means_w2 %>%
  filter(country_name == "India") %>%
  left_join(global_means_w2, by = "variable") %>%
  mutate(
    gap = country_mean - global_mean,
    abs_gap = abs(gap)
  ) %>%
  arrange(desc(abs_gap))

#Filter of 1.0 point
india_far_from_world_w2 <- india_vs_global_w2 %>%
  filter(abs_gap >= 1.0) %>%
  mutate(
    question = sapply(variable, function(v) attr(pals[[v]], "label"))
  )

india_far_from_world_w2 <- india_far_from_world_w2 %>%
  select(
    variable,
    question,
    india_mean = country_mean,
    global_mean,
    gap,
    abs_gap
  ) %>%
  arrange(desc(abs_gap))

view(india_far_from_world_w2)

#plotting 
india_plot_data <- india_far_from_world_w2 %>%
  pivot_longer(
    cols = c(india_mean, global_mean),
    names_to = "group",
    values_to = "mean_value"
  )

ggplot(
  india_plot_data,
  aes(
    x = reorder(question, mean_value),
    y = mean_value,
    fill = group
  )
) +
  geom_col(position = "dodge") +
  coord_flip() +
  scale_fill_manual(
    values = c("india_mean" = "firebrick", "global_mean" = "grey60"),
    labels = c("Global average", "India")
  ) +
  labs(
    title = "India vs Global Average on Selected Questions (w2-weighted)",
    subtitle = "Only questions where |India − Global| ≥ 1.0",
    x = NULL,
    y = "Weighted mean response (1–6 scale)",
    fill = NULL
  ) +
  theme_minimal()

summary(india_far_from_world_w2$abs_gap)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.023   1.110   1.217   1.271   1.430   1.541

Moving to region-level breakdowns next… stay tuned

PALS Analysis

Arslan

2026-01-09