library(tidyverse)
library(readxl)
library(janitor)
library(survey)
library(knitr)
library(kableExtra)
# ── File path ─────────────────────────────────────────────────────────────────
datafile <- "D:/Populism and Democrary/India civic behaviour survey/Survey_Data.xlsx"
stopifnot(file.exists(datafile))
# ── Load raw sheets (never modified after this point) ─────────────────────────
raw_rural <- read_excel(datafile, sheet = 1)
raw_urban <- read_excel(datafile, sheet = 2)
# ── Sanity check ──────────────────────────────────────────────────────────────
dims <- tibble(
sheet = c("Rural", "Urban"),
rows = c(nrow(raw_rural), nrow(raw_urban)),
cols = c(ncol(raw_rural), ncol(raw_urban)),
dup_ids = c(anyDuplicated(raw_rural$id),
anyDuplicated(raw_urban$id))
)
kable(dims, caption = "Sanity check: datasets loaded (rows × cols)") |>
kable_styling(full_width = FALSE,
bootstrap_options = c("striped", "hover", "condensed"))
Sanity check: datasets loaded (rows × cols)
|
sheet
|
rows
|
cols
|
dup_ids
|
|
Rural
|
4187
|
51
|
0
|
|
Urban
|
5001
|
51
|
0
|
# ── Create working copies — raw_* objects remain untouched ────────────────────
make_workfile <- function(df, sample_label) {
df |>
clean_names() |>
mutate(
sample_type = sample_label,
# Force demographic columns to numeric first, then recode 98/99 → NA
across(c(a2, a3, a4, a5, a6, a6a, a7, a7a, a8),
~ na_if(as.numeric(.), 98) |> na_if(99)),
across(starts_with("r"), ~ na_if(as.numeric(.), 98) |> na_if(99))
)
}
rural <- make_workfile(raw_rural, "Rural")
urban <- make_workfile(raw_urban, "Urban")
# ── Column alignment check before binding ─────────────────────────────────────
col_check <- tibble(
check = "Column names identical across sheets",
result = identical(names(rural), names(urban))
)
kable(col_check, caption = "Column alignment check") |>
kable_styling(full_width = FALSE,
bootstrap_options = c("striped", "hover", "condensed"))
Column alignment check
|
check
|
result
|
|
Column names identical across sheets
|
FALSE
|
# ── Stack into one analysis file ──────────────────────────────────────────────
df <- bind_rows(rural, urban)
# ── Coverage: n per state per sample type ─────────────────────────────────────
n_table <- df |>
count(state, sample_type) |>
pivot_wider(names_from = sample_type, values_from = n, values_fill = 0) |>
arrange(state)
kable(n_table,
caption = "Raw N per state by sample type (unweighted)") |>
kable_styling(full_width = FALSE,
bootstrap_options = c("striped", "hover", "condensed"))
Raw N per state by sample type (unweighted)
|
state
|
Rural
|
Urban
|
|
Andhra Pradesh
|
217
|
215
|
|
Assam
|
202
|
205
|
|
Bihar
|
203
|
200
|
|
Chandigarh
|
0
|
400
|
|
Chhattisgarh
|
210
|
205
|
|
Gujrat
|
203
|
201
|
|
Haryana
|
209
|
215
|
|
Himachal Pradesh
|
211
|
214
|
|
Jharkhand
|
217
|
211
|
|
Karnataka
|
217
|
219
|
|
Kerala
|
207
|
211
|
|
Madhya Pradesh
|
201
|
213
|
|
Maharashtra
|
211
|
204
|
|
NCT of Delhi
|
0
|
415
|
|
Odisha
|
201
|
206
|
|
Punjab
|
215
|
210
|
|
Rajasthan
|
227
|
218
|
|
Tamil Nadu
|
211
|
213
|
|
Telangana
|
215
|
208
|
|
Uttar Pradesh
|
205
|
213
|
|
Uttarkhand
|
201
|
201
|
|
West Bengal
|
204
|
204
|
Demographics
# ── Demographic recoding ───────────────────────────────────────────────────────
df <- df |>
mutate(
# Age → 3 cohorts
age_cohort = case_when(
a2%in% c(1, 2) ~ "Young (18–35)",
a2%in% c(3, 4) ~ "Middle (36–60)",
a2 == 5 ~ "Senior (60+)",
TRUE ~ NA_character_
),
age_cohort = factor(age_cohort,
levels = c("Young (18–35)",
"Middle (36–60)",
"Senior (60+)")),
# Gender
gender = factor(a3,
levels = 1:2,
labels = c("Man", "Woman")),
# Education → 3 cohorts
edu_cohort = case_when(
a5%in% 1:4 ~ "Below Secondary",
a5%in% 5:6 ~ "Secondary / HSC",
a5%in% 7:9 ~ "Graduate & Above",
TRUE ~ NA_character_
),
edu_cohort = factor(edu_cohort,
levels = c("Below Secondary",
"Secondary / HSC",
"Graduate & Above")),
# Religion
religion = factor(a7,
levels = 1:6,
labels = c("Hindu", "Muslim", "Sikh",
"Christian", "Atheist", "Other"))
)
# ── Verification table ────────────────────────────────────────────────────────
verify_demo <- function(df, var, label) {
df |>
count(category = {{ var }}) |>
filter(!is.na(category)) |>
mutate(
variable = label,
pct = round(n / sum(n) * 100, 1)
) |>
select(variable, category, n, pct)
}
demo_check <- bind_rows(
verify_demo(df, age_cohort, "Age Cohort"),
verify_demo(df, gender, "Gender"),
verify_demo(df, edu_cohort, "Education"),
verify_demo(df, religion, "Religion")
)
kable(demo_check,
caption = "Demographic distributions (unweighted)") |>
kable_styling(full_width = FALSE,
bootstrap_options = c("striped", "hover", "condensed")) |>
collapse_rows(columns = 1, valign = "top")
Demographic distributions (unweighted)
|
variable
|
category
|
n
|
pct
|
|
Age Cohort
|
Young (18–35)
|
4121
|
44.9
|
|
Middle (36–60)
|
3863
|
42.0
|
|
Senior (60+)
|
1204
|
13.1
|
|
Gender
|
Man
|
4663
|
50.8
|
|
Woman
|
4525
|
49.2
|
|
Education
|
Below Secondary
|
3435
|
37.4
|
|
Secondary / HSC
|
4108
|
44.7
|
|
Graduate & Above
|
1645
|
17.9
|
|
Religion
|
Hindu
|
7703
|
83.8
|
|
Muslim
|
921
|
10.0
|
|
Sikh
|
346
|
3.8
|
|
Christian
|
197
|
2.1
|
|
Atheist
|
1
|
0.0
|
|
Other
|
20
|
0.2
|
Discrimination and diversity
Rural
# ── Variable mapping: Discrimination / Diversity — RURAL ─────────────────────
disc_vars_rural <- c(
"Employer right\nnot to hire by religion" = "r28",
"Free to marry —\ndifferent religions" = "r29",
"Free to marry —\ndifferent castes" = "r30",
"Villagers exclude those\nwho eat certain foods" = "r31",
"Comfortable with diff.\nreligion neighbours" = "r32",
"Lower caste workers —\nsame water/toilets" = "r33"
)
# ── Clean helper: 1/2 = Agree, 3/4 = Disagree ────────────────────────────────
clean_agree <- function(x) {
case_when(
x %in% c(1, 2) ~ "Agree",
x %in% c(3, 4) ~ "Disagree",
TRUE ~ NA_character_
)
}
# ── Compute % Agree per question by demographic ───────────────────────────────
compute_disc <- function(df, vars, group_var) {
imap_dfr(vars, function(v, label) {
df |>
mutate(response = clean_agree(.data[[v]])) |>
filter(!is.na(response), !is.na({{ group_var }})) |>
count(group = {{ group_var }}, response) |>
group_by(group) |>
mutate(pct = round(n / sum(n) * 100, 1)) |>
ungroup() |>
mutate(question = label)
})
}
# ── Rural data only ───────────────────────────────────────────────────────────
rural_df <- df |> filter(sample_type == "Rural")
# ── Age cohort — Rural ────────────────────────────────────────────────────────
disc_rural_age <- compute_disc(rural_df, disc_vars_rural, age_cohort)
disc_rural_age |>
filter(!is.na(group)) |>
mutate(question = str_wrap(question, 30)) |>
ggplot(aes(x = group, y = pct, fill = response)) +
geom_col(position = "stack", width = 0.6) +
geom_text(aes(label = ifelse(pct >= 8, paste0(pct, "%"), "")),
position = position_stack(vjust = 0.5),
size = 3, colour = "white", fontface = "bold") +
facet_wrap(~ question, nrow = 2) +
scale_fill_manual(values = c("Agree" = "#4E79A7", "Disagree" = "#E15759"),
name = NULL) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
labs(
title = "Discrimination & Diversity — Rural: by Age Cohort",
subtitle = "India Civic Behaviour Survey — unweighted | 1+2 = Agree, 3+4 = Disagree",
x = NULL, y = "% of respondents"
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "bottom",
strip.text = element_text(face = "bold", size = 9),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(colour = "grey40", size = 10)
)

# ── Gender — Rural ────────────────────────────────────────────────────────────
disc_rural_gender <- compute_disc(rural_df, disc_vars_rural, gender)
disc_rural_gender |>
filter(!is.na(group)) |>
mutate(question = str_wrap(question, 30)) |>
ggplot(aes(x = group, y = pct, fill = response)) +
geom_col(position = "stack", width = 0.6) +
geom_text(aes(label = ifelse(pct >= 8, paste0(pct, "%"), "")),
position = position_stack(vjust = 0.5),
size = 3, colour = "white", fontface = "bold") +
facet_wrap(~ question, nrow = 2) +
scale_fill_manual(values = c("Agree" = "#4E79A7", "Disagree" = "#E15759"),
name = NULL) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
labs(
title = "Discrimination & Diversity — Rural: by Gender",
subtitle = "India Civic Behaviour Survey — unweighted | 1+2 = Agree, 3+4 = Disagree",
x = NULL, y = "% of respondents"
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "bottom",
strip.text = element_text(face = "bold", size = 9),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(colour = "grey40", size = 10)
)

Urban
# ── Variable mapping: Discrimination / Diversity — URBAN ─────────────────────
disc_vars_urban <- c(
"Employer right\nnot to hire by religion" = "u28",
"Free to marry —\ndifferent religions" = "u29",
"Free to marry —\ndifferent castes" = "u30",
"Housing societies\nright to ban food" = "u31",
"Comfortable with diff.\nreligion neighbours" = "u32",
"Domestic help —\nsame bathroom" = "u33"
)
urban_df <- df |> filter(sample_type == "Urban")
# ── Age cohort — Urban ────────────────────────────────────────────────────────
disc_urban_age <- compute_disc(urban_df, disc_vars_urban, age_cohort)
disc_urban_age |>
filter(!is.na(group)) |>
mutate(question = str_wrap(question, 30)) |>
ggplot(aes(x = group, y = pct, fill = response)) +
geom_col(position = "stack", width = 0.6) +
geom_text(aes(label = ifelse(pct >= 8, paste0(pct, "%"), "")),
position = position_stack(vjust = 0.5),
size = 3, colour = "white", fontface = "bold") +
facet_wrap(~ question, nrow = 2) +
scale_fill_manual(values = c("Agree" = "#4E79A7", "Disagree" = "#E15759"),
name = NULL) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
labs(
title = "Discrimination & Diversity — Urban: by Age Cohort",
subtitle = "India Civic Behaviour Survey — unweighted | 1+2 = Agree, 3+4 = Disagree",
x = NULL, y = "% of respondents"
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "bottom",
strip.text = element_text(face = "bold", size = 9),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(colour = "grey40", size = 10)
)

# ── Gender — Urban ────────────────────────────────────────────────────────────
disc_urban_gender <- compute_disc(urban_df, disc_vars_urban, gender)
disc_urban_gender |>
filter(!is.na(group)) |>
mutate(question = str_wrap(question, 30)) |>
ggplot(aes(x = group, y = pct, fill = response)) +
geom_col(position = "stack", width = 0.6) +
geom_text(aes(label = ifelse(pct >= 8, paste0(pct, "%"), "")),
position = position_stack(vjust = 0.5),
size = 3, colour = "white", fontface = "bold") +
facet_wrap(~ question, nrow = 2) +
scale_fill_manual(values = c("Agree" = "#4E79A7", "Disagree" = "#E15759"),
name = NULL) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
labs(
title = "Discrimination & Diversity — Urban: by Gender",
subtitle = "India Civic Behaviour Survey — unweighted | 1+2 = Agree, 3+4 = Disagree",
x = NULL, y = "% of respondents"
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "bottom",
strip.text = element_text(face = "bold", size = 9),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(colour = "grey40", size = 10)
)

Gender attitudes
Rural
# ── Variable mapping: Gender Attitudes — RURAL ────────────────────────────────
gender_vars_rural <- c(
"Husband justified\nto beat wife" = "r21",
"Woman free to decide\non her earnings" = "r22",
"Male members take\nfinal household decisions" = "r23",
"Woman free to marry\nagainst parents' wishes" = "r24",
"Women vote as\nmale family members" = "r25",
"Women encouraged\nto work outside" = "r26",
"Daughters encouraged\nto study like sons" = "r27"
)
# ── Age cohort — Rural ────────────────────────────────────────────────────────
gender_rural_age <- compute_disc(rural_df, gender_vars_rural, age_cohort)
gender_rural_age |>
filter(!is.na(group)) |>
mutate(question = str_wrap(question, 30)) |>
ggplot(aes(x = group, y = pct, fill = response)) +
geom_col(position = "stack", width = 0.6) +
geom_text(aes(label = ifelse(pct >= 8, paste0(pct, "%"), "")),
position = position_stack(vjust = 0.5),
size = 3, colour = "white", fontface = "bold") +
facet_wrap(~ question, nrow = 2) +
scale_fill_manual(values = c("Agree" = "#4E79A7", "Disagree" = "#E15759"),
name = NULL) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
labs(
title = "Gender Attitudes — Rural: by Age Cohort",
subtitle = "India Civic Behaviour Survey — unweighted | 1+2 = Agree, 3+4 = Disagree",
x = NULL, y = "% of respondents"
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "bottom",
strip.text = element_text(face = "bold", size = 9),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(colour = "grey40", size = 10)
)

# ── Gender — Rural ────────────────────────────────────────────────────────────
gender_rural_gender <- compute_disc(rural_df, gender_vars_rural, gender)
gender_rural_gender |>
filter(!is.na(group)) |>
mutate(question = str_wrap(question, 30)) |>
ggplot(aes(x = group, y = pct, fill = response)) +
geom_col(position = "stack", width = 0.6) +
geom_text(aes(label = ifelse(pct >= 8, paste0(pct, "%"), "")),
position = position_stack(vjust = 0.5),
size = 3, colour = "white", fontface = "bold") +
facet_wrap(~ question, nrow = 2) +
scale_fill_manual(values = c("Agree" = "#4E79A7", "Disagree" = "#E15759"),
name = NULL) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
labs(
title = "Gender Attitudes — Rural: by Gender",
subtitle = "India Civic Behaviour Survey — unweighted | 1+2 = Agree, 3+4 = Disagree",
x = NULL, y = "% of respondents"
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "bottom",
strip.text = element_text(face = "bold", size = 9),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(colour = "grey40", size = 10)
)

Urban
# ── Variable mapping: Gender Attitudes — URBAN ────────────────────────────────
gender_vars_urban <- c(
"Husband justified\nto beat wife" = "u21",
"Woman free to decide\non her earnings" = "u22",
"Male members take\nfinal household decisions" = "u23",
"Woman free to marry\nagainst parents' wishes" = "u24",
"Women vote as\nmale family members" = "u25",
"Women encouraged\nto work outside" = "u26",
"Daughters encouraged\nto study like sons" = "u27"
)
urban_df <- df |> filter(sample_type == "Urban")
# ── Age cohort — Urban ────────────────────────────────────────────────────────
gender_urban_age <- compute_disc(urban_df, gender_vars_urban, age_cohort)
gender_urban_age |>
filter(!is.na(group)) |>
mutate(question = str_wrap(question, 30)) |>
ggplot(aes(x = group, y = pct, fill = response)) +
geom_col(position = "stack", width = 0.6) +
geom_text(aes(label = ifelse(pct >= 8, paste0(pct, "%"), "")),
position = position_stack(vjust = 0.5),
size = 3, colour = "white", fontface = "bold") +
facet_wrap(~ question, nrow = 2) +
scale_fill_manual(values = c("Agree" = "#4E79A7", "Disagree" = "#E15759"),
name = NULL) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
labs(
title = "Gender Attitudes — Urban: by Age Cohort",
subtitle = "India Civic Behaviour Survey — unweighted | 1+2 = Agree, 3+4 = Disagree",
x = NULL, y = "% of respondents"
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "bottom",
strip.text = element_text(face = "bold", size = 9),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(colour = "grey40", size = 10)
)

# ── Gender — Urban ────────────────────────────────────────────────────────────
gender_urban_gender <- compute_disc(urban_df, gender_vars_urban, gender)
gender_urban_gender |>
filter(!is.na(group)) |>
mutate(question = str_wrap(question, 30)) |>
ggplot(aes(x = group, y = pct, fill = response)) +
geom_col(position = "stack", width = 0.6) +
geom_text(aes(label = ifelse(pct >= 8, paste0(pct, "%"), "")),
position = position_stack(vjust = 0.5),
size = 3, colour = "white", fontface = "bold") +
facet_wrap(~ question, nrow = 2) +
scale_fill_manual(values = c("Agree" = "#4E79A7", "Disagree" = "#E15759"),
name = NULL) +
scale_y_continuous(labels = scales::label_percent(scale = 1)) +
labs(
title = "Gender Attitudes — Urban: by Gender",
subtitle = "India Civic Behaviour Survey — unweighted | 1+2 = Agree, 3+4 = Disagree",
x = NULL, y = "% of respondents"
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "bottom",
strip.text = element_text(face = "bold", size = 9),
panel.grid.major.x = element_blank(),
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(colour = "grey40", size = 10)
)
