library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr     1.2.1     âś” readr     2.2.0
## âś” forcats   1.0.1     âś” stringr   1.6.0
## âś” ggplot2   4.0.3     âś” tibble    3.3.1
## âś” lubridate 1.9.5     âś” tidyr     1.3.2
## âś” purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(haven)
library(labelled)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loading required package: survival
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(srvyr)
## 
## Attaching package: 'srvyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(skimr)
library(gtsummary)
library(flextable)
## 
## Attaching package: 'flextable'
## 
## The following object is masked from 'package:gtsummary':
## 
##     continuous_summary
## 
## The following object is masked from 'package:purrr':
## 
##     compose
library(ggplot2)

#cleaning and preparing data for analysis from main dataset, as per the research question and variables of interest. Used dhs_codebook for variable selection and coding.

main_dataset <- read_dta("D:/DHS_research/dataraw/bangladesh/individual.DTA")
analysis_data <- main_dataset %>%
select(v001,v002,v005,v021,v022, v012, v024, v025, v106, v130, v190, v502,v394,v467b,v467c,v467d,v467f,v481,v157,v158,v159)
#demographics,healthcare, media exposure
analysis_data <- analysis_data %>% #turning into factors for analysis
mutate(across(where(is.labelled),to_factor))
#adding weight for survey desing(as per dhs rules)
analysis_data <-analysis_data %>%
mutate(weight=v005/1000000)
#access problem and binary indicator for women's healthcare access barriers
analysis_data <- analysis_data %>%
mutate(access_problem = if_else(v467b == "big problem"|v467c =="big problem"|v467d=="big problem"|v467f == "big problem","Has Access Problem","No Access Problem"))
analysis_data <- analysis_data %>% 
mutate(access_bin = if_else(access_problem == "Has Access Problem",1,0))
#survey design for analysis
dhs_design <- svydesign(ids = ~v021,strata = ~v022,weights = ~weight,data = analysis_data,nest = TRUE)
dhs_design
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (672) clusters.
## svydesign(ids = ~v021, strata = ~v022, weights = ~weight, data = analysis_data, 
##     nest = TRUE)

##women’s healthcare access problems (%) by wealth

wealth_results <- svyby(~access_bin,~v190,dhs_design,svymean,na.rm = TRUE)
wealth_results <- wealth_results %>% #cleaning the results for percentage and better understanding
mutate(access_bin = round(access_bin * 100, 2),
se = round(se * 100, 2))
wealth_results
##            v190 access_bin   se
## poorest poorest      80.12 1.08
## poorer   poorer      75.50 1.04
## middle   middle      68.04 1.09
## richer   richer      62.90 1.15
## richest richest      49.86 1.31

#wealth figure

ggplot(wealth_results,aes(x = v190, y = access_bin)) +
geom_col(width = 0.7) +geom_text(aes(label = access_bin),vjust = -0.5,size = 8) +
labs(title = "Healthcare Access Problems by Wealth Quintile",subtitle = "Bangladesh DHS Women Dataset",x = "Wealth Quintile",y = "Women With Healthcare Access Problems (%)",caption = "Weighted DHS estimates") +
ylim(0, 100) + theme_minimal(base_size = 14)

#women’s access to healthcare access(%) by education

education_results <- svyby(~access_bin,~v106,dhs_design,svymean,na.rm = TRUE)
education_results <- education_results %>%
mutate(access_bin = round(access_bin * 100, 2),se = round(se * 100, 2))
education_results
##                      v106 access_bin   se
## no education no education      77.40 1.01
## primary           primary      73.27 0.85
## secondary       secondary      64.14 0.89
## higher             higher      45.97 1.35

#education figure

ggplot(education_results,aes(x = v106, y = access_bin)) +
geom_col(width = 0.7) +geom_text(aes(label = access_bin),vjust = -0.5,size = 8) +
labs(title = "Healthcare Access Problems by Education Level",subtitle = "Bangladesh DHS Women Dataset",x = "Education Level",y = "Women With Healthcare Access Problems (%)",caption = "Weighted DHS estimates") +ylim(0, 100) +theme_minimal(base_size = 14)

#women’s access to healthcare access(%) by urban vs rural

residence_results <- svyby(
~access_bin,~v025,dhs_design,svymean,na.rm = TRUE)
residence_results <- residence_results %>%
mutate(
access_bin = round(access_bin * 100, 2),
se = round(se * 100, 2))

residence_results
##        v025 access_bin   se
## urban urban      57.92 1.28
## rural rural      70.47 0.86

#urban vs rural figure

ggplot(residence_results,aes(x = v025, y = access_bin)) +
geom_col(width = 0.6) +geom_text(aes(label = access_bin),vjust = -0.5,size = 8) +
labs(title = "Healthcare Access Problems by Residence",subtitle = "Bangladesh DHS Women Dataset",x = "Residence",y = "Women With Healthcare Access Problems (%)",
caption ="Weighted DHS estimates")+ ylim(0, 100) + theme_minimal(base_size = 14)

#women’s access to healthcare access(%) by marital status

analysis_data <- analysis_data %>%
mutate(marital_group = case_when(v502 == "currently in union/living with a man" ~ "Currently Married",v502 == "formerly in union/living with a man" ~ "Formerly Married",
TRUE ~ NA_character_))
#survey design to include marital status
dhs_design <- svydesign(ids = ~v021,strata = ~v022,weights = ~weight,data = analysis_data,nest = TRUE)
marital_results <- svyby(~access_bin,~marital_group,dhs_design,svymean,na.rm = TRUE)

marital_results 
##                       marital_group access_bin          se
## Currently Married Currently Married  0.6663951 0.007278476
## Formerly Married   Formerly Married  0.7127945 0.016542411
marital_results <- marital_results %>%
mutate(access_bin = round(access_bin * 100, 2),se = round(se * 100, 2))

marital_results 
##                       marital_group access_bin   se
## Currently Married Currently Married      66.64 0.73
## Formerly Married   Formerly Married      71.28 1.65

#women’s access to healthcare by media exposure

#coding numeric for statistical analysis
analysis_data <- analysis_data %>%
mutate(media_exposure = if_else(v157 != "not at all" |v158 != "not at all" |v159 != "not at all",1,0))
table(analysis_data$media_exposure)
## 
##     0     1 
##  6981 13146
#labeling: convinient for interpretation
analysis_data <- analysis_data %>% #c
mutate(media_exposure = case_when(media_exposure == 1 ~ "Has Media Exposure",media_exposure == 0 ~ "No Media Exposure"))
table(analysis_data$media_exposure)
## 
## Has Media Exposure  No Media Exposure 
##              13146               6981
dhs_design <- svydesign(ids = ~v021,strata = ~v022, weights = ~weight,data = analysis_data, nest = TRUE)

media_results <- svyby(~access_bin,~media_exposure,dhs_design,svymean,na.rm = TRUE)
media_results
##                        media_exposure access_bin          se
## Has Media Exposure Has Media Exposure  0.6228659 0.008076747
## No Media Exposure   No Media Exposure  0.7585671 0.009204702
media_results <- media_results %>%
mutate(access_bin = round(access_bin * 100, 2),se = round(se * 100, 2))

media_results
##                        media_exposure access_bin   se
## Has Media Exposure Has Media Exposure      62.29 0.81
## No Media Exposure   No Media Exposure      75.86 0.92

#mMedia exposure figure

ggplot(media_results,aes(x = media_exposure, y = access_bin)) + geom_col(width = 0.6) +
geom_text(aes(label = access_bin),vjust = -0.5,size = 10)+
labs(title = "Healthcare Access Problems by Media Exposure",subtitle = "Bangladesh DHS Women Dataset",
x = "Media Exposure",y = "Women With Healthcare Access Problems (%)",caption = "Weighted DHS estimates")+ylim(0, 100)+
theme_minimal(base_size = 14)

All results together for better understanding.

combined_results <- bind_rows(wealth_results %>% mutate(variable = "Wealth", category = as.character(v190)) %>% select(variable, category, access_bin, se),
education_results %>% 
  mutate(variable = "Education", category = as.character(v106)) %>% select(variable, category, access_bin, se),
residence_results %>% 
  mutate(variable = "Residence", category = as.character(v025)) %>% select(variable, category, access_bin, se),
marital_results %>% 
  mutate(variable = "Marital Status", category = as.character(marital_group)) %>% select(variable, category, access_bin, se),
media_results %>% 
  mutate(variable = "Media Exposure", category = as.character(media_exposure)) %>% select(variable, category, access_bin, se))
combined_results
##                          variable           category access_bin   se
## poorest                    Wealth            poorest      80.12 1.08
## poorer                     Wealth             poorer      75.50 1.04
## middle                     Wealth             middle      68.04 1.09
## richer                     Wealth             richer      62.90 1.15
## richest                    Wealth            richest      49.86 1.31
## no education            Education       no education      77.40 1.01
## primary                 Education            primary      73.27 0.85
## secondary               Education          secondary      64.14 0.89
## higher                  Education             higher      45.97 1.35
## urban                   Residence              urban      57.92 1.28
## rural                   Residence              rural      70.47 0.86
## Currently Married  Marital Status  Currently Married      66.64 0.73
## Formerly Married   Marital Status   Formerly Married      71.28 1.65
## Has Media Exposure Media Exposure Has Media Exposure      62.29 0.81
## No Media Exposure  Media Exposure  No Media Exposure      75.86 0.92
health_model <- svyglm(
access_bin ~v190 + v106 +v025 +marital_group + media_exposure + v012 +v130,design = dhs_design,family = quasibinomial())
summary(health_model)
## 
## Call:
## svyglm(formula = access_bin ~ v190 + v106 + v025 + marital_group + 
##     media_exposure + v012 + v130, design = dhs_design, family = quasibinomial())
## 
## Survey design:
## svydesign(ids = ~v021, strata = ~v022, weights = ~weight, data = analysis_data, 
##     nest = TRUE)
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      1.392652   0.131845  10.563  < 2e-16 ***
## v190poorer                      -0.164180   0.070124  -2.341  0.01952 *  
## v190middle                      -0.421480   0.077822  -5.416 8.66e-08 ***
## v190richer                      -0.543680   0.083911  -6.479 1.85e-10 ***
## v190richest                     -0.886498   0.089079  -9.952  < 2e-16 ***
## v106primary                     -0.158870   0.060213  -2.638  0.00853 ** 
## v106secondary                   -0.445346   0.066572  -6.690 4.91e-11 ***
## v106higher                      -0.968762   0.081799 -11.843  < 2e-16 ***
## v025rural                        0.154491   0.069307   2.229  0.02616 *  
## marital_groupFormerly Married    0.088572   0.082153   1.078  0.28138    
## media_exposureNo Media Exposure  0.158657   0.052830   3.003  0.00278 ** 
## v012                            -0.002214   0.002279  -0.972  0.33154    
## v130hinduism                     0.196544   0.085946   2.287  0.02253 *  
## v130buddhism                     0.690856   0.373924   1.848  0.06513 .  
## v130christianity                 0.794006   0.577011   1.376  0.16929    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasibinomial family taken to be 1.000078)
## 
## Number of Fisher Scoring iterations: 4

Interpretation of the results:

1.Richest women are much less likely to face healthcare access barriers compared to the poorest women.
2.Women with higher education have substantially lower odds of healthcare access problems.
3.Rural women have higher odds of healthcare access problems than urban women.
4.Women without media exposure have higher healthcare access barriers.
5.After controlling for other factors, marital status does not independently predict healthcare access problems.

Other visualization

rank vulnerbility

library(ggplot2)
library(dplyr)

combined_ranked <- combined_results %>%
  arrange(desc(access_bin)) %>%
  mutate(
    category_label = paste(variable, category, sep = ": "),
    category_label = reorder(category_label, access_bin)
  )

ggplot(combined_ranked, aes(x = category_label, y = access_bin)) +
  geom_col(width = 0.7) +
  geom_text(
    aes(label = paste0(access_bin, "%")),
    hjust = -0.1,
    size = 3.5
  ) +
  coord_flip() +
  labs(
    title = "Most Vulnerable Groups for Healthcare Access Problems",
    subtitle = "Ranked weighted estimates from Bangladesh DHS women dataset",
    x = "",
    y = "Women With Healthcare Access Problems (%)",
    caption = "Weighted DHS estimates"
  ) +
  ylim(0, 100) +
  theme_minimal(base_size = 13)

#

library(ggplot2)
library(dplyr)
library(forcats)

combined_ranked <- combined_results %>%
  arrange(desc(access_bin)) %>%
  mutate(
    category_label = paste(variable, category, sep = ": "),
    category_label = fct_reorder(category_label, access_bin),
    risk_level = case_when(
      access_bin >= 75 ~ "Very High",
      access_bin >= 65 ~ "High",
      access_bin >= 55 ~ "Moderate",
      TRUE ~ "Lower"
    )
  )

ggplot(combined_ranked,
       aes(x = category_label, y = access_bin, fill = risk_level)) +
  geom_col(width = 0.75, alpha = 0.9) +
  geom_text(
    aes(label = paste0(access_bin, "%")),
    hjust = -0.15,
    size = 4,
    fontface = "bold"
  ) +
  coord_flip() +
  scale_fill_manual(
    values = c(
      "Very High" = "#B2182B",
      "High" = "#EF8A62",
      "Moderate" = "#67A9CF",
      "Lower" = "#2166AC"
    )
  ) +
  labs(
    title = "Vulnerability Profile of Healthcare Access Problems",
    subtitle = "Ranked social groups among women in Bangladesh",
    x = "",
    y = "Women With Healthcare Access Problems (%)",
    fill = "Risk Level",
    caption = "Source: Bangladesh DHS | Weighted survey estimates"
  ) +
  ylim(0, 100) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 17),
    plot.subtitle = element_text(size = 12),
    axis.text.y = element_text(size = 11),
    legend.position = "bottom",
    panel.grid.major.y = element_blank()
  )