library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr 1.2.1 âś” readr 2.2.0
## âś” forcats 1.0.1 âś” stringr 1.6.0
## âś” ggplot2 4.0.3 âś” tibble 3.3.1
## âś” lubridate 1.9.5 âś” tidyr 1.3.2
## âś” purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(haven)
library(labelled)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: survival
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
library(srvyr)
##
## Attaching package: 'srvyr'
##
## The following object is masked from 'package:stats':
##
## filter
library(skimr)
library(gtsummary)
library(flextable)
##
## Attaching package: 'flextable'
##
## The following object is masked from 'package:gtsummary':
##
## continuous_summary
##
## The following object is masked from 'package:purrr':
##
## compose
library(ggplot2)
#cleaning and preparing data for analysis from main dataset, as per the research question and variables of interest. Used dhs_codebook for variable selection and coding.
main_dataset <- read_dta("D:/DHS_research/dataraw/bangladesh/individual.DTA")
analysis_data <- main_dataset %>%
select(v001,v002,v005,v021,v022, v012, v024, v025, v106, v130, v190, v502,v394,v467b,v467c,v467d,v467f,v481,v157,v158,v159)
#demographics,healthcare, media exposure
analysis_data <- analysis_data %>% #turning into factors for analysis
mutate(across(where(is.labelled),to_factor))
#adding weight for survey desing(as per dhs rules)
analysis_data <-analysis_data %>%
mutate(weight=v005/1000000)
#access problem and binary indicator for women's healthcare access barriers
analysis_data <- analysis_data %>%
mutate(access_problem = if_else(v467b == "big problem"|v467c =="big problem"|v467d=="big problem"|v467f == "big problem","Has Access Problem","No Access Problem"))
analysis_data <- analysis_data %>%
mutate(access_bin = if_else(access_problem == "Has Access Problem",1,0))
#survey design for analysis
dhs_design <- svydesign(ids = ~v021,strata = ~v022,weights = ~weight,data = analysis_data,nest = TRUE)
dhs_design
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (672) clusters.
## svydesign(ids = ~v021, strata = ~v022, weights = ~weight, data = analysis_data,
## nest = TRUE)
##women’s healthcare access problems (%) by wealth
wealth_results <- svyby(~access_bin,~v190,dhs_design,svymean,na.rm = TRUE)
wealth_results <- wealth_results %>% #cleaning the results for percentage and better understanding
mutate(access_bin = round(access_bin * 100, 2),
se = round(se * 100, 2))
wealth_results
## v190 access_bin se
## poorest poorest 80.12 1.08
## poorer poorer 75.50 1.04
## middle middle 68.04 1.09
## richer richer 62.90 1.15
## richest richest 49.86 1.31
#wealth figure
ggplot(wealth_results,aes(x = v190, y = access_bin)) +
geom_col(width = 0.7) +geom_text(aes(label = access_bin),vjust = -0.5,size = 8) +
labs(title = "Healthcare Access Problems by Wealth Quintile",subtitle = "Bangladesh DHS Women Dataset",x = "Wealth Quintile",y = "Women With Healthcare Access Problems (%)",caption = "Weighted DHS estimates") +
ylim(0, 100) + theme_minimal(base_size = 14)
#women’s access to healthcare access(%) by education
education_results <- svyby(~access_bin,~v106,dhs_design,svymean,na.rm = TRUE)
education_results <- education_results %>%
mutate(access_bin = round(access_bin * 100, 2),se = round(se * 100, 2))
education_results
## v106 access_bin se
## no education no education 77.40 1.01
## primary primary 73.27 0.85
## secondary secondary 64.14 0.89
## higher higher 45.97 1.35
#education figure
ggplot(education_results,aes(x = v106, y = access_bin)) +
geom_col(width = 0.7) +geom_text(aes(label = access_bin),vjust = -0.5,size = 8) +
labs(title = "Healthcare Access Problems by Education Level",subtitle = "Bangladesh DHS Women Dataset",x = "Education Level",y = "Women With Healthcare Access Problems (%)",caption = "Weighted DHS estimates") +ylim(0, 100) +theme_minimal(base_size = 14)
#women’s access to healthcare access(%) by urban vs rural
residence_results <- svyby(
~access_bin,~v025,dhs_design,svymean,na.rm = TRUE)
residence_results <- residence_results %>%
mutate(
access_bin = round(access_bin * 100, 2),
se = round(se * 100, 2))
residence_results
## v025 access_bin se
## urban urban 57.92 1.28
## rural rural 70.47 0.86
#urban vs rural figure
ggplot(residence_results,aes(x = v025, y = access_bin)) +
geom_col(width = 0.6) +geom_text(aes(label = access_bin),vjust = -0.5,size = 8) +
labs(title = "Healthcare Access Problems by Residence",subtitle = "Bangladesh DHS Women Dataset",x = "Residence",y = "Women With Healthcare Access Problems (%)",
caption ="Weighted DHS estimates")+ ylim(0, 100) + theme_minimal(base_size = 14)
#women’s access to healthcare access(%) by marital status
analysis_data <- analysis_data %>%
mutate(marital_group = case_when(v502 == "currently in union/living with a man" ~ "Currently Married",v502 == "formerly in union/living with a man" ~ "Formerly Married",
TRUE ~ NA_character_))
#survey design to include marital status
dhs_design <- svydesign(ids = ~v021,strata = ~v022,weights = ~weight,data = analysis_data,nest = TRUE)
marital_results <- svyby(~access_bin,~marital_group,dhs_design,svymean,na.rm = TRUE)
marital_results
## marital_group access_bin se
## Currently Married Currently Married 0.6663951 0.007278476
## Formerly Married Formerly Married 0.7127945 0.016542411
marital_results <- marital_results %>%
mutate(access_bin = round(access_bin * 100, 2),se = round(se * 100, 2))
marital_results
## marital_group access_bin se
## Currently Married Currently Married 66.64 0.73
## Formerly Married Formerly Married 71.28 1.65
#women’s access to healthcare by media exposure
#coding numeric for statistical analysis
analysis_data <- analysis_data %>%
mutate(media_exposure = if_else(v157 != "not at all" |v158 != "not at all" |v159 != "not at all",1,0))
table(analysis_data$media_exposure)
##
## 0 1
## 6981 13146
#labeling: convinient for interpretation
analysis_data <- analysis_data %>% #c
mutate(media_exposure = case_when(media_exposure == 1 ~ "Has Media Exposure",media_exposure == 0 ~ "No Media Exposure"))
table(analysis_data$media_exposure)
##
## Has Media Exposure No Media Exposure
## 13146 6981
dhs_design <- svydesign(ids = ~v021,strata = ~v022, weights = ~weight,data = analysis_data, nest = TRUE)
media_results <- svyby(~access_bin,~media_exposure,dhs_design,svymean,na.rm = TRUE)
media_results
## media_exposure access_bin se
## Has Media Exposure Has Media Exposure 0.6228659 0.008076747
## No Media Exposure No Media Exposure 0.7585671 0.009204702
media_results <- media_results %>%
mutate(access_bin = round(access_bin * 100, 2),se = round(se * 100, 2))
media_results
## media_exposure access_bin se
## Has Media Exposure Has Media Exposure 62.29 0.81
## No Media Exposure No Media Exposure 75.86 0.92
#mMedia exposure figure
ggplot(media_results,aes(x = media_exposure, y = access_bin)) + geom_col(width = 0.6) +
geom_text(aes(label = access_bin),vjust = -0.5,size = 10)+
labs(title = "Healthcare Access Problems by Media Exposure",subtitle = "Bangladesh DHS Women Dataset",
x = "Media Exposure",y = "Women With Healthcare Access Problems (%)",caption = "Weighted DHS estimates")+ylim(0, 100)+
theme_minimal(base_size = 14)
combined_results <- bind_rows(wealth_results %>% mutate(variable = "Wealth", category = as.character(v190)) %>% select(variable, category, access_bin, se),
education_results %>%
mutate(variable = "Education", category = as.character(v106)) %>% select(variable, category, access_bin, se),
residence_results %>%
mutate(variable = "Residence", category = as.character(v025)) %>% select(variable, category, access_bin, se),
marital_results %>%
mutate(variable = "Marital Status", category = as.character(marital_group)) %>% select(variable, category, access_bin, se),
media_results %>%
mutate(variable = "Media Exposure", category = as.character(media_exposure)) %>% select(variable, category, access_bin, se))
combined_results
## variable category access_bin se
## poorest Wealth poorest 80.12 1.08
## poorer Wealth poorer 75.50 1.04
## middle Wealth middle 68.04 1.09
## richer Wealth richer 62.90 1.15
## richest Wealth richest 49.86 1.31
## no education Education no education 77.40 1.01
## primary Education primary 73.27 0.85
## secondary Education secondary 64.14 0.89
## higher Education higher 45.97 1.35
## urban Residence urban 57.92 1.28
## rural Residence rural 70.47 0.86
## Currently Married Marital Status Currently Married 66.64 0.73
## Formerly Married Marital Status Formerly Married 71.28 1.65
## Has Media Exposure Media Exposure Has Media Exposure 62.29 0.81
## No Media Exposure Media Exposure No Media Exposure 75.86 0.92
health_model <- svyglm(
access_bin ~v190 + v106 +v025 +marital_group + media_exposure + v012 +v130,design = dhs_design,family = quasibinomial())
summary(health_model)
##
## Call:
## svyglm(formula = access_bin ~ v190 + v106 + v025 + marital_group +
## media_exposure + v012 + v130, design = dhs_design, family = quasibinomial())
##
## Survey design:
## svydesign(ids = ~v021, strata = ~v022, weights = ~weight, data = analysis_data,
## nest = TRUE)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.392652 0.131845 10.563 < 2e-16 ***
## v190poorer -0.164180 0.070124 -2.341 0.01952 *
## v190middle -0.421480 0.077822 -5.416 8.66e-08 ***
## v190richer -0.543680 0.083911 -6.479 1.85e-10 ***
## v190richest -0.886498 0.089079 -9.952 < 2e-16 ***
## v106primary -0.158870 0.060213 -2.638 0.00853 **
## v106secondary -0.445346 0.066572 -6.690 4.91e-11 ***
## v106higher -0.968762 0.081799 -11.843 < 2e-16 ***
## v025rural 0.154491 0.069307 2.229 0.02616 *
## marital_groupFormerly Married 0.088572 0.082153 1.078 0.28138
## media_exposureNo Media Exposure 0.158657 0.052830 3.003 0.00278 **
## v012 -0.002214 0.002279 -0.972 0.33154
## v130hinduism 0.196544 0.085946 2.287 0.02253 *
## v130buddhism 0.690856 0.373924 1.848 0.06513 .
## v130christianity 0.794006 0.577011 1.376 0.16929
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 1.000078)
##
## Number of Fisher Scoring iterations: 4
1.Richest women are much less likely to face healthcare access barriers compared to the poorest women.
2.Women with higher education have substantially lower odds of healthcare access problems.
3.Rural women have higher odds of healthcare access problems than urban women.
4.Women without media exposure have higher healthcare access barriers.
5.After controlling for other factors, marital status does not independently predict healthcare access problems.
library(ggplot2)
library(dplyr)
combined_ranked <- combined_results %>%
arrange(desc(access_bin)) %>%
mutate(
category_label = paste(variable, category, sep = ": "),
category_label = reorder(category_label, access_bin)
)
ggplot(combined_ranked, aes(x = category_label, y = access_bin)) +
geom_col(width = 0.7) +
geom_text(
aes(label = paste0(access_bin, "%")),
hjust = -0.1,
size = 3.5
) +
coord_flip() +
labs(
title = "Most Vulnerable Groups for Healthcare Access Problems",
subtitle = "Ranked weighted estimates from Bangladesh DHS women dataset",
x = "",
y = "Women With Healthcare Access Problems (%)",
caption = "Weighted DHS estimates"
) +
ylim(0, 100) +
theme_minimal(base_size = 13)
#
library(ggplot2)
library(dplyr)
library(forcats)
combined_ranked <- combined_results %>%
arrange(desc(access_bin)) %>%
mutate(
category_label = paste(variable, category, sep = ": "),
category_label = fct_reorder(category_label, access_bin),
risk_level = case_when(
access_bin >= 75 ~ "Very High",
access_bin >= 65 ~ "High",
access_bin >= 55 ~ "Moderate",
TRUE ~ "Lower"
)
)
ggplot(combined_ranked,
aes(x = category_label, y = access_bin, fill = risk_level)) +
geom_col(width = 0.75, alpha = 0.9) +
geom_text(
aes(label = paste0(access_bin, "%")),
hjust = -0.15,
size = 4,
fontface = "bold"
) +
coord_flip() +
scale_fill_manual(
values = c(
"Very High" = "#B2182B",
"High" = "#EF8A62",
"Moderate" = "#67A9CF",
"Lower" = "#2166AC"
)
) +
labs(
title = "Vulnerability Profile of Healthcare Access Problems",
subtitle = "Ranked social groups among women in Bangladesh",
x = "",
y = "Women With Healthcare Access Problems (%)",
fill = "Risk Level",
caption = "Source: Bangladesh DHS | Weighted survey estimates"
) +
ylim(0, 100) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold", size = 17),
plot.subtitle = element_text(size = 12),
axis.text.y = element_text(size = 11),
legend.position = "bottom",
panel.grid.major.y = element_blank()
)