Data: Ethiopian Demographic and Health Survey - 2011 (EDHS 2011)

Research Question: Is there any rural-urban disparity in undernutrition among women in Ethiopia?

# Load EDHS 2011

library(car)
library(foreign)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2. http://CRAN.R-project.org/package=stargazer
library(sjPlot)
## Visit http://strengejacke.de/sjPlot for package-vignettes.
library(ggplot2)
library(pander)
library(knitr)

dat2<-read.dta("~/Google Drive/Dem7283/DHS Resources/ET_2011_DHS_01242017_168_103714/etir61dt/ETIR61FL.dta", convert.factors = F)

# The dependent variable is Body mass index (BMI)

mean(dat2$v445, na.rm=T)
## [1] 2305.865
# devide the dependent variable (bmi) by 100.
 
dat2$bmi<-dat2$v445/100

mean(dat2$bmi, na.rm=T)
## [1] 23.05865
# Select women who were not pregnant or postpartum at the time of the survey. Since pregnancy has an effect on the measure of body mass index. 

# Recode BMI in to (1: BMI<18.5 as "Undernutrition", 0: BMI>=18.5 "Good Nutrition")

#Avoid women who are currently pregnant or pospartum/amenorrheic to avoid the impact of pregnancy or child birth on Body Mass Index(BMI)

dat3 <- dat2[dat2$v213!=1,]
dat3 <- dat3[dat3$v623!=2,]
dat3$binbmi <- 0
dat3[dat3$bmi < 18.5,]$binbmi <- 1

# recode the independent varaibles in to a meaningful manner
# Marital Status

dat3$marst<-recode(dat3$v501, recodes='0="nevermarried"; 1:2="married"; 3="widowed"; 4:5="divorced"; else=NA', as.factor.result = T)

dat3$marst<-relevel(dat3$marst, ref="married")

#Education 
dat3$educ<-recode(dat3$v106, recodes="0='0Noeducation'; 1='2Primary'; 2='3Secodary'; 3='4higher'; else=NA", as.factor.result=T)

dat3$educ<-relevel(dat3$educ, ref="2Primary")

#Occupation

dat3$occup<-recode(dat3$v717, recodes="0 ='1Unemployed'; 1:3='2Non-manual/Professional'; 4:9='3Agricultural/Manual(skilled/unskilled)'; else=NA", as.factor.result=T)

#Age
dat3$age<-recode(dat3$v013, recodes="1='15-19'; 2:3='20-29'; 4:5='30-39'; 6:7='40-49'; else='NA'", as.factor.result=T)

dat3$age<-relevel(dat3$age, ref='15-19')

#Children Ever Born 
dat3$parity<-recode(dat3$v201, recodes="0='0'; 1:2='1-2'; 3:4='3-4'; 5:18='5+'; else=NA", as.factor.result=T)


table(dat3$parity)
## 
##    0  1-2  3-4   5+ 
## 5406 2496 1540 2781
#wealth Index
dat3$wealthindex<-recode(dat3$v190, recodes="1:2='1Poor'; 3='2Middle'; 4:5='3Rich'; else=NA", as.factor.result=T)

dat3$wealthindex<-relevel(dat3$wealthindex, ref="1Poor")

table(dat3$wealthindex)
## 
##   1Poor 2Middle   3Rich 
##    3857    1561    6805
#Residence
dat3$residence<-recode(dat3$v102, recodes='1="Urban"; 2="Rural"; else=NA',  as.factor.result = T)

dat3$residence<-relevel(dat3$residence, ref="Urban")

Weighted Analysis

# Calculate the DHS sample weight 

dat3$weight<-dat3$v005/1000000

options(survey.lonely.psu = "adjust")

des<-svydesign(ids=~1, strata=~v022, weights=~weight, data = dat3[is.na(dat3$weight)==F,])

% of women by education level: a survey-corrected chi-square test for independence.

des1<-svyby(formula = ~binbmi, by =~educ, design = des, FUN = svymean, na.rm=T)
svychisq(~binbmi+educ, design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~binbmi + educ, design = des)
## F = 17.785, ndf = 2.9758, ddf = 36305.0000, p-value = 1.864e-11
qplot(x=des1$educ,y=des1$binbmi, data=des1 ,xlab="Education", ylab="%  fair/poor nutrition" )+
geom_errorbar(aes(x=educ, ymin=binbmi-se,ymax= binbmi+se), width=.25)+
ggtitle(label = "% of Ethiopian Women with Fair/Poor Nutrition by Education")

svychisq(~binbmi+educ, design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~binbmi + educ, design = des)
## F = 17.785, ndf = 2.9758, ddf = 36305.0000, p-value = 1.864e-11
#calculate residence*nutrition cross tabulation, and plot it

des2<-svyby(formula = ~binbmi, by = ~residence, design = des, FUN = svymean, na.rm=T)

svychisq(~binbmi+residence, design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~binbmi + residence, design = des)
## F = 59.476, ndf = 1, ddf = 12200, p-value = 1.334e-14
qplot(x=des2$residence,y=des2$binbmi, data=des2 ,xlab="Residence", ylab="%  Fair/Poor Nutrition" )+
geom_errorbar(aes(x=residence, ymin=binbmi-se,ymax= binbmi+se), width=.25)+
ggtitle(label = "% of Ethiopian Adult Women with Fair/Poor Nutrition by Residence")

# calculate age*education*nutrition cross tabulation, and plot it
 des3<-svyby(formula = ~binbmi, by = ~age, design = des, FUN = svymean, na.rm=T)
svychisq(~binbmi+age, design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~binbmi + age, design = des)
## F = 39.605, ndf = 2.9989, ddf = 36587.0000, p-value < 2.2e-16
qplot(x=des3$age,y=des3$binbmi, data=des3 ,xlab="Age", ylab="% Fair/Poor Nutrition" )+
geom_errorbar(aes(x=age, ymin=binbmi-se,ymax= binbmi+se), width=.25)+
ggtitle(label = "% of Ethiopian Adult Women with Fair/Poor Nutrition by Age")

#calculate maritalstatus*nutrition cross tabulation, and plot it
 
des4<-svyby(formula = ~binbmi, by = ~marst, design = des, FUN = svymean, na.rm=T)

svychisq(~binbmi+marst, design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~binbmi + marst, design = des)
## F = 10.517, ndf = 2.9903, ddf = 36482.0000, p-value = 6.792e-07
qplot(x=des4$marst,y=des4$binbmi, data=des4 ,xlab="Marital Status", ylab="% Fair/Poor Nutrition" )+
geom_errorbar(aes(x=marst, ymin=binbmi-se,ymax= binbmi+se), width=.25)+
ggtitle(label = "% of Ethiopian Adult Women with Fair/Poor Nutrition by Marital Status")

des1des2<-svyby(formula=~binbmi, by =~residence+educ, design = des, FUN = svymean, na.rm=T)
des1des2
##                    residence         educ    binbmi          se
## Urban.2Primary         Urban     2Primary 0.2104915 0.017120263
## Rural.2Primary         Rural     2Primary 0.3344066 0.011406605
## Urban.0Noeducation     Urban 0Noeducation 0.1918193 0.025013404
## Rural.0Noeducation     Rural 0Noeducation 0.2743546 0.009282359
## Urban.3Secodary        Urban    3Secodary 0.2002300 0.021745245
## Rural.3Secodary        Rural    3Secodary 0.1954375 0.034019276
## Urban.4higher          Urban      4higher 0.1241056 0.017314592
## Rural.4higher          Rural      4higher 0.2053428 0.055721494

``` # Logit model

fit.logit <-svyglm(binbmi~educ+age+residence, design=des, family=binomial)
## Warning: non-integer #successes in a binomial glm!
#summary(fit.logit)
panderOptions("digits", 2)
pander(fit.logit, covariate.labels = c("No education", "Secondary", 
"College and Higher","Age 15-19", "Age 30-39", "Age 40-49","Rural"))
Fitting generalized (binomial/logit) linear model: binbmi ~ educ + age + residence The results from the above model shows that women living in the rural parts of the country has a better chance of reporting undernutrition. Similarly, education variables also show a negative linear trend, those with more education having lower chances of reporting undernutrition (BMI<18.5) compared to those with primary level of education (reference group). The age variable also shows a negative linear trend, as women get older they have a lower chance of reporting undernutrition with the exception women in aged (15-19).
  Estimate Std. Error t value Pr(>|t|)
No education -0.052 0.075 -0.69 0.49
Secondary -0.25 0.13 -1.9 0.058
College and Higher -0.45 0.16 -2.7 0.0068
Age 15-19 -0.74 0.081 -9.1 8.1e-20
Age 30-39 -0.57 0.096 -6 2.7e-09
Age 40-49 -0.34 0.096 -3.6 0.00038
Rural 0.47 0.086 5.4 7.4e-08
(Intercept) -0.92 0.089 -10 1.3e-24

Effect Plots

sjp.glm(fit.logit,title="Odds Ratios for Fair/Poor vs Good Nutrition - Logit model" , sortOdds=F, showModelSummary=T)

Fitted Values

dat<-expand.grid(residence=levels(dat3$residence), educ=levels(dat3$educ), age=levels(dat3$age))

# generate the fitted values 
fit<-predict(fit.logit, newdat=dat,type="response")

dat$fitted.prob.lrm<-round(fit,3)
# head(dat, n=50)

Interesting cases

Compare women living in both (rural and urban) parts of the country to see the rural/urban disparity in undernutrition.

To see the rural/urban disparity among women in Ethiopia we will consider two types of women:

  1. Women aged (40-49) living in the rural areas of Ethiopia with no education

  2. Women aged (40-49) living in the urban parts of the country with college or above level of education.

dat[which(dat$educ=="0Noeducation"&dat$age=="40-49"&dat$residence =="Rural"),]
##    residence         educ   age fitted.prob.lrm
## 28     Rural 0Noeducation 40-49           0.301
dat[which(dat$educ=="4higher"&dat$age=="40-49"&dat$residence =="Urban"),]
##    residence    educ   age fitted.prob.lrm
## 31     Urban 4higher 40-49           0.154

From the above two cases we can clearly see that uneducated women aged (40-49), living in rural parts of the country has about 29% of reporting undernutrition, while women aged (40-49) with college and above level of education living in urban parts of Ethiopia has only 17% chance.