Data: Ethiopian Demographic and Health Survey - 2011 (EDHS 2011)
Research Question: Is there any rural-urban disparity in undernutrition among women in Ethiopia?
# Load EDHS 2011
library(car)
library(foreign)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2. http://CRAN.R-project.org/package=stargazer
library(sjPlot)
## Visit http://strengejacke.de/sjPlot for package-vignettes.
library(ggplot2)
library(pander)
library(knitr)
dat2<-read.dta("~/Google Drive/Dem7283/DHS Resources/ET_2011_DHS_01242017_168_103714/etir61dt/ETIR61FL.dta", convert.factors = F)
# The dependent variable is Body mass index (BMI)
mean(dat2$v445, na.rm=T)
## [1] 2305.865
# devide the dependent variable (bmi) by 100.
dat2$bmi<-dat2$v445/100
mean(dat2$bmi, na.rm=T)
## [1] 23.05865
# Select women who were not pregnant or postpartum at the time of the survey. Since pregnancy has an effect on the measure of body mass index.
# Recode BMI in to (1: BMI<18.5 as "Undernutrition", 0: BMI>=18.5 "Good Nutrition")
#Avoid women who are currently pregnant or pospartum/amenorrheic to avoid the impact of pregnancy or child birth on Body Mass Index(BMI)
dat3 <- dat2[dat2$v213!=1,]
dat3 <- dat3[dat3$v623!=2,]
dat3$binbmi <- 0
dat3[dat3$bmi < 18.5,]$binbmi <- 1
# recode the independent varaibles in to a meaningful manner
# Marital Status
dat3$marst<-recode(dat3$v501, recodes='0="nevermarried"; 1:2="married"; 3="widowed"; 4:5="divorced"; else=NA', as.factor.result = T)
dat3$marst<-relevel(dat3$marst, ref="married")
#Education
dat3$educ<-recode(dat3$v106, recodes="0='0Noeducation'; 1='2Primary'; 2='3Secodary'; 3='4higher'; else=NA", as.factor.result=T)
dat3$educ<-relevel(dat3$educ, ref="2Primary")
#Occupation
dat3$occup<-recode(dat3$v717, recodes="0 ='1Unemployed'; 1:3='2Non-manual/Professional'; 4:9='3Agricultural/Manual(skilled/unskilled)'; else=NA", as.factor.result=T)
#Age
dat3$age<-recode(dat3$v013, recodes="1='15-19'; 2:3='20-29'; 4:5='30-39'; 6:7='40-49'; else='NA'", as.factor.result=T)
dat3$age<-relevel(dat3$age, ref='15-19')
#Children Ever Born
dat3$parity<-recode(dat3$v201, recodes="0='0'; 1:2='1-2'; 3:4='3-4'; 5:18='5+'; else=NA", as.factor.result=T)
table(dat3$parity)
##
## 0 1-2 3-4 5+
## 5406 2496 1540 2781
#wealth Index
dat3$wealthindex<-recode(dat3$v190, recodes="1:2='1Poor'; 3='2Middle'; 4:5='3Rich'; else=NA", as.factor.result=T)
dat3$wealthindex<-relevel(dat3$wealthindex, ref="1Poor")
table(dat3$wealthindex)
##
## 1Poor 2Middle 3Rich
## 3857 1561 6805
#Residence
dat3$residence<-recode(dat3$v102, recodes='1="Urban"; 2="Rural"; else=NA', as.factor.result = T)
dat3$residence<-relevel(dat3$residence, ref="Urban")
# Calculate the DHS sample weight
dat3$weight<-dat3$v005/1000000
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~v022, weights=~weight, data = dat3[is.na(dat3$weight)==F,])
% of women by education level: a survey-corrected chi-square test for independence.
des1<-svyby(formula = ~binbmi, by =~educ, design = des, FUN = svymean, na.rm=T)
svychisq(~binbmi+educ, design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~binbmi + educ, design = des)
## F = 17.785, ndf = 2.9758, ddf = 36305.0000, p-value = 1.864e-11
qplot(x=des1$educ,y=des1$binbmi, data=des1 ,xlab="Education", ylab="% fair/poor nutrition" )+
geom_errorbar(aes(x=educ, ymin=binbmi-se,ymax= binbmi+se), width=.25)+
ggtitle(label = "% of Ethiopian Women with Fair/Poor Nutrition by Education")
svychisq(~binbmi+educ, design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~binbmi + educ, design = des)
## F = 17.785, ndf = 2.9758, ddf = 36305.0000, p-value = 1.864e-11
#calculate residence*nutrition cross tabulation, and plot it
des2<-svyby(formula = ~binbmi, by = ~residence, design = des, FUN = svymean, na.rm=T)
svychisq(~binbmi+residence, design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~binbmi + residence, design = des)
## F = 59.476, ndf = 1, ddf = 12200, p-value = 1.334e-14
qplot(x=des2$residence,y=des2$binbmi, data=des2 ,xlab="Residence", ylab="% Fair/Poor Nutrition" )+
geom_errorbar(aes(x=residence, ymin=binbmi-se,ymax= binbmi+se), width=.25)+
ggtitle(label = "% of Ethiopian Adult Women with Fair/Poor Nutrition by Residence")
# calculate age*education*nutrition cross tabulation, and plot it
des3<-svyby(formula = ~binbmi, by = ~age, design = des, FUN = svymean, na.rm=T)
svychisq(~binbmi+age, design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~binbmi + age, design = des)
## F = 39.605, ndf = 2.9989, ddf = 36587.0000, p-value < 2.2e-16
qplot(x=des3$age,y=des3$binbmi, data=des3 ,xlab="Age", ylab="% Fair/Poor Nutrition" )+
geom_errorbar(aes(x=age, ymin=binbmi-se,ymax= binbmi+se), width=.25)+
ggtitle(label = "% of Ethiopian Adult Women with Fair/Poor Nutrition by Age")
#calculate maritalstatus*nutrition cross tabulation, and plot it
des4<-svyby(formula = ~binbmi, by = ~marst, design = des, FUN = svymean, na.rm=T)
svychisq(~binbmi+marst, design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~binbmi + marst, design = des)
## F = 10.517, ndf = 2.9903, ddf = 36482.0000, p-value = 6.792e-07
qplot(x=des4$marst,y=des4$binbmi, data=des4 ,xlab="Marital Status", ylab="% Fair/Poor Nutrition" )+
geom_errorbar(aes(x=marst, ymin=binbmi-se,ymax= binbmi+se), width=.25)+
ggtitle(label = "% of Ethiopian Adult Women with Fair/Poor Nutrition by Marital Status")
des1des2<-svyby(formula=~binbmi, by =~residence+educ, design = des, FUN = svymean, na.rm=T)
des1des2
## residence educ binbmi se
## Urban.2Primary Urban 2Primary 0.2104915 0.017120263
## Rural.2Primary Rural 2Primary 0.3344066 0.011406605
## Urban.0Noeducation Urban 0Noeducation 0.1918193 0.025013404
## Rural.0Noeducation Rural 0Noeducation 0.2743546 0.009282359
## Urban.3Secodary Urban 3Secodary 0.2002300 0.021745245
## Rural.3Secodary Rural 3Secodary 0.1954375 0.034019276
## Urban.4higher Urban 4higher 0.1241056 0.017314592
## Rural.4higher Rural 4higher 0.2053428 0.055721494
``` # Logit model
fit.logit <-svyglm(binbmi~educ+age+residence, design=des, family=binomial)
## Warning: non-integer #successes in a binomial glm!
#summary(fit.logit)
panderOptions("digits", 2)
pander(fit.logit, covariate.labels = c("No education", "Secondary",
"College and Higher","Age 15-19", "Age 30-39", "Age 40-49","Rural"))
| Â | Estimate | Std. Error | t value | Pr(>|t|) |
|---|---|---|---|---|
| No education | -0.052 | 0.075 | -0.69 | 0.49 |
| Secondary | -0.25 | 0.13 | -1.9 | 0.058 |
| College and Higher | -0.45 | 0.16 | -2.7 | 0.0068 |
| Age 15-19 | -0.74 | 0.081 | -9.1 | 8.1e-20 |
| Age 30-39 | -0.57 | 0.096 | -6 | 2.7e-09 |
| Age 40-49 | -0.34 | 0.096 | -3.6 | 0.00038 |
| Rural | 0.47 | 0.086 | 5.4 | 7.4e-08 |
| (Intercept) | -0.92 | 0.089 | -10 | 1.3e-24 |
sjp.glm(fit.logit,title="Odds Ratios for Fair/Poor vs Good Nutrition - Logit model" , sortOdds=F, showModelSummary=T)
dat<-expand.grid(residence=levels(dat3$residence), educ=levels(dat3$educ), age=levels(dat3$age))
# generate the fitted values
fit<-predict(fit.logit, newdat=dat,type="response")
dat$fitted.prob.lrm<-round(fit,3)
# head(dat, n=50)
Compare women living in both (rural and urban) parts of the country to see the rural/urban disparity in undernutrition.
To see the rural/urban disparity among women in Ethiopia we will consider two types of women:
Women aged (40-49) living in the rural areas of Ethiopia with no education
Women aged (40-49) living in the urban parts of the country with college or above level of education.
dat[which(dat$educ=="0Noeducation"&dat$age=="40-49"&dat$residence =="Rural"),]
## residence educ age fitted.prob.lrm
## 28 Rural 0Noeducation 40-49 0.301
dat[which(dat$educ=="4higher"&dat$age=="40-49"&dat$residence =="Urban"),]
## residence educ age fitted.prob.lrm
## 31 Urban 4higher 40-49 0.154
From the above two cases we can clearly see that uneducated women aged (40-49), living in rural parts of the country has about 29% of reporting undernutrition, while women aged (40-49) with college and above level of education living in urban parts of Ethiopia has only 17% chance.