Here, I am fitting a random intercept and random slope multilevel model using a continuous outcome in “woman weight” in Kilograms, drawn from the 2014 Ghana demographic and health survey.
library(foreign)
library(car)
## Loading required package: carData
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(arm)
## Loading required package: MASS
## Loading required package: lme4
##
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is D:/MY Ph.D COURSES/FALL 2018/DEM 7473 AH. MODELLING/Homeworks
##
## Attaching package: 'arm'
## The following object is masked from 'package:car':
##
## logit
library(mice)
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(lattice)
library(haven)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(lme4)
library(MuMIn)
library(sjstats)
##
## Attaching package: 'sjstats'
## The following objects are masked from 'package:survey':
##
## cv, deff
set.seed(1234)
ghdat_2014<-read.dta("D:/GDHS/GDHS_2014/gh_Individual_Recode/GH_Individual_Recode.dta", convert.factors = F)
# Selecting required variables
wghtdat<-subset(ghdat_2014,select=c("v005","v021", "v022","v013","v024", "v025", "v106","v130", "v131", "v190", "v437", "v501", "v714"))
# Woman weight
wghtdat$wght<-ifelse(wghtdat$v437==9994 | wghtdat$v437==9995 | wghtdat$v437==9996, NA, wghtdat$v437/10)
# Age
wghtdat$age<-Recode(wghtdat$v013, recodes="1='15-19'; 2:3='20-29'; 4:5='30-39'; 6:7='40-49'; else='NA'", as.factor=T)
# Level of education
wghtdat$edu<-Recode(wghtdat$v106, recodes="0='1No education'; 1='2Primary'; 2:3='3secodary/higher';else=NA", as.factor=T)
# Marital status
wghtdat$maristat<-Recode(wghtdat$v501, recodes="0='1Never married'; 1:2='2Married/cohabiting'; 3:5='3Sep/div/wid'; else=NA", as.factor=T)
# Religion
wghtdat$relig<-recode(wghtdat$v130, recodes="1:6='1Christian'; 7='2Islam'; 8='3Traditional/Spiritualist';9:96='Other';else=NA", as.factor=T)
# Wealth status
wghtdat$wealthstat<-Recode(wghtdat$v190, recodes="1:2='1Poor'; 3='2Middle'; 4:5='3Rich'; else=NA", as.factor=T)
# Residence
wghtdat$resid<-Recode(wghtdat$v025, recodes='1="1Urban"; 2="2Rural"; else=NA',as.factor=T)
# Current work status
wghtdat$workstat<-Recode(wghtdat$v714, recodes= '0="2Not working"; 1="1Working"; else=NA',as.factor=T)
# Rename region
wghtdat$region<-wghtdat$v024
fit0<-lm(wght~as.factor(region), data=wghtdat)
anova(fit0)
## Analysis of Variance Table
##
## Response: wght
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(region) 9 52618 5846.5 31.935 < 2.2e-16 ***
## Residuals 4742 868132 183.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
wghtdat$wghtz<-scale(wghtdat$wght, center=T, scale=T)
fit1<-lmer(wghtz~age+edu+maristat+wealthstat+relig+workstat+resid+(1|region), data=wghtdat)
summary(fit1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: wghtz ~ age + edu + maristat + wealthstat + relig + workstat +
## resid + (1 | region)
## Data: wghtdat
##
## REML criterion at convergence: 12115
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7549 -0.6541 -0.1110 0.5212 7.4306
##
## Random effects:
## Groups Name Variance Std.Dev.
## region (Intercept) 0.005111 0.07149
## Residual 0.740437 0.86049
## Number of obs: 4747, groups: region, 10
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) -1.08914 0.06334 254.26656 -17.194
## age20-29 0.28618 0.04363 4728.17513 6.559
## age30-39 0.54873 0.05323 4731.89514 10.309
## age40-49 0.81467 0.05666 4731.97539 14.378
## edu2Primary 0.22363 0.04138 4323.44090 5.404
## edu3secodary/higher 0.29894 0.03857 4392.04767 7.751
## maristat2Married/cohabiting 0.25186 0.03952 4730.26026 6.373
## maristat3Sep/div/wid 0.27288 0.05543 4727.23086 4.923
## wealthstat2Middle 0.32943 0.03860 3558.81381 8.536
## wealthstat3Rich 0.64963 0.04394 2255.99744 14.786
## relig2Islam 0.14010 0.03568 2996.73365 3.926
## relig3Traditional/Spiritualist -0.10218 0.08328 4612.98542 -1.227
## religOther -0.06945 0.08085 4730.28686 -0.859
## workstat2Not working -0.08004 0.03233 4728.54435 -2.476
## resid2Rural -0.01689 0.03442 4639.44409 -0.491
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## age20-29 5.98e-11 ***
## age30-39 < 2e-16 ***
## age40-49 < 2e-16 ***
## edu2Primary 6.87e-08 ***
## edu3secodary/higher 1.13e-14 ***
## maristat2Married/cohabiting 2.02e-10 ***
## maristat3Sep/div/wid 8.83e-07 ***
## wealthstat2Middle < 2e-16 ***
## wealthstat3Rich < 2e-16 ***
## relig2Islam 8.82e-05 ***
## relig3Traditional/Spiritualist 0.2199
## religOther 0.3904
## workstat2Not working 0.0133 *
## resid2Rural 0.6237
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
The variance compaonents for the random intercept model show a variance of 0.0051 at the regional or group level and a variance of 0.740 at the individual level. Results for the fixed effects show that age, education, marital status, wealth status, religion and work status have significant effect on the weight of women in Ghana. Weight increased significantly with increase in age, education level,and wealth status. Married and formerly married women had higher weights than women who had never married. Muslim women had increased weight compared to Christian women and others whereas unemployed women had lower weight than the employed women.
fit2<-lmer(wghtz~age+edu+maristat+wealthstat+relig+workstat+resid+(1+wealthstat|region), data=wghtdat)
summary(fit2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: wghtz ~ age + edu + maristat + wealthstat + relig + workstat +
## resid + (1 + wealthstat | region)
## Data: wghtdat
##
## REML criterion at convergence: 12108.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7424 -0.6529 -0.1118 0.5194 7.4121
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## region (Intercept) 0.0065866 0.08116
## wealthstat2Middle 0.0192994 0.13892 -0.67
## wealthstat3Rich 0.0005276 0.02297 0.15 0.63
## Residual 0.7381120 0.85913
## Number of obs: 4747, groups: region, 10
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) -1.08926 0.06448 70.73289 -16.892
## age20-29 0.28468 0.04359 4721.53798 6.531
## age30-39 0.54776 0.05318 4708.95447 10.300
## age40-49 0.81429 0.05662 4718.97329 14.383
## edu2Primary 0.22543 0.04128 3235.76048 5.462
## edu3secodary/higher 0.30071 0.03850 3223.82738 7.811
## maristat2Married/cohabiting 0.25274 0.03950 4711.64535 6.399
## maristat3Sep/div/wid 0.27615 0.05537 4722.30449 4.988
## wealthstat2Middle 0.33373 0.05905 8.21789 5.652
## wealthstat3Rich 0.64022 0.04509 128.47765 14.198
## relig2Islam 0.13930 0.03565 1972.04127 3.907
## relig3Traditional/Spiritualist -0.09924 0.08328 4110.16621 -1.192
## religOther -0.06476 0.08078 4721.01522 -0.802
## workstat2Not working -0.08031 0.03229 4718.04712 -2.487
## resid2Rural -0.01316 0.03458 3734.50670 -0.381
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## age20-29 7.24e-11 ***
## age30-39 < 2e-16 ***
## age40-49 < 2e-16 ***
## edu2Primary 5.07e-08 ***
## edu3secodary/higher 7.62e-15 ***
## maristat2Married/cohabiting 1.72e-10 ***
## maristat3Sep/div/wid 6.34e-07 ***
## wealthstat2Middle 0.000436 ***
## wealthstat3Rich < 2e-16 ***
## relig2Islam 9.65e-05 ***
## relig3Traditional/Spiritualist 0.233453
## religOther 0.422797
## workstat2Not working 0.012922 *
## resid2Rural 0.703493
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
In the random slopes model, the variance compaonents show an increase to 0.0065 at the regional or group level and a slight reduction to 0.738 at the individual level. Results for the fixed effects were not much different from the first model as age, education, marital status, wealth status, religion and work status have significant effect on the weight of women. Again, weight increased significantly with increase in age, education level,and wealth status. Married and formerly married women had higher weights than women who had never married. Muslim women had increased weight compared to Christian women and others whereas unemployed women had lower weight than the employed women.
icc(fit1)
##
## Linear mixed model
##
## Family : gaussian (identity)
## Formula: wghtz ~ age + edu + maristat + wealthstat + relig + workstat + resid + (1 | region)
##
## ICC (region): 0.0069
icc(fit2)
## Caution! ICC for random-slope-intercept models usually not meaningful. See 'Note' in `?icc`.
##
## Linear mixed model
##
## Family : gaussian (identity)
## Formula: wghtz ~ age + edu + maristat + wealthstat + relig + workstat + resid + (1 + wealthstat | region)
##
## ICC (region): 0.0088
The ICC results show that the random slope in fit2 has increased the correlation among the regions, which could be mainly due to sampling of the women.
model.sel(fit1, fit2)
## Model selection table
## (Intrc) age edu mrstt relig resid wlths wrkst random df logLik
## fit1 -1.089 + + + + + + + r 17 -6057.522
## fit2 -1.089 + + + + + + + 1+w|r 22 -6054.196
## AICc delta weight
## fit1 12149.2 0.00 0.848
## fit2 12152.6 3.43 0.152
## Models ranked by AICc(x)
## Random terms:
## r = '1 | region'
## 1+w|r = '1 + wealthstat | region'
# Fake R square
r.squaredGLMM(fit1)
## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help
## page.
## R2m R2c
## [1,] 0.2470691 0.2522309
r.squaredGLMM(fit2)
## R2m R2c
## [1,] 0.2445882 0.2525156
Results in the model selection above show that the random intercept model(fit1) have a slightly better prediction than the fit2.
# Getting the random effects
rancoefs2<-ranef(fit2)
head(rancoefs2$region, n=10)
## (Intercept) wealthstat2Middle wealthstat3Rich
## 1 0.031612173 -0.103332690 -0.013430289
## 2 -0.008731183 0.006308209 -0.001206025
## 3 0.167144107 -0.167552492 0.012741717
## 4 0.028473142 0.022700398 0.013500283
## 5 -0.063099420 0.098908569 0.003073035
## 6 -0.039130540 0.028523258 -0.005349378
## 7 0.011147824 -0.134583820 -0.026435542
## 8 -0.094032446 0.067004614 -0.013194856
## 9 -0.023790825 0.192890113 0.035560949
## 10 -0.009592832 -0.010866159 -0.005259893
par(mfrow=c(1,1))
pairs(rancoefs2$region)
plot(rancoefs2$region[,"(Intercept)"],rancoefs2$region[,"wealthstat2Middle"], main = "Random effects for each region", xlab = "Intercept", ylab = "Wealth slope")