Here, I am fitting a random intercept and random slope multilevel model using a continuous outcome in “woman weight” in Kilograms, drawn from the 2014 Ghana demographic and health survey.

Loading packages and data

library(foreign)
library(car)

## Loading required package: carData

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(arm)

## Loading required package: MASS

## Loading required package: lme4

## 
## arm (Version 1.10-1, built: 2018-4-12)

## Working directory is D:/MY Ph.D COURSES/FALL 2018/DEM 7473 AH. MODELLING/Homeworks

## 
## Attaching package: 'arm'

## The following object is masked from 'package:car':
## 
##     logit

library(mice)

## Loading required package: lattice

## 
## Attaching package: 'mice'

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

library(lattice)
library(haven)
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(lmerTest)

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(lme4)
library(MuMIn)
library(sjstats)

## 
## Attaching package: 'sjstats'

## The following objects are masked from 'package:survey':
## 
##     cv, deff

set.seed(1234)



ghdat_2014<-read.dta("D:/GDHS/GDHS_2014/gh_Individual_Recode/GH_Individual_Recode.dta", convert.factors = F)

# Selecting required variables
wghtdat<-subset(ghdat_2014,select=c("v005","v021", "v022","v013","v024", "v025", "v106","v130", "v131", "v190", "v437", "v501", "v714"))

Recoding variables

# Woman weight
wghtdat$wght<-ifelse(wghtdat$v437==9994 | wghtdat$v437==9995 | wghtdat$v437==9996, NA, wghtdat$v437/10)

# Age
wghtdat$age<-Recode(wghtdat$v013, recodes="1='15-19'; 2:3='20-29'; 4:5='30-39'; 6:7='40-49'; else='NA'", as.factor=T)

# Level of education
wghtdat$edu<-Recode(wghtdat$v106, recodes="0='1No education'; 1='2Primary'; 2:3='3secodary/higher';else=NA", as.factor=T) 

# Marital status
wghtdat$maristat<-Recode(wghtdat$v501, recodes="0='1Never married'; 1:2='2Married/cohabiting'; 3:5='3Sep/div/wid'; else=NA", as.factor=T)

# Religion
wghtdat$relig<-recode(wghtdat$v130, recodes="1:6='1Christian'; 7='2Islam'; 8='3Traditional/Spiritualist';9:96='Other';else=NA", as.factor=T)

# Wealth status
wghtdat$wealthstat<-Recode(wghtdat$v190, recodes="1:2='1Poor'; 3='2Middle'; 4:5='3Rich'; else=NA", as.factor=T)

# Residence
wghtdat$resid<-Recode(wghtdat$v025, recodes='1="1Urban"; 2="2Rural"; else=NA',as.factor=T)

# Current work status 
wghtdat$workstat<-Recode(wghtdat$v714, recodes= '0="2Not working"; 1="1Working"; else=NA',as.factor=T)

# Rename region
wghtdat$region<-wghtdat$v024

Testing for variation at the regional level with the ANOVA model

fit0<-lm(wght~as.factor(region), data=wghtdat)

anova(fit0)

## Analysis of Variance Table
## 
## Response: wght
##                     Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(region)    9  52618  5846.5  31.935 < 2.2e-16 ***
## Residuals         4742 868132   183.1                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fitting the random intercept model

wghtdat$wghtz<-scale(wghtdat$wght, center=T, scale=T)

fit1<-lmer(wghtz~age+edu+maristat+wealthstat+relig+workstat+resid+(1|region), data=wghtdat)

summary(fit1)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: wghtz ~ age + edu + maristat + wealthstat + relig + workstat +  
##     resid + (1 | region)
##    Data: wghtdat
## 
## REML criterion at convergence: 12115
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7549 -0.6541 -0.1110  0.5212  7.4306 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  region   (Intercept) 0.005111 0.07149 
##  Residual             0.740437 0.86049 
## Number of obs: 4747, groups:  region, 10
## 
## Fixed effects:
##                                  Estimate Std. Error         df t value
## (Intercept)                      -1.08914    0.06334  254.26656 -17.194
## age20-29                          0.28618    0.04363 4728.17513   6.559
## age30-39                          0.54873    0.05323 4731.89514  10.309
## age40-49                          0.81467    0.05666 4731.97539  14.378
## edu2Primary                       0.22363    0.04138 4323.44090   5.404
## edu3secodary/higher               0.29894    0.03857 4392.04767   7.751
## maristat2Married/cohabiting       0.25186    0.03952 4730.26026   6.373
## maristat3Sep/div/wid              0.27288    0.05543 4727.23086   4.923
## wealthstat2Middle                 0.32943    0.03860 3558.81381   8.536
## wealthstat3Rich                   0.64963    0.04394 2255.99744  14.786
## relig2Islam                       0.14010    0.03568 2996.73365   3.926
## relig3Traditional/Spiritualist   -0.10218    0.08328 4612.98542  -1.227
## religOther                       -0.06945    0.08085 4730.28686  -0.859
## workstat2Not working             -0.08004    0.03233 4728.54435  -2.476
## resid2Rural                      -0.01689    0.03442 4639.44409  -0.491
##                                Pr(>|t|)    
## (Intercept)                     < 2e-16 ***
## age20-29                       5.98e-11 ***
## age30-39                        < 2e-16 ***
## age40-49                        < 2e-16 ***
## edu2Primary                    6.87e-08 ***
## edu3secodary/higher            1.13e-14 ***
## maristat2Married/cohabiting    2.02e-10 ***
## maristat3Sep/div/wid           8.83e-07 ***
## wealthstat2Middle               < 2e-16 ***
## wealthstat3Rich                 < 2e-16 ***
## relig2Islam                    8.82e-05 ***
## relig3Traditional/Spiritualist   0.2199    
## religOther                       0.3904    
## workstat2Not working             0.0133 *  
## resid2Rural                      0.6237    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

The variance compaonents for the random intercept model show a variance of 0.0051 at the regional or group level and a variance of 0.740 at the individual level. Results for the fixed effects show that age, education, marital status, wealth status, religion and work status have significant effect on the weight of women in Ghana. Weight increased significantly with increase in age, education level,and wealth status. Married and formerly married women had higher weights than women who had never married. Muslim women had increased weight compared to Christian women and others whereas unemployed women had lower weight than the employed women.

Fitting both random intercept and random slope model

fit2<-lmer(wghtz~age+edu+maristat+wealthstat+relig+workstat+resid+(1+wealthstat|region), data=wghtdat)

summary(fit2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: wghtz ~ age + edu + maristat + wealthstat + relig + workstat +  
##     resid + (1 + wealthstat | region)
##    Data: wghtdat
## 
## REML criterion at convergence: 12108.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7424 -0.6529 -0.1118  0.5194  7.4121 
## 
## Random effects:
##  Groups   Name              Variance  Std.Dev. Corr       
##  region   (Intercept)       0.0065866 0.08116             
##           wealthstat2Middle 0.0192994 0.13892  -0.67      
##           wealthstat3Rich   0.0005276 0.02297   0.15  0.63
##  Residual                   0.7381120 0.85913             
## Number of obs: 4747, groups:  region, 10
## 
## Fixed effects:
##                                  Estimate Std. Error         df t value
## (Intercept)                      -1.08926    0.06448   70.73289 -16.892
## age20-29                          0.28468    0.04359 4721.53798   6.531
## age30-39                          0.54776    0.05318 4708.95447  10.300
## age40-49                          0.81429    0.05662 4718.97329  14.383
## edu2Primary                       0.22543    0.04128 3235.76048   5.462
## edu3secodary/higher               0.30071    0.03850 3223.82738   7.811
## maristat2Married/cohabiting       0.25274    0.03950 4711.64535   6.399
## maristat3Sep/div/wid              0.27615    0.05537 4722.30449   4.988
## wealthstat2Middle                 0.33373    0.05905    8.21789   5.652
## wealthstat3Rich                   0.64022    0.04509  128.47765  14.198
## relig2Islam                       0.13930    0.03565 1972.04127   3.907
## relig3Traditional/Spiritualist   -0.09924    0.08328 4110.16621  -1.192
## religOther                       -0.06476    0.08078 4721.01522  -0.802
## workstat2Not working             -0.08031    0.03229 4718.04712  -2.487
## resid2Rural                      -0.01316    0.03458 3734.50670  -0.381
##                                Pr(>|t|)    
## (Intercept)                     < 2e-16 ***
## age20-29                       7.24e-11 ***
## age30-39                        < 2e-16 ***
## age40-49                        < 2e-16 ***
## edu2Primary                    5.07e-08 ***
## edu3secodary/higher            7.62e-15 ***
## maristat2Married/cohabiting    1.72e-10 ***
## maristat3Sep/div/wid           6.34e-07 ***
## wealthstat2Middle              0.000436 ***
## wealthstat3Rich                 < 2e-16 ***
## relig2Islam                    9.65e-05 ***
## relig3Traditional/Spiritualist 0.233453    
## religOther                     0.422797    
## workstat2Not working           0.012922 *  
## resid2Rural                    0.703493    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 15 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

In the random slopes model, the variance compaonents show an increase to 0.0065 at the regional or group level and a slight reduction to 0.738 at the individual level. Results for the fixed effects were not much different from the first model as age, education, marital status, wealth status, religion and work status have significant effect on the weight of women. Again, weight increased significantly with increase in age, education level,and wealth status. Married and formerly married women had higher weights than women who had never married. Muslim women had increased weight compared to Christian women and others whereas unemployed women had lower weight than the employed women.

Inter-class correlations

icc(fit1)

## 
## Linear mixed model
## 
## Family : gaussian (identity)
## Formula: wghtz ~ age + edu + maristat + wealthstat + relig + workstat + resid + (1 | region)
## 
##   ICC (region): 0.0069

icc(fit2)

## Caution! ICC for random-slope-intercept models usually not meaningful. See 'Note' in `?icc`.

## 
## Linear mixed model
## 
## Family : gaussian (identity)
## Formula: wghtz ~ age + edu + maristat + wealthstat + relig + workstat + resid + (1 + wealthstat | region)
## 
##   ICC (region): 0.0088

The ICC results show that the random slope in fit2 has increased the correlation among the regions, which could be mainly due to sampling of the women.

Model selection

model.sel(fit1, fit2)

## Model selection table 
##      (Intrc) age edu mrstt relig resid wlths wrkst random df    logLik
## fit1  -1.089   +   +     +     +     +     +     +      r 17 -6057.522
## fit2  -1.089   +   +     +     +     +     +     +  1+w|r 22 -6054.196
##         AICc delta weight
## fit1 12149.2  0.00  0.848
## fit2 12152.6  3.43  0.152
## Models ranked by AICc(x) 
## Random terms: 
## r = '1 | region'
## 1+w|r = '1 + wealthstat | region'

# Fake R square
r.squaredGLMM(fit1)

## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help
## page.

##            R2m       R2c
## [1,] 0.2470691 0.2522309

r.squaredGLMM(fit2)

##            R2m       R2c
## [1,] 0.2445882 0.2525156

Results in the model selection above show that the random intercept model(fit1) have a slightly better prediction than the fit2.

Plotting the random slope model

# Getting the random effects

rancoefs2<-ranef(fit2)

head(rancoefs2$region, n=10)

##     (Intercept) wealthstat2Middle wealthstat3Rich
## 1   0.031612173      -0.103332690    -0.013430289
## 2  -0.008731183       0.006308209    -0.001206025
## 3   0.167144107      -0.167552492     0.012741717
## 4   0.028473142       0.022700398     0.013500283
## 5  -0.063099420       0.098908569     0.003073035
## 6  -0.039130540       0.028523258    -0.005349378
## 7   0.011147824      -0.134583820    -0.026435542
## 8  -0.094032446       0.067004614    -0.013194856
## 9  -0.023790825       0.192890113     0.035560949
## 10 -0.009592832      -0.010866159    -0.005259893

Plotting the random effects

par(mfrow=c(1,1))
pairs(rancoefs2$region)

plot(rancoefs2$region[,"(Intercept)"],rancoefs2$region[,"wealthstat2Middle"], main = "Random effects for each region", xlab = "Intercept", ylab = "Wealth slope")

Homework 2

Samuel Nyarko

September 23, 2018