In this example, we will use Bayesian hierarchical models to do some longitudinal modeling of data from the ECLS-K. Specifically, we will model changes in a student’s standardized math score from kindergarten to 8th grade. We will use JAGS to illustrate the use of random slope models /growth curve models for individual change

First we load our data

load("~/Google Drive/dem7903_App_Hier/data/eclsk.Rdata")
names(eclsk)<-tolower(names(eclsk))
library (car)
library(rjags)
## Loading required package: coda
## Loading required package: lattice
## Linked to JAGS 3.4.0
## Loaded modules: basemod,bugs
#get out only the variables I'm going to use for this example
myvars<-c( "childid","gender", "race", "r1_kage","r2_kage", "r4age", "r5age", "r6age", "r7age","c1r4mtsc", "c4r4mtsc", "c5r4mtsc", "c6r4mtsc", "c7r4mtsc", "w1povrty","w1povrty","w3povrty", "w5povrty", "w8povrty","wkmomed", "s2_id", "p2fsstat","p5fsstat","p6fsstat","p7fsstat", "cregion")
eclsk<-eclsk[,myvars]

#recode outcome, math score at each of the 4 waves
eclsk$math1<-ifelse(eclsk$c1r4mtsc==-9,NA, eclsk$c1r4mtsc)
eclsk$math2<-ifelse(eclsk$c4r4mtsc==-9,NA, eclsk$c4r4mtsc)
eclsk$math3<-ifelse(eclsk$c5r4mtsc==-9,NA, eclsk$c5r4mtsc)
eclsk$math4<-ifelse(eclsk$c6r4mtsc==-9,NA, eclsk$c6r4mtsc)
eclsk$math5<-ifelse(eclsk$c7r4mtsc==-9,NA, eclsk$c7r4mtsc)

eclsk$age1<-ifelse(eclsk$r1_kage==-9, NA, eclsk$r1_kage/12)
eclsk$age2<-ifelse(eclsk$r4age==-9, NA, eclsk$r4age/12)
#for the later waves, the NCES group the ages into ranges of months, so 1= <105 months, 2=105 to 108 months. So, I fix the age at the midpoint of the interval they give, and make it into years by dividing by 12
eclsk$age3<-recode(eclsk$r5age,recodes="1=105; 2=107; 3=109; 4=112; 5=115; 6=117; -9=NA")/12
eclsk$age4<-recode(eclsk$r6age,recodes="1=118; 2=129; 3=135; 4=141; 5=155; -9=NA")/12
eclsk$age5<-recode(eclsk$r7age,recodes="1=155; 2=166; 3=172; 4=178; 5=192; -9=NA")/12

eclsk$pov1<-ifelse(eclsk$w1povrty==1,1,0)
eclsk$pov2<-ifelse(eclsk$w1povrty==1,1,0)
eclsk$pov3<-ifelse(eclsk$w3povrty==1,1,0)
eclsk$pov4<-ifelse(eclsk$w5povrty==1,1,0)
eclsk$pov5<-ifelse(eclsk$w5povrty==1,1,0)

#Recode race with white, non Hispanic as reference using dummy vars
eclsk$hisp<-recode (eclsk$race, recodes="3:4=1;-9=NA; else=0")
eclsk$black<-recode (eclsk$race, recodes="2=1;-9=NA; else=0")
eclsk$asian<-recode (eclsk$race, recodes="5=1;-9=NA; else=0")
eclsk$nahn<-recode (eclsk$race, recodes="6:7=1;-9=NA; else=0")
eclsk$other<-recode (eclsk$race, recodes="8=1;-9=NA; else=0")
eclsk$male<-recode(eclsk$gender, recodes="1=1; 2=0; -9=NA")
eclsk$mlths<-recode(eclsk$wkmomed, recodes = "1:2=1; 3:9=0; else = NA")
eclsk$mgths<-recode(eclsk$wkmomed, recodes = "1:3=0; 4:9=1; else =NA") 

To analyze data longitudinally, we need to reshape the data from the current “wide” format (repeated measures in columns) to a “long” format (repeated observations in rows). The reshape() function allows us to do this easily. It allows us to specify our repeated measures, time varying covariates as well as time-constant covariates.

This takes a long time to run with the full sample, so I just subset to children from the south census region (cregion==3)

eclsk<-subset(eclsk, cregion==3)
e.long<-reshape(eclsk, idvar="childid",  varying=list(math = c("math1", "math2", "math3", "math4","math5"),
                                         age = c("age1", "age2", "age3", "age4", "age5"),
                                         pov= c("pov1", "pov2", "pov3", "pov4", "pov5")),
                                         times=1:5,direction="long",  
                                          drop = names(eclsk)[c(4:19,22:25) ],)
e.long<-e.long[order(e.long$childid, e.long$time),]
head(e.long, n=20)
##             childid gender race wkmomed s2_id cregion hisp black asian
## 0536001C.1 0536001C      1    2       5  <NA>       3    0     1     0
## 0536001C.2 0536001C      1    2       5  <NA>       3    0     1     0
## 0536001C.3 0536001C      1    2       5  <NA>       3    0     1     0
## 0536001C.4 0536001C      1    2       5  <NA>       3    0     1     0
## 0536001C.5 0536001C      1    2       5  <NA>       3    0     1     0
## 0536002C.1 0536002C      2    1       2  0536       3    0     0     0
## 0536002C.2 0536002C      2    1       2  0536       3    0     0     0
## 0536002C.3 0536002C      2    1       2  0536       3    0     0     0
## 0536002C.4 0536002C      2    1       2  0536       3    0     0     0
## 0536002C.5 0536002C      2    1       2  0536       3    0     0     0
## 0536003C.1 0536003C      1    1       5  0536       3    0     0     0
## 0536003C.2 0536003C      1    1       5  0536       3    0     0     0
## 0536003C.3 0536003C      1    1       5  0536       3    0     0     0
## 0536003C.4 0536003C      1    1       5  0536       3    0     0     0
## 0536003C.5 0536003C      1    1       5  0536       3    0     0     0
## 0536004C.1 0536004C      2    1       3  0536       3    0     0     0
## 0536004C.2 0536004C      2    1       3  0536       3    0     0     0
## 0536004C.3 0536004C      2    1       3  0536       3    0     0     0
## 0536004C.4 0536004C      2    1       3  0536       3    0     0     0
## 0536004C.5 0536004C      2    1       3  0536       3    0     0     0
##            nahn other male mlths mgths time  math1      age1 pov1
## 0536001C.1    0     0    1     0     1    1 28.422  5.464167    1
## 0536001C.2    0     0    1     0     1    2     NA        NA    1
## 0536001C.3    0     0    1     0     1    3     NA        NA   NA
## 0536001C.4    0     0    1     0     1    4     NA        NA   NA
## 0536001C.5    0     0    1     0     1    5     NA        NA   NA
## 0536002C.1    0     0    0     1     0    1 38.180  5.600000    1
## 0536002C.2    0     0    0     1     0    2 26.281  7.180833    1
## 0536002C.3    0     0    0     1     0    3 30.951  9.083333    0
## 0536002C.4    0     0    0     1     0    4     NA        NA   NA
## 0536002C.5    0     0    0     1     0    5     NA        NA   NA
## 0536003C.1    0     0    1     0     1    1 34.458  5.822500    0
## 0536003C.2    0     0    1     0     1    2 39.385  7.316667    0
## 0536003C.3    0     0    1     0     1    3 47.576  9.333333    0
## 0536003C.4    0     0    1     0     1    4 48.059 11.250000    0
## 0536003C.5    0     0    1     0     1    5     NA        NA    0
## 0536004C.1    0     0    0     0     0    1 36.472  5.358333    0
## 0536004C.2    0     0    0     0     0    2     NA        NA    0
## 0536004C.3    0     0    0     0     0    3     NA        NA   NA
## 0536004C.4    0     0    0     0     0    4     NA        NA   NA
## 0536004C.5    0     0    0     0     0    5     NA        NA   NA

Models

The first model is a simple random intercept model for each child’s mean for the math score, with a population trajectory for age, not child specific

model1<-"
model{

#Likelihood
  for( i in 1:n)
    {
     math[i]~dnorm(mu[i], tau)
      mu[i]<-b[1]+b[2]*black[i]+b[3]*hisp[i]+b[4]*asian[i]+b[5]*other[i]+b[6]*lths[i]+b[7]*gths[i]+b[8]*pov[i]+b[9]*age[i]+u[childnum[i]]
    }

#priors
#Prior for random intercept
for (j in 1:nkids)
  {
    u[j] ~ dnorm(0, tauu)
  }


#regression effects, MVN prior
b[1:9]~dmnorm(meanb[], prec.beta[,])
for(j in 1:9){
meanb[j]~dnorm(0, .0001)
}
prec.beta[1:9,1:9]~dwish(Obeta[,], 9)
for(j in 1:9){ for(k in 1:9){ Obeta[j,k] <- equals(j,k)*.1 } }


tau<-pow(sd, -2)
sd~dunif(0,100)
tauu<-pow(sdu, -2)
sdu~dunif(0,100)

}
"
e.long<-subset(e.long, subset = is.na(math1)==F&is.na(black)==F&is.na(male)==F&is.na(pov1)==F&is.na(mlths)==F&is.na(age1)==F)
nkids<-table(e.long$childid)
head(nkids) #Number of people within counties
## 
## 0536001C 0536002C 0536003C 0536004C 0536005C 0536006C 
##        1        3        4        1        3        2
e.long$childnum<-rep(1:length(unique(e.long$childid)), nkids)
dat<-list(math=as.numeric(scale(e.long$math1, center=T, scale=T)), sex=e.long$male, black=e.long$black, hisp=e.long$hisp, other=e.long$other, asian=e.long$asian,pov=e.long$pov1,lths=e.long$mlths,gths=e.long$mgths,age=e.long$age1, childnum=e.long$childnum,n=length(e.long$math1), nkids=length(unique(e.long$childid)))
lapply(dat , summary)
## $math
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -5.17600 -0.63890  0.05798  0.00000  0.67650  3.68900 
## 
## $sex
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  1.0000  0.5162  1.0000  1.0000 
## 
## $black
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2276  0.0000  1.0000 
## 
## $hisp
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.1483  0.0000  1.0000 
## 
## $other
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.00000 0.02149 0.00000 1.00000 
## 
## $asian
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.00000 0.02532 0.00000 1.00000 
## 
## $pov
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2281  0.0000  1.0000 
## 
## $lths
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.147   0.000   1.000 
## 
## $gths
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  1.0000  0.5262  1.0000  1.0000 
## 
## $age
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   4.500   6.761   8.750   8.950  11.250  16.000 
## 
## $childnum
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1    1356    2675    2687    4036    5354 
## 
## $n
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19080   19080   19080   19080   19080   19080 
## 
## $nkids
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5354    5354    5354    5354    5354    5354
#Set some initial values
init.rng1<-list(".RNG.seed" = 1234, ".RNG.name" = "base::Mersenne-Twister", sd=.1, sdu=.1)
init.rng2<-list(".RNG.seed" = 5678, ".RNG.name" = "base::Mersenne-Twister", sd=.5, sdu=.5)

#Initialize the model
mod1<-jags.model(file=textConnection(model1), data=dat, n.chains=2,inits =list(init.rng1, init.rng2) )
## Warning in jags.model(file = textConnection(model1), data = dat, n.chains
## = 2, : Unused variable "sex" in data
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 216412
## 
## Initializing model
#burn in 
update(mod1, 5000)

#collect  samples of the parameters
samps1<-coda.samples(mod1, variable.names=c( "b", "sd", "sdu"), n.iter=5000, n.thin=1)

#Effective sample size for each parameter
effectiveSize(samps1)
##      b[1]      b[2]      b[3]      b[4]      b[5]      b[6]      b[7] 
##  882.7850  720.6274  597.1324  584.2845  576.8167  590.1810  634.0036 
##      b[8]      b[9]        sd       sdu 
## 1926.7616 7152.3877 3930.8941 3587.5588
#Numerical summary of each parameter:
summary(samps1, quantiles =  c(.025, .05, .95, .975))
## 
## Iterations = 6001:11000
## Thinning interval = 1 
## Number of chains = 2 
## Sample size per chain = 5000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##           Mean       SD  Naive SE Time-series SE
## b[1]  0.035865 0.024942 2.494e-04      8.412e-04
## b[2] -0.593525 0.028035 2.803e-04      1.044e-03
## b[3] -0.152945 0.032954 3.295e-04      1.348e-03
## b[4]  0.020740 0.071589 7.159e-04      2.964e-03
## b[5] -0.176288 0.078025 7.802e-04      3.245e-03
## b[6] -0.309745 0.035888 3.589e-04      1.477e-03
## b[7]  0.410148 0.024353 2.435e-04      9.680e-04
## b[8] -0.093710 0.017976 1.798e-04      4.098e-04
## b[9] -0.005022 0.001436 1.436e-05      1.698e-05
## sd    0.517576 0.003083 3.083e-05      4.917e-05
## sdu   0.757076 0.008551 8.551e-05      1.431e-04
## 
## 2. Quantiles for each variable:
## 
##           2.5%        5%       95%     97.5%
## b[1] -0.012567 -0.005553  0.076729  0.084336
## b[2] -0.648325 -0.639522 -0.546979 -0.538484
## b[3] -0.215880 -0.206543 -0.098854 -0.088803
## b[4] -0.115255 -0.094319  0.139368  0.161513
## b[5] -0.325417 -0.303909 -0.046833 -0.023834
## b[6] -0.379075 -0.369218 -0.249675 -0.238929
## b[7]  0.361830  0.369295  0.450098  0.456452
## b[8] -0.128510 -0.122623 -0.063876 -0.057577
## b[9] -0.007829 -0.007365 -0.002661 -0.002171
## sd    0.511537  0.512429  0.522653  0.523574
## sdu   0.740708  0.743258  0.771640  0.774012
#GBR diagnostic
gelman.diag(samps1,  multivariate = F)
## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## b[1]       1.00       1.00
## b[2]       1.01       1.03
## b[3]       1.00       1.01
## b[4]       1.00       1.00
## b[5]       1.01       1.03
## b[6]       1.00       1.00
## b[7]       1.00       1.00
## b[8]       1.00       1.00
## b[9]       1.00       1.00
## sd         1.00       1.00
## sdu        1.00       1.00
#DIC
dic.samples(mod1, n.iter = 1000,type = "pD")
## Mean deviance:  29014 
## penalty 4643 
## Penalized deviance: 33657

The second model is for a child-specific linear trajectory (random slope) and mean (random intercept)

model2<-"
model{

#Likelihood
  for( i in 1:n)
    {
     math[i]~dnorm(mu[i], tau)
      mu[i]<-b[1]*black[i]+b[2]*hisp[i]+b[3]*asian[i]+b[4]*other[i]+b[5]*lths[i]+b[6]*gths[i]+b[7]*pov[i]+u[childnum[i],1]*age[i]+u[childnum[i],2]
    }

#priors
#MVN Prior for random intercepts and slopes
for (j in 1:nkids)
  {
    u[j, 1:2] ~ dmnorm(meanu[], prec.Sigma[,])
  }
meanu[1]~dnorm(0, .001)
meanu[2]~dnorm(0, .001)
prec.Sigma[1:2, 1:2] ~ dwish(Omega[,], 2)  
Sigma[1:2, 1:2]<-inverse(prec.Sigma[,]) 
rho12<-Sigma[1,2]/ sqrt(Sigma[1,1]* Sigma[2,2])
#Set some initial values for the covariance matrix
for (j in 1:2){ for (k in 1:2){  Omega[j,k] <-equals(j,k)*.1 } }

#regression effects, MVN prior
b[1:7]~dmnorm(meanb[], prec.beta[,])
for(j in 1:7){
meanb[j]~dnorm(0, .0001)
}
prec.beta[1:7,1:7]~dwish(Obeta[,], 7)
for(j in 1:7){ for(k in 1:7){ Obeta[j,k] <- equals(j,k)*.1 } }

#prior on residual precision
tau<-pow(sd, -2)
sd~dunif(0,100)


}
"


#Set some initial values
init.rng1<-list(".RNG.seed" = 1234, ".RNG.name" = "base::Mersenne-Twister", sd=.1)
init.rng2<-list(".RNG.seed" = 5678, ".RNG.name" = "base::Mersenne-Twister", sd=.5)

#initialize the model
mod2<-jags.model(file=textConnection(model2), data=dat, n.chains=2,inits =list(init.rng1, init.rng2) )
## Warning in jags.model(file = textConnection(model2), data = dat, n.chains
## = 2, : Unused variable "sex" in data
## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 245122
## 
## Initializing model
#burn in 
update(mod2, 5000)

#collect samples of the parameters
samps2<-coda.samples(mod2, variable.names=c( "b", "sd", "Sigma", "rho12"), n.iter=5000, n.thin=1)

#effective sample size for each parameter
effectiveSize(samps2)
## Sigma[1,1] Sigma[2,1] Sigma[1,2] Sigma[2,2]       b[1]       b[2] 
##   689.7690   862.1431   862.1431  1188.0520   472.0621   405.7380 
##       b[3]       b[4]       b[5]       b[6]       b[7]      rho12 
##   496.4664   462.1116   374.9937   213.7298  1260.9138  1114.6249 
##         sd 
##  2064.9716
#Numerical summary of each parameter:
summary(samps2, quantiles =  c(.025, .05, .95, .975))
## 
## Iterations = 6001:11000
## Thinning interval = 1 
## Number of chains = 2 
## Sample size per chain = 5000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                Mean        SD  Naive SE Time-series SE
## Sigma[1,1]  0.00579 0.0003092 3.092e-06      1.176e-05
## Sigma[2,1] -0.05194 0.0031380 3.138e-05      1.066e-04
## Sigma[1,2] -0.05194 0.0031380 3.138e-05      1.066e-04
## Sigma[2,2]  1.04687 0.0377018 3.770e-04      1.101e-03
## b[1]       -0.59219 0.0279609 2.796e-04      1.285e-03
## b[2]       -0.15372 0.0337498 3.375e-04      1.687e-03
## b[3]        0.03416 0.0705059 7.051e-04      3.199e-03
## b[4]       -0.16535 0.0776084 7.761e-04      3.616e-03
## b[5]       -0.30248 0.0367289 3.673e-04      1.898e-03
## b[6]        0.40717 0.0248319 2.483e-04      1.726e-03
## b[7]       -0.10730 0.0189664 1.897e-04      5.345e-04
## rho12      -0.66673 0.0154052 1.541e-04      4.633e-04
## sd          0.46588 0.0032848 3.285e-05      7.321e-05
## 
## 2. Quantiles for each variable:
## 
##                 2.5%        5%       95%     97.5%
## Sigma[1,1]  0.005212  0.005299  0.006318  0.006424
## Sigma[2,1] -0.058309 -0.057170 -0.046899 -0.045954
## Sigma[1,2] -0.058309 -0.057170 -0.046899 -0.045954
## Sigma[2,2]  0.974739  0.985370  1.109618  1.122984
## b[1]       -0.646077 -0.638113 -0.546435 -0.537822
## b[2]       -0.219594 -0.208227 -0.097651 -0.086651
## b[3]       -0.106093 -0.083803  0.148285  0.170299
## b[4]       -0.315406 -0.291578 -0.037502 -0.012409
## b[5]       -0.373772 -0.363276 -0.242731 -0.230909
## b[6]        0.359955  0.366694  0.450266  0.458566
## b[7]       -0.144226 -0.138809 -0.076306 -0.070219
## rho12      -0.696095 -0.691641 -0.640746 -0.635212
## sd          0.459525  0.460565  0.471271  0.472399
#GBR 
gelman.diag(samps2,  multivariate = F)
## Potential scale reduction factors:
## 
##            Point est. Upper C.I.
## Sigma[1,1]       1.01       1.06
## Sigma[2,1]       1.01       1.04
## Sigma[1,2]       1.01       1.04
## Sigma[2,2]       1.01       1.02
## b[1]             1.01       1.04
## b[2]             1.00       1.00
## b[3]             1.00       1.01
## b[4]             1.00       1.01
## b[5]             1.00       1.02
## b[6]             1.01       1.04
## b[7]             1.00       1.01
## rho12            1.01       1.03
## sd               1.00       1.01
dic.samples(mod2, n.iter = 1000,type = "pD")
## Mean deviance:  25006 
## penalty 6457 
## Penalized deviance: 31463