This example will go through the basics of using Stan by way of the brms
library, for estimation of linear and generalized linear mixed models.
We will use the ECLS-K 2011 data for our example, and use height for age/sex z-score as a continous outcome, and short for age status outcome (height for age z < -1) as a dichotomous outcome.
There is an overview on using brms
for fitting various models. You can find these here. The package rstanarm
is very similar and better documented, and can be found here, with numerous tutorials on that page.
Both packages serve as front-ends to the Stan library for MCMC. People new to Stan can often be put off by its syntax and model construction. Both of these packages allow us to use R syntax that we are accustomed to from functions like glmer()
and lmer()
to fit models. We can also see the Stan code from the model that is generated, so we can learn how Stan works inside.
First we load our data and recode some variables:
load("~/Google Drive/dem7903_App_Hier/data/eclsk_11.Rdata")
names(eclsk11)<-tolower(names(eclsk11))
library (car)
library(brms)
library(pscl)
#get out only the variables I'm going to use for this example
myvars<-c("childid", "x_chsex_r", "x1locale", "x_raceth_r", "x2povty", "x12par1ed_i","p1curmar", "x1htotal", "x1mscalk1", "x2mscalk1", "x3mscalk1", "x4mscalk1", "p1hscale", "p2hscale", "p4hscale","x2fsstat2","x4fsstat2", "x12sesl","x4sesl_i", "p2parct1", "p2parct2", "s1_id", "p2safepl","x2krceth","p1o2near", "x_distpov" , "w1c0", "w1p0", "w2p0", "w1c0str", "w1p0str","w4c4p_40","w4c4p_4str", "w4c4p_4psu", "w1c0psu", "w1p0psu", "x1height","x2height","x3height","x4height", "x1kage_r","x2kage_r","x3age","x4age")
#subset the data
eclsk.sub<-eclsk11[,myvars]
rm(eclsk11); gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1682168 89.9 2637877 140.9 2164898 115.7
## Vcells 2999464 22.9 219469355 1674.5 182880303 1395.3
#Longitudinal variables
#recode our outcomes, the first is the child's math standardized test score in Kindergarten
eclsk.sub$math1<-ifelse(eclsk.sub$x1mscalk1<0, NA, eclsk.sub$x1mscalk1)
eclsk.sub$math2<-ifelse(eclsk.sub$x2mscalk1<0, NA, eclsk.sub$x2mscalk1)
#eclsk.sub$math3<-ifelse(eclsk.sub$x3mscalk1<0, NA, eclsk.sub$x3mscalk1)
eclsk.sub$math4<-ifelse(eclsk.sub$x4mscalk1<0, NA, eclsk.sub$x4mscalk1)
#Second outcome is child's height for age, continuous outcome
eclsk.sub$height1<-ifelse(eclsk.sub$x1height<=-7, NA, eclsk.sub$x1height)
eclsk.sub$height2<-ifelse(eclsk.sub$x2height<=-7, NA, eclsk.sub$x2height)
#eclsk.sub$height3<-ifelse(eclsk.sub$x3height<=-7, NA, eclsk.sub$x3height)
eclsk.sub$height4<-ifelse(eclsk.sub$x4height<=-7, NA, eclsk.sub$x4height)
#Age at each wave
eclsk.sub$age_yrs1<-ifelse(eclsk.sub$x1kage_r<0, NA, eclsk.sub$x1kage_r/12)
eclsk.sub$age_yrs2<-ifelse(eclsk.sub$x2kage_r<0, NA, eclsk.sub$x2kage_r/12)
#eclsk.sub$age_yrs3<-ifelse(eclsk.sub$x3age<0, NA, eclsk.sub$x3age/12)
eclsk.sub$age_yrs4<-ifelse(eclsk.sub$x4age<0, NA, eclsk.sub$x4age/12)
eclsk.sub<- eclsk.sub[is.na(eclsk.sub$age_yrs1)==F, ]
#Height for age z score standardized by sex and age
eclsk.sub$height_z1<-ave(eclsk.sub$height1, as.factor(paste(round(eclsk.sub$age_yrs1, 1.5), eclsk.sub$male)), FUN=scale)
eclsk.sub$height_z2<-ave(eclsk.sub$height2, as.factor(paste(round(eclsk.sub$age_yrs2, 1.5), eclsk.sub$male)), FUN=scale)
#eclsk.sub$height_z3<-ave(eclsk.sub$height3, as.factor(paste(round(eclsk.sub$age_yrs3, 1.5), eclsk.sub$male)), FUN=scale)
eclsk.sub$height_z4<-ave(eclsk.sub$height4, as.factor(paste(round(eclsk.sub$age_yrs4, 1.5), eclsk.sub$male)), FUN=scale)
#Household food insecurity, dichotomous outcome
#This outcome is only present at two waves
eclsk.sub$foodinsec1<-recode(eclsk.sub$x2fsstat2, recodes="2:3=1; 1=0; else=NA")
eclsk.sub$foodinsec2<-recode(eclsk.sub$x4fsstat2, recodes="2:3=1; 1=0; else=NA")
#Child health assessment Excellent to poor , ordinal outcome
eclsk.sub$chhealth1<-ifelse(eclsk.sub$p1hscale<0, NA, eclsk.sub$p1hscale)
eclsk.sub$chhealth2<-ifelse(eclsk.sub$p2hscale<0, NA, eclsk.sub$p2hscale)
eclsk.sub$chhealth4<-ifelse(eclsk.sub$p4hscale<0, NA, eclsk.sub$p4hscale)
#SES
eclsk.sub$hhses1<-ifelse(eclsk.sub$x12sesl==-9, NA, scale(eclsk.sub$x12sesl))
eclsk.sub$hhses4<-ifelse(eclsk.sub$x4sesl_i==-9, NA, scale(eclsk.sub$x4sesl_i))
#Non time varying variables
#First we recode some Child characteristics
#Child's sex: recode as male =1
eclsk.sub$male<-recode(eclsk.sub$x_chsex_r, recodes="1=1; 2=0; -9=NA")
#Recode race with white, non Hispanic as reference using dummy vars
eclsk.sub$hisp<-recode (eclsk.sub$x_raceth_r, recodes="3:4=1;-9=NA; else=0")
eclsk.sub$black<-recode (eclsk.sub$x_raceth_r, recodes="2=1;-9=NA; else=0")
eclsk.sub$asian<-recode (eclsk.sub$x_raceth_r, recodes="5=1;-9=NA; else=0")
eclsk.sub$nahn<-recode (eclsk.sub$x_raceth_r, recodes="6:7=1;-9=NA; else=0")
eclsk.sub$other<-recode (eclsk.sub$x_raceth_r, recodes="8=1;-9=NA; else=0")
#Then we recode some parent/mother characteristics
#Mother's education, recode as 2 dummys with HS = reference
eclsk.sub$lths<-recode(eclsk.sub$x12par1ed_i, recodes = "0:2=1; 3:8=0; else = NA")
eclsk.sub$gths<-recode(eclsk.sub$x12par1ed_i, recodes = "1:3=0; 4:8=1; else =NA")
#marital status, recode as 2 dummys, ref= married
eclsk.sub$single<-recode(eclsk.sub$p1curmar, recodes="4=1; -7:-9=NA; else=0")
eclsk.sub$notmar<-recode(eclsk.sub$p1curmar, recodes="2:3=1; -7:-9=NA; else=0")
#Then we do some household level variables
#Urban school location = 1
eclsk.sub$urban<-recode(eclsk.sub$x1locale, recodes = "1:3=1; 4=0; -1:-9=NA")
#poverty level in poverty = 1
eclsk.sub$pov<-recode(eclsk.sub$x2povty , recodes ="1:2=1; 3=0; -9=NA")
#Household size
eclsk.sub$hhsize<-eclsk.sub$x1htotal
#school % minority student body
eclsk.sub$minorsch<-ifelse(eclsk.sub$x2krceth <0, NA, eclsk.sub$x2krceth/10)
#Unsafe neighborhood
eclsk.sub$unsafe<-recode(eclsk.sub$p2safepl , recodes = "1:2='unsafe'; 3='safe'; else=NA", as.factor.result = T)
#school district poverty
eclsk.sub$dist_pov<-ifelse(eclsk.sub$x_distpov==-9, NA, scale(eclsk.sub$x_distpov))
#mean centered weights for wave 1
eclsk.sub$wts1<-eclsk.sub$w1c0/mean(eclsk.sub$w1c0, na.rm=T)
eclsk.sub$wtsp1<-eclsk.sub$w1p0/mean(eclsk.sub$w1p0, na.rm=T)
This is an example of how to use survey design weights with linear mixed models. Here is an example of using weights that sum to the strata-specific sample size. First, I make an id that is the strata, then I follow the first method described by Carle (2009).
eclsk.sub$sampleid<-eclsk.sub$w1c0str
#within each sampling unit, sum the weights
wts<-tapply(eclsk.sub$w1c0,eclsk.sub$sampleid,sum)
#make a data frame from this
wts<-data.frame(id=names(unlist(wts)), wt=unlist(wts))
#get the unique sampling location ids'
t1<-as.data.frame(table(eclsk.sub$sampleid))
#put all of this into a data set
wts2<-data.frame(ids=wts$id, sumwt=wts$wt, jn=t1$Freq)
head(wts2)
## ids sumwt jn
## 1 1 63224.28 293
## 2 2 15747.17 67
## 3 3 23367.05 114
## 4 4 28346.22 129
## 5 5 36348.00 121
## 6 6 34575.17 169
#merge all of this back to the original data file
eclsk.sub<-merge(eclsk.sub, wts2, by.x="sampleid", by.y="ids", all.x=T)
eclsk.sub$swts<-eclsk.sub$w1c0*(eclsk.sub$jn/eclsk.sub$sumwt)
#compare standardized weights to old person weights
hist(eclsk.sub$swts); summary(eclsk.sub$swts)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0561 0.7025 0.9494 1.0000 1.1910 4.1870 28
hist(eclsk.sub$w1c0); summary(eclsk.sub$w1c0)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 172.5 240.7 255.6 302.8 958.8
The First model will fit a model that considers the individual and family level variables and a random intercept only. One may state a reserach question such as:
“Do children living in households in poverty have lower height for age/sex z-scores than children living in households that are not in poverty, net of other demographic and socioeconomic factors?”
So the hypothesis of interest here involves us testing whether the effect of household poverty shows a \(\beta\) coefficient different from zero.
If we were to write the symbolic form of this model it would be:
\[y_{ij} = \beta_{0j} + \sum {\beta_k x_{ik}} + e_{ij}\] \[\beta_{0j} = \beta_0 + u_j\] \[e_{ij} \sim N(0, \sigma^2_e)\] \[u_j \sim N(0, \sigma^2_u)\]
In terms of priors, we need them for each fixed effect \(\beta\) and for \(\sigma^2\). We can a Normal prior for \(\beta\) parameters, since they can be both positive or negative, but the null hypothesis is that they are 0. Here we will use half-cauchy priors for the residual variance and the between-intercept variance. and priors:
\(\beta \sim N(0, 10)\)
\(\sigma_e \sim \text {half-Cauchy(5)}\)
\(\sigma_u \sim \text {half-Cauchy(5)}\)
prior<-c(set_prior("normal(0,10)", class="b"),#prior for the beta's
set_prior("cauchy(0,5)", class="sigma"),#prior for the residual std. deviation
set_prior("cauchy(0,5)", class="sd", group="s1_id"))#prior for the random intercept std. deviation
fit.1<-brm(height_z1|weights(swts)~male+hisp+black+asian+nahn+other+lths+gths+single+notmar+urban+hhsize+pov+(1|s1_id)+(1|w1c0str:w1c0psu), data=eclsk.sub, family=gaussian(),
prior = prior,
warmup=500, #burnin for 500 interations for each chain = 1000 burnin
iter=1000, chains=2, #2*1000 =2000 - 1000 burnin = 1000 total iterations
cores=2,seed = 1115, #number of computer cores, 1 per chain is good.
save_model="~/Google Drive/dem7903_App_Hier/code/code16/fit_lmm_stan.txt")
## Compiling the C++ model
print(fit.1, digits=3)
## Family: gaussian (identity)
## Formula: height_z1 | weights(swts) ~ male + hisp + black + asian + nahn + other + lths + gths + single + notmar + urban + hhsize + pov + (1 | s1_id) + (1 | w1c0str:w1c0psu)
## Data: eclsk.sub (Number of observations: 10665)
## Samples: 2 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup samples = 1000
## WAIC: Not computed
##
## Random Effects:
## ~s1_id (Number of levels: 853)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.152 0.016 0.119 0.184 181 1.007
##
## ~w1c0str:w1c0psu (Number of levels: 234)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.111 0.02 0.069 0.149 141 1.023
##
## Fixed Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 0.050 0.047 -0.038 0.141 1000 0.999
## male 0.155 0.019 0.118 0.194 1000 1.008
## hisp -0.046 0.029 -0.099 0.012 679 1.004
## black 0.378 0.034 0.313 0.440 1000 1.000
## asian -0.271 0.047 -0.358 -0.183 1000 0.999
## nahn 0.286 0.096 0.108 0.471 1000 1.001
## other 0.079 0.044 -0.002 0.167 1000 1.001
## lths 0.007 0.038 -0.070 0.079 973 0.999
## gths 0.046 0.026 -0.007 0.096 966 0.999
## single -0.048 0.030 -0.106 0.015 1000 0.999
## notmar -0.018 0.031 -0.078 0.043 1000 0.998
## urban 0.002 0.008 -0.014 0.017 773 1.000
## hhsize -0.027 0.008 -0.040 -0.011 1000 0.998
## pov -0.087 0.023 -0.131 -0.041 1000 1.003
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma(height_z1) 0.956 0.007 0.942 0.969 1000 0.998
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
rstan_gg_options(fill = "skyblue", color = "skyblue4", pt_color = "red")
stanplot(fit.1,pars=c("sigma_height_z1","sd_s1_id_Intercept"), type="dens" )+ggtitle("Posterior Density for Variances")
stanplot(fit.1,pars="sd_s1_id_Intercept", type="trace" )+ggtitle("Traceplot for Between school Intercept Variance")
stanplot(fit.1, pars = "^b", type = "plot", ci_level = 0.95)+ggtitle("95% Credible Intervals for fixed effects")
## ci_level: 0.95 (95% intervals)
## outer_level: 0.95 (95% intervals)
#We can do a hypothesis test for the poverty parameter
h0pov<-hypothesis(fit.1, "pov<0", class = "b")
h0pov
## Hypothesis Tests for class b:
## Estimate Est.Error l-95% CI u-95% CI Evid.Ratio
## pov < 0 -0.09 0.02 -Inf -0.05 999 *
## ---
## '*': The expected value under the hypothesis lies outside the 95% CI.
In terms of our research question, we have evidence that the poverty effect is negative, with a credible interval that does not contain 0, so we can conclude that our research hypothesis has support.
The next model will fit a model that considers a binary response of childhood growth stunting. . A research question corresponding to this could be:
“Do children living in households in poverty have have higher odds of growth stunting than children living in households that are not in poverty, net of other demographic and socioeconomic factors?”
So the hypothesis of interest here involves us testing whether the effec of household poverty shows a \(\beta\) coefficient different from zero.
If we were to write the symbolic form of this model it would be the logistic regression model: \[ln \frac {p_{ij}}{1-p{ij}} = \beta_{0j} + \sum {\beta_k x_{ik}} \] \[\beta_{0j} = \beta_0 + u_j\] \[u_j \sim N(0, \sigma^2_u)\]
In terms of priors, we need them for each fixed effect \(\beta\) and for \(\sigma^2_u\). We can a Normal prior for \(\beta\) parameters, since they can be both positive or negative, but the null hypothesis is that they are 0. Here we will use half-cauchy priors for the between-intercept variance, and there is no prior for the residual variance, becuase in the logistic model, this is the constant \(\frac{\pi^3}{3}\).
\(\beta \sim N(0, 10)\)
\(\sigma_u \sim \text {half-Cauchy(5)}\)
eclsk.sub$stunt<-ifelse(eclsk.sub$height_z1 < -1, 1,0)
#bayesian model
prior<-c(set_prior("normal(0,1)", class="b"),
set_prior("cauchy(0,5)", class="sd", group="s1_id"))
#no residual sigma for the binomial
fit.b<-brm(stunt|weights(swts)~male+hisp+black+asian+nahn+other+lths+gths+single+notmar+urban+hhsize+pov+(1|s1_id)+(1|w1c0str:w1c0psu), data=eclsk.sub, family=bernoulli(),
prior = prior,
warmup=500,
iter=1000, chains=2,
cores=2,seed = 1115,
save_model="~/Google Drive/dem7903_App_Hier/code/code16/fit_glmm_b_stan.txt")
print(fit.b, digits=3)
## Family: bernoulli (logit)
## Formula: stunt | weights(swts) ~ male + hisp + black + asian + nahn + other + lths + gths + single + notmar + urban + hhsize + pov + (1 | s1_id) + (1 | w1c0str:w1c0psu)
## Data: eclsk.sub (Number of observations: 10665)
## Samples: 2 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup samples = 1000
## WAIC: Not computed
##
## Random Effects:
## ~s1_id (Number of levels: 853)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.222 0.068 0.07 0.346 176 1.011
##
## ~w1c0str:w1c0psu (Number of levels: 234)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.171 0.058 0.049 0.279 162 1.015
##
## Fixed Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept -1.873 0.123 -2.109 -1.641 1000 0.999
## male -0.346 0.053 -0.446 -0.247 1000 1.002
## hisp 0.056 0.079 -0.104 0.217 897 1.001
## black -0.746 0.113 -0.978 -0.537 1000 1.001
## asian 0.616 0.121 0.370 0.845 1000 0.998
## nahn -0.121 0.269 -0.685 0.377 1000 1.000
## other 0.110 0.129 -0.143 0.357 1000 0.999
## lths 0.024 0.097 -0.160 0.217 1000 1.002
## gths -0.104 0.072 -0.243 0.034 1000 0.999
## single -0.058 0.092 -0.242 0.131 1000 0.998
## notmar -0.176 0.092 -0.357 0.001 1000 0.999
## urban -0.024 0.019 -0.062 0.012 1000 1.000
## hhsize 0.070 0.020 0.031 0.108 1000 0.998
## pov 0.191 0.067 0.057 0.322 1000 0.999
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
rstan_gg_options(fill = "skyblue", color = "skyblue4", pt_color = "red")
stanplot(fit.b,pars="sd_s1_id_Intercept", type="dens" )+ggtitle("Posterior Density for Between school Intercept Variance")
stanplot(fit.b,pars="sd_s1_id_Intercept", type="trace" )+ggtitle("Traceplot for Between school Intercept Variance")
stanplot(fit.b, pars = "^b", type = "plot", ci_level = 0.95)+ggtitle("95% Credible Intervals for fixed effects")
h0pov<-hypothesis(fit.b, "pov>0")
h0pov
## Hypothesis Tests for class b:
## Estimate Est.Error l-95% CI u-95% CI Evid.Ratio
## pov > 0 0.19 0.07 0.08 Inf Inf *
## ---
## '*': The expected value under the hypothesis lies outside the 95% CI.
samps<-fit.b$fit
#Bayesian p value for the poverty effect, combining the samples from chains 1 and 2
mean(c(samps@sim$samples[[1]]$b_pov, samps@sim$samples[[2]]$b_pov) >0)
## [1] 0.9935
#Credible interval for the odds ratio
or_pov<-exp(c(samps@sim$samples[[1]]$b_pov, samps@sim$samples[[2]]$b_pov))
quantile(or_pov, p=c(0, .025, .05, .5, .95, .975, 1))
## 0% 2.5% 5% 50% 95% 97.5% 100%
## 0.4160436 1.0443560 1.0825622 1.2098391 1.3640105 1.4007221 2.3214061
In terms of our research question, we have evidence that the poverty effect is positive, with a credible interval that does not contain 0, so we can conclude that our research hypothesis has support.
The second model considers both random intercepts and random slopes, in this case, i’m only considering a random slope for poverty status. This would imply the proposition:
“Do children living in poverty face a disadvantage in terms of their math test score equally in every school?”
If we were to write the symbolic form of this model it would be: \[y_{ij} = \beta_{0j} + \sum {\beta_k x_{ik}} + \beta_{j \ pov} \ pov_i + e_{ij}\] \[\beta_{0j} = \beta_0 + u_{1j}\] \[\beta_{j \ pov} = \beta_{pov} + u_{2j}\] \[\mathbf{u} \text{~} \mathbf{MVN (0, \Sigma )}\] \[\mathbf{\Sigma = \begin{bmatrix} \sigma_1&\sigma_{12} \\ \sigma_{21}&\sigma_{2} \end{bmatrix}}\]
the term \(\beta_{j \ pov} \ pov_i\) is the effect of poverty for the \(j^{th}\) school. Since we have two random effects in the model at the school level, it’s easier to model them with a correlation matrix, called a LKJ prior.
prior<-c(set_prior("normal(0,10)", class="b"),#prior for the beta's
set_prior("cauchy(0,5)", class="sigma"),#prior for the residual std. deviation
set_prior("cauchy(0,5)", class="sd", group="s1_id"),#prior for the random intercept std. deviation
set_prior("lkj(2)", class = "cor", group="s1_id"))
fit.2d<-brm(height_z1|weights(swts)~male+hisp+black+asian+nahn+other+lths+gths+single+notmar+urban+hhsize+hhses1+pov+(pov|s1_id)+(1|w1c0str:w1c0psu), data=eclsk.sub, family=gaussian(),
prior = prior,
warmup=500, #burnin for 500 interations for each chain = 1000 burnin
iter=1000, chains=2, #2*1000 =2000 - 1000 burnin = 1000 total iterations
cores=2,seed = 1115, #number of computer cores, 1 per chain is good.
save_model="~/Google Drive/dem7903_App_Hier/code/code16/fit_lmm_2d_stan.txt"
)
print(fit.2d, digits=3)
## Family: gaussian (identity)
## Formula: height_z1 | weights(swts) ~ male + hisp + black + asian + nahn + other + lths + gths + single + notmar + urban + hhsize + hhses1 + pov + (pov | s1_id) + (1 | w1c0str:w1c0psu)
## Data: eclsk.sub (Number of observations: 10665)
## Samples: 2 chains, each with iter = 1000; warmup = 500; thin = 1;
## total post-warmup samples = 1000
## WAIC: Not computed
##
## Random Effects:
## ~s1_id (Number of levels: 853)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.173 0.021 0.131 0.215 274 1.010
## sd(pov) 0.206 0.039 0.122 0.274 114 1.003
## cor(Intercept,pov) -0.523 0.154 -0.755 -0.155 108 1.002
##
## ~w1c0str:w1c0psu (Number of levels: 234)
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sd(Intercept) 0.104 0.02 0.064 0.143 155 0.999
##
## Fixed Effects:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept 0.048 0.045 -0.038 0.133 1000 1.000
## male 0.155 0.020 0.118 0.194 1000 1.000
## hisp -0.045 0.029 -0.103 0.011 1000 0.998
## black 0.382 0.034 0.317 0.446 1000 0.999
## asian -0.279 0.049 -0.372 -0.187 1000 0.999
## nahn 0.287 0.095 0.098 0.481 1000 1.001
## other 0.079 0.045 -0.005 0.163 1000 0.999
## lths 0.017 0.038 -0.051 0.086 1000 0.999
## gths 0.025 0.029 -0.030 0.083 1000 1.000
## single -0.041 0.030 -0.100 0.016 1000 1.005
## notmar -0.014 0.029 -0.073 0.042 1000 0.998
## urban 0.002 0.008 -0.014 0.018 1000 0.999
## hhsize -0.027 0.007 -0.042 -0.014 1000 0.999
## hhses1 0.034 0.019 -0.005 0.073 1000 1.001
## pov -0.063 0.026 -0.113 -0.014 1000 0.999
##
## Family Specific Parameters:
## Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma(height_z1) 0.951 0.007 0.939 0.965 1000 0.999
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
rstan_gg_options(fill = "skyblue", color = "skyblue4", pt_color = "red")
stanplot(fit.2d,pars="sigma_height_z1", type="dens" )+ggtitle("Posterior Density for Residual Variance")
stanplot(fit.2d,pars=c("sigma_height_z1","sd_s1_id_Intercept","sd_s1_id_pov"), type="dens" )+ggtitle("Posterior Density for Variances")
stanplot(fit.2d,pars="sd_s1_id_Intercept", type="trace" )+ggtitle("Traceplot for Between school Intercept Variance")
stanplot(fit.2d,pars="sd_s1_id_pov", type="trace" )+ggtitle("Traceplot for Between school poverty effect Variance")
stanplot(fit.2d,pars="cor_s1_id_Intercept_pov", type="dens" )+ggtitle("Posterior Density for correlation between school random slope and Intercept")
stanplot(fit.2d,pars="cor_s1_id_Intercept_pov", type="trace" )+ggtitle("Traceplot for correlation between school random slope and Intercept")
stanplot(fit.2d, pars = "^b", type = "plot", ci_level = 0.95)+ggtitle("95% Credible Intervals for fixed effects")