Bayesian Data Analysis 1

This example will go through the basics of using Stan by way of the brms library, for estimation of simple linear and generalized linear models. You must install brms first, using install.packages("brms").

We will use the ECLS-K 2011 data for our example, and use height for age/sex z-score as a continous outcome, and short for age status outcome (height for age z < -1) as a dichotomous outcome.

There is an overview on using brms for fitting various models. You can find these here. The package rstanarm is very similar and better documented, and can be found here, with numerous tutorials on that page.

Both packages serve as front-ends to the Stan library for MCMC. People new to Stan can often be put off by its syntax and model construction. Both of these packages allow us to use R syntax that we are accustomed to from functions like glm() and lmer() to fit models. We can also see the Stan code from the model that is generated, so we can learn how Stan works inside.

First we load our data and recode some variables:

load("~/Google Drive/dem7903_App_Hier/data/eclsk_11.Rdata")
names(eclsk11)<-tolower(names(eclsk11))
library (car)
#get out only the variables I'm going to use for this example

myvars<-c("childid", "x_chsex_r", "x1locale", "x_raceth_r", "x2povty", "x12par1ed_i","p1curmar", "x1htotal", "x1mscalk1", "x2mscalk1", "x3mscalk1", "x4mscalk1", "p1hscale", "p2hscale", "p4hscale","x2fsstat2","x4fsstat2", "x12sesl","x4sesl_i", "p2parct1", "p2parct2", "s1_id", "p2safepl","x2krceth","p1o2near", "x_distpov" , "w1c0", "w1p0", "w2p0", "w1c0str", "w1p0str","w4c4p_40","w4c4p_4str", "w4c4p_4psu",  "w1c0psu", "w1p0psu", "x1height","x2height","x3height","x4height", "x1kage_r","x2kage_r","x3age","x4age")

#subset the data
eclsk.sub<-eclsk11[,myvars]

rm(eclsk11); gc()

##           used (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells 1528965 81.7    2164898  115.7   1770749   94.6
## Vcells 2870395 21.9  219469625 1674.5 182791308 1394.6

#Longitudinal variables
#recode our outcomes, the  first is the child's math standardized test score  in Kindergarten
eclsk.sub$math1<-ifelse(eclsk.sub$x1mscalk1<0, NA, eclsk.sub$x1mscalk1)
eclsk.sub$math2<-ifelse(eclsk.sub$x2mscalk1<0, NA, eclsk.sub$x2mscalk1)
#eclsk.sub$math3<-ifelse(eclsk.sub$x3mscalk1<0, NA, eclsk.sub$x3mscalk1)
eclsk.sub$math4<-ifelse(eclsk.sub$x4mscalk1<0, NA, eclsk.sub$x4mscalk1)

#Second outcome is child's height for age, continuous outcome
eclsk.sub$height1<-ifelse(eclsk.sub$x1height<=-7, NA, eclsk.sub$x1height)
eclsk.sub$height2<-ifelse(eclsk.sub$x2height<=-7, NA, eclsk.sub$x2height)
#eclsk.sub$height3<-ifelse(eclsk.sub$x3height<=-7, NA, eclsk.sub$x3height)
eclsk.sub$height4<-ifelse(eclsk.sub$x4height<=-7, NA, eclsk.sub$x4height)

#Age at each wave
eclsk.sub$age_yrs1<-ifelse(eclsk.sub$x1kage_r<0, NA, eclsk.sub$x1kage_r/12)
eclsk.sub$age_yrs2<-ifelse(eclsk.sub$x2kage_r<0, NA, eclsk.sub$x2kage_r/12)
#eclsk.sub$age_yrs3<-ifelse(eclsk.sub$x3age<0, NA, eclsk.sub$x3age/12)
eclsk.sub$age_yrs4<-ifelse(eclsk.sub$x4age<0, NA, eclsk.sub$x4age/12)

eclsk.sub<- eclsk.sub[is.na(eclsk.sub$age_yrs1)==F, ]

#Height for age z score standardized by sex and age
eclsk.sub$height_z1<-ave(eclsk.sub$height1, as.factor(paste(round(eclsk.sub$age_yrs1, 1.5), eclsk.sub$male)), FUN=scale)
eclsk.sub$height_z2<-ave(eclsk.sub$height2, as.factor(paste(round(eclsk.sub$age_yrs2, 1.5), eclsk.sub$male)), FUN=scale)
#eclsk.sub$height_z3<-ave(eclsk.sub$height3, as.factor(paste(round(eclsk.sub$age_yrs3, 1.5), eclsk.sub$male)), FUN=scale)
eclsk.sub$height_z4<-ave(eclsk.sub$height4, as.factor(paste(round(eclsk.sub$age_yrs4, 1.5), eclsk.sub$male)), FUN=scale)


#Household food insecurity, dichotomous outcome
#This outcome is only present at two waves
eclsk.sub$foodinsec1<-recode(eclsk.sub$x2fsstat2, recodes="2:3=1; 1=0; else=NA")
eclsk.sub$foodinsec2<-recode(eclsk.sub$x4fsstat2, recodes="2:3=1; 1=0; else=NA")


#Child health assessment Excellent to poor , ordinal outcome
eclsk.sub$chhealth1<-ifelse(eclsk.sub$p1hscale<0, NA, eclsk.sub$p1hscale)
eclsk.sub$chhealth2<-ifelse(eclsk.sub$p2hscale<0, NA, eclsk.sub$p2hscale)
eclsk.sub$chhealth4<-ifelse(eclsk.sub$p4hscale<0, NA, eclsk.sub$p4hscale)

#SES
eclsk.sub$hhses1<-ifelse(eclsk.sub$x12sesl==-9, NA, scale(eclsk.sub$x12sesl))
eclsk.sub$hhses4<-ifelse(eclsk.sub$x4sesl_i==-9, NA, scale(eclsk.sub$x4sesl_i))

#Non time varying variables
#First we recode some Child characteristics
#Child's sex: recode as male =1
eclsk.sub$male<-recode(eclsk.sub$x_chsex_r, recodes="1=1; 2=0; -9=NA")

#Recode race with white, non Hispanic as reference using dummy vars
eclsk.sub$hisp<-recode (eclsk.sub$x_raceth_r, recodes="3:4=1;-9=NA; else=0")
eclsk.sub$black<-recode (eclsk.sub$x_raceth_r, recodes="2=1;-9=NA; else=0")
eclsk.sub$asian<-recode (eclsk.sub$x_raceth_r, recodes="5=1;-9=NA; else=0")
eclsk.sub$nahn<-recode (eclsk.sub$x_raceth_r, recodes="6:7=1;-9=NA; else=0")
eclsk.sub$other<-recode (eclsk.sub$x_raceth_r, recodes="8=1;-9=NA; else=0")


#Then we recode some parent/mother characteristics
#Mother's education, recode as 2 dummys with HS = reference
eclsk.sub$lths<-recode(eclsk.sub$x12par1ed_i, recodes = "0:2=1; 3:8=0; else = NA")
eclsk.sub$gths<-recode(eclsk.sub$x12par1ed_i, recodes = "1:3=0; 4:8=1; else =NA") 

#marital status, recode as 2 dummys, ref= married
eclsk.sub$single<-recode(eclsk.sub$p1curmar, recodes="4=1; -7:-9=NA; else=0")
eclsk.sub$notmar<-recode(eclsk.sub$p1curmar, recodes="2:3=1; -7:-9=NA; else=0")


#Then we do some household level variables

#Urban school location = 1
eclsk.sub$urban<-recode(eclsk.sub$x1locale, recodes = "1:3=1; 4=0; -1:-9=NA")

#poverty level in poverty = 1
eclsk.sub$pov<-recode(eclsk.sub$x2povty , recodes ="1:2=1; 3=0; -9=NA")

#Household size
eclsk.sub$hhsize<-eclsk.sub$x1htotal

#school % minority student body
eclsk.sub$minorsch<-ifelse(eclsk.sub$x2krceth <0, NA, eclsk.sub$x2krceth/10)

#Unsafe neighborhood
eclsk.sub$unsafe<-recode(eclsk.sub$p2safepl , recodes = "1:2='unsafe'; 3='safe'; else=NA", as.factor.result = T)

#school district poverty
eclsk.sub$dist_pov<-ifelse(eclsk.sub$x_distpov==-9, NA, scale(eclsk.sub$x_distpov))


#mean centered weights for wave 1
eclsk.sub$wts1<-eclsk.sub$w1c0/mean(eclsk.sub$w1c0, na.rm=T)
eclsk.sub$wtsp1<-eclsk.sub$w1p0/mean(eclsk.sub$w1p0, na.rm=T)

Linear Regression Example

Here is a simple linear regression model for bmi using a single predictor variable. If we were to write the full model specification, it would be:

\(y \sim N(X' \beta, \sigma^2 )\)

In terms of priors, we need them for each \(\beta\) and for \(\sigma^2\). It is common to use a Normal prior for \(\beta\) parameters, since they can be both positive or negative, but the null hypothesis is that they are 0. For example a high variance prior can be contrasted with a low variance prior. Here’s a plot of this:

So, a Normal prior with a \(\sigma\) of 10, is pretty vague, but a \(\sigma\) of 100, is really vague.

Likewise, for a variance parameter, we have a few options. In the past, inverse Gamma priors were used for variances, but Gelman (2006) suggests using a half-Cauchy prior. Here are some comparisons:

#Cauchy's
x <- seq(.0000001, 10, length=100)
hx <- dcauchy(x, scale=1)

degf <- c(.1, .5,1, 10)
colors <- c("red", "blue", "darkgreen", "gold", "black")
labels <- c("scale=.1", "scale=.5", "scale=1", "scale=10")

plot(x, hx, type="l", lty=2, xlab="x value", ylim=c(0, .5),
  ylab="Density", main="Comparison of Half-Cauchy Distributions")

for (i in 1:4){ 
  lines(x, dcauchy(x,0,degf[i]), lwd=2, col=colors[i])
}

legend("topright", inset=.05, title="Distributions",
  labels, lwd=2, lty=c(1, 1, 1, 1, 2), col=colors)

#Gamma's
library(pscl)
x <- seq(0.0000001, 10, length=100)
hx <- densigamma(x, alpha = 1, beta = 1 )

degf <- c(.5, .1 ,.01, .001)
colors <- c("red", "blue", "darkgreen", "gold", "black")
labels <- c("b=.5", "b=.1", "b=.01", "b=.001")

plot(x, hx, type="l", lty=2, xlab="x value", ylim=c(0, 1),
  ylab="Density", main=c("Comparison of Inverse Gamma","Distributions with a = .5"))

for (i in 1:4){ 
  lines(x, densigamma(x,.5,degf[i]), lwd=2, col=colors[i])
}

legend("topright", inset=.05, title="Distributions",
  labels, lwd=2, lty=c(1, 1, 1, 1, 2), col=colors)

So, we see there is quite a variety of prior densities that we can get here. The big question is: “Does it matter much?”. Below I will specify a simple multiple regression model, again, with a Normal likelihood:

\(y \sim N(X' \beta, \sigma^2 )\)

and priors:

\(\beta \sim N(0, 1)\)

\(\sigma \sim inverse gamma(.5, .5)\)

There a loads of ways to do this, but I like doing it this way.

library(brms)

## Loading required package: rstan

## Loading required package: ggplot2

## rstan (Version 2.9.0-3, packaged: 2016-02-11 15:54:41 UTC, GitRev: 05c3d0058b6a)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## rstan_options(auto_write = TRUE)
## options(mc.cores = parallel::detectCores())

## Loading 'brms' package (version 0.8.0). Useful instructions 
## can be found by typing help('brms'). A more detailed introduction 
## to the package is available through vignette('brms').

#here's the model fit via ML
fit.lm<-glm(height_z1~chhealth1+foodinsec1+hhsize+hhses1, data=eclsk.sub, weights=wts1)
summary(fit.lm)

## 
## Call:
## glm(formula = height_z1 ~ chhealth1 + foodinsec1 + hhsize + hhses1, 
##     data = eclsk.sub, weights = wts1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -4.7320  -0.6395  -0.0359   0.6297   5.2765  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.259994   0.040351   6.443 1.22e-10 ***
## chhealth1   -0.042392   0.012701  -3.338 0.000848 ***
## foodinsec1  -0.062773   0.031132  -2.016 0.043790 *  
## hhsize      -0.038886   0.007398  -5.257 1.50e-07 ***
## hhses1       0.030607   0.012419   2.465 0.013734 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.9797623)
## 
##     Null deviance: 10069  on 10216  degrees of freedom
## Residual deviance: 10005  on 10212  degrees of freedom
##   (5558 observations deleted due to missingness)
## AIC: 29819
## 
## Number of Fisher Scoring iterations: 2

prior<-c(set_prior("normal(0,1)", class="b"),#prior for the beta's
         set_prior("inv_gamma(.5,.5)", class="sigma"))#prior for the residual std. deviation
fit.1<-brm(height_z1|weights(wts1)~chhealth1+foodinsec1+hhsize+hhses1, data=eclsk.sub, family=gaussian(),
              prior = prior, 
              warmup=500, #burnin for 500 interations for each chain = 1000 burnin
              iter=1000, chains=2, #2*2000 = 4000 - 1000 burnin = 3000 total iterations
              cores=2,seed = 1115 #number of computer cores, 1 per chain is good.
              )

## Compiling the C++ model

samps<-fit.1$fit
plot(samps)

## ci_level: 0.8 (80% intervals)

## outer_level: 0.95 (95% intervals)

stan_trace(samps, "sigma_height_z1")

stan_dens(samps)

stan_ac(samps)

round(summary(samps, probs=c(.025, .975))$summary, 3)

##                       mean se_mean    sd       2.5%      97.5%   n_eff
## b_Intercept          0.260   0.001 0.040      0.184      0.336 775.656
## b_chhealth1         -0.042   0.000 0.013     -0.068     -0.016 703.803
## b_foodinsec1        -0.062   0.001 0.030     -0.118     -0.003 615.184
## b_hhsize            -0.039   0.000 0.007     -0.054     -0.025 806.890
## b_hhses1             0.031   0.000 0.012      0.009      0.054 636.289
## sigma_height_z1      0.986   0.000 0.007      0.974      1.000 832.620
## lp__            -14472.882   0.090 1.737 -14477.255 -14470.593 371.634
##                  Rhat
## b_Intercept     0.999
## b_chhealth1     1.000
## b_foodinsec1    1.001
## b_hhsize        0.998
## b_hhses1        1.001
## sigma_height_z1 1.002
## lp__            1.004

#bayesian model half cauchy prior on residual variance
prior<-c(set_prior("normal(0,1)", class="b"),#prior for the beta's
         set_prior("cauchy(0,5)", class="sigma"))#prior for the residual std. deviation
fit.2<-brm(height_z1|weights(wts1)~chhealth1+foodinsec1+hhsize+hhses1, data=eclsk.sub, family=gaussian(),
              prior = prior, 
              warmup=500, #burnin for 500 interations for each chain = 1000 burnin
              iter=1000, chains=2, #2*2000 = 4000 - 1000 burnin = 3000 total iterations
              cores=2,seed = 1115, #number of computer cores, 1 per chain is good.
              save_model="~/Google Drive/dem7903_App_Hier/code/code16/fit_lm_b_stan.txt")

## Compiling the C++ model

samps2<-fit.2$fit
plot(samps2)

## ci_level: 0.8 (80% intervals)
## outer_level: 0.95 (95% intervals)

stan_trace(samps2, "sigma_height_z1")

stan_dens(samps2)

stan_ac(samps2)

round(summary(samps2, probs=c(.1, .9))$summary, 3)

##                       mean se_mean    sd        10%        90%    n_eff
## b_Intercept          0.260   0.001 0.040      0.208      0.308  818.715
## b_chhealth1         -0.042   0.000 0.012     -0.058     -0.026  808.283
## b_foodinsec1        -0.063   0.001 0.029     -0.100     -0.028  640.464
## b_hhsize            -0.039   0.000 0.007     -0.048     -0.029  546.993
## b_hhses1             0.031   0.000 0.012      0.015      0.046  637.431
## sigma_height_z1      0.986   0.000 0.007      0.977      0.995 1000.000
## lp__            -14472.431   0.085 1.724 -14474.845 -14470.556  409.488
##                  Rhat
## b_Intercept     1.000
## b_chhealth1     0.999
## b_foodinsec1    0.998
## b_hhsize        1.000
## b_hhses1        1.001
## sigma_height_z1 1.001
## lp__            1.003

In this case, the parameters are invariant to the choice of prior. But we have lots of data, and this should not be surprising, because, if we remember, in situations where data are rich, the posterior is dominated by the data.

#bayesian model uniform prior on residual variance
prior<-c(set_prior("normal(0,10)", class="b"),#prior for the beta's
         set_prior("uniform(0,10)", class="sigma"))#prior for the residual std. deviation
fit.lm.b2<-brm(height_z1|weights(wts1)~chhealth1+foodinsec1+hhsize+hhses1, data=eclsk.sub, family=gaussian(),
              prior = prior, 
              warmup=500, #burnin for 500 interations for each chain = 1000 burnin
              iter=1000, chains=2, #2*2000 = 4000 - 1000 burnin = 3000 total iterations
              cores=2,seed = 1115, #number of computer cores, 1 per chain is good.
              save_model="~/Google Drive/dem7903_App_Hier/code/code16/fit_lm_b_stan.txt")

## Compiling the C++ model

summary(fit.lm.b2)

##  Family: gaussian (identity) 
## Formula: height_z1 | weights(wts1) ~ chhealth1 + foodinsec1 + hhsize + hhses1 
##    Data: eclsk.sub (Number of observations: 10217) 
## Samples: 2 chains, each with iter = 1000; warmup = 500; thin = 1; 
##          total post-warmup samples = 1000
##    WAIC: Not computed
##  
## Fixed Effects: 
##            Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept      0.26      0.04     0.18     0.34        793    1
## chhealth1     -0.04      0.01    -0.07    -0.02        671    1
## foodinsec1    -0.06      0.03    -0.11     0.00        662    1
## hhsize        -0.04      0.01    -0.05    -0.02        856    1
## hhses1         0.03      0.01     0.01     0.05        688    1
## 
## Family Specific Parameters: 
##                  Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## sigma(height_z1)     0.99      0.01     0.97        1        983    1
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot(fit.lm.b2)

In the Bayesian context, it’s easier to fit models that assume other types of distributions for our outcome. For example, we can use a Student-t distribution witn \(\nu\) degrees of freedom for a continuous outcome, instead of a normal distribution. This is often called a type of “robust regression” becuase the tails of the t distribution are fatter than those of a normal, so the variance in the residuals can be greater. In the case below, our model would be specified as:

\[y \sim t(X' \beta, \sigma^2 , \nu)\]

and priors:

\(\beta \sim N(0, 1)\)

\(\sigma \sim half cauchy(5)\)

\(\nu \sim gamma(1, 1.5)\)

#Student's T regression model
prior<-c(set_prior("normal(0,1)", class="b"),#prior for the beta's
         set_prior("cauchy(0,5)", class="sigma"),#prior for the residual std. deviation
         set_prior("gamma(1,1.5)", class="nu")) #prior for the t distribution d.f.

fit.lm.b.t<-brm(height_z1|weights(wts1)~chhealth1+foodinsec1+hhsize+hhses1, data=eclsk.sub, family=student(),
              prior = prior, 
              warmup=500, 
              iter=2000, chains=2, 
              cores=2,seed = 1115,
              save_model="~/Google Drive/dem7903_App_Hier/code/code16/fit_lm_b_t_stan.txt")

## Compiling the C++ model

samps.t<-fit.lm.b.t$fit 
plot(samps.t)

## ci_level: 0.8 (80% intervals)

## outer_level: 0.95 (95% intervals)

stan_trace(samps.t)

stan_dens(samps.t)

stan_ac(samps.t)

round(summary(samps.t, probs=c(.1, .9))$summary, 3)

##                       mean se_mean    sd        10%        90%    n_eff
## b_Intercept          0.262   0.001 0.041      0.209      0.315 2600.035
## b_chhealth1         -0.042   0.000 0.013     -0.058     -0.025 2154.815
## b_foodinsec1        -0.068   0.001 0.031     -0.109     -0.027 2544.313
## b_hhsize            -0.040   0.000 0.007     -0.050     -0.031 2734.090
## b_hhses1             0.032   0.000 0.012      0.016      0.048 1926.282
## sigma_height_z1      0.949   0.000 0.008      0.938      0.959 1949.933
## nu                  23.157   0.062 2.597     19.954     26.517 1735.425
## lp__            -14530.284   0.052 1.825 -14532.745 -14528.241 1234.987
##                  Rhat
## b_Intercept     1.001
## b_chhealth1     1.000
## b_foodinsec1    1.003
## b_hhsize        1.000
## b_hhses1        1.001
## sigma_height_z1 0.999
## nu              0.999
## lp__            1.000

Comparing Bayesian models

For comparing Bayesian models, we do not reply on traditional hypothesis testing procedure, such as the F test or likelihood ratio test. Typically some form of infomation criteria is used. Since the inventors of Stan also have put a lot of thought into these ideas, we use the “Watanabe-Akaike Information Criterion” or WAIC (also here ) to compare models.

The WAIC is a recently developed alternative to the DIC that uses the log of the posterior predictive density, meaning that the log of the total density evaluated at the estimates of the parameters, and penalizes it using a penalty term, to represent the model complexity. As with AIC, smaller values of WAIC indicate better fitting models. According to Gelman and colleagues 2014 pg 174, the “WAIC has the desirable property of averaging over the posterior distribution rather than conditioning on a point estimate”

We can compare multiple models (fit to the same data) using the WAIC() function in brms:

WAIC(fit.2, fit.lm.b.t, compare = T)

##                        WAIC     SE
## fit.2              28952.16 211.58
## fit.lm.b.t         29004.87 213.12
## fit.2 - fit.lm.b.t   -52.71   7.52

Which certainly looks like the Normal model is prefered here. We can also evaluate the model fit to the sensitivity of the prior from the normal model:

WAIC(fit.1, fit.2, compare = T)

##                   WAIC     SE
## fit.1         28952.18 211.51
## fit.2         28952.16 211.58
## fit.1 - fit.2     0.02   0.14

Which looks like it doesn’t matter, which I think we knew already.

Logistic Regression Example

Next, we consider a similar simple logistic regression model:

#here's the model fit via ML
eclsk.sub$stunt<-ifelse(eclsk.sub$height_z1 < -1, 1,0)
fit.glm<-glm(stunt~male+chhealth1+foodinsec1+hhsize, data=eclsk.sub, weights=wts1, family="binomial")
summary(fit.glm)$coef

##                Estimate Std. Error    z value     Pr(>|z|)
## (Intercept) -2.17589091 0.11122154 -19.563575 3.161166e-85
## male        -0.34890228 0.05531126  -6.307979 2.827030e-10
## chhealth1    0.10043047 0.03396133   2.957200 3.104462e-03
## foodinsec1   0.12441126 0.08131325   1.530024 1.260107e-01
## hhsize       0.09823615 0.01979993   4.961440 6.997234e-07

confint(fit.glm)

##                   2.5 %     97.5 %
## (Intercept) -2.39399460 -1.9579447
## male        -0.45749684 -0.2406478
## chhealth1    0.03343667  0.1665915
## foodinsec1  -0.03693065  0.2819474
## hhsize       0.05923246  0.1368661

#bayesian model
prior<-c(set_prior("normal(0,1)", class="b"))
#no residual sigma for the binomial
fit.glm.b<-brm(stunt|weights(wts1)~male+chhealth1+foodinsec1+hhsize, data=eclsk.sub, family=bernoulli(),
              prior = prior, 
              warmup=500, 
              iter=1000, chains=2, 
              cores=2,seed = 1115,
              save_model="~/Google Drive/dem7903_App_Hier/code/code16/fit_glm_b_stan.txt")
summary(fit.glm.b)

##  Family: bernoulli (logit) 
## Formula: stunt | weights(wts1) ~ male + chhealth1 + foodinsec1 + hhsize 
##    Data: eclsk.sub (Number of observations: 10217) 
## Samples: 2 chains, each with iter = 1000; warmup = 500; thin = 1; 
##          total post-warmup samples = 1000
##    WAIC: Not computed
##  
## Fixed Effects: 
##            Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept     -2.18      0.11    -2.40    -1.97        563 1.00
## male          -0.35      0.05    -0.46    -0.25        855 1.00
## chhealth1      0.10      0.03     0.03     0.17       1000 1.00
## foodinsec1     0.12      0.08    -0.04     0.27        561 1.01
## hhsize         0.10      0.02     0.06     0.14        514 1.00
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot(fit.glm.b)

Ordinal Logit Regression Model

#Outcome is ordered categories of height for age/sex z-score
eclsk.sub$stuntcat<-recode(eclsk.sub$height_z1, recodes = "lo:-1=4; -1:0=3; 0:1=2; 1:2=1;2:hi=0", as.factor.result = T)
eclsk.sub$stuntcat<- as.ordered(eclsk.sub$stuntcat )

#Fit the same model via MLE
library(MASS)
fit.glm.o<-polr (stuntcat~male+chhealth1+foodinsec1+hhsize+hhses1, data=eclsk.sub, weights=wts1)

## Warning: non-integer #successes in a binomial glm!

summary(fit.glm.o)

## 
## Re-fitting to get Hessian

## Call:
## polr(formula = stuntcat ~ male + chhealth1 + foodinsec1 + hhsize + 
##     hhses1, data = eclsk.sub, weights = wts1)
## 
## Coefficients:
##               Value Std. Error t value
## male       -0.29193    0.03585  -8.143
## chhealth1   0.07027    0.02329   3.016
## foodinsec1  0.13606    0.05697   2.388
## hhsize      0.07284    0.01362   5.347
## hhses1     -0.05476    0.02266  -2.417
## 
## Intercepts:
##       Value    Std. Error t value 
## 0|1    -3.2495   0.0943   -34.4473
## 1|2    -1.3441   0.0782   -17.1972
## 2|3     0.2467   0.0766     3.2221
## 3|4     2.0167   0.0796    25.3243
## 4|NaN   6.4163   0.2206    29.0869
## 
## Residual Deviance: 28884.09 
## AIC: 28904.09 
## (5539 observations deleted due to missingness)

confint(fit.glm.o)

## Waiting for profiling to be done...
## 
## Re-fitting to get Hessian

##                  2.5 %      97.5 %
## male       -0.36222905 -0.22169962
## chhealth1   0.02461350  0.11592755
## foodinsec1  0.02444042  0.24776748
## hhsize      0.04615611  0.09955769
## hhses1     -0.09915601 -0.01034772

#bayesian model
prior<-c(set_prior("normal(0,1)", class="b"))
fit.glm.b.ord<-brm(stuntcat|weights(wts1)~male+chhealth1+foodinsec1+hhsize+hhses1, data=eclsk.sub, family=cumulative(),
              prior = prior, 
              warmup=500, 
              iter=1000, chains=2, 
              cores=2,seed = 1115,
              save.model="~/Google Drive/dem7903_App_Hier/code/code16/fit_glm_ord_stan.txt")

## Warning: Argument 'save.model' is deprecated. Please use argument
## 'save_model' instead.

## Compiling the C++ model

summary(fit.glm.b.ord)

##  Family: cumulative (logit) 
## Formula: stuntcat | weights(wts1) ~ male + chhealth1 + foodinsec1 + hhsize + hhses1 
##    Data: eclsk.sub (Number of observations: 10236) 
## Samples: 2 chains, each with iter = 1000; warmup = 500; thin = 1; 
##          total post-warmup samples = 1000
##    WAIC: Not computed
##  
## Fixed Effects: 
##              Estimate Est.Error l-95% CI u-95% CI Eff.Sample Rhat
## Intercept[1]    -3.86      0.10    -4.04    -3.67        611    1
## Intercept[2]    -1.95      0.08    -2.11    -1.79       1000    1
## Intercept[3]    -0.36      0.08    -0.51    -0.21        836    1
## Intercept[4]     1.41      0.08     1.25     1.57       1000    1
## Intercept[5]     5.84      0.22     5.38     6.29       1000    1
## male            -0.29      0.04    -0.36    -0.22       1000    1
## chhealth1        0.07      0.02     0.03     0.12       1000    1
## foodinsec1       0.13      0.06     0.02     0.25       1000    1
## hhsize           0.07      0.01     0.04     0.10        990    1
## hhses1          -0.06      0.02    -0.10    -0.01       1000    1
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).

plot(fit.glm.b.ord)