Measurement Error Models

In this example, I will illustrate the use of the Berkson measurement error model on data from the BRFSS from Texas.

Traditionally, we only assume error in our outcome variables: e.g. $y_i ~ /alpha + /beta *x_i + e_i$

But often, both y and x have errors in their measurement. This leads to a joint model for both x and y. Two basic forms of this include the Berkson and classical error models.

The Berkson model represents the situation where the observed predictors are less variable than their actual underlying true covariates, such as if you have a covariate that is recorded in intervals (income quartiles), vs the true continuous predictor (income in dollars). The former is less variable that the latter.

The classical model uses a latent variable specificaiton for the true underlying covariate. This is equivalent to stating that a variable x is an imperfect measure of the true covariate z.

We will use the BRFSS data for the state of Texas for our example, and use BMI as a continous outcome, and obesity status outcome (BMI >= 30) as a dichotomous outcome. To examine the Berkson model, I will use a categorized measure of poverty at the county level. To examine the classical error model, I will use the ACS estimates of the errors in the poverty rate estimates directly.

First we load our data and recode some variables:

library(rjags)

## Loading required package: coda
## Loading required package: lattice
## Linked to JAGS 3.4.0
## Loaded modules: basemod,bugs

library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(car)
library(lme4)

## Loading required package: Matrix
## Loading required package: Rcpp

load("~/Google Drive/dem7903_App_Hier/data/brfss_11.Rdata")
nams<-names(brfss_11)
newnames<-gsub("_", "", nams)
names(brfss_11)<-tolower(newnames)
brfss_11$statefip<-sprintf("%02d", brfss_11$state )
brfss_11$cofip<-sprintf("%03d", brfss_11$cnty )
brfss_11$cofips<-paste(brfss_11$statefip, brfss_11$cofip, sep="")
brfss_11$obese<-ifelse(brfss_11$bmi5/100 >=30, 1,0)
brfss_11$black<-recode(brfss_11$racegr2, recodes="2=1; 9=NA; else=0", as.factor.result=F)
brfss_11$white<-recode(brfss_11$racegr2, recodes="1=1; 9=NA; else=0", as.factor.result=F)
brfss_11$other<-recode(brfss_11$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor.result=F)
brfss_11$hispanic<-recode(brfss_11$racegr2, recodes="5=1; 9=NA; else=0", as.factor.result=F)
#education level
brfss_11$lths<-recode(brfss_11$educa, recodes="1:3=1;9=NA; else=0", as.factor.result=F)
brfss_11$coll<-recode(brfss_11$educa, recodes="5:6=1;9=NA; else=0", as.factor.result=F)
brfss_11$agez<-scale(brfss_11$age, center=T, scale=T)
brfss_11$bmiz<-scale(brfss_11$bmi5/100, center=T, scale=T)
brfss_11$lowinc<-recode(brfss_11$incomg, recodes = "1:3=1; 4:5=0; else=NA")


#Read in ACS poverty rates at the county level
#poverty is measured with error, and we know the error in this case
acsecon<-read.csv("~/Google Drive/dem7903_App_Hier/data/aff_download/ACS_10_5YR_DP03_with_ann.csv")
acsecon$povrate<-acsecon[, "HC03_VC156"]
acsecon$poverr<-acsecon[, "HC04_VC156"]

#the next way I do it is the classical way, where you have a predictor that is given in ranges, not actual values.
#So I make grouped poverty rates using quantiles
quantile(acsecon$povrate, p=c(0, .25, .5, .75, 1))

##   0%  25%  50%  75% 100% 
##  0.0  7.4 10.4 14.1 44.9

acsecon$pov_cut<-recode(acsecon$povrate, recodes = "0:7.4=1; 7.41:10.4=2; 10.41:14.1=3; 14.11:44.9=4")

acsecon$cofips<-substr(acsecon$GEO.id, 10,14)
acsecon<-acsecon[, c("cofips", "povrate", "poverr", "pov_cut")]
head(acsecon)

##   cofips povrate poverr pov_cut
## 1  01001     7.5    1.4       2
## 2  01003     9.1    1.0       2
## 3  01005    19.9    2.9       4
## 4  01007     9.4    2.3       2
## 5  01009    10.0    1.5       2
## 6  01011    22.6    5.6       4

#and merge the data back to the brfss data
merged<-merge(x=brfss_11, y=acsecon, by.x="cofips", by.y="cofips", all.x=T)

## Warning in merge.data.frame(x = brfss_11, y = acsecon, by.x = "cofips", :
## column names 'cholchk', 'mrace' are duplicated in the result

Next, I subset the data to select the observations from Texas.

tx<-subset(merged, subset = state=="48"& is.na(bmiz)==F& is.na(black)==F& is.na(lths)==F& is.na(lowinc)==F)
nwncos<-table(tx$cofips)
nwncos #Number of people within counties

## 
## 48029 48133 48157 48201 48303 48329 48423 48439 48453 
##   863   498   772  1185   603   421   452   483   859

tx$conum<-rep(1:length(unique(tx$cofips)), nwncos[nwncos!=0])
length(unique(tx$conum)) #Number of counties

## [1] 9

Model specifications

The first model will ignore the errors in the predictor.

model1<-"
model{

  for (i in 1:n){
      bmi[i]~dnorm(mu[i], tau)
      mu[i]<-b[1]*black[i]+b[2]*hisp[i]+b[3]*other[i]+b[4]*lths[i]+b[5]*coll[i]+u[cofips[i]]
  }
for (j in 1:ncos)
  {
    u[j]~dnorm(muu[j], tau_u)
    muu[j]<-u0+gam*(pov[j]-mean(pov[]))
  }
#priors
u0~dnorm(0, .01)
for(j in 1:5) { b[j]~dnorm(0, .01)}
gam~dnorm(0, .01)
tau<-pow(sd, -2)
sd~dunif(0,100)

tau_u<-pow(sd_u, -2)
sd_u~dunif(0,100)

}

"

#get initial values from a linear model
b<-as.numeric(coef(lm(bmiz~black+hispanic+other+lths+coll+scale(povrate, center=T), tx)))
b

## [1]  0.02931728  0.50613065  0.17991108 -0.15825431  0.10769865 -0.06863772
## [7]  0.03491620

#make the data
dat<-list(bmi=as.numeric(tx$bmiz), obese=tx$obese, black=tx$black, hisp=tx$hispanic, other=tx$other, lths=tx$lths, coll=tx$coll, age=as.numeric(tx$agez), lowinc=tx$lowinc, pov=as.numeric(tapply(tx$povrate, tx$cofips, mean)), n=length(tx$bmiz),cofips=tx$conum, ncos=length(unique(tx$cofips)))

init.rng1<-list(".RNG.seed" = 1234, ".RNG.name" = "base::Mersenne-Twister", u0=b[1], b=b[2:6], gam=b[7])
init.rng2<-list(".RNG.seed" = 5678, ".RNG.name" = "base::Mersenne-Twister", u0=b[1], b=b[2:6], gam=b[7])

#start the model
mod1<-jags.model(file=textConnection(model1), data=dat,inits =list(init.rng1, init.rng2) , n.chains=2)

## Warning in jags.model(file = textConnection(model1), data = dat, inits =
## list(init.rng1, : Unused variable "obese" in data

## Warning in jags.model(file = textConnection(model1), data = dat, inits =
## list(init.rng1, : Unused variable "age" in data

## Warning in jags.model(file = textConnection(model1), data = dat, inits =
## list(init.rng1, : Unused variable "lowinc" in data

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 43133
## 
## Initializing model

#next, we update the model, this is the "burn in" period
update(mod1, 20000)

#sample samples from each chain, thinning every 25th iteration
samps<-coda.samples(mod1, variable.names=c("u0", "gam", "b", "sd", "sd_u"), n.iter=2000, n.thin=25)
effectiveSize(samps)

##      b[1]      b[2]      b[3]      b[4]      b[5]       gam        sd 
## 1968.8843 1233.3564 3138.9558 1033.6617  225.0964  975.5451 2567.3602 
##      sd_u        u0 
##  175.0144  202.3266

gelman.diag(samps, multivariate = F)

## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## b[1]       1.00       1.00
## b[2]       1.00       1.00
## b[3]       1.00       1.02
## b[4]       1.00       1.00
## b[5]       1.00       1.01
## gam        1.00       1.00
## sd         1.00       1.00
## sd_u       1.01       1.03
## u0         1.00       1.01

#Numerical summary of each parameter, here I also include the 90% credible interval:
summary(samps, quantiles =  c(.025, .975))

## 
## Iterations = 21001:23000
## Thinning interval = 1 
## Number of chains = 2 
## Sample size per chain = 2000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##          Mean       SD  Naive SE Time-series SE
## b[1]  0.50987 0.045039 0.0007121      0.0010216
## b[2]  0.17939 0.038017 0.0006011      0.0011476
## b[3] -0.15389 0.055694 0.0008806      0.0009948
## b[4]  0.11227 0.051677 0.0008171      0.0016181
## b[5] -0.06265 0.032617 0.0005157      0.0021903
## gam   0.01329 0.007990 0.0001263      0.0002623
## sd    1.00169 0.009091 0.0001437      0.0001794
## sd_u  0.04018 0.027738 0.0004386      0.0021597
## u0    0.02518 0.034242 0.0005414      0.0024216
## 
## 2. Quantiles for each variable:
## 
##            2.5%      97.5%
## b[1]  0.4230474  0.5977690
## b[2]  0.1062045  0.2558031
## b[3] -0.2640193 -0.0454144
## b[4]  0.0139422  0.2165754
## b[5] -0.1275151 -0.0004197
## gam  -0.0030158  0.0295003
## sd    0.9842130  1.0198156
## sd_u  0.0004914  0.1027521
## u0   -0.0383918  0.0944713

dic.samples(mod1, n.iter = 1000, type = "pD")

## Mean deviance:  17434 
## penalty 11.9 
## Penalized deviance: 17445

Berkson Model

Here, I have error in the poverty rate because the rate has been measured in an interval fashion, instead of a continuous fashion. One thing you may encounter in this type of situation is where you have a calibration estimate for the intervaled data, such as a regression of the true value on the intervaled value, so you know the parameters from such an equation. I estimate these calibration values below using a linear model of the intervaled estimate on the true estimate.

model2<-"
model{

  for (i in 1:n){
      bmi[i]~dnorm(mu[i], tau)
      mu[i]<-b[1]*black[i]+b[2]*hisp[i]+b[3]*other[i]+b[4]*lths[i]+b[5]*coll[i]+u[cofips[i]]
      
  }
for (j in 1:ncos)
  {
     u[j]~dnorm(muu[j], tau_u)
    muu[j]<-u0+gam*(z[j])
    z[j]~dnorm(muz[j], tauz)
    muz[j]<-alphaz+bz*povcut[j]
  }
#priors
for(j in 1:5) { b[j]~dnorm(0, .01)}
gam~dnorm(0,.01)
u0~dnorm(0, .0001)

tau<-pow(sd, -2)
sd~dunif(0,100)

tau_u<-pow(sd_u, -2)
sd_u~dunif(0,100)

}
"
#get calibration values for error model. 
summary(lm(povrate~pov_cut, tx))

## 
## Call:
## lm(formula = povrate ~ pov_cut, data = tx)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5502 -0.3620  0.1557  0.8439  1.0498 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.36798    0.04104   82.06   <2e-16 ***
## pov_cut      3.09407    0.01467  210.88   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9128 on 6134 degrees of freedom
## Multiple R-squared:  0.8788, Adjusted R-squared:  0.8788 
## F-statistic: 4.447e+04 on 1 and 6134 DF,  p-value: < 2.2e-16

#Make the data
dat<-list(bmi=as.numeric(tx$bmiz), obese=tx$obese, black=tx$black, hisp=tx$hispanic, other=tx$other, lths=tx$lths, coll=tx$coll, age=as.numeric(tx$agez), lowinc=tx$lowinc, povcut=tapply(tx$pov_cut, tx$cofips, median ), n=length(tx$bmiz),cofips=tx$conum, ncos=length(unique(tx$cofips)), alphaz=3.367, bz=3.094, tauz=1/(.9128^2))

#get initial values

b<-as.numeric(fixef(lmer(bmiz~black+hispanic+other+lths+coll+pov_cut+(1|cofips), tx)))
b

## [1] -0.04088886  0.50858872  0.17811710 -0.15601629  0.11105274 -0.06681006
## [7]  0.02686786

VarCorr(lmer(bmiz~black+hispanic+other+lths+coll+pov_cut+(1|cofips), tx))

##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.046201
##  Residual             1.001549

#initialize
init.rng1<-list( b=b[2:6], u0=b[1], gam=b[7], sd=1.001549, sd_u=.046201)
init.rng2<-list( b=b[2:6], u0=b[1], gam=b[7], sd=1.001549, sd_u=.046201)

#Start the model
mod2<-jags.model(file=textConnection(model2), data=dat,inits =list(init.rng1, init.rng2) , n.chains=2)

## Warning in jags.model(file = textConnection(model2), data = dat, inits =
## list(init.rng1, : Unused variable "obese" in data

## Warning in jags.model(file = textConnection(model2), data = dat, inits =
## list(init.rng1, : Unused variable "age" in data

## Warning in jags.model(file = textConnection(model2), data = dat, inits =
## list(init.rng1, : Unused variable "lowinc" in data

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 43143
## 
## Initializing model

#next, we update the model, this is the "burn in" period
update(mod2, 30000)

#sample samples from each chain, thinning every 25th iteration
samps2<-coda.samples(mod2, variable.names=c("u0", "b","gam", "sd", "sd_u", "z", "sd", "sd_u"), n.iter=2000, n.thin=25)

## Warning in jags.samples(model, variable.names, n.iter, thin, type = "trace", : Failed to set trace monitor for sd
## Monitor already exists and cannot be duplicated

## Warning in jags.samples(model, variable.names, n.iter, thin, type = "trace", : Failed to set trace monitor for sd_u
## Monitor already exists and cannot be duplicated

effectiveSize(samps2)

##       b[1]       b[2]       b[3]       b[4]       b[5]        gam 
## 2138.60942 1748.75166 2911.11641  803.02335  249.60109   68.92078 
##         sd       sd_u         u0       z[1]       z[2]       z[3] 
## 2405.04009  261.94953   68.54170 2681.46889 2990.04280 2309.37881 
##       z[4]       z[5]       z[6]       z[7]       z[8]       z[9] 
## 3124.52492 2670.47419 2536.43120 1820.33461 2411.54651 1517.70422

gelman.diag(samps2, multivariate = F)

## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## b[1]       1.00       1.00
## b[2]       1.00       1.02
## b[3]       1.00       1.00
## b[4]       1.00       1.00
## b[5]       1.00       1.01
## gam        1.04       1.06
## sd         1.00       1.00
## sd_u       1.00       1.00
## u0         1.03       1.06
## z[1]       1.00       1.00
## z[2]       1.00       1.00
## z[3]       1.00       1.01
## z[4]       1.00       1.01
## z[5]       1.00       1.01
## z[6]       1.00       1.00
## z[7]       1.00       1.01
## z[8]       1.00       1.00
## z[9]       1.00       1.01

#Numerical summary of each parameter, here I also include the 90% credible interval:
summary(samps2, quantiles =  c(.025, .975))

## 
## Iterations = 31001:33000
## Thinning interval = 1 
## Number of chains = 2 
## Sample size per chain = 2000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##          Mean       SD  Naive SE Time-series SE
## b[1]  0.51106 0.044334 0.0007010      0.0009696
## b[2]  0.17915 0.037841 0.0005983      0.0009042
## b[3] -0.15364 0.054971 0.0008692      0.0010195
## b[4]  0.11343 0.053182 0.0008409      0.0018869
## b[5] -0.06245 0.033981 0.0005373      0.0021496
## gam   0.01189 0.010112 0.0001599      0.0013474
## sd    1.00162 0.008988 0.0001421      0.0001833
## sd_u  0.05399 0.032977 0.0005214      0.0020772
## u0   -0.11208 0.122702 0.0019401      0.0157495
## z[1] 12.79380 0.902000 0.0142619      0.0188566
## z[2] 15.68188 0.884428 0.0139840      0.0162108
## z[3]  6.44646 0.902590 0.0142712      0.0191741
## z[4] 12.58502 0.885850 0.0140065      0.0158862
## z[5] 12.72573 0.912685 0.0144308      0.0184077
## z[6]  9.68164 0.921951 0.0145773      0.0188387
## z[7] 12.50257 0.922895 0.0145923      0.0225793
## z[8]  9.75881 0.914454 0.0144588      0.0187690
## z[9] 12.34640 0.946765 0.0149697      0.0252258
## 
## 2. Quantiles for each variable:
## 
##           2.5%    97.5%
## b[1]  0.424325  0.59770
## b[2]  0.108111  0.25275
## b[3] -0.262130 -0.04670
## b[4]  0.006049  0.21667
## b[5] -0.130353  0.00225
## gam  -0.008807  0.03153
## sd    0.984054  1.01956
## sd_u  0.006818  0.13507
## u0   -0.356820  0.14689
## z[1] 11.062171 14.55954
## z[2] 13.979474 17.37629
## z[3]  4.691435  8.21881
## z[4] 10.832859 14.38868
## z[5] 10.959678 14.46606
## z[6]  7.846071 11.49392
## z[7] 10.724212 14.33990
## z[8]  7.968003 11.56446
## z[9] 10.485059 14.25753

dic.samples(mod2, n.iter = 1000, type = "pD")

## Mean deviance:  17434 
## penalty 12.48 
## Penalized deviance: 17446

classical Model

Here the poverty rate is treated as a latent variable with known error (from the ACS margin of error). Alternatively, you could put a prior on the latent variable variance.

model3<-"
model{

  for (i in 1:n){
      bmi[i]~dnorm(mu[i], tau)
      mu[i]<-b[1]*black[i]+b[2]*hisp[i]+b[3]*other[i]+b[4]*lths[i]+b[5]*coll[i]+u[cofips[i]]
          
  }

for (j in 1:ncos)
  {
    u[j]~dnorm(muu[j], tau_u)
    muu[j]<-u0+gam*(povtrue[j]-mean(povtrue[]))
    povrate[j]~dnorm(povtrue[j],taupov[j] )
    povtrue[j]~dnorm(0, taup)T(0,100)
    taupov[j]<-pow(poverr[j],-2)
  }
#priors
u0~dnorm(0, .01)
for(j in 1:5) { b[j]~dnorm(0, .01)}
gam~dnorm(0, .01)

tau<-pow(sd, -2)
sd~dunif(0,100)

tau_u<-pow(sd_u, -2)
sd_u~dunif(0,100)

taup~dgamma(.01, .01)
}
"

dat<-list(bmi=as.numeric(tx$bmiz), obese=tx$obese, black=tx$black, hisp=tx$hispanic, other=tx$other, lths=tx$lths, coll=tx$coll, age=as.numeric(tx$agez), lowinc=tx$lowinc, povrate=as.numeric(tapply(tx$povrate, tx$cofips, mean)), poverr=tapply(tx$poverr/1.645, tx$cofips, mean), n=length(tx$bmiz),cofips=tx$conum, ncos=length(unique(tx$cofips)))

#get initial values

b<-as.numeric(fixef(lmer(bmiz~black+hispanic+other+lths+coll+scale(povrate, center=T)+(1|cofips), tx)))
b

## [1]  0.03040590  0.50906581  0.17779706 -0.15478146  0.11036624 -0.06592581
## [7]  0.03446866

VarCorr(lmer(bmiz~black+hispanic+other+lths+coll+scale(povrate, center=T)+(1|cofips), tx))

##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.035714
##  Residual             1.001545

init.rng1<-list( u0=b[1], b=b[2:6], gam=b[7])
init.rng2<-list( u0=b[1], b=b[2:6], gam=b[7])
mod3<-jags.model(file=textConnection(model3), data=dat,inits =list(init.rng1, init.rng2) , n.chains=2)

## Warning in jags.model(file = textConnection(model3), data = dat, inits =
## list(init.rng1, : Unused variable "obese" in data

## Warning in jags.model(file = textConnection(model3), data = dat, inits =
## list(init.rng1, : Unused variable "age" in data

## Warning in jags.model(file = textConnection(model3), data = dat, inits =
## list(init.rng1, : Unused variable "lowinc" in data

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 43158
## 
## Initializing model

#next, we update the model, this is the "burn in" period
update(mod3, 20000)

#sample 1000 samples from each chain, thinning every 25th iteration
samps3<-coda.samples(mod3, variable.names=c("u0", "b","gam", "sd", "sd_u", "povtrue", "povrate"), n.iter=2000, n.thin=25)
effectiveSize(samps3)

##       b[1]       b[2]       b[3]       b[4]       b[5]        gam 
##  2437.5915  1568.5121  3013.7831  1065.8596   341.2738  1425.7628 
## povrate[1] povrate[2] povrate[3] povrate[4] povrate[5] povrate[6] 
##     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000 
## povrate[7] povrate[8] povrate[9] povtrue[1] povtrue[2] povtrue[3] 
##     0.0000     0.0000     0.0000  3872.4346  2049.5267  3807.1865 
## povtrue[4] povtrue[5] povtrue[6] povtrue[7] povtrue[8] povtrue[9] 
##  4000.0000  3556.0354  2703.2562  2918.1094  4000.0000  3761.9943 
##         sd       sd_u         u0 
##  2547.5197   380.4240   342.8834

gelman.diag(samps3, multivariate = F)

## Potential scale reduction factors:
## 
##            Point est. Upper C.I.
## b[1]             1.00       1.00
## b[2]             1.00       1.00
## b[3]             1.00       1.00
## b[4]             1.00       1.00
## b[5]             1.00       1.01
## gam              1.00       1.00
## povrate[1]        NaN        NaN
## povrate[2]        NaN        NaN
## povrate[3]        NaN        NaN
## povrate[4]        NaN        NaN
## povrate[5]        NaN        NaN
## povrate[6]        NaN        NaN
## povrate[7]        NaN        NaN
## povrate[8]        NaN        NaN
## povrate[9]        NaN        NaN
## povtrue[1]       1.00       1.00
## povtrue[2]       1.00       1.00
## povtrue[3]       1.00       1.00
## povtrue[4]       1.00       1.00
## povtrue[5]       1.00       1.00
## povtrue[6]       1.00       1.00
## povtrue[7]       1.00       1.01
## povtrue[8]       1.00       1.00
## povtrue[9]       1.00       1.00
## sd               1.00       1.00
## sd_u             1.01       1.06
## u0               1.00       1.00

#Numerical summary of each parameter:
summary(samps3, quantiles =  c(.025, .975))

## 
## Iterations = 21001:23000
## Thinning interval = 1 
## Number of chains = 2 
## Sample size per chain = 2000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                Mean       SD  Naive SE Time-series SE
## b[1]        0.50856 0.045210 0.0007148      0.0009170
## b[2]        0.17741 0.037301 0.0005898      0.0009550
## b[3]       -0.15491 0.055716 0.0008810      0.0010226
## b[4]        0.10834 0.050594 0.0008000      0.0015581
## b[5]       -0.06702 0.031530 0.0004985      0.0017093
## gam         0.01372 0.008967 0.0001418      0.0002375
## povrate[1] 13.20000 0.000000 0.0000000      0.0000000
## povrate[2] 15.90000 0.000000 0.0000000      0.0000000
## povrate[3]  6.10000 0.000000 0.0000000      0.0000000
## povrate[4] 13.70000 0.000000 0.0000000      0.0000000
## povrate[5] 12.30000 0.000000 0.0000000      0.0000000
## povrate[6] 10.10000 0.000000 0.0000000      0.0000000
## povrate[7] 11.30000 0.000000 0.0000000      0.0000000
## povrate[8] 10.40000 0.000000 0.0000000      0.0000000
## povrate[9] 11.10000 0.000000 0.0000000      0.0000000
## povtrue[1] 13.20419 0.183786 0.0029059      0.0029572
## povtrue[2] 15.20978 1.828042 0.0289039      0.0406905
## povtrue[3]  6.10252 0.300538 0.0047519      0.0048759
## povtrue[4] 13.68942 0.183155 0.0028959      0.0028962
## povtrue[5] 12.33436 0.532059 0.0084126      0.0089287
## povtrue[6] 10.20148 0.907504 0.0143489      0.0175023
## povtrue[7] 11.21962 0.588596 0.0093065      0.0109008
## povtrue[8] 10.40920 0.178102 0.0028160      0.0028163
## povtrue[9] 11.05568 0.300233 0.0047471      0.0049099
## sd          1.00186 0.009315 0.0001473      0.0001846
## sd_u        0.04662 0.028250 0.0004467      0.0014430
## u0          0.03107 0.034948 0.0005526      0.0018998
## 
## 2. Quantiles for each variable:
## 
##                 2.5%     97.5%
## b[1]        0.416773  0.594974
## b[2]        0.102011  0.249291
## b[3]       -0.263098 -0.044232
## b[4]        0.009332  0.207569
## b[5]       -0.128957 -0.006447
## gam        -0.003974  0.031573
## povrate[1] 13.200000 13.200000
## povrate[2] 15.900000 15.900000
## povrate[3]  6.100000  6.100000
## povrate[4] 13.700000 13.700000
## povrate[5] 12.300000 12.300000
## povrate[6] 10.100000 10.100000
## povrate[7] 11.300000 11.300000
## povrate[8] 10.400000 10.400000
## povrate[9] 11.100000 11.100000
## povtrue[1] 12.838419 13.566852
## povtrue[2] 11.574008 18.802500
## povtrue[3]  5.511780  6.692156
## povtrue[4] 13.331411 14.040259
## povtrue[5] 11.274370 13.354635
## povtrue[6]  8.401609 11.953778
## povtrue[7] 10.083676 12.359108
## povtrue[8] 10.061355 10.756515
## povtrue[9] 10.486926 11.637176
## sd          0.984061  1.020340
## sd_u        0.008032  0.116552
## u0         -0.035262  0.100764

dat$povrate

## [1] 13.2 15.9  6.1 13.7 12.3 10.1 11.3 10.4 11.1

dat$poverr

##     48029     48133     48157     48201     48303     48329     48423 
## 0.1823708 2.0060790 0.3039514 0.1823708 0.5471125 0.9118541 0.6079027 
##     48439     48453 
## 0.1823708 0.3039514

dic.samples(mod3, n.iter = 1000, type = "pD")

## Mean deviance:  17444 
## penalty 20.8 
## Penalized deviance: 17464

In this case, the first model, without any measurement error fits best, followed by the berkson model, then the classical model, using the DIC.

Measurement Error Models

coreysparks

November 5, 2014

Model specifications

Berkson Model

classical Model