# IntroduÃ§Ã£o

InferÃªncia sob o Modelo Tobit utilizando os pacotes Bugs, Jags, Stan e Nimble

Tamanho amostral = $$100$$

$$x_i \sim unif(1,5;3)$$

Garantindo 5% de censura amostral

A amostra gerada tem a seguinte distribuiÃ§Ã£o:

$$y_i \sim N(\beta_0+\beta_1\times x_i, \tau)$$

Com as seguintes prioris:

$$\beta_0 \sim N(0;0,01)$$

$$\beta_1 \sim N(0;0,01)$$

$$\tau \sim Ga(1;0,1)$$

$$E[\tau] = 10$$ e $$V[\tau]=100$$

Para efeito de simulaÃ§Ã£o, foram padronizadas as seguintes definiÃ§Ãµes:

SimulaÃ§Ãµes = 10000

burn = 1000

Tamanho amostral a posteriori = 9000

EspaÃ§amento = 1

# Bugs

########################################################################################
#############                          BUGS                                #############
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
table(y.cens<0)

FALSE
95 
sink("regression.txt")
cat("
model
{
# priors
beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)
# likelihood
for(i in 1:N)
{
mu[i] <- beta0 + beta1*x[i]
y[i] ~ dnorm(mu[i], tau)C(0,)}
}
")
sink()
my.data <- list(y=y.cens,N=100,x=x)
params <- c("beta0","beta1","tau")
inits <- function(){list (beta0=1,beta1=1,tau=2)}
res <- bugs(data=my.data, inits=inits, parameters.to.save=params,
n.iter=10000, n.chains=1, n.burnin=1000,n.thin = 1,
model.file="regression.txt",DIC=FALSE)
res$summary  mean sd 2.5% 25% 50% 75% 97.5% beta0 -1.924597 0.4815395 -2.8820000 -2.24325 -1.922 -1.60200 -0.991790 beta1 2.056745 0.2125253 1.6469750 1.91300 2.054 2.19800 2.480025 tau 1.265164 0.1818779 0.9342975 1.13775 1.257 1.38025 1.645000 par(mfrow=c(1,3)) plot(res$sims.matrix[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(res$sims.matrix[,2],type="l",main=expression(beta[1])) abline(h=2,col="red") plot(res$sims.matrix[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))

plot(density(res$sims.matrix[,1]),main=expression(beta[0])) abline(v=-2,col="red") plot(density(res$sims.matrix[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(res$sims.matrix[,3]),main=expression(tau)) abline(v=1,col="red") # Jags ######################################################################################## ############# JAGS ############# ######################################################################################## set.seed(20) x <- runif(100,1.5,3) y <- rnorm(100,-2+2*x) y.cens <- ifelse(y>0,y,NA) y.ind <- as.numeric(y>=0) N = length(y) model_code= 'model { for (i in 1:N){ mu[i] <- beta0 + beta1*x[i] y.ind[i] ~ dinterval(y.censored[i], 0) y.censored[i] ~ dnorm(mu[i], tau)} beta0 ~ dnorm(0,0.01) beta1 ~ dnorm(0,0.01) tau ~ dgamma(1,0.1) }' tobit.inits <- function() { list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1)))) } ?jags m <- jags(model.file=textConnection(model_code), parameters.to.save = c("beta0", "beta1","tau"), n.iter=10000,n.burnin=1000,n.chains = 1,n.thin = 1, data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x), inits=tobit.inits,DIC=FALSE) module glm loaded module dic loaded Compiling model graph Resolving undeclared variables Allocating nodes Graph information: Observed stochastic nodes: 195 Unobserved stochastic nodes: 8 Total graph size: 508 Initializing model | | | 0% | |* | 2% | |** | 4% | |*** | 7% | |**** | 9% | |****** | 11% | |******* | 13% | |******** | 16% | |********* | 18% | |********** | 20% | |*********** | 22% | |************ | 24% | |************* | 27% | |************** | 29% | |**************** | 31% | |***************** | 33% | |****************** | 36% | |******************* | 38% | |******************** | 40% | |********************* | 42% | |********************** | 44% | |*********************** | 47% | |************************ | 49% | |************************** | 51% | |*************************** | 53% | |**************************** | 56% | |***************************** | 58% | |****************************** | 60% | |******************************* | 62% | |******************************** | 64% | |********************************* | 67% | |********************************** | 69% | |************************************ | 71% | |************************************* | 73% | |************************************** | 76% | |*************************************** | 78% | |**************************************** | 80% | |***************************************** | 82% | |****************************************** | 84% | |******************************************* | 87% | |******************************************** | 89% | |********************************************** | 91% | |*********************************************** | 93% | |************************************************ | 96% | |************************************************* | 98% | |**************************************************| 100% m$BUGSoutput$summary  mean sd 2.5% 25% 50% 75% 97.5% beta0 -2.688581 0.4871077 -3.6641946 -3.0117060 -2.688885 -2.370395 -1.741422 beta1 2.360142 0.2163471 1.9399078 2.2164496 2.362275 2.503432 2.793005 tau 1.108847 0.1639959 0.8147208 0.9933266 1.101411 1.213167 1.448729 par(mfrow=c(1,3)) plot(m$BUGSoutput$sims.matrix[,1],type="l",main=expression(beta[0])) abline(h=-2,col="red") plot(m$BUGSoutput$sims.matrix[,2],type="l",main=expression(beta[1])) abline(h=2,col="red") plot(m$BUGSoutput$sims.matrix[,3],type="l",main=expression(tau)) abline(h=1,col="red") par(mfrow=c(1,3)) plot(density(m$BUGSoutput$sims.matrix[,1]),main=expression(beta[0])) abline(v=-2,col="red") plot(density(m$BUGSoutput$sims.matrix[,2]),main=expression(beta[1])) abline(v=2,col="red") plot(density(m$BUGSoutput$sims.matrix[,3]),main=expression(tau)) abline(v=1,col="red") # Stan modelo normal ######################################################################################## ############# STAN ############# ######################################################################################## set.seed(20) y = rnorm(1000,3,2) y.censored <- ifelse(y>=0, y, 0) y.ind <- as.numeric(y>=0) N = length(y) model = "data { int<lower=0> N_obs; int<lower=0> N_cens; real y_obs[N_obs]; real<lower=min(y_obs)> U; } parameters { real<lower=U> y_cens[N_cens]; real mu; real<lower=0> sigma2; } transformed parameters { real<lower=0> tau; tau = 1 / sigma2; } model { y_obs ~ normal(mu, sqrt(sigma2)); y_cens ~ normal(0, 10); mu ~ normal(0,10); sigma2 ~ gamma(1,0.1); }" data <- list(y_obs=y.censored, y_cens=y, N_obs = length(y.censored), N_cens = length(y), U = 0) fit <- stan(model_code = model,data = data, iter = 10000, warmup = 1000, chains = 1)  SAMPLING FOR MODEL 'e14823c6ba9dc00bd93917471a3cce2c' NOW (CHAIN 1). Chain 1: Chain 1: Gradient evaluation took 0 seconds Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds. Chain 1: Adjust your expectations accordingly! Chain 1: Chain 1: Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup) Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup) Chain 1: Iteration: 1001 / 10000 [ 10%] (Sampling) Chain 1: Iteration: 2000 / 10000 [ 20%] (Sampling) Chain 1: Iteration: 3000 / 10000 [ 30%] (Sampling) Chain 1: Iteration: 4000 / 10000 [ 40%] (Sampling) Chain 1: Iteration: 5000 / 10000 [ 50%] (Sampling) Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling) Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling) Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling) Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling) Chain 1: Iteration: 10000 / 10000 [100%] (Sampling) Chain 1: Chain 1: Elapsed Time: 4.77 seconds (Warm-up) Chain 1: 30.399 seconds (Sampling) Chain 1: 35.169 seconds (Total) Chain 1:  summary(fit,c("mu","tau")) $summary
mean      se_mean         sd      2.5%       25%       50%       75%    97.5%    n_eff      Rhat
mu  3.0300011 0.0004999933 0.05991698 2.9136245 2.9892271 3.0303034 3.0703253 3.145489 14360.56 1.0000211
tau 0.2701379 0.0001188452 0.01223221 0.2463536 0.2620211 0.2698582 0.2783503 0.294574 10593.67 0.9999091

$c_summary , , chains = chain:1 stats parameter mean sd 2.5% 25% 50% 75% 97.5% mu 3.0300011 0.05991698 2.9136245 2.9892271 3.0303034 3.0703253 3.145489 tau 0.2701379 0.01223221 0.2463536 0.2620211 0.2698582 0.2783503 0.294574 fitd = extract(fit) par(mfrow=c(1,2)) plot(fitd$mu,type="l",main=expression(mu))
abline(h=3,col="red")
plot(fitd$tau,type="l",main=expression(tau)) abline(h=0.25,col="red") par(mfrow=c(1,2)) plot(density(fitd$mu,type="l"),main=expression(beta[0]))
abline(v=3,col="red")
plot(density(fitd$tau,type="l"),main=expression(beta[1])) abline(v=0.25,col="red") # Stan tobit ######################################################################################## ############# STAN ############# ######################################################################################## set.seed(20) x <- runif(100,1.5,3) y <- rnorm(100,-2+2*x) y.cens <- ifelse(y>0,y,0) y.ind <- as.numeric(y>=0) N = length(y) model = "data { int<lower=0> N_obs; int<lower=0> N_cens; vector[N_obs] y_obs; vector[N_obs] x; real<lower=min(y_obs)> U; } parameters { real<lower=U> y_cens[N_cens]; real beta0; real beta1; real<lower=0> sigma2; } transformed parameters { real<lower=0> tau; tau = 1 / sigma2; } model { y_obs ~ normal(beta0+beta1*x, sqrt(sigma2)); y_cens ~ normal(0, 10); beta0 ~ normal(0,10); beta1 ~ normal(0,10); sigma2 ~ gamma(1,0.1); } generated quantities { vector[N_obs] y_rep; for(n in 1:N_obs) { y_rep[n] = normal_rng(beta0 + beta1 * x[n], sqrt(sigma2)); } } " data <- list(y_obs=y.cens, y_cens=y, N_obs = length(y.cens), N_cens = length(y), U = 0,x=x) fit2 <- stan(model_code = model,data = data, iter = 10000, warmup = 1000, chains = 1)  SAMPLING FOR MODEL 'f7a6ef25f375eddfb9bc1460ee975394' NOW (CHAIN 1). Chain 1: Chain 1: Gradient evaluation took 0 seconds Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds. Chain 1: Adjust your expectations accordingly! Chain 1: Chain 1: Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup) Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup) Chain 1: Iteration: 1001 / 10000 [ 10%] (Sampling) Chain 1: Iteration: 2000 / 10000 [ 20%] (Sampling) Chain 1: Iteration: 3000 / 10000 [ 30%] (Sampling) Chain 1: Iteration: 4000 / 10000 [ 40%] (Sampling) Chain 1: Iteration: 5000 / 10000 [ 50%] (Sampling) Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling) Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling) Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling) Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling) Chain 1: Iteration: 10000 / 10000 [100%] (Sampling) Chain 1: Chain 1: Elapsed Time: 0.815 seconds (Warm-up) Chain 1: 5.686 seconds (Sampling) Chain 1: 6.501 seconds (Total) Chain 1:  summary(fit2,c("beta0","beta1","sigma2")) $summary
mean     se_mean        sd      2.5%        25%        50%       75%     97.5%     n_eff      Rhat
beta0  -2.5029309 0.005461745 0.4608263 -3.402576 -2.8167268 -2.5013279 -2.189749 -1.583587  7118.880 0.9999093
beta1   2.2875233 0.002451611 0.2052878  1.886756  2.1474523  2.2848043  2.428126  2.691587  7011.696 0.9999060
sigma2  0.8801829 0.001066457 0.1311793  0.665012  0.7881697  0.8669909  0.957052  1.175921 15130.167 1.0001735

$c_summary , , chains = chain:1 stats parameter mean sd 2.5% 25% 50% 75% 97.5% beta0 -2.5029309 0.4608263 -3.402576 -2.8167268 -2.5013279 -2.189749 -1.583587 beta1 2.2875233 0.2052878 1.886756 2.1474523 2.2848043 2.428126 2.691587 sigma2 0.8801829 0.1311793 0.665012 0.7881697 0.8669909 0.957052 1.175921 fitd = extract(fit2) par(mfrow=c(1,3)) plot(fitd$beta0,type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(fitd$beta1,type="l",main=expression(beta[1])) abline(h=2,col="red") plot(fitd$sigma2,type="l",main=expression(sigma^2))
abline(h=1,col="red")
par(mfrow=c(1,3))

plot(density(fitd$beta0,type="l"),main=expression(beta[0])) abline(v=-2,col="red") plot(density(fitd$beta1,type="l"),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(fitd$sigma2,type="l"),main=expression(sigma^2)) abline(v=1,col="red") # Nimble 1 ######################################################################################## ############# NIMBLE ############# ######################################################################################## set.seed(20) x <- runif(100,1.5,3) y <- rnorm(100,-2+2*x) y.cens <- ifelse(y>0,y,NA) y.ind <- as.numeric(y>=0) N = length(y) simpleCode1 <- nimbleCode({ for (i in 1:N){ mu[i] <- beta0 + beta1*x[i] y.ind[i] ~ dinterval(y.censored[i], 0) y.censored[i] ~ dnorm(mu[i], tau)} beta0 ~ dnorm(0,0.01) beta1 ~ dnorm(0,0.01) tau ~ dgamma(1,0.1) }) tobit.inits <- function() { list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1)))) } simpleModel1 <- nimbleModel(simpleCode1,data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),constants = list(N = N)) defining model... BUGSmodel: found the same variable(s) in both 'data' and 'constants'; using variable(s) from 'data'. building model... setting data and initial values... data not used in model: Nrunning calculate on model (any error reports that follow may simply reflect missing values in model variables) ... checking model sizes and dimensions... This model is not fully initialized. This is not an error. To see which variables are not initialized, use model$initializeInfo(). For more information on model initialization, see help(modelInitialization).
model building finished.
mcmc.out <- nimbleMCMC(code = simpleModel1,
monitors=c("beta0", "beta1","tau"),
nchains = 1,
niter = 10000,
nburnin = 1000,
summary = TRUE,
progressBar = TRUE)
compiling... this may take a minute. Use 'showCompilerOutput = TRUE' to see C++ compiler details.
compilation finished.
running chain 1...
|-------------|-------------|-------------|-------------|
|-------------------------------------------------------|
mcmc.out$summary  Mean Median St.Dev. 95%CI_low 95%CI_upp beta0 -2.680071 -2.663973 0.4857003 -3.6917210 -1.775284 beta1 2.356633 2.348672 0.2151124 1.9515371 2.801466 tau 1.111285 1.101014 0.1627670 0.8139987 1.446730 par(mfrow=c(1,3)) plot(mcmc.out$samples[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(mcmc.out$samples[,2],type="l",main=expression(beta[1])) abline(h=2,col="red") plot(mcmc.out$samples[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))

plot(density(mcmc.out$samples[,1]),main=expression(beta[0])) abline(v=-2,col="red") plot(density(mcmc.out$samples[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(mcmc.out$samples[,3]),main=expression(tau)) abline(v=1,col="red") geweke.diag(mcmc.out$samples)

Fraction in 1st window = 0.1
Fraction in 2nd window = 0.5

beta0   beta1     tau
0.1693 -0.1391  0.4951 
raftery.diag(mcmc.out$samples)  Quantile (q) = 0.025 Accuracy (r) = +/- 0.005 Probability (s) = 0.95 Burn-in Total Lower bound Dependence (M) (N) (Nmin) factor (I) beta0 44 46372 3746 12.40 beta1 48 51728 3746 13.80 tau 2 3973 3746 1.06  # Nimble 2 ######################################################################################## ############# NIMBLE ############# ######################################################################################## set.seed(20) x <- runif(100,1.5,3) y <- rnorm(100,-2+2*x) y.cens <- ifelse(y>0,y,NA) y.ind <- as.numeric(y>=0) N = length(y) simpleCode1 <- nimbleCode({ for (i in 1:N){ mu[i] <- beta0 + beta1*x[i] y.ind[i] ~ dinterval(y.censored[i], 0) y.censored[i] ~ dnorm(mu[i], tau)} beta0 ~ dnorm(0,0.01) beta1 ~ dnorm(0,0.01) tau ~ dgamma(1,0.1) }) tobit.inits <- function() { list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1)))) } simpleModel1 <- nimbleModel(simpleCode1,data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),constants = list(N = N)) defining model... BUGSmodel: found the same variable(s) in both 'data' and 'constants'; using variable(s) from 'data'. building model... setting data and initial values... data not used in model: Nrunning calculate on model (any error reports that follow may simply reflect missing values in model variables) ... checking model sizes and dimensions... This model is not fully initialized. This is not an error. To see which variables are not initialized, use model$initializeInfo(). For more information on model initialization, see help(modelInitialization).
model building finished.
mcmc.out <- nimbleMCMC(code = simpleModel1,
monitors=c("beta0", "beta1","tau"),
nchains = 1,
niter = 100000,
nburnin = 1000,
summary = TRUE,
progressBar = TRUE)
compiling... this may take a minute. Use 'showCompilerOutput = TRUE' to see C++ compiler details.
compilation finished.
running chain 1...
|-------------|-------------|-------------|-------------|
|-------------------------------------------------------|
mcmc.out$summary  Mean Median St.Dev. 95%CI_low 95%CI_upp beta0 -2.692343 -2.693460 0.4915015 -3.6561635 -1.705984 beta1 2.361458 2.362215 0.2180700 1.9235208 2.790032 tau 1.110457 1.102130 0.1630927 0.8148995 1.454054 par(mfrow=c(1,3)) plot(mcmc.out$samples[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(mcmc.out$samples[,2],type="l",main=expression(beta[1])) abline(h=2,col="red") plot(mcmc.out$samples[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))

plot(density(mcmc.out$samples[,1]),main=expression(beta[0])) abline(v=-2,col="red") plot(density(mcmc.out$samples[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(mcmc.out$samples[,3]),main=expression(tau)) abline(v=1,col="red") geweke.diag(mcmc.out$samples)

Fraction in 1st window = 0.1
Fraction in 2nd window = 0.5

beta0   beta1     tau
0.4908 -0.4564  0.3993 
raftery.diag(mcmc.out$samples)  Quantile (q) = 0.025 Accuracy (r) = +/- 0.005 Probability (s) = 0.95 Burn-in Total Lower bound Dependence (M) (N) (Nmin) factor (I) beta0 54 65151 3746 17.40 beta1 77 83050 3746 22.20 tau 2 3927 3746 1.05  # Laplace ######################################################################################## ############# LAPLACE ############# ######################################################################################## set.seed(20) x <- runif(100,1.5,3) y <- rnorm(100,-2+2*x) y.cens <- ifelse(y>0,y,NA) y.ind <- as.numeric(y>=0) N = length(y) d = ifelse(y < 0, 0, 1) model <- function(p,y,d,x) { log_lik <- sum(pnorm(0, p["beta0"] + p["beta1"]*x, 1/sqrt(p["tau"]), log = T)*(1-d)) + sum(d * dnorm(y, p["beta0"] + p["beta1"]*x, 1/sqrt(p["tau"]), log = T)) log_post <- log_lik + dnorm(p["beta0"], 0, 10, log = T) + dnorm(p["beta1"], 0, 10, log = T) + dgamma(p["tau"], 1, 0.1, log = T) log_post } inits <- c(beta0 = 1,beta1 = 1, tau = 2) fit <- optim(inits, model, control = list(fnscale = -1), hessian = TRUE,y = y,d = d,x=x,method = "SANN") param_mean <- fit$par
param_cov_mat <- solve(-fit\$hessian)
round(param_mean, 2)
beta0 beta1   tau
-2.65  2.34  1.12 
qnorm(c(0.025,0.975),param_mean[1],sqrt(param_cov_mat[1,1]))
[1] -3.579736 -1.712075
qnorm(c(0.025,0.975),param_mean[2],sqrt(param_cov_mat[2,2]))
[1] 1.926429 2.755371
qnorm(c(0.025,0.975),param_mean[3],sqrt(param_cov_mat[3,3]))
[1] 0.7986063 1.4431002