Inferência sob o Modelo Tobit utilizando os pacotes Bugs, Jags, Stan e Nimble
Tamanho amostral = \(100\)
\(x_i \sim unif(1,5;3)\)
Garantindo 5% de censura amostral
A amostra gerada tem a seguinte distribuição:
\(y_i \sim N(\beta_0+\beta_1\times x_i, \tau)\)
Com as seguintes prioris:
\(\beta_0 \sim N(0;0,01)\)
\(\beta_1 \sim N(0;0,01)\)
\(\tau \sim Ga(1;0,1)\)
\(E[\tau] = 10\) e \(V[\tau]=100\)
Para efeito de simulação, foram padronizadas as seguintes definições:
Simulações = 10000
burn = 1000
Tamanho amostral a posteriori = 9000
Cadeias = 1
Espaçamento = 1
########################################################################################
############# BUGS #############
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
table(y.cens<0)
FALSE
95
sink("regression.txt")
cat("
model
{
# priors
beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)
# likelihood
for(i in 1:N)
{
mu[i] <- beta0 + beta1*x[i]
y[i] ~ dnorm(mu[i], tau)C(0,)}
}
")
sink()
my.data <- list(y=y.cens,N=100,x=x)
params <- c("beta0","beta1","tau")
inits <- function(){list (beta0=1,beta1=1,tau=2)}
res <- bugs(data=my.data, inits=inits, parameters.to.save=params,
n.iter=10000, n.chains=1, n.burnin=1000,n.thin = 1,
model.file="regression.txt",DIC=FALSE)
res$summary
mean sd 2.5% 25% 50% 75% 97.5%
beta0 -1.924597 0.4815395 -2.8820000 -2.24325 -1.922 -1.60200 -0.991790
beta1 2.056745 0.2125253 1.6469750 1.91300 2.054 2.19800 2.480025
tau 1.265164 0.1818779 0.9342975 1.13775 1.257 1.38025 1.645000
par(mfrow=c(1,3))
plot(res$sims.matrix[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(res$sims.matrix[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(res$sims.matrix[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))
plot(density(res$sims.matrix[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(res$sims.matrix[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(res$sims.matrix[,3]),main=expression(tau))
abline(v=1,col="red")
########################################################################################
############# JAGS #############
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)
model_code= 'model {
for (i in 1:N){
mu[i] <- beta0 + beta1*x[i]
y.ind[i] ~ dinterval(y.censored[i], 0)
y.censored[i] ~ dnorm(mu[i], tau)}
beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)
}'
tobit.inits <- function() {
list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1))))
}
?jags
m <- jags(model.file=textConnection(model_code),
parameters.to.save = c("beta0", "beta1","tau"),
n.iter=10000,n.burnin=1000,n.chains = 1,n.thin = 1,
data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),
inits=tobit.inits,DIC=FALSE)
module glm loaded
module dic loaded
Compiling model graph
Resolving undeclared variables
Allocating nodes
Graph information:
Observed stochastic nodes: 195
Unobserved stochastic nodes: 8
Total graph size: 508
Initializing model
|
| | 0%
|
|* | 2%
|
|** | 4%
|
|*** | 7%
|
|**** | 9%
|
|****** | 11%
|
|******* | 13%
|
|******** | 16%
|
|********* | 18%
|
|********** | 20%
|
|*********** | 22%
|
|************ | 24%
|
|************* | 27%
|
|************** | 29%
|
|**************** | 31%
|
|***************** | 33%
|
|****************** | 36%
|
|******************* | 38%
|
|******************** | 40%
|
|********************* | 42%
|
|********************** | 44%
|
|*********************** | 47%
|
|************************ | 49%
|
|************************** | 51%
|
|*************************** | 53%
|
|**************************** | 56%
|
|***************************** | 58%
|
|****************************** | 60%
|
|******************************* | 62%
|
|******************************** | 64%
|
|********************************* | 67%
|
|********************************** | 69%
|
|************************************ | 71%
|
|************************************* | 73%
|
|************************************** | 76%
|
|*************************************** | 78%
|
|**************************************** | 80%
|
|***************************************** | 82%
|
|****************************************** | 84%
|
|******************************************* | 87%
|
|******************************************** | 89%
|
|********************************************** | 91%
|
|*********************************************** | 93%
|
|************************************************ | 96%
|
|************************************************* | 98%
|
|**************************************************| 100%
m$BUGSoutput$summary
mean sd 2.5% 25% 50% 75% 97.5%
beta0 -2.688581 0.4871077 -3.6641946 -3.0117060 -2.688885 -2.370395 -1.741422
beta1 2.360142 0.2163471 1.9399078 2.2164496 2.362275 2.503432 2.793005
tau 1.108847 0.1639959 0.8147208 0.9933266 1.101411 1.213167 1.448729
par(mfrow=c(1,3))
plot(m$BUGSoutput$sims.matrix[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(m$BUGSoutput$sims.matrix[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(m$BUGSoutput$sims.matrix[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))
plot(density(m$BUGSoutput$sims.matrix[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(m$BUGSoutput$sims.matrix[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(m$BUGSoutput$sims.matrix[,3]),main=expression(tau))
abline(v=1,col="red")
########################################################################################
############# STAN #############
########################################################################################
set.seed(20)
y = rnorm(1000,3,2)
y.censored <- ifelse(y>=0, y, 0)
y.ind <- as.numeric(y>=0)
N = length(y)
model = "data {
int<lower=0> N_obs;
int<lower=0> N_cens;
real y_obs[N_obs];
real<lower=min(y_obs)> U;
}
parameters {
real<lower=U> y_cens[N_cens];
real mu;
real<lower=0> sigma2;
}
transformed parameters {
real<lower=0> tau;
tau = 1 / sigma2;
}
model {
y_obs ~ normal(mu, sqrt(sigma2));
y_cens ~ normal(0, 10);
mu ~ normal(0,10);
sigma2 ~ gamma(1,0.1);
}"
data <- list(y_obs=y.censored, y_cens=y, N_obs = length(y.censored), N_cens = length(y), U = 0)
fit <- stan(model_code = model,data = data, iter = 10000, warmup = 1000, chains = 1)
SAMPLING FOR MODEL 'e14823c6ba9dc00bd93917471a3cce2c' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup)
Chain 1: Iteration: 1001 / 10000 [ 10%] (Sampling)
Chain 1: Iteration: 2000 / 10000 [ 20%] (Sampling)
Chain 1: Iteration: 3000 / 10000 [ 30%] (Sampling)
Chain 1: Iteration: 4000 / 10000 [ 40%] (Sampling)
Chain 1: Iteration: 5000 / 10000 [ 50%] (Sampling)
Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling)
Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling)
Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling)
Chain 1: Iteration: 10000 / 10000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 4.77 seconds (Warm-up)
Chain 1: 30.399 seconds (Sampling)
Chain 1: 35.169 seconds (Total)
Chain 1:
summary(fit,c("mu","tau"))
$summary
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu 3.0300011 0.0004999933 0.05991698 2.9136245 2.9892271 3.0303034 3.0703253 3.145489 14360.56 1.0000211
tau 0.2701379 0.0001188452 0.01223221 0.2463536 0.2620211 0.2698582 0.2783503 0.294574 10593.67 0.9999091
$c_summary
, , chains = chain:1
stats
parameter mean sd 2.5% 25% 50% 75% 97.5%
mu 3.0300011 0.05991698 2.9136245 2.9892271 3.0303034 3.0703253 3.145489
tau 0.2701379 0.01223221 0.2463536 0.2620211 0.2698582 0.2783503 0.294574
fitd = extract(fit)
par(mfrow=c(1,2))
plot(fitd$mu,type="l",main=expression(mu))
abline(h=3,col="red")
plot(fitd$tau,type="l",main=expression(tau))
abline(h=0.25,col="red")
par(mfrow=c(1,2))
plot(density(fitd$mu,type="l"),main=expression(beta[0]))
abline(v=3,col="red")
plot(density(fitd$tau,type="l"),main=expression(beta[1]))
abline(v=0.25,col="red")
########################################################################################
############# STAN #############
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,0)
y.ind <- as.numeric(y>=0)
N = length(y)
model = "data {
int<lower=0> N_obs;
int<lower=0> N_cens;
vector[N_obs] y_obs;
vector[N_obs] x;
real<lower=min(y_obs)> U;
}
parameters {
real<lower=U> y_cens[N_cens];
real beta0;
real beta1;
real<lower=0> sigma2;
}
transformed parameters {
real<lower=0> tau;
tau = 1 / sigma2;
}
model {
y_obs ~ normal(beta0+beta1*x, sqrt(sigma2));
y_cens ~ normal(0, 10);
beta0 ~ normal(0,10);
beta1 ~ normal(0,10);
sigma2 ~ gamma(1,0.1);
}
generated quantities {
vector[N_obs] y_rep;
for(n in 1:N_obs) {
y_rep[n] = normal_rng(beta0 + beta1 * x[n], sqrt(sigma2));
}
}
"
data <- list(y_obs=y.cens, y_cens=y, N_obs = length(y.cens), N_cens = length(y), U = 0,x=x)
fit2 <- stan(model_code = model,data = data, iter = 10000, warmup = 1000, chains = 1)
SAMPLING FOR MODEL 'f7a6ef25f375eddfb9bc1460ee975394' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup)
Chain 1: Iteration: 1001 / 10000 [ 10%] (Sampling)
Chain 1: Iteration: 2000 / 10000 [ 20%] (Sampling)
Chain 1: Iteration: 3000 / 10000 [ 30%] (Sampling)
Chain 1: Iteration: 4000 / 10000 [ 40%] (Sampling)
Chain 1: Iteration: 5000 / 10000 [ 50%] (Sampling)
Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling)
Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling)
Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling)
Chain 1: Iteration: 10000 / 10000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 0.815 seconds (Warm-up)
Chain 1: 5.686 seconds (Sampling)
Chain 1: 6.501 seconds (Total)
Chain 1:
summary(fit2,c("beta0","beta1","sigma2"))
$summary
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
beta0 -2.5029309 0.005461745 0.4608263 -3.402576 -2.8167268 -2.5013279 -2.189749 -1.583587 7118.880 0.9999093
beta1 2.2875233 0.002451611 0.2052878 1.886756 2.1474523 2.2848043 2.428126 2.691587 7011.696 0.9999060
sigma2 0.8801829 0.001066457 0.1311793 0.665012 0.7881697 0.8669909 0.957052 1.175921 15130.167 1.0001735
$c_summary
, , chains = chain:1
stats
parameter mean sd 2.5% 25% 50% 75% 97.5%
beta0 -2.5029309 0.4608263 -3.402576 -2.8167268 -2.5013279 -2.189749 -1.583587
beta1 2.2875233 0.2052878 1.886756 2.1474523 2.2848043 2.428126 2.691587
sigma2 0.8801829 0.1311793 0.665012 0.7881697 0.8669909 0.957052 1.175921
fitd = extract(fit2)
par(mfrow=c(1,3))
plot(fitd$beta0,type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(fitd$beta1,type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(fitd$sigma2,type="l",main=expression(sigma^2))
abline(h=1,col="red")
par(mfrow=c(1,3))
plot(density(fitd$beta0,type="l"),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(fitd$beta1,type="l"),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(fitd$sigma2,type="l"),main=expression(sigma^2))
abline(v=1,col="red")
########################################################################################
############# NIMBLE #############
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)
simpleCode1 <- nimbleCode({
for (i in 1:N){
mu[i] <- beta0 + beta1*x[i]
y.ind[i] ~ dinterval(y.censored[i], 0)
y.censored[i] ~ dnorm(mu[i], tau)}
beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)
})
tobit.inits <- function() {
list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1))))
}
simpleModel1 <- nimbleModel(simpleCode1,data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),constants = list(N = N))
defining model...
BUGSmodel: found the same variable(s) in both 'data' and 'constants'; using variable(s) from 'data'.
building model...
setting data and initial values...
data not used in model: Nrunning calculate on model (any error reports that follow may simply reflect missing values in model variables) ...
checking model sizes and dimensions... This model is not fully initialized. This is not an error. To see which variables are not initialized, use model$initializeInfo(). For more information on model initialization, see help(modelInitialization).
model building finished.
mcmc.out <- nimbleMCMC(code = simpleModel1,
monitors=c("beta0", "beta1","tau"),
nchains = 1,
niter = 10000,
nburnin = 1000,
summary = TRUE,
progressBar = TRUE)
compiling... this may take a minute. Use 'showCompilerOutput = TRUE' to see C++ compiler details.
compilation finished.
running chain 1...
|-------------|-------------|-------------|-------------|
|-------------------------------------------------------|
mcmc.out$summary
Mean Median St.Dev. 95%CI_low 95%CI_upp
beta0 -2.680071 -2.663973 0.4857003 -3.6917210 -1.775284
beta1 2.356633 2.348672 0.2151124 1.9515371 2.801466
tau 1.111285 1.101014 0.1627670 0.8139987 1.446730
par(mfrow=c(1,3))
plot(mcmc.out$samples[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(mcmc.out$samples[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(mcmc.out$samples[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))
plot(density(mcmc.out$samples[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(mcmc.out$samples[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(mcmc.out$samples[,3]),main=expression(tau))
abline(v=1,col="red")
geweke.diag(mcmc.out$samples)
Fraction in 1st window = 0.1
Fraction in 2nd window = 0.5
beta0 beta1 tau
0.1693 -0.1391 0.4951
raftery.diag(mcmc.out$samples)
Quantile (q) = 0.025
Accuracy (r) = +/- 0.005
Probability (s) = 0.95
Burn-in Total Lower bound Dependence
(M) (N) (Nmin) factor (I)
beta0 44 46372 3746 12.40
beta1 48 51728 3746 13.80
tau 2 3973 3746 1.06
########################################################################################
############# NIMBLE #############
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)
simpleCode1 <- nimbleCode({
for (i in 1:N){
mu[i] <- beta0 + beta1*x[i]
y.ind[i] ~ dinterval(y.censored[i], 0)
y.censored[i] ~ dnorm(mu[i], tau)}
beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)
})
tobit.inits <- function() {
list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1))))
}
simpleModel1 <- nimbleModel(simpleCode1,data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),constants = list(N = N))
defining model...
BUGSmodel: found the same variable(s) in both 'data' and 'constants'; using variable(s) from 'data'.
building model...
setting data and initial values...
data not used in model: Nrunning calculate on model (any error reports that follow may simply reflect missing values in model variables) ...
checking model sizes and dimensions... This model is not fully initialized. This is not an error. To see which variables are not initialized, use model$initializeInfo(). For more information on model initialization, see help(modelInitialization).
model building finished.
mcmc.out <- nimbleMCMC(code = simpleModel1,
monitors=c("beta0", "beta1","tau"),
nchains = 1,
niter = 100000,
nburnin = 1000,
summary = TRUE,
progressBar = TRUE)
compiling... this may take a minute. Use 'showCompilerOutput = TRUE' to see C++ compiler details.
compilation finished.
running chain 1...
|-------------|-------------|-------------|-------------|
|-------------------------------------------------------|
mcmc.out$summary
Mean Median St.Dev. 95%CI_low 95%CI_upp
beta0 -2.692343 -2.693460 0.4915015 -3.6561635 -1.705984
beta1 2.361458 2.362215 0.2180700 1.9235208 2.790032
tau 1.110457 1.102130 0.1630927 0.8148995 1.454054
par(mfrow=c(1,3))
plot(mcmc.out$samples[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(mcmc.out$samples[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(mcmc.out$samples[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))
plot(density(mcmc.out$samples[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(mcmc.out$samples[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(mcmc.out$samples[,3]),main=expression(tau))
abline(v=1,col="red")
geweke.diag(mcmc.out$samples)
Fraction in 1st window = 0.1
Fraction in 2nd window = 0.5
beta0 beta1 tau
0.4908 -0.4564 0.3993
raftery.diag(mcmc.out$samples)
Quantile (q) = 0.025
Accuracy (r) = +/- 0.005
Probability (s) = 0.95
Burn-in Total Lower bound Dependence
(M) (N) (Nmin) factor (I)
beta0 54 65151 3746 17.40
beta1 77 83050 3746 22.20
tau 2 3927 3746 1.05
########################################################################################
############# LAPLACE #############
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)
d = ifelse(y < 0, 0, 1)
model <- function(p,y,d,x) {
log_lik <- sum(pnorm(0, p["beta0"] + p["beta1"]*x, 1/sqrt(p["tau"]), log = T)*(1-d)) + sum(d * dnorm(y, p["beta0"] + p["beta1"]*x, 1/sqrt(p["tau"]), log = T))
log_post <- log_lik + dnorm(p["beta0"], 0, 10, log = T) + dnorm(p["beta1"], 0, 10, log = T) + dgamma(p["tau"], 1, 0.1, log = T)
log_post
}
inits <- c(beta0 = 1,beta1 = 1, tau = 2)
fit <- optim(inits, model, control = list(fnscale = -1), hessian = TRUE,y = y,d = d,x=x,method = "SANN")
param_mean <- fit$par
param_cov_mat <- solve(-fit$hessian)
round(param_mean, 2)
beta0 beta1 tau
-2.65 2.34 1.12
qnorm(c(0.025,0.975),param_mean[1],sqrt(param_cov_mat[1,1]))
[1] -3.579736 -1.712075
qnorm(c(0.025,0.975),param_mean[2],sqrt(param_cov_mat[2,2]))
[1] 1.926429 2.755371
qnorm(c(0.025,0.975),param_mean[3],sqrt(param_cov_mat[3,3]))
[1] 0.7986063 1.4431002