Introdução

Inferência sob o Modelo Tobit utilizando os pacotes Bugs, Jags, Stan e Nimble

Tamanho amostral = \(100\)

\(x_i \sim unif(1,5;3)\)

Garantindo 5% de censura amostral

A amostra gerada tem a seguinte distribuição:

\(y_i \sim N(\beta_0+\beta_1\times x_i, \tau)\)

Com as seguintes prioris:

\(\beta_0 \sim N(0;0,01)\)

\(\beta_1 \sim N(0;0,01)\)

\(\tau \sim Ga(1;0,1)\)

\(E[\tau] = 10\) e \(V[\tau]=100\)

Para efeito de simulação, foram padronizadas as seguintes definições:

Simulações = 10000

burn = 1000

Tamanho amostral a posteriori = 9000

Cadeias = 1

Espaçamento = 1

Bugs

########################################################################################
#############                          BUGS                                ############# 
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
table(y.cens<0)

FALSE 
   95 
sink("regression.txt")
cat(" 
model
{
# priors
beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)
# likelihood
for(i in 1:N)
{
mu[i] <- beta0 + beta1*x[i]
y[i] ~ dnorm(mu[i], tau)C(0,)}
}
    ")
sink()
my.data <- list(y=y.cens,N=100,x=x)
params <- c("beta0","beta1","tau")
inits <- function(){list (beta0=1,beta1=1,tau=2)}
res <- bugs(data=my.data, inits=inits, parameters.to.save=params,
            n.iter=10000, n.chains=1, n.burnin=1000,n.thin = 1,
            model.file="regression.txt",DIC=FALSE)
res$summary
           mean        sd       2.5%      25%    50%      75%     97.5%
beta0 -1.924597 0.4815395 -2.8820000 -2.24325 -1.922 -1.60200 -0.991790
beta1  2.056745 0.2125253  1.6469750  1.91300  2.054  2.19800  2.480025
tau    1.265164 0.1818779  0.9342975  1.13775  1.257  1.38025  1.645000
par(mfrow=c(1,3))
plot(res$sims.matrix[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(res$sims.matrix[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(res$sims.matrix[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))

plot(density(res$sims.matrix[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(res$sims.matrix[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(res$sims.matrix[,3]),main=expression(tau))
abline(v=1,col="red")

Jags

########################################################################################
#############                          JAGS                                ############# 
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)
model_code=  'model {
for (i in 1:N){
mu[i] <- beta0 + beta1*x[i]
y.ind[i] ~ dinterval(y.censored[i], 0)
y.censored[i] ~ dnorm(mu[i], tau)}
beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)
}'
tobit.inits <- function() {
  list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1))))
}
?jags
m <- jags(model.file=textConnection(model_code),
          parameters.to.save = c("beta0", "beta1","tau"),
          n.iter=10000,n.burnin=1000,n.chains = 1,n.thin = 1,
          data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),
          inits=tobit.inits,DIC=FALSE)
module glm loaded
module dic loaded
Compiling model graph
   Resolving undeclared variables
   Allocating nodes
Graph information:
   Observed stochastic nodes: 195
   Unobserved stochastic nodes: 8
   Total graph size: 508

Initializing model


  |                                                        
  |                                                  |   0%
  |                                                        
  |*                                                 |   2%
  |                                                        
  |**                                                |   4%
  |                                                        
  |***                                               |   7%
  |                                                        
  |****                                              |   9%
  |                                                        
  |******                                            |  11%
  |                                                        
  |*******                                           |  13%
  |                                                        
  |********                                          |  16%
  |                                                        
  |*********                                         |  18%
  |                                                        
  |**********                                        |  20%
  |                                                        
  |***********                                       |  22%
  |                                                        
  |************                                      |  24%
  |                                                        
  |*************                                     |  27%
  |                                                        
  |**************                                    |  29%
  |                                                        
  |****************                                  |  31%
  |                                                        
  |*****************                                 |  33%
  |                                                        
  |******************                                |  36%
  |                                                        
  |*******************                               |  38%
  |                                                        
  |********************                              |  40%
  |                                                        
  |*********************                             |  42%
  |                                                        
  |**********************                            |  44%
  |                                                        
  |***********************                           |  47%
  |                                                        
  |************************                          |  49%
  |                                                        
  |**************************                        |  51%
  |                                                        
  |***************************                       |  53%
  |                                                        
  |****************************                      |  56%
  |                                                        
  |*****************************                     |  58%
  |                                                        
  |******************************                    |  60%
  |                                                        
  |*******************************                   |  62%
  |                                                        
  |********************************                  |  64%
  |                                                        
  |*********************************                 |  67%
  |                                                        
  |**********************************                |  69%
  |                                                        
  |************************************              |  71%
  |                                                        
  |*************************************             |  73%
  |                                                        
  |**************************************            |  76%
  |                                                        
  |***************************************           |  78%
  |                                                        
  |****************************************          |  80%
  |                                                        
  |*****************************************         |  82%
  |                                                        
  |******************************************        |  84%
  |                                                        
  |*******************************************       |  87%
  |                                                        
  |********************************************      |  89%
  |                                                        
  |**********************************************    |  91%
  |                                                        
  |***********************************************   |  93%
  |                                                        
  |************************************************  |  96%
  |                                                        
  |************************************************* |  98%
  |                                                        
  |**************************************************| 100%
m$BUGSoutput$summary
           mean        sd       2.5%        25%       50%       75%     97.5%
beta0 -2.688581 0.4871077 -3.6641946 -3.0117060 -2.688885 -2.370395 -1.741422
beta1  2.360142 0.2163471  1.9399078  2.2164496  2.362275  2.503432  2.793005
tau    1.108847 0.1639959  0.8147208  0.9933266  1.101411  1.213167  1.448729
par(mfrow=c(1,3))
plot(m$BUGSoutput$sims.matrix[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(m$BUGSoutput$sims.matrix[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(m$BUGSoutput$sims.matrix[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))

plot(density(m$BUGSoutput$sims.matrix[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(m$BUGSoutput$sims.matrix[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(m$BUGSoutput$sims.matrix[,3]),main=expression(tau))
abline(v=1,col="red")

Stan modelo normal

########################################################################################
#############                          STAN                                ############# 
########################################################################################
set.seed(20)
y = rnorm(1000,3,2)
y.censored <- ifelse(y>=0, y, 0)
y.ind <- as.numeric(y>=0)
N = length(y)
model = "data {
int<lower=0> N_obs;
int<lower=0> N_cens;
real y_obs[N_obs];
real<lower=min(y_obs)> U;
}
parameters {
real<lower=U> y_cens[N_cens];
real mu;
real<lower=0> sigma2;
}
transformed parameters {
real<lower=0> tau;
tau = 1 / sigma2; 
}
model {
y_obs ~ normal(mu, sqrt(sigma2));
y_cens ~ normal(0, 10);
mu ~ normal(0,10);
sigma2 ~ gamma(1,0.1);
}"
data <- list(y_obs=y.censored, y_cens=y, N_obs = length(y.censored), N_cens = length(y), U = 0)
fit <- stan(model_code = model,data = data, iter = 10000, warmup = 1000, chains = 1)

SAMPLING FOR MODEL 'e14823c6ba9dc00bd93917471a3cce2c' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 1: Iteration: 1001 / 10000 [ 10%]  (Sampling)
Chain 1: Iteration: 2000 / 10000 [ 20%]  (Sampling)
Chain 1: Iteration: 3000 / 10000 [ 30%]  (Sampling)
Chain 1: Iteration: 4000 / 10000 [ 40%]  (Sampling)
Chain 1: Iteration: 5000 / 10000 [ 50%]  (Sampling)
Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 4.77 seconds (Warm-up)
Chain 1:                30.399 seconds (Sampling)
Chain 1:                35.169 seconds (Total)
Chain 1: 
summary(fit,c("mu","tau"))
$summary
         mean      se_mean         sd      2.5%       25%       50%       75%    97.5%    n_eff      Rhat
mu  3.0300011 0.0004999933 0.05991698 2.9136245 2.9892271 3.0303034 3.0703253 3.145489 14360.56 1.0000211
tau 0.2701379 0.0001188452 0.01223221 0.2463536 0.2620211 0.2698582 0.2783503 0.294574 10593.67 0.9999091

$c_summary
, , chains = chain:1

         stats
parameter      mean         sd      2.5%       25%       50%       75%    97.5%
      mu  3.0300011 0.05991698 2.9136245 2.9892271 3.0303034 3.0703253 3.145489
      tau 0.2701379 0.01223221 0.2463536 0.2620211 0.2698582 0.2783503 0.294574
fitd = extract(fit)
par(mfrow=c(1,2))
plot(fitd$mu,type="l",main=expression(mu))
abline(h=3,col="red")
plot(fitd$tau,type="l",main=expression(tau))
abline(h=0.25,col="red")
par(mfrow=c(1,2))

plot(density(fitd$mu,type="l"),main=expression(beta[0]))
abline(v=3,col="red")
plot(density(fitd$tau,type="l"),main=expression(beta[1]))
abline(v=0.25,col="red")

Stan tobit

########################################################################################
#############                          STAN                                ############# 
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,0)
y.ind <- as.numeric(y>=0)
N = length(y)
model = "data {
int<lower=0> N_obs;
int<lower=0> N_cens;
vector[N_obs] y_obs;
vector[N_obs] x;
real<lower=min(y_obs)> U;
}
parameters {
real<lower=U> y_cens[N_cens];
real beta0;
real beta1;
real<lower=0> sigma2;
}
transformed parameters {
real<lower=0> tau;
tau = 1 / sigma2; 
}
model {
y_obs ~ normal(beta0+beta1*x, sqrt(sigma2));
y_cens ~ normal(0, 10);
beta0 ~ normal(0,10);
beta1 ~ normal(0,10);
sigma2 ~ gamma(1,0.1);
}
generated quantities {
vector[N_obs] y_rep;
for(n in 1:N_obs) {
y_rep[n] = normal_rng(beta0 + beta1 * x[n], sqrt(sigma2));
}
}
"
data <- list(y_obs=y.cens, y_cens=y, N_obs = length(y.cens), N_cens = length(y), U = 0,x=x)
fit2 <- stan(model_code = model,data = data, iter = 10000, warmup = 1000, chains = 1)

SAMPLING FOR MODEL 'f7a6ef25f375eddfb9bc1460ee975394' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
Chain 1: Iteration: 1001 / 10000 [ 10%]  (Sampling)
Chain 1: Iteration: 2000 / 10000 [ 20%]  (Sampling)
Chain 1: Iteration: 3000 / 10000 [ 30%]  (Sampling)
Chain 1: Iteration: 4000 / 10000 [ 40%]  (Sampling)
Chain 1: Iteration: 5000 / 10000 [ 50%]  (Sampling)
Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 0.815 seconds (Warm-up)
Chain 1:                5.686 seconds (Sampling)
Chain 1:                6.501 seconds (Total)
Chain 1: 
summary(fit2,c("beta0","beta1","sigma2"))
$summary
             mean     se_mean        sd      2.5%        25%        50%       75%     97.5%     n_eff      Rhat
beta0  -2.5029309 0.005461745 0.4608263 -3.402576 -2.8167268 -2.5013279 -2.189749 -1.583587  7118.880 0.9999093
beta1   2.2875233 0.002451611 0.2052878  1.886756  2.1474523  2.2848043  2.428126  2.691587  7011.696 0.9999060
sigma2  0.8801829 0.001066457 0.1311793  0.665012  0.7881697  0.8669909  0.957052  1.175921 15130.167 1.0001735

$c_summary
, , chains = chain:1

         stats
parameter       mean        sd      2.5%        25%        50%       75%     97.5%
   beta0  -2.5029309 0.4608263 -3.402576 -2.8167268 -2.5013279 -2.189749 -1.583587
   beta1   2.2875233 0.2052878  1.886756  2.1474523  2.2848043  2.428126  2.691587
   sigma2  0.8801829 0.1311793  0.665012  0.7881697  0.8669909  0.957052  1.175921
fitd = extract(fit2)
par(mfrow=c(1,3))
plot(fitd$beta0,type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(fitd$beta1,type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(fitd$sigma2,type="l",main=expression(sigma^2))
abline(h=1,col="red")
par(mfrow=c(1,3))

plot(density(fitd$beta0,type="l"),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(fitd$beta1,type="l"),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(fitd$sigma2,type="l"),main=expression(sigma^2))
abline(v=1,col="red")

Nimble 1

########################################################################################
#############                          NIMBLE                              ############# 
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)
simpleCode1 <- nimbleCode({
for (i in 1:N){
mu[i] <- beta0 + beta1*x[i]
y.ind[i] ~ dinterval(y.censored[i], 0)
y.censored[i] ~ dnorm(mu[i], tau)}
beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)
})
 
tobit.inits <- function() {
  list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1))))
}
simpleModel1 <- nimbleModel(simpleCode1,data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),constants = list(N = N))
defining model...
BUGSmodel: found the same variable(s) in both 'data' and 'constants'; using variable(s) from 'data'.
building model...
setting data and initial values...
data not used in model: Nrunning calculate on model (any error reports that follow may simply reflect missing values in model variables) ... 
checking model sizes and dimensions... This model is not fully initialized. This is not an error. To see which variables are not initialized, use model$initializeInfo(). For more information on model initialization, see help(modelInitialization).
model building finished.
mcmc.out <- nimbleMCMC(code = simpleModel1, 
                       monitors=c("beta0", "beta1","tau"),
                       nchains = 1, 
                       niter = 10000,
                       nburnin = 1000,
                       summary = TRUE, 
                       progressBar = TRUE)
compiling... this may take a minute. Use 'showCompilerOutput = TRUE' to see C++ compiler details.
compilation finished.
running chain 1...
|-------------|-------------|-------------|-------------|
|-------------------------------------------------------|
mcmc.out$summary
           Mean    Median   St.Dev.  95%CI_low 95%CI_upp
beta0 -2.680071 -2.663973 0.4857003 -3.6917210 -1.775284
beta1  2.356633  2.348672 0.2151124  1.9515371  2.801466
tau    1.111285  1.101014 0.1627670  0.8139987  1.446730
par(mfrow=c(1,3))
plot(mcmc.out$samples[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(mcmc.out$samples[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(mcmc.out$samples[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))

plot(density(mcmc.out$samples[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(mcmc.out$samples[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(mcmc.out$samples[,3]),main=expression(tau))
abline(v=1,col="red")

geweke.diag(mcmc.out$samples)

Fraction in 1st window = 0.1
Fraction in 2nd window = 0.5 

  beta0   beta1     tau 
 0.1693 -0.1391  0.4951 
raftery.diag(mcmc.out$samples)

Quantile (q) = 0.025
Accuracy (r) = +/- 0.005
Probability (s) = 0.95 
                                             
       Burn-in  Total Lower bound  Dependence
       (M)      (N)   (Nmin)       factor (I)
 beta0 44       46372 3746         12.40     
 beta1 48       51728 3746         13.80     
 tau   2        3973  3746          1.06     

Nimble 2

########################################################################################
#############                          NIMBLE                              ############# 
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)
simpleCode1 <- nimbleCode({
for (i in 1:N){
mu[i] <- beta0 + beta1*x[i]
y.ind[i] ~ dinterval(y.censored[i], 0)
y.censored[i] ~ dnorm(mu[i], tau)}
beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)
})
 
tobit.inits <- function() {
  list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1))))
}
simpleModel1 <- nimbleModel(simpleCode1,data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),constants = list(N = N))
defining model...
BUGSmodel: found the same variable(s) in both 'data' and 'constants'; using variable(s) from 'data'.
building model...
setting data and initial values...
data not used in model: Nrunning calculate on model (any error reports that follow may simply reflect missing values in model variables) ... 
checking model sizes and dimensions... This model is not fully initialized. This is not an error. To see which variables are not initialized, use model$initializeInfo(). For more information on model initialization, see help(modelInitialization).
model building finished.
mcmc.out <- nimbleMCMC(code = simpleModel1, 
                       monitors=c("beta0", "beta1","tau"),
                       nchains = 1, 
                       niter = 100000,
                       nburnin = 1000,
                       summary = TRUE, 
                       progressBar = TRUE)
compiling... this may take a minute. Use 'showCompilerOutput = TRUE' to see C++ compiler details.
compilation finished.
running chain 1...
|-------------|-------------|-------------|-------------|
|-------------------------------------------------------|
mcmc.out$summary
           Mean    Median   St.Dev.  95%CI_low 95%CI_upp
beta0 -2.692343 -2.693460 0.4915015 -3.6561635 -1.705984
beta1  2.361458  2.362215 0.2180700  1.9235208  2.790032
tau    1.110457  1.102130 0.1630927  0.8148995  1.454054
par(mfrow=c(1,3))
plot(mcmc.out$samples[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(mcmc.out$samples[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(mcmc.out$samples[,3],type="l",main=expression(tau))
abline(h=1,col="red")
par(mfrow=c(1,3))

plot(density(mcmc.out$samples[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(mcmc.out$samples[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(mcmc.out$samples[,3]),main=expression(tau))
abline(v=1,col="red")

geweke.diag(mcmc.out$samples)

Fraction in 1st window = 0.1
Fraction in 2nd window = 0.5 

  beta0   beta1     tau 
 0.4908 -0.4564  0.3993 
raftery.diag(mcmc.out$samples)

Quantile (q) = 0.025
Accuracy (r) = +/- 0.005
Probability (s) = 0.95 
                                             
       Burn-in  Total Lower bound  Dependence
       (M)      (N)   (Nmin)       factor (I)
 beta0 54       65151 3746         17.40     
 beta1 77       83050 3746         22.20     
 tau   2        3927  3746          1.05     

Laplace

########################################################################################
#############                          LAPLACE                             ############# 
########################################################################################
set.seed(20)
x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)
d = ifelse(y < 0, 0, 1)
model <- function(p,y,d,x) {
  log_lik <- sum(pnorm(0, p["beta0"] + p["beta1"]*x, 1/sqrt(p["tau"]), log = T)*(1-d)) + sum(d * dnorm(y, p["beta0"] + p["beta1"]*x, 1/sqrt(p["tau"]), log = T))
  log_post <- log_lik + dnorm(p["beta0"], 0, 10, log = T) + dnorm(p["beta1"], 0, 10, log = T) + dgamma(p["tau"], 1, 0.1, log = T)
  log_post
}
inits <- c(beta0 = 1,beta1 = 1, tau = 2)
fit <- optim(inits, model, control = list(fnscale = -1), hessian = TRUE,y = y,d = d,x=x,method = "SANN")
param_mean <- fit$par
param_cov_mat <- solve(-fit$hessian)
round(param_mean, 2)
beta0 beta1   tau 
-2.65  2.34  1.12 
qnorm(c(0.025,0.975),param_mean[1],sqrt(param_cov_mat[1,1]))
[1] -3.579736 -1.712075
qnorm(c(0.025,0.975),param_mean[2],sqrt(param_cov_mat[2,2]))
[1] 1.926429 2.755371
qnorm(c(0.025,0.975),param_mean[3],sqrt(param_cov_mat[3,3]))
[1] 0.7986063 1.4431002
---
title: "Tobit"
output: html_notebook
---


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library("R2OpenBUGS")
library("R2jags")
library("dplyr")
library("rstan")
library("nimble")
library("HDInterval")
library("mvtnorm")
library("LaplacesDemon")
```

#Introdução

Inferência sob o Modelo Tobit utilizando os pacotes Bugs, Jags, Stan e Nimble

Tamanho amostral = $100$

$x_i \sim unif(1,5;3)$

Garantindo 5% de censura amostral


A amostra gerada tem a seguinte distribuição:

$y_i \sim N(\beta_0+\beta_1\times x_i, \tau)$


Com as seguintes prioris:

$\beta_0 \sim N(0;0,01)$

$\beta_1 \sim N(0;0,01)$

$\tau \sim Ga(1;0,1)$

$E[\tau] = 10$ e $V[\tau]=100$


Para efeito de simulação, foram padronizadas as seguintes definições:

Simulações = 10000

_burn_ = 1000

Tamanho amostral a posteriori = 9000

Cadeias = 1

Espaçamento = 1
 

#Bugs

```{r}

########################################################################################
#############                          BUGS                                ############# 
########################################################################################

set.seed(20)

x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
table(y.cens<0)

sink("regression.txt")
cat(" 
model
{
# priors
beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)

# likelihood
for(i in 1:N)
{
mu[i] <- beta0 + beta1*x[i]
y[i] ~ dnorm(mu[i], tau)C(0,)}
}
    ")
sink()

my.data <- list(y=y.cens,N=100,x=x)

params <- c("beta0","beta1","tau")

inits <- function(){list (beta0=1,beta1=1,tau=2)}

res <- bugs(data=my.data, inits=inits, parameters.to.save=params,
            n.iter=10000, n.chains=1, n.burnin=1000,n.thin = 1,
            model.file="regression.txt",DIC=FALSE)
res$summary

par(mfrow=c(1,3))
plot(res$sims.matrix[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(res$sims.matrix[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(res$sims.matrix[,3],type="l",main=expression(tau))
abline(h=1,col="red")


par(mfrow=c(1,3))
plot(density(res$sims.matrix[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(res$sims.matrix[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(res$sims.matrix[,3]),main=expression(tau))
abline(v=1,col="red")

```



#Jags
```{r}

########################################################################################
#############                          JAGS                                ############# 
########################################################################################

set.seed(20)

x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)


model_code=  'model {
for (i in 1:N){

mu[i] <- beta0 + beta1*x[i]
y.ind[i] ~ dinterval(y.censored[i], 0)
y.censored[i] ~ dnorm(mu[i], tau)}

beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)

}'

tobit.inits <- function() {
  list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1))))
}

?jags

m <- jags(model.file=textConnection(model_code),
          parameters.to.save = c("beta0", "beta1","tau"),
          n.iter=10000,n.burnin=1000,n.chains = 1,n.thin = 1,
          data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),
          inits=tobit.inits,DIC=FALSE)

m$BUGSoutput$summary

par(mfrow=c(1,3))
plot(m$BUGSoutput$sims.matrix[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(m$BUGSoutput$sims.matrix[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(m$BUGSoutput$sims.matrix[,3],type="l",main=expression(tau))
abline(h=1,col="red")

par(mfrow=c(1,3))
plot(density(m$BUGSoutput$sims.matrix[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(m$BUGSoutput$sims.matrix[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(m$BUGSoutput$sims.matrix[,3]),main=expression(tau))
abline(v=1,col="red")


```


#Stan modelo normal

```{r,warning=FALSE}
########################################################################################
#############                          STAN                                ############# 
########################################################################################

set.seed(20)
y = rnorm(1000,3,2)
y.censored <- ifelse(y>=0, y, 0)
y.ind <- as.numeric(y>=0)
N = length(y)


model = "data {
int<lower=0> N_obs;
int<lower=0> N_cens;
real y_obs[N_obs];
real<lower=min(y_obs)> U;
}
parameters {
real<lower=U> y_cens[N_cens];
real mu;
real<lower=0> sigma2;
}
transformed parameters {
real<lower=0> tau;
tau = 1 / sigma2; 
}
model {
y_obs ~ normal(mu, sqrt(sigma2));
y_cens ~ normal(0, 10);
mu ~ normal(0,10);
sigma2 ~ gamma(1,0.1);
}"

data <- list(y_obs=y.censored, y_cens=y, N_obs = length(y.censored), N_cens = length(y), U = 0)
fit <- stan(model_code = model,data = data, iter = 10000, warmup = 1000, chains = 1)
summary(fit,c("mu","tau"))

fitd = extract(fit)

par(mfrow=c(1,2))
plot(fitd$mu,type="l",main=expression(mu))
abline(h=3,col="red")
plot(fitd$tau,type="l",main=expression(tau))
abline(h=0.25,col="red")

par(mfrow=c(1,2))
plot(density(fitd$mu,type="l"),main=expression(beta[0]))
abline(v=3,col="red")
plot(density(fitd$tau,type="l"),main=expression(beta[1]))
abline(v=0.25,col="red")

```




#Stan tobit

```{r,warning=FALSE}
########################################################################################
#############                          STAN                                ############# 
########################################################################################

set.seed(20)

x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,0)
y.ind <- as.numeric(y>=0)
N = length(y)


model = "data {
int<lower=0> N_obs;
int<lower=0> N_cens;
vector[N_obs] y_obs;
vector[N_obs] x;
real<lower=min(y_obs)> U;
}
parameters {
real<lower=U> y_cens[N_cens];
real beta0;
real beta1;
real<lower=0> sigma2;
}
transformed parameters {
real<lower=0> tau;
tau = 1 / sigma2; 
}
model {
y_obs ~ normal(beta0+beta1*x, sqrt(sigma2));
y_cens ~ normal(0, 10);
beta0 ~ normal(0,10);
beta1 ~ normal(0,10);
sigma2 ~ gamma(1,0.1);
}
generated quantities {
vector[N_obs] y_rep;
for(n in 1:N_obs) {
y_rep[n] = normal_rng(beta0 + beta1 * x[n], sqrt(sigma2));
}
}
"

data <- list(y_obs=y.cens, y_cens=y, N_obs = length(y.cens), N_cens = length(y), U = 0,x=x)
fit2 <- stan(model_code = model,data = data, iter = 10000, warmup = 1000, chains = 1)
summary(fit2,c("beta0","beta1","sigma2"))

fitd = extract(fit2)

par(mfrow=c(1,3))
plot(fitd$beta0,type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(fitd$beta1,type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(fitd$sigma2,type="l",main=expression(sigma^2))
abline(h=1,col="red")

par(mfrow=c(1,3))
plot(density(fitd$beta0,type="l"),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(fitd$beta1,type="l"),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(fitd$sigma2,type="l"),main=expression(sigma^2))
abline(v=1,col="red")
```


#Nimble 1
```{r}
########################################################################################
#############                          NIMBLE                              ############# 
########################################################################################

set.seed(20)

x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)

simpleCode1 <- nimbleCode({
for (i in 1:N){

mu[i] <- beta0 + beta1*x[i]
y.ind[i] ~ dinterval(y.censored[i], 0)
y.censored[i] ~ dnorm(mu[i], tau)}

beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)

})
 
tobit.inits <- function() {
  list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1))))
}

simpleModel1 <- nimbleModel(simpleCode1,data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),constants = list(N = N))

mcmc.out <- nimbleMCMC(code = simpleModel1, 
                       monitors=c("beta0", "beta1","tau"),
                       nchains = 1, 
                       niter = 10000,
                       nburnin = 1000,
                       summary = TRUE, 
                       progressBar = TRUE)

mcmc.out$summary

par(mfrow=c(1,3))
plot(mcmc.out$samples[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(mcmc.out$samples[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(mcmc.out$samples[,3],type="l",main=expression(tau))
abline(h=1,col="red")

par(mfrow=c(1,3))
plot(density(mcmc.out$samples[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(mcmc.out$samples[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(mcmc.out$samples[,3]),main=expression(tau))
abline(v=1,col="red")

geweke.diag(mcmc.out$samples)
raftery.diag(mcmc.out$samples)

```


#Nimble 2

```{r}
########################################################################################
#############                          NIMBLE                              ############# 
########################################################################################

set.seed(20)

x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)

simpleCode1 <- nimbleCode({
for (i in 1:N){

mu[i] <- beta0 + beta1*x[i]
y.ind[i] ~ dinterval(y.censored[i], 0)
y.censored[i] ~ dnorm(mu[i], tau)}

beta0 ~ dnorm(0,0.01)
beta1 ~ dnorm(0,0.01)
tau ~ dgamma(1,0.1)

})
 
tobit.inits <- function() {
  list(beta0=1, beta1=1,tau=2,y.censored=ifelse(y.ind, NA, -abs(rnorm(N, 0, 1))))
}

simpleModel1 <- nimbleModel(simpleCode1,data=list(y.censored=y.cens, y.ind=y.ind, N=N,x=x),constants = list(N = N))

mcmc.out <- nimbleMCMC(code = simpleModel1, 
                       monitors=c("beta0", "beta1","tau"),
                       nchains = 1, 
                       niter = 100000,
                       nburnin = 1000,
                       summary = TRUE, 
                       progressBar = TRUE)

mcmc.out$summary

par(mfrow=c(1,3))
plot(mcmc.out$samples[,1],type="l",main=expression(beta[0]))
abline(h=-2,col="red")
plot(mcmc.out$samples[,2],type="l",main=expression(beta[1]))
abline(h=2,col="red")
plot(mcmc.out$samples[,3],type="l",main=expression(tau))
abline(h=1,col="red")

par(mfrow=c(1,3))
plot(density(mcmc.out$samples[,1]),main=expression(beta[0]))
abline(v=-2,col="red")
plot(density(mcmc.out$samples[,2]),main=expression(beta[1]))
abline(v=2,col="red")
plot(density(mcmc.out$samples[,3]),main=expression(tau))
abline(v=1,col="red")

geweke.diag(mcmc.out$samples)
raftery.diag(mcmc.out$samples)

```



#Laplace
```{r}
########################################################################################
#############                          LAPLACE                             ############# 
########################################################################################


set.seed(20)

x <- runif(100,1.5,3)
y <- rnorm(100,-2+2*x)
y.cens <- ifelse(y>0,y,NA)
y.ind <- as.numeric(y>=0)
N = length(y)

d = ifelse(y < 0, 0, 1)

model <- function(p,y,d,x) {
  log_lik <- sum(pnorm(0, p["beta0"] + p["beta1"]*x, 1/sqrt(p["tau"]), log = T)*(1-d)) + sum(d * dnorm(y, p["beta0"] + p["beta1"]*x, 1/sqrt(p["tau"]), log = T))
  log_post <- log_lik + dnorm(p["beta0"], 0, 10, log = T) + dnorm(p["beta1"], 0, 10, log = T) + dgamma(p["tau"], 1, 0.1, log = T)
  log_post
}

inits <- c(beta0 = 1,beta1 = 1, tau = 2)
fit <- optim(inits, model, control = list(fnscale = -1), hessian = TRUE,y = y,d = d,x=x,method = "SANN")
param_mean <- fit$par
param_cov_mat <- solve(-fit$hessian)
round(param_mean, 2)


qnorm(c(0.025,0.975),param_mean[1],sqrt(param_cov_mat[1,1]))
qnorm(c(0.025,0.975),param_mean[2],sqrt(param_cov_mat[2,2]))
qnorm(c(0.025,0.975),param_mean[3],sqrt(param_cov_mat[3,3]))

```


