O documento a seguir é referente a implementação do algoritmo do Gibbs para resolução de problemáticas inseridas no contexto de inferência bayesiana. Em particular, para a utilização de métodos via Monte Carlo Markov Chain
Ao leitor, segue um dicionário de variáveis para orientar a leitura.
DICIONÁRIO DE VARIÁVEIS
Verossimilhança de
\[ p\left( \textbf{Y}|\phi_0,\phi_1,\tau\right) = p\left(y_t|y_1, \ldots, y_{t-1},\phi_0,\phi_1,\tau\right) p\left(y_1,\ldots, y_{t-1}|\phi_0,\phi_1,\tau \right) \] \[ =p\left(y_t|y_{t-1},\phi_0,\phi_1,\tau\right) p\left(y_{t-1}|y_{t-2},\phi_0,\phi_1,\tau\right)\ldots p\left( y_2|\phi_0,\phi_1,\tau \right) \] \[ =\prod_{i=1}^t p\left( y_i|y_{i - 1}, \phi_0,\phi_1,\tau \right) \]
Condicional completa para \(\phi_0\):
\[ p\left( \phi_0|\textbf{Y},\phi_1,\tau \right) \propto p\left( \phi_0, \phi_1, \tau, \textbf{Y} \right) \propto p\left( \textbf{Y}| \phi_0, \phi_1, \tau \right) p\left( \phi_0 \right) p\left( \phi_1 \right) p\left( \tau \right) \] \[ \propto \prod_{i = 2}^t p\left( y_i|y_{i - 1},\phi_0,\phi_1,\tau \right) p\left(\phi_0\right) p\left(\phi_1\right) p\left(\tau\right) \] \[ \propto \exp\left\{-\frac{\tau}{2}\sum_{i = 2}^t \left( \left( y_i - \phi_0 \right) - \phi_1y_{i-1} \right)^2 \right\} \exp \left\{ -\frac{\tau_0}{2} \left( \phi_0 - \mu_0 \right)^2 \right\} \] \[ \propto \exp\left\{ \frac{-\tau}{2} \sum_{i = 2}^t \left( \left( y_i - \phi_1y_{i - 1} \right)^2 - 2\left( y_i - \phi_1y_{i - 1} \right)\phi_0 + \phi_0^2 \right) -\frac{\tau_0}{2}\left( \phi_0^2 - 2\phi_0\mu_0 \right) \right\} \] \[ \propto \exp \left\{ \frac{-\tau}{2} \left( -2\phi_0\sum_{i = 2}^t \left( y_i - \phi_1y_{i - 1} \right) +t\phi_0^2 \right) -\frac{\tau_0}{2}\left( \phi_0^2 - 2\phi_0\mu_0 \right) \right\} \] \[ \propto \exp\left\{ -\left(\frac{t\tau}{2} + \frac{\tau_0}{2} \right) \left(\phi_0^2 - 2\left( \sum_{i = 2}^t \left( y_i - \phi_1y_{i - 1}\right)\frac{\tau}{2} +\frac{\mu_0\tau_0}{2} \right) \left(\frac{2}{t\tau + \tau_0} \right) \right) \right\} \] \[ \propto \exp\left\{ -\left( \frac{t\tau + \tau_0}{2} \right) \left(\phi_0^2 - 2\phi_0 \left( \frac{\sum_{i = 2}^t \left( y_i - \phi_1y_{i - 1} \right)\tau + \mu_0\tau_0 }{t\tau + \tau_0} \right) \right) \right\} \] Logo \[ \left(\phi_0|\textbf{Y},\phi_1,\tau \right) \sim N\left(\frac{\sum_{i = 2}^t \left( y_i - \phi_1y_{i - 1} \right) \tau + \mu_0\tau_0}{t\tau + \tau_0},\left( n\tau + \tau_0 \right)^{-1}\right) \]
Condicional completa para \(\phi_1\):
\[ p\left( \phi_1|\textbf{Y},\phi_0,\tau \right) \propto p\left( \textbf{Y},\phi_0\phi_1,\tau \right) \propto p\left(\textbf{Y}|\phi_0,\phi_1,\tau\right) p\left(\phi_0\right)p\left(\phi_1\right)p\left(\tau\right) \] \[ \propto \prod_{i = 1}^t p\left(y_i|y_{i -1}, \phi_0, \phi_1 \tau \right) p\left(\phi_1\right) \] \[ \propto \exp \left\{ -\frac{\tau}{2} \sum_{i = 2}^t\left( \left(y_i-\phi_0 \right)-\phi_1y_{i-1} \right)^2 - \frac{\tau_1}{2}\left(\phi_1-\mu_1 \right)^2 \right\} \] \[ \propto \exp\left\{ \frac{\tau}{2}\left( \sum_{i = 2}^t \left(y_i - \phi_0 \right)^2 - 2\sum_{i = 2}^t \left( y_i - \phi_0 \right)\phi_1y_{i-1} + \phi_1\sum_{i = 2}^ty_{i-1}^2 \right) - \frac{\tau_1}{2}\left( \phi_1^2 - 2\phi_1\mu_1\right) \right\} \] \[ \propto\exp\left\{ -\frac{1}{2}\left( \tau\sum_{i = 2}^ty_{i-1}^2 + \tau_1 \right)\left( \phi_1^2 - 2\phi_1 \left( \frac{1}{\tau\sum_{i = 2}^ty_{i-1}^2 + \tau_1} \right) \left( \sum_{i = 2}^t\left( y_i - \phi_0 \right)y_{i-1}\tau + \tau_1\mu_1 \right) \right) \right\} \] \[ \propto\exp\left\{ -\frac{1}{2}\left( \tau\sum_{i = 2}^ty_{i-1}^2+\tau_1 \right) \left( \phi_1 - \frac{\sum_{i = 2}^t \left( y_i - \phi_0 \right)y_{i-1}\tau + \tau_1\mu_1}{\tau\sum_{i = 2}^ty_{i-1}^2 + \tau_1} \right)^2 \right\} \] Logo \[ \left(\phi_1|\textbf{Y},\phi_0,\tau\right) \sim N\left( \frac{\Sigma_{i =2 }^t\left( y_i - \phi_0 \right)y_{i-1}\tau}{\tau\Sigma_{i=2}^ty_{i-1}^2 + \tau_1}, \left( \tau\Sigma_{i =2 }^ty_{i-1}^2 + \tau_1 \right) \right) \]
Condicional completa para \(\tau\): \[
p\left(\tau|\textbf{Y},\phi_0,\phi_1\right) \propto
p\left(\textbf{Y}|\phi_0,\phi_1,\tau \right)
p\left(\phi_0\right)p\left(\phi_1\right)p\left(\tau\right)
\] \[
\propto \prod_{i = 2}^t \tau^{\frac{1}{2}}\exp\left\{
-\frac{\tau}{2}\left(y_t - \phi_0 - \phi_1y_{t-1}
\right)^2
\right\}
\tau^{\alpha - 1}\exp\left\{ -\beta\tau
\right\}
\] \[
\propto \tau^{\frac{t}{2}} \exp\left\{
-\frac{\tau\sum_{i = 2}^t\left(y_i-\phi_0-\phi_1y_{i-1}
\right)^2}{2}
\right\}
\tau^{\alpha - 1} \exp\left\{
-\beta\tau
\right\}
\] Logo \[
\left(\tau|\textbf{Y},\phi_0,\phi_1\right)\sim
N\left(\frac{n}{2} + \alpha,
\frac{\sum_{i = 2}^t\left( y_i - \phi_0 -\phi_1y_{i-1}
\right)^2}{2}+\beta
\right)
\]
Gerando uma amostra aleatória para teste
rm(list=ls())
phi_verdadeiro=2
beta0_verdadeiro=1
beta1_verdadeiro=1
n=100
x=rnorm(n,0,1)
e=rnorm(n,0,sqrt(1/phi_verdadeiro))
y=beta0_verdadeiro+beta1_verdadeiro*x+eDefinindo
phi=5
beta0=-5
beta1=-5
a=2
b=2
N=10000
s2=10000
s2_0=10000
s2_1=10000
mu0=1
mu1=1Iterador para a amostragem via Gibbs Sampling
for (i in 2:N) {
alpha=a+n/2
beta=b+sum((y-beta0[i-1]-beta1[i-1]*x)^2)/2
phi[i]=rgamma(1,alpha,beta)
media_beta0 = ((s2_0*(sum(y)-(beta1[i-1]*sum(x))))+(phi[i]*mu0))/((n*s2_0)+phi[i])
dp_beta0 = sqrt((phi[i]*s2_0)/((n*s2_0)+phi[i]))
beta0[i] = rnorm(1,media_beta0,dp_beta0)
media_beta1 = ((s2_1*(sum(x*y)-(beta0[i]*sum(x))))+(phi[i]*mu1))/((s2_1*sum(x^2))+phi[i])
dp_beta1 = sqrt((phi[i]*s2_1)/((s2_1*sum(x^2))+phi[i]))
beta1[i] = rnorm(1,media_beta1,dp_beta1)
}
Gráfico de convergência da cadeia resultando, com todas as observações geradas
plot(beta0,type = "l",main=expression(beta[0]),ylab="",
xlab="",cex.main=3,cex.lab=2, cex.axis=2)
abline(h=beta0_verdadeiro,col="red")plot(beta1,type = "l",main=expression(beta[0]),ylab="",
xlab="",cex.main=3,cex.lab=2, cex.axis=2)
abline(h=beta1_verdadeiro,col="red")plot(phi,type = "l",main=expression(tau),ylab="",
xlab="",cex.main=3,cex.lab=2, cex.axis=2)
abline(h=phi_verdadeiro,col="red")
Gerando duas cadeias, com pontos inicias diferentes, para verificar a convergência
phib=0;beta0b=5;beta1b=5
for (i in 2:N) {
alpha=a+n/2
beta=b+sum((y-beta0b[i-1]-beta1b[i-1]*x)^2)/2
phib[i]=rgamma(1,alpha,beta)
media_beta0 = ((s2_0*(sum(y)-(beta1b[i-1]*sum(x))))+(phib[i]*mu0))/((n*s2_0)+phib[i])
dp_beta0 = sqrt((phib[i]*s2_0)/((n*s2_0)+phib[i]))
beta0b[i] = rnorm(1,media_beta0,dp_beta0)
media_beta1 = ((s2_1*(sum(x*y)-(beta0b[i]*sum(x))))+(phib[i]*mu1))/((s2_1*sum(x^2))+phib[i])
dp_beta1 = sqrt((phib[i]*s2_1)/((s2_1*sum(x^2))+phib[i]))
beta1b[i] = rnorm(1,media_beta1,dp_beta1)
}Verificando visualmente a convergência múltipla
plot(beta0,type="l",col="lightsalmon",main=expression(beta[0]),ylab="",xlab="",cex.main=3,cex.lab=2, cex.axis=2)
lines(beta0b,type="l",col="purple3")
abline(h=beta0_verdadeiro,col="red")plot(beta1,type="l",col="lightsalmon",main=expression(beta[1]),ylab="",xlab="",cex.main=3,cex.lab=2, cex.axis=2)
lines(beta1b,type="l",col="purple3")
abline(h=beta1_verdadeiro,col="red")plot(phi,type="l",col="lightsalmon",main=expression(phi),ylab="",xlab="",cex.main=3,cex.lab=2, cex.axis=2)
lines(phib,type="l",col="purple3")
abline(h=phi_verdadeiro,col="red")
raftery.diag(as.mcmc(beta0))##
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95
##
## Burn-in Total Lower bound Dependence
## (M) (N) (Nmin) factor (I)
## 1 3726 3746 0.995
raftery.diag(as.mcmc(beta1))##
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95
##
## Burn-in Total Lower bound Dependence
## (M) (N) (Nmin) factor (I)
## 2 3577 3746 0.955
raftery.diag(as.mcmc(phi))##
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95
##
## Burn-in Total Lower bound Dependence
## (M) (N) (Nmin) factor (I)
## 2 3802 3746 1.01
geweke.diag(as.mcmc(beta0))##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## var1
## 0.1083
geweke.diag(as.mcmc(beta1))##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## var1
## -1.669
geweke.diag(as.mcmc(phi))##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## var1
## 0.5987
Correlograma
Acf(beta0,main=expression(beta[0]),lag.max=200,cex.main=3)Acf(beta1,main=expression(beta[1]),cex.main=3,lag.max=200)Acf(phi,main=expression(phi),cex.main=3,lag.max=200)
Representações numéricas
autocorr(as.mcmc(beta0))## , , 1
##
## [,1]
## Lag 0 1.000000000
## Lag 1 0.018219540
## Lag 5 0.007222551
## Lag 10 0.004252414
## Lag 50 -0.004590715
autocorr(as.mcmc(beta1))## , , 1
##
## [,1]
## Lag 0 1.000000000
## Lag 1 -0.005255897
## Lag 5 -0.001784056
## Lag 10 -0.013111996
## Lag 50 0.006106261
autocorr(as.mcmc(phi))## , , 1
##
## [,1]
## Lag 0 1.000000000
## Lag 1 -0.057375572
## Lag 5 -0.012878896
## Lag 10 -0.009066621
## Lag 50 -0.012297549
Removendo burnin
#Retirando o burnin
burnbeta0=burnin(beta0,method="Geweke")+10
burnbeta1=burnin(beta1,metho="Geweke")+10
burnphi=burnin(phi,metho="Geweke")+10
Beta0 = beta0[burnbeta0:length(beta0)]
Beta1 = beta1[burnbeta1:length(beta1)]
Phi = phi[burnphi:length(phi)]Gráfico das cadeias com o burnin removido
plot(Beta0, type = "l",main=expression(beta[0]),ylab="",xlab="",cex.main=3,cex.lab=2, cex.axis=2)plot(Beta1, type = "l",main=expression(beta[1]),ylab="",xlab="",cex.main=3 ,cex.lab=2, cex.axis=2)plot(Phi, type = "l",main=expression(phi),ylab="",xlab="",cex.main=3,cex.lab=2, cex.axis=2)
hist(Beta0,main=expression(beta[0]),ylab="",xlab="",col='lavender',cex.main=3,cex.lab=2, cex.axis=2)hist(Beta1,main=expression(beta[1]),ylab="",xlab="",col='lavender',cex.main=3,cex.lab=2, cex.axis=2)hist(Phi,main=expression(phi),ylab="",xlab="",col='lavender',cex.main=3,cex.lab=2, cex.axis=2)densplot(as.mcmc(Beta0),main=expression(phi[0]),xlab="",ylab="",cex.main=3,cex.lab=2, cex.axis=2)densplot(as.mcmc(Beta1),main=expression(phi[1]),xlab="",ylab="",cex.main=3,cex.lab=2, cex.axis=2)densplot(as.mcmc(Phi),main=expression(tau),xlab="",ylab="",cex.main=3,cex.lab=2, cex.axis=2)
Prioris
gamma <- function(x){
a = 2
b = 2
return(dgamma(x,a,b))
}
curve(gamma,from = 0, to = 5)normal <- function(x){
return(dnorm(x,mean = 0, sd = sqrt(10000)))
}
curve(normal,from = -450, to = 450)
Priori com Posteriori
plot(density(Beta0),xlim=c(-450,450),ylim=c(0,0.013),lty=1,
col="blue",main=expression(beta[0]),xlab="",ylab="",lwd=2 ,cex.main=3,cex.lab=2, cex.axis=2)
curve(normal,from = -450, to = 450,add=TRUE,lty=2,col="red",lwd=2)
legend("topleft",legend=c("Priori","Posteriori"),lty=c(2,1),
cex = 1.5,col=c("red","blue"),lwd=2,bty = "n")plot(density(Beta1),xlim=c(-450,450),ylim=c(0,0.013),
lty=1,col="blue",main=expression(beta[1]),xlab="",ylab="",
lwd=2,cex.main=3,cex.lab=2, cex.axis=2)
curve(normal,from = -450, to = 450,add=TRUE,lty=2,col="red",lwd=2)
legend("topleft",legend=c("Priori","Posteriori"),lty=c(2,1),
cex = 1.5,col=c("red","blue"),lwd=2, bty = "n")plot(density(Phi),xlim=c(0,5),ylim=c(0,1.5),lty=1,
col="blue",main=expression(phi),xlab="",ylab="",lwd=2,cex.main=3
,cex.lab=2, cex.axis=2)
curve(gamma,from = 0, to = 5, add=TRUE,lty=2,col="red",lwd=2)
legend("topleft",legend=c("Priori","Posteriori"),lty=c(2,1)
,cex = 1.5,col=c("red","blue"),lwd=2,bty = "n")
preditiva <- NULL
for(i in 2:N){preditiva[i] <- beta0[i] + beta1[i]*x[n] + rnorm(1,0,sqrt(phi[1]^(-1)))}
hist(preditiva[100:length(preditiva)], main = NULL,
xlab = NULL, ylab = NULL, freq = F, cex.main = 3, cex.lab = 2, cex.axis = 2,col = "lavender",prob=TRUE)
lines(density(preditiva[100:length(preditiva)]),lty="longdash",lwd=2,col="firebrick3")est1=mean(Beta0)
est2=mean(Beta1)
est3=mean(Phi)
est4=median(Beta0)
est5=median(Beta1)
est6=median(Phi)
obj1 = density(Beta0)
est7=obj1$x[which.max(obj1$y)]
obj2 = density(Beta1)
est8=obj2$x[which.max(obj2$y)]
obj3 = density(Phi)
est9=obj3$x[which.max(obj3$y)]
media = c(est1,est2,est3)
mediana = c(est4,est5,est6)
moda = c(est7,est8,est9)
estima = cbind(media,mediana,moda)
rownames(estima) = c("Beta 0", "Beta 1", "Phi")
estima## media mediana moda
## Beta 0 0.8883537 0.8883062 0.8812917
## Beta 1 1.0272279 1.0279592 1.0316570
## Phi 1.7040812 1.6917554 1.6635090