O documento a seguir é referente a implementação do algoritmo do Metropolis Hastings para resolução de problemáticas inseridas no contexto de inferência bayesiana. Em particular, para a utilização de métodos via Monte Carlo Markov Chain
Ao leitor, segue um dicionário de variáveis para orientar a leitura.
DICIONÁRIO DE VARIÁVEIS
Verossimilhança de
\[ p\left( \textbf{Y}|\phi_0,\phi_1,\tau\right) = p\left(y_t|y_1, \ldots, y_{t-1},\phi_0,\phi_1,\tau\right) p\left(y_1,\ldots, y_{t-1}|\phi_0,\phi_1,\tau \right) \] \[ =p\left(y_t|y_{t-1},\phi_0,\phi_1,\tau\right) p\left(y_{t-1}|y_{t-2},\phi_0,\phi_1,\tau\right)\ldots p\left( y_2|\phi_0,\phi_1,\tau \right) \] \[ =\prod_{i=1}^t p\left( y_i|y_{i - 1}, \phi_0,\phi_1,\tau \right) \]
Condicional completa para \(\phi_0\):
\[ p\left( \phi_0|\textbf{Y},\phi_1,\tau \right) \propto p\left( \phi_0, \phi_1, \tau, \textbf{Y} \right) \propto p\left( \textbf{Y}| \phi_0, \phi_1, \tau \right) p\left( \phi_0 \right) p\left( \phi_1 \right) p\left( \tau \right) \] \[ \propto \prod_{i = 2}^t p\left( y_i|y_{i - 1},\phi_0,\phi_1,\tau \right) p\left(\phi_0\right) p\left(\phi_1\right) p\left(\tau\right) \] \[ \propto \exp\left\{-\frac{\tau}{2}\sum_{i = 2}^t \left( \left( y_i - \phi_0 \right) - \phi_1y_{i-1} \right)^2 \right\} \exp \left\{ -\frac{\tau_0}{2} \left( \phi_0 - \mu_0 \right)^2 \right\} \] \[ \propto \exp\left\{ \frac{-\tau}{2} \sum_{i = 2}^t \left( \left( y_i - \phi_1y_{i - 1} \right)^2 - 2\left( y_i - \phi_1y_{i - 1} \right)\phi_0 + \phi_0^2 \right) -\frac{\tau_0}{2}\left( \phi_0^2 - 2\phi_0\mu_0 \right) \right\} \] \[ \propto \exp \left\{ \frac{-\tau}{2} \left( -2\phi_0\sum_{i = 2}^t \left( y_i - \phi_1y_{i - 1} \right) +t\phi_0^2 \right) -\frac{\tau_0}{2}\left( \phi_0^2 - 2\phi_0\mu_0 \right) \right\} \] \[ \propto \exp\left\{ -\left(\frac{t\tau}{2} + \frac{\tau_0}{2} \right) \left(\phi_0^2 - 2\left( \sum_{i = 2}^t \left( y_i - \phi_1y_{i - 1}\right)\frac{\tau}{2} +\frac{\mu_0\tau_0}{2} \right) \left(\frac{2}{t\tau + \tau_0} \right) \right) \right\} \] \[ \propto \exp\left\{ -\left( \frac{t\tau + \tau_0}{2} \right) \left(\phi_0^2 - 2\phi_0 \left( \frac{\sum_{i = 2}^t \left( y_i - \phi_1y_{i - 1} \right)\tau + \mu_0\tau_0 }{t\tau + \tau_0} \right) \right) \right\} \] Logo \[ \left(\phi_0|\textbf{Y},\phi_1,\tau \right) \sim N\left(\frac{\sum_{i = 2}^t \left( y_i - \phi_1y_{i - 1} \right) \tau + \mu_0\tau_0}{t\tau + \tau_0},\left( n\tau + \tau_0 \right)^{-1}\right) \]
Condicional completa para \(\phi_1\):
\[ p\left( \phi_1|\textbf{Y},\phi_0,\tau \right) \propto p\left( \textbf{Y},\phi_0\phi_1,\tau \right) \propto p\left(\textbf{Y}|\phi_0,\phi_1,\tau\right) p\left(\phi_0\right)p\left(\phi_1\right)p\left(\tau\right) \] \[ \propto \prod_{i = 1}^t p\left(y_i|y_{i -1}, \phi_0, \phi_1 \tau \right) p\left(\phi_1\right) \] \[ \propto \exp \left\{ -\frac{\tau}{2} \sum_{i = 2}^t\left( \left(y_i-\phi_0 \right)-\phi_1y_{i-1} \right)^2 - \frac{\tau_1}{2}\left(\phi_1-\mu_1 \right)^2 \right\} \] \[ \propto \exp\left\{ \frac{\tau}{2}\left( \sum_{i = 2}^t \left(y_i - \phi_0 \right)^2 - 2\sum_{i = 2}^t \left( y_i - \phi_0 \right)\phi_1y_{i-1} + \phi_1\sum_{i = 2}^ty_{i-1}^2 \right) - \frac{\tau_1}{2}\left( \phi_1^2 - 2\phi_1\mu_1\right) \right\} \] \[ \propto\exp\left\{ -\frac{1}{2}\left( \tau\sum_{i = 2}^ty_{i-1}^2 + \tau_1 \right)\left( \phi_1^2 - 2\phi_1 \left( \frac{1}{\tau\sum_{i = 2}^ty_{i-1}^2 + \tau_1} \right) \left( \sum_{i = 2}^t\left( y_i - \phi_0 \right)y_{i-1}\tau + \tau_1\mu_1 \right) \right) \right\} \] \[ \propto\exp\left\{ -\frac{1}{2}\left( \tau\sum_{i = 2}^ty_{i-1}^2+\tau_1 \right) \left( \phi_1 - \frac{\sum_{i = 2}^t \left( y_i - \phi_0 \right)y_{i-1}\tau + \tau_1\mu_1}{\tau\sum_{i = 2}^ty_{i-1}^2 + \tau_1} \right)^2 \right\} \] Logo \[ \left(\phi_1|\textbf{Y},\phi_0,\tau\right) \sim N\left( \frac{\Sigma_{i =2 }^t\left( y_i - \phi_0 \right)y_{i-1}\tau}{\tau\Sigma_{i=2}^ty_{i-1}^2 + \tau_1}, \left( \tau\Sigma_{i =2 }^ty_{i-1}^2 + \tau_1 \right) \right) \]
Condicional completa para \(\tau\): \[
p\left(\tau|\textbf{Y},\phi_0,\phi_1\right) \propto
p\left(\textbf{Y}|\phi_0,\phi_1,\tau \right)
p\left(\phi_0\right)p\left(\phi_1\right)p\left(\tau\right)
\] \[
\propto \prod_{i = 2}^t \tau^{\frac{1}{2}}\exp\left\{
-\frac{\tau}{2}\left(y_t - \phi_0 - \phi_1y_{t-1}
\right)^2
\right\}
\tau^{\alpha - 1}\exp\left\{ -\beta\tau
\right\}
\] \[
\propto \tau^{\frac{t}{2}} \exp\left\{
-\frac{\tau\sum_{i = 2}^t\left(y_i-\phi_0-\phi_1y_{i-1}
\right)^2}{2}
\right\}
\tau^{\alpha - 1} \exp\left\{
-\beta\tau
\right\}
\] Logo \[
\left(\tau|\textbf{Y},\phi_0,\phi_1\right)\sim
N\left(\frac{n}{2} + \alpha,
\frac{\sum_{i = 2}^t\left( y_i - \phi_0 -\phi_1y_{i-1}
\right)^2}{2}+\beta
\right)
\]
Gerando uma amostra aleatória para teste
rm(list=ls())
sigma2_verdadeiro=2
beta0_verdadeiro=1
beta1_verdadeiro=1
n=100
x=rnorm(n,0,1)
e=rnorm(n,0,sqrt(sigma2_verdadeiro))
y=beta0_verdadeiro+beta1_verdadeiro*x+eDefinindo
N = 15000
n = length(y)
taxa = 0
mu0 = 0
mu1 = 0
s2_0 = 1000
s2_1 = 1000
beta0 = 5
beta1 = 5
sigma2 = 5
delta = 0.5
a = 0.01
b = 0.001Iterador para a amostragem via Metropolis Hastings
for (i in 2:N){
media_beta0 = ((s2_0*(sum(y)-(beta1[i-1]*sum(x))))+(sigma2[i-1]*mu0))/((n*s2_0)+sigma2[i-1])
dp_beta0 = sqrt((sigma2[i-1]*s2_0)/((n*s2_0)+sigma2[i-1]))
beta0[i] = rnorm(1,media_beta0,dp_beta0)
media_beta1 = ((s2_1*(sum(x*y)-(beta0[i]*sum(x))))+(sigma2[i-1]*mu1))/((s2_1*sum(x^2))+sigma2[i-1])
dp_beta1 = sqrt((sigma2[i-1]*s2_1)/((s2_1*sum(x^2))+sigma2[i-1]))
beta1[i] = rnorm(1,media_beta1,dp_beta1)
log_sigma2_p = rnorm(1,log(sigma2[i-1]),delta)
sigma2_p = exp(log_sigma2_p)
u = runif(1)
log_num = -(((n/2)+a+1)*log(sigma2_p)) - ((1/(2*sigma2_p))*(sum((y-beta0[i]-(beta1[i]*x))^2)+(2*b))) - log(sigma2[i-1])
log_den = -(((n/2)+a+1)*log(sigma2[i-1])) - ((1/(2*sigma2[i-1]))*(sum((y-beta0[i]-(beta1[i]*x))^2)+(2*b))) - log(sigma2_p)
prob = log_num - log_den
alfa = min(0, prob)
if (log(u) < alfa) {sigma2[i] = sigma2_p;taxa = taxa + 1} else {sigma2[i] = sigma2[i - 1]}
}
aceita = taxa/N
aceita## [1] 0.3293333
Gráfico de convergência da cadeia resultando, com todas as observações geradas
plot(beta0,type = "l",main=expression(beta[0]),ylab="",
xlab="",cex.main=3,cex.lab=2, cex.axis=2)
abline(h=beta0_verdadeiro,col="red")plot(beta1,type = "l",main=expression(beta[0]),ylab="",
xlab="",cex.main=3,cex.lab=2, cex.axis=2)
abline(h=beta1_verdadeiro,col="red")plot(sigma2,type = "l",main=expression(sigma^2),ylab="",
xlab="",cex.main=3,cex.lab=2, cex.axis=2)
abline(h=sigma2_verdadeiro,col="red")
Gerando duas cadeias, com pontos inicias diferentes, para verificar a convergência
sigma2b=10;beta0b=10;beta1b=10
for (i in 2:N){
media_beta0 = ((s2_0*(sum(y)-(beta1b[i-1]*sum(x))))+(sigma2b[i-1]*mu0))/((n*s2_0)+sigma2b[i-1])
dp_beta0 = sqrt((sigma2b[i-1]*s2_0)/((n*s2_0)+sigma2b[i-1]))
beta0b[i] = rnorm(1,media_beta0,dp_beta0)
media_beta1 = ((s2_1*(sum(x*y)-(beta0b[i]*sum(x))))+(sigma2b[i-1]*mu1))/((s2_1*sum(x^2))+sigma2b[i-1])
dp_beta1 = sqrt((sigma2b[i-1]*s2_1)/((s2_1*sum(x^2))+sigma2b[i-1]))
beta1b[i] = rnorm(1,media_beta1,dp_beta1)
log_sigma2_p = rnorm(1,log(sigma2b[i-1]),delta)
sigma2_p = exp(log_sigma2_p)
u = runif(1)
log_num = -(((n/2)+a+1)*log(sigma2_p)) - ((1/(2*sigma2_p))*(sum((y-beta0b[i]-(beta1b[i]*x))^2)+(2*b))) - log(sigma2b[i-1])
log_den = -(((n/2)+a+1)*log(sigma2b[i-1])) - ((1/(2*sigma2b[i-1]))*(sum((y-beta0b[i]-(beta1b[i]*x))^2)+(2*b))) - log(sigma2_p)
prob = log_num - log_den
alfa = min(0, prob)
if (log(u) < alfa) {sigma2b[i] = sigma2_p;taxa = taxa + 1} else {sigma2b[i] = sigma2b[i - 1]}
}Verificando visualmente a convergência múltipla
plot(beta0,type="l",col="lightsalmon",main=expression(phi[0]),
ylab="",xlab="",cex.main=3,cex.lab=2, cex.axis=2)
lines(beta0b,type="l",col="purple3")
abline(h=beta0_verdadeiro,col="red")plot(beta1,type="l",col="lightsalmon",main=expression(phi[1])
,ylab="",xlab="",cex.main=3,cex.lab=2, cex.axis=2)
lines(beta1b,type="l",col="purple3")
abline(h=beta1_verdadeiro,col="red")plot(sigma2,type="l",col="lightsalmon",main=expression(sigma^2),
ylab="",xlab="",cex.main=3,cex.lab=2, cex.axis=2)
lines(sigma2b,type="l",col="purple3")
abline(h=sigma2_verdadeiro,col="red")
raftery.diag(as.mcmc(beta0))##
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95
##
## Burn-in Total Lower bound Dependence
## (M) (N) (Nmin) factor (I)
## 2 3740 3746 0.998
raftery.diag(as.mcmc(beta1))##
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95
##
## Burn-in Total Lower bound Dependence
## (M) (N) (Nmin) factor (I)
## 2 3700 3746 0.988
raftery.diag(as.mcmc(sigma2))##
## Quantile (q) = 0.025
## Accuracy (r) = +/- 0.005
## Probability (s) = 0.95
##
## Burn-in Total Lower bound Dependence
## (M) (N) (Nmin) factor (I)
## 13 14021 3746 3.74
geweke.diag(as.mcmc(beta0))##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## var1
## 0.4643
geweke.diag(as.mcmc(beta1))##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## var1
## 0.9613
geweke.diag(as.mcmc(sigma2))##
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5
##
## var1
## 1.314
Correlograma
Acf(beta0,main=expression(beta[0]),lag.max=200,cex.main=3)Acf(beta1,main=expression(beta[1]),cex.main=3,lag.max=200)Acf(sigma2,main=expression(sigma^2),cex.main=3,lag.max=200)
Representações numéricas
autocorr(as.mcmc(beta0))## , , 1
##
## [,1]
## Lag 0 1.0000000000
## Lag 1 0.0058620384
## Lag 5 -0.0003119376
## Lag 10 0.0130374003
## Lag 50 0.0121531232
autocorr(as.mcmc(beta1))## , , 1
##
## [,1]
## Lag 0 1.000000000
## Lag 1 -0.019516696
## Lag 5 0.004364688
## Lag 10 -0.002859988
## Lag 50 -0.005974105
autocorr(as.mcmc(sigma2))## , , 1
##
## [,1]
## Lag 0 1.00000000
## Lag 1 0.67666353
## Lag 5 0.16436172
## Lag 10 0.04959981
## Lag 50 -0.01412447
Removendo burnin
burnbeta0=burnin(beta0,method="Geweke")+10
burnbeta1=burnin(beta1,metho="Geweke")+10
burnsigma2=burnin(sigma2,metho="Geweke")+10
Beta0 = beta0[burnbeta0:length(beta0)]
Beta1 = beta1[burnbeta1:length(beta1)]
Sigma2 = sigma2[burnsigma2:length(sigma2)]Gráfico das cadeias com o burnin removido
plot(Beta0, type = "l",main=expression(beta[0]),ylab="",
xlab="",cex.main=3,cex.lab=2, cex.axis=2)
abline(h=beta0_verdadeiro,col="red")plot(Beta1, type = "l",main=expression(beta[1]),ylab="",
xlab="",cex.main=3 ,cex.lab=2, cex.axis=2)
abline(h=beta1_verdadeiro,col="red")plot(Sigma2, type = "l",main=expression(sigma^2),ylab="",
xlab="",cex.main=3,cex.lab=2, cex.axis=2)
abline(h=sigma2_verdadeiro,col="red")
hist(Beta0,main=expression(beta[0]),ylab="",xlab="",col='lavender'
,cex.main=3,cex.lab=2, cex.axis=2)hist(Beta1,main=expression(beta[1]),ylab="",xlab="",col='lavender'
,cex.main=3,cex.lab=2, cex.axis=2)hist(Sigma2,main=expression(sigma^2),ylab="",xlab="",col='lavender'
,cex.main=3,cex.lab=2, cex.axis=2)densplot(as.mcmc(Beta0),main=expression(beta[0]),xlab="",ylab=""
,cex.main=3,cex.lab=2, cex.axis=2)densplot(as.mcmc(Beta1),main=expression(beta[1]),xlab="",ylab=""
,cex.main=3,cex.lab=2, cex.axis=2)densplot(as.mcmc(Sigma2),main=expression(sigma^2),xlab="",ylab=""
,cex.main=3,cex.lab=2, cex.axis=2)
Prioris
gamma <- function(x){
a = 2
b = 2
return(dgamma(x,a,b))
}
curve(gamma,from = 0, to = 5)normal <- function(x){
return(dnorm(x,mean = 0, sd = sqrt(10000)))
}
curve(normal,from = -450, to = 450)
Priori com Posteriori
plot(density(Beta0),xlim=c(-450,450),ylim=c(0,0.013),lty=1,
col="blue",main=expression(phi[0]),xlab="",ylab="",lwd=2 ,
cex.main=3,cex.lab=2, cex.axis=2)
curve(normal,from = -450, to = 450,add=TRUE,lty=2,col="red",lwd=2)
legend("topleft",legend=c("Priori","Posteriori"),lty=c(2,1),
cex = 1.5,col=c("red","blue"),lwd=2,bty = "n")plot(density(Beta1),xlim=c(-450,450),ylim=c(0,0.013),
lty=1,col="blue",main=expression(phi[1]),xlab="",ylab="",
lwd=2,cex.main=3,cex.lab=2, cex.axis=2)
curve(normal,from = -450, to = 450,add=TRUE,lty=2,col="red",lwd=2)
legend("topleft",legend=c("Priori","Posteriori"),lty=c(2,1),
cex = 1.5,col=c("red","blue"),lwd=2, bty = "n")plot(density(Sigma2),xlim=c(0,25),ylim=c(0,1.5),lty=1,
col="blue",main=expression(sigma^2),xlab="",ylab="",lwd=2,cex.main=3
,cex.lab=2, cex.axis=2)
curve(gamma,from = 0, to = 5, add=TRUE,lty=2,col="red",lwd=2)
legend("topleft",legend=c("Priori","Posteriori"),lty=c(2,1)
,cex = 1.5,col=c("red","blue"),lwd=2,bty = "n")
preditiva <- NULL
for(i in 2:N){preditiva[i] <- beta0[i] + beta1[i]*x[n] + rnorm(1,0,sqrt(sigma2[i]))}
x11()
hist(preditiva[100:length(preditiva)], main = NULL,
xlab = NULL, ylab = NULL, freq = F, cex.main = 3, cex.lab = 2, cex.axis = 2,
col = "lavender",prob=TRUE)
lines(density(preditiva[100:length(preditiva)])
,lty="longdash",lwd=2,col="firebrick3")est1=mean(Beta0)
est2=mean(Beta1)
est3=mean(Sigma2)
est4=median(Beta0)
est5=median(Beta1)
est6=median(Sigma2)
obj1 = density(Beta0)
est7=obj1$x[which.max(obj1$y)]
obj2 = density(Beta1)
est8=obj2$x[which.max(obj2$y)]
obj3 = density(Sigma2)
est9=obj3$x[which.max(obj3$y)]
media = c(est1,est2,est3)
mediana = c(est4,est5,est6)
moda = c(est7,est8,est9)
estima = cbind(media,mediana,moda)
rownames(estima) = c("Beta 0", "Beta 1", "Sigma 2")
estima## media mediana moda
## Beta 0 1.116608 1.1170640 1.1194973
## Beta 1 0.975321 0.9763552 0.9841624
## Sigma 2 1.923885 1.8981617 1.8461009