Reunião PPAG
Reunião PPAG
1 Simulação do número de stirling
Aproximação de stirling:
\(n! \approx \sqrt{2\pi n}(\cfrac{n}{e})^n\)
1.1 Visualização
stirling <- function (n)((2*pi*n)^.5)*(n/exp(1))^n
amostra = c(seq(1,3,0.1))
plot(amostra,stirling(amostra),type="l",pch=19,ylab="Valor aproximado",xlab="Número",col="blue")
lines(amostra,factorial(amostra),amostra,type="l",pch=19,col="red")
legend(1,5.5, legend=c("Aproximação de Stirling", "Fatorial"),
col=c("blue", "red"), lty=1, cex=0.8)
A aproximação confirma-se visualmente, embora não tenhamos ideia numericamente do quanto bem está sendo aproximado. Para tal, utilizou-se o cálculo de um erro quadrático médio para dois tamanhos de amostra diferente.
1.2 Numericamente
set.seed(02042019)
#Criando uma amostra
amostra = seq(1,3,0.01)
#Aplicando as funcoes
st=log(stirling(amostra))
fc=log(factorial(amostra))
#Erro quadratico medio
dif1=mean((st-fc)^2)
#Supondo N grande
amostra = seq(1,100,0.01)
st=log(stirling(amostra))
fc=log(factorial(amostra))
#Erro quadratico medio
dif2=mean((st-fc)^2)
dif1;dif2
## [1] 0.002257498
## [1] 6.841241e-05
1.3 Análise dos erros
set.seed(02042019)
amostra = list()
st = list()
fc = list()
dif = vector()
for (i in 2:100){
amostra[[i]] = seq(1,i,0.01)
st[[i]]=log(stirling(amostra[[i]]))
fc[[i]]=log(factorial(amostra[[i]]))
dif[i]=mean((st[[i]]-fc[[i]])^2)
}
plot(dif, type="l")
Observa-se uma convergência pra zero com comportamento exponencial, indicando o bom resultado da aproximação.
2 Testando o Laplace
2.1 Geração dos dados com sigma baixo, não logaritmado
set.seed(02042019)
sigma2_verdadeiro=0.01
beta0_verdadeiro=-2
beta1_verdadeiro=2
n=100
x=rnorm(n,0,1)
e=rnorm(n,0,sqrt(sigma2_verdadeiro))
y=beta0_verdadeiro+beta1_verdadeiro*x+e
plot(x,y)
model <- function(p,y,x) {
log_lik <- sum(dnorm(y,p["beta0"]+p["beta1"]*x,sqrt(p["sigma2"]), log = T))
log_post <- log_lik + dnorm(p["beta0"], 0, 10, log = T) + dnorm(p["beta1"], 0, 10, log = T) + dgamma(p["sigma2"],1,0.01, log = T)
log_post
}
inits <- c(beta0 = 1, beta1 = 1, sigma2 = 1)
fit <- optim(inits, model, control = list(fnscale = -1), hessian = TRUE, y = y,x = x)
param_mean <- fit$par
param_cov_mat <- solve(-fit$hessian)
round(param_mean, 2)
## beta0 beta1 sigma2
## -2.00 2.00 0.01
round(param_cov_mat, 3)
## beta0 beta1 sigma2
## beta0 0 0 0
## beta1 0 0 0
## sigma2 0 0 0
samples <- rmvnorm(10000, param_mean, param_cov_mat)
samples <- cbind(samples, pred = rnorm(n = nrow(samples), samples[, "beta0"], samples[,
"beta1"]), samples[, "sigma2"])
samples1 <- mcmc(samples)
beta0 = samples[,1]
beta1 = samples[,2]
sigma2 = samples[,3]
summary(samples1)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## beta0 -1.998871 0.009786 9.786e-05 9.786e-05
## beta1 1.999067 0.008686 8.686e-05 8.904e-05
## sigma2 0.009486 0.001301 1.301e-05 1.298e-05
## pred -2.008954 1.977301 1.977e-02 2.046e-02
## 0.009486 0.001301 1.301e-05 1.298e-05
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## beta0 -2.018407 -2.005458 -1.99878 -1.99221 -1.97995
## beta1 1.981952 1.993126 1.99898 2.00497 2.01580
## sigma2 0.006899 0.008614 0.00947 0.01035 0.01204
## pred -5.891215 -3.344652 -2.02013 -0.67052 1.83859
## 0.006899 0.008614 0.00947 0.01035 0.01204
plot(density(beta0))
abline(v=-2,col="red")
plot(density(beta1))
abline(v=2,col="red")
plot(density(sigma2))
abline(v=0.01,col="red")
Ok
2.2 Geração dos dados com sigma baixo, logaritmado
set.seed(02042019)
sigma2_verdadeiro=0.01
beta0_verdadeiro=-2
beta1_verdadeiro=2
n=100
x=rnorm(n,0,1)
e=rnorm(n,0,sqrt(sigma2_verdadeiro))
y=beta0_verdadeiro+beta1_verdadeiro*x+e
plot(x,y)
model <- function(p,y,x) {
log_lik <- sum(dnorm(y,p["beta0"]+p["beta1"]*x,sqrt(p["sigma2"]), log = T))
log_post <- log_lik + dnorm(p["beta0"], 0, 10, log = T) + dnorm(p["beta1"], 0, 10, log = T) + dnorm(log(p["sigma2"]),0,10, log = T)
log_post
}
inits <- c(beta0 = 1, beta1 = 1, sigma2 = 1)
fit <- optim(inits, model, control = list(fnscale = -1), hessian = TRUE, y = y,x = x)
param_mean <- fit$par
param_cov_mat <- solve(-fit$hessian)
round(param_mean, 2)
## beta0 beta1 sigma2
## -2.00 2.00 0.01
round(param_cov_mat, 3)
## beta0 beta1 sigma2
## beta0 0 0 0
## beta1 0 0 0
## sigma2 0 0 0
samples <- rmvnorm(10000, param_mean, param_cov_mat)
samples <- cbind(samples, pred = rnorm(n = nrow(samples), samples[, "beta0"], samples[,
"beta1"]), samples[, "sigma2"])
samples1 <- mcmc(samples)
beta0 = samples[,1]
beta1 = samples[,2]
sigma2 = samples[,3]
summary(samples1)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## beta0 -1.998889 0.009790 9.790e-05 9.790e-05
## beta1 1.999121 0.008689 8.689e-05 8.907e-05
## sigma2 0.009493 0.001302 1.302e-05 1.299e-05
## pred -2.008973 1.977354 1.977e-02 2.046e-02
## 0.009493 0.001302 1.302e-05 1.299e-05
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## beta0 -2.018433 -2.00548 -1.998797 -1.99223 -1.97996
## beta1 1.982000 1.99318 1.999033 2.00502 2.01586
## sigma2 0.006904 0.00862 0.009478 0.01036 0.01205
## pred -5.891336 -3.34470 -2.020147 -0.67051 1.83867
## 0.006904 0.00862 0.009478 0.01036 0.01205
plot(density(beta0))
abline(v=-2,col="red")
plot(density(beta1))
abline(v=2,col="red")
plot(density(sigma2))
abline(v=0.01,col="red")
Ok
2.3 Geração dos dados com sigma próximo de zero, não logaritmado
set.seed(02042019)
sigma2_verdadeiro=0.001
beta0_verdadeiro=-2
beta1_verdadeiro=2
n=100
x=rnorm(n,0,1)
e=rnorm(n,0,sqrt(sigma2_verdadeiro))
y=beta0_verdadeiro+beta1_verdadeiro*x+e
plot(x,y)
model <- function(p,y,x) {
log_lik <- sum(dnorm(y,p["beta0"]+p["beta1"]*x,sqrt(p["sigma2"]), log = T))
log_post <- log_lik + dnorm(p["beta0"], 0, 10, log = T) + dnorm(p["beta1"], 0, 10, log = T) + dgamma(p["sigma2"],1,0.01, log = T)
log_post
}
inits <- c(beta0 = 1, beta1 = 1, sigma2 = 1)
fit <- optim(inits, model, control = list(fnscale = -1), hessian = TRUE, y = y,x = x)
param_mean <- fit$par
param_cov_mat <- solve(-fit$hessian)
round(param_mean, 2)
round(param_cov_mat, 3)
samples <- rmvnorm(10000, param_mean, param_cov_mat)
samples <- cbind(samples, pred = rnorm(n = nrow(samples), samples[, "beta0"], samples[,
"beta1"]), samples[, "sigma2"])
samples1 <- mcmc(samples)
beta0 = samples[,1]
beta1 = samples[,2]
sigma2 = samples[,3]
summary(samples1)
plot(density(beta0))
abline(v=-2,col="red")
plot(density(beta1))
abline(v=2,col="red")
plot(density(sigma2))
abline(v=0.01,col="red")
warnings()
NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedError in optim(inits, model, control = list(fnscale = -1), hessian = TRUE, : non-finite finite-difference value [3]
2.4 Priori limitada para variância
set.seed(02042019)
sigma2_verdadeiro=5
beta0_verdadeiro=-2
beta1_verdadeiro=2
n=100
x=rnorm(n,0,1)
e=rnorm(n,0,sqrt(sigma2_verdadeiro))
y=beta0_verdadeiro+beta1_verdadeiro*x+e
plot(x,y)
model <- function(p,y,x) {
log_lik <- sum(dnorm(y,p["beta0"]+p["beta1"]*x,sqrt(p["sigma2"]), log = T))
log_post <- log_lik + dnorm(p["beta0"], 0, 10, log = T) + dnorm(p["beta1"], 0, 10, log = T) + dunif(p["sigma2"],1,10, log = T)
log_post
}
inits <- c(beta0 = 1, beta1 = 1, sigma2 = 1)
fit <- optim(inits, model, control = list(fnscale = -1), hessian = TRUE, y = y,x = x)
param_mean <- fit$par
param_cov_mat <- solve(-fit$hessian)
round(param_mean, 2)
## beta0 beta1 sigma2
## -1.97 1.98 4.74
round(param_cov_mat, 3)
## beta0 beta1 sigma2
## beta0 0.047 -0.001 0.00
## beta1 -0.001 0.038 0.00
## sigma2 0.000 0.000 0.45
samples <- rmvnorm(10000, param_mean, param_cov_mat)
samples <- cbind(samples, pred = rnorm(n = nrow(samples), samples[, "beta0"], samples[,
"beta1"]), samples[, "sigma2"])
samples1 <- mcmc(samples)
beta0 = samples[,1]
beta1 = samples[,2]
sigma2 = samples[,3]
summary(samples1)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## beta0 -1.973 0.2188 0.002188 0.002188
## beta1 1.978 0.1942 0.001942 0.001991
## sigma2 4.745 0.6736 0.006736 0.006719
## pred -1.982 1.9748 0.019748 0.020445
## 4.745 0.6736 0.006736 0.006719
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## beta0 -2.410 -2.120 -1.971 -1.8243 -1.550
## beta1 1.595 1.845 1.976 2.1101 2.352
## sigma2 3.406 4.294 4.737 5.1946 6.067
## pred -5.843 -3.319 -1.988 -0.6463 1.865
## 3.406 4.294 4.737 5.1946 6.067
plot(density(beta0))
abline(v=-2,col="red")
plot(density(beta1))
abline(v=2,col="red")
plot(density(sigma2))
abline(v=5,col="red")
Ok
2.5 Priori limitada para tudo
set.seed(02042019)
sigma2_verdadeiro=5
beta0_verdadeiro=-2
beta1_verdadeiro=2
n=100
x=rnorm(n,0,1)
e=rnorm(n,0,sqrt(sigma2_verdadeiro))
y=beta0_verdadeiro+beta1_verdadeiro*x+e
plot(x,y)
model <- function(p,y,x) {
log_lik <- sum(dnorm(y,p["beta0"]+p["beta1"]*x,sqrt(p["sigma2"]), log = T))
log_post <- log_lik + dunif(p["beta0"], -100, 100, log = T) + dunif(p["beta1"], -100, 100, log = T) + dunif(p["sigma2"],1,10, log = T)
log_post
}
inits <- c(beta0 = 1, beta1 = 1, sigma2 = 1)
fit <- optim(inits, model, control = list(fnscale = -1), hessian = TRUE, y = y,x = x)
param_mean <- fit$par
param_cov_mat <- solve(-fit$hessian)
round(param_mean, 2)
## beta0 beta1 sigma2
## -1.97 1.98 4.74
round(param_cov_mat, 3)
## beta0 beta1 sigma2
## beta0 0.047 -0.001 0.000
## beta1 -0.001 0.038 0.000
## sigma2 0.000 0.000 0.449
samples <- rmvnorm(10000, param_mean, param_cov_mat)
samples <- cbind(samples, pred = rnorm(n = nrow(samples), samples[, "beta0"], samples[,
"beta1"]), samples[, "sigma2"])
samples1 <- mcmc(samples)
beta0 = samples[,1]
beta1 = samples[,2]
sigma2 = samples[,3]
summary(samples1)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## beta0 -1.975 0.2188 0.002188 0.002188
## beta1 1.980 0.1942 0.001942 0.001991
## sigma2 4.742 0.6727 0.006727 0.006711
## pred -1.983 1.9764 0.019764 0.020462
## 4.742 0.6727 0.006727 0.006711
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## beta0 -2.412 -2.122 -1.973 -1.826 -1.552
## beta1 1.597 1.847 1.978 2.112 2.354
## sigma2 3.404 4.291 4.734 5.191 6.062
## pred -5.847 -3.321 -1.990 -0.647 1.867
## 3.404 4.291 4.734 5.191 6.062
plot(density(beta0))
abline(v=-2,col="red")
plot(density(beta1))
abline(v=2,col="red")
plot(density(sigma2))
abline(v=5,col="red")
Ok
2.6 Visualizaçao assintótica
set.seed(02042019)
sigma2_verdadeiro=5
beta0_verdadeiro=-2
beta1_verdadeiro=2
n=1000000
x=rnorm(n,0,1)
e=rnorm(n,0,sqrt(sigma2_verdadeiro))
y=beta0_verdadeiro+beta1_verdadeiro*x+e
plot(x,y)
model <- function(p,y,x) {
log_lik <- sum(dnorm(y,p["beta0"]+p["beta1"]*x,sqrt(p["sigma2"]), log = T))
log_post <- log_lik + dnorm(p["beta0"], 0, 10, log = T) + dnorm(p["beta1"], 0, 10, log = T) + dgamma(p["sigma2"],1,0.01, log = T)
log_post
}
inits <- c(beta0 = 1, beta1 = 1, sigma2 = 1)
fit <- optim(inits, model, control = list(fnscale = -1), hessian = TRUE, y = y,x = x)
param_mean <- fit$par
param_cov_mat <- solve(-fit$hessian)
round(param_mean, 2)
## beta0 beta1 sigma2
## -2.00 2.00 5.01
round(param_cov_mat, 3)
## beta0 beta1 sigma2
## beta0 0 0 0
## beta1 0 0 0
## sigma2 0 0 0
samples <- rmvnorm(1000000, param_mean, param_cov_mat)
samples <- cbind(samples, pred = rnorm(n = nrow(samples), samples[, "beta0"], samples[,
"beta1"]), samples[, "sigma2"])
samples1 <- mcmc(samples)
beta0 = samples[,1]
beta1 = samples[,2]
sigma2 = samples[,3]
summary(samples1)
##
## Iterations = 1:1e+06
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 1e+06
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## beta0 -1.999 0.002234 2.234e-06 2.234e-06
## beta1 2.002 0.002239 2.239e-06 2.239e-06
## sigma2 5.006 0.007079 7.079e-06 7.098e-06
## pred -1.999 2.002280 2.002e-03 1.999e-03
## 5.006 0.007079 7.079e-06 7.098e-06
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## beta0 -2.004 -2.001 -1.999 -1.9979 -1.995
## beta1 1.998 2.001 2.002 2.0036 2.006
## sigma2 4.992 5.001 5.006 5.0104 5.020
## pred -5.916 -3.350 -2.000 -0.6506 1.929
## 4.992 5.001 5.006 5.0104 5.020
plot(density(beta0))
abline(v=-2,col="red")
plot(density(beta1))
abline(v=2,col="red")
plot(density(sigma2))
abline(v=5,col="red")
3 INLA
4 Prioris
inla.list.models("prior")
## Section [prior]
## betacorrelation Beta prior for the correlation
## dirichlet Dirichlet prior
## expression: A generic prior defined using expressions
## flat A constant prior
## gaussian Gaussian prior
## invalid Void prior
## jeffreystdf Jeffreys prior for the doc
## logflat A constant prior for log(theta)
## loggamma Log-Gamma prior
## logiflat A constant prior for log(1/theta)
## logitbeta Logit prior for a probability
## logtgaussian Truncated Gaussian prior
## logtnormal Truncated Normal prior
## minuslogsqrtruncnormal (obsolete)
## mvnorm A multivariate Normal prior
## none No prior
## normal Normal prior
## pc Generic PC prior
## pc.ar PC prior for the AR(p) model
## pc.cor0 PC prior correlation, basemodel cor=0
## pc.cor1 PC prior correlation, basemodel cor=1
## pc.dof PC prior for log(dof-2)
## pc.fgnh PC prior for the Hurst parameter in FGN
## pc.gamma PC prior for a Gamma parameter
## pc.gammacount PC prior for the GammaCount likelihood
## pc.matern PC prior for the Matern SPDE
## pc.mgamma PC prior for a Gamma parameter
## pc.prec PC prior for log(precision)
## pc.range PC prior for the range in the Matern SPDE
## pc.spde.GA (experimental)
## pom #classes-dependent prior for the POM model
## ref.ar Reference prior for the AR(p) model, p<=3
## table: A generic tabulated prior
## wishart1d Wishart prior dim=1
## wishart2d Wishart prior dim=2
## wishart3d Wishart prior dim=3
## wishart4d Wishart prior dim=4
## wishart5d Wishart prior dim=5
5 Modelo com priori definida
y = yt[[1]][[1]]
x = yt[[1]][[2]]
dado=as.data.frame(cbind(y,x))
modelo = inla(y~x,family="gaussian",data=dado,
control.family=list(hyper = list(prec = list(prior="loggamma",param=c(1,0.1)))))
summary(modelo)
5.1 Simulação
modelo = list()
amostra = list()
final = list()
b0 = list()
b1 = list()
tau = list()
media_b0 = vector()
media_b1 = vector()
media_tau = vector()
mediana_b0 = vector()
mediana_b1 = vector()
mediana_tau = vector()
var_b0 = vector()
var_b1 = vector()
var_tau = vector()
eqm_b0 = vector()
eqm_b1 = vector()
eqm_tau = vector()
hdi = list()
tic()
for (i in 1:100){
y = yt[[i]][[1]]
x = yt[[i]][[2]]
dado=as.data.frame(cbind(y,x))
modelo[[i]] = inla(y~x,family="gaussian",data=dado,
control.family=list(hyper = list(prec = list(prior="loggamma",param=c(1,0.1)))))
final[[i]] = modelo[[i]]$marginals.fixed
b0[[i]] = final[[i]][[1]][,1]
b1[[i]] = final[[i]][[2]][,1]
tau[[i]] = modelo[[i]]$marginals.hyperpar[[1]][,1]
media_b0[i] = mean(b0[[i]])
media_b1[i] = mean(b1[[i]])
media_tau[i] = mean(tau[[i]])
mediana_b0[i] = median(b0[[i]])
mediana_b1[i] = median(b1[[i]])
mediana_tau[i] = median(tau[[i]])
var_b0[i] = var(b0[[i]])
var_b1[i] = var(b1[[i]])
var_tau[i] = var(tau[[i]])
eqm_b0[i] = mean((b0[[i]]+2)^2)
eqm_b1[i] = mean((b1[[i]]-2)^2)
eqm_tau[i] = mean((tau[[i]]-.2)^2)
hdi[[i]] = hdi(final[[i]])
}
toc
media=c(mean(media_b0),mean(media_b1),mean(media_tau))
mediana=c(mean(mediana_b0),mean(mediana_b1),mean(mediana_tau))
var=c(mean(var_b0),mean(var_b1),mean(var_tau))
eqm=c(mean((media_b0+2)^2),mean((media_b1-2)^2),mean((media_tau-.2)^2))
medidas=cbind(media,mediana,var,eqm)
rownames(medidas) = c("b0","b1","tau");medidas
int = as.data.frame(t(as.data.frame((hdi))))
write.csv2(int,"C:\\Users\\Rafael\\Desktop\\int.xls")