Com pacotes extras para fazer gráfico
require(rjags)
require(coda)
require(DT)
require(tidyverse)
require(car)
#install.packages("MCMCpack")
require(MCMCpack)
#install.packages("bayesplot")
require(bayesplot)
#install.packages("ggrepel")
require(grid)
Modelo de regressão não linear
Dugongues são animais da mesma ordem dos peixes-bois (ordem Sirenia). Dentre suas principais características, temos: narinas no topo da cabeça, lábio superior voltado para baixo e nadadeira dividida em duas partes (como a das baleias e golfinhos). Os Dugongues são encontrados na costa leste da Àfrica, Ìndia, Indonésia, Malásia e Austrália.
Leitura dos dados & representação gráfica
dugongues=read.table("exemplo3_dugongues.txt",sep=";",head=TRUE)
datatable(dugongues)
dugongues %>%
ggplot(aes(x=x,y=y)) +
geom_point()
Ajuste do modelo não linear pelo método de máxima verossimilhança, utilizando a função optim do R - minimiza a função log-verossimilhança e realiza a estimação numérica (não analítica). O algoritmo utilizado é o “BFGS”
O algoritmo Broyden – Fletcher – Goldfarb – Shanno (BFGS) é um método iterativo para resolver problemas de otimização não linear irrestrita.
O método BFGS pertence aos métodos quasi-Newton, uma classe de técnicas de otimização que busca um ponto estacionário de uma função (de preferência duas vezes continuamente diferenciável).
“https://stat.ethz.ch/R-manual/R-devel/library/stats/html/optim.html”> Saiba mais sobre optim no R ;
Houve NaNs produzidos que podem ser investigados (não foram exibidos aqui porém se os alunos “rodarem” irão enxergar;
geralmente ocorrem quando ao longo das iterações os valores dos parâmetros assumem valores nulos ou negativos, quando não são por natureza;
No entanto o método apresentou convergência (código igual a zero).
x=dugongues$x
y=dugongues$y
logL=function(par){
alpha=par[1]
beta=par[2]
gamma=par[3]
sigma=par[4]
media = alpha-beta*gamma^x
L = prod(dnorm(y,media,sigma))
(-1)*log(L)
}
nlm_model = optim(logL,par=c(1,1,0.5,5),method="BFGS")
nlm_model
## $par
## [1] 2.67067170 0.97575073 0.87382246 0.09115797
##
## $value
## [1] -26.36358
##
## $counts
## function gradient
## 115 29
##
## $convergence
## [1] 0
##
## $message
## NULL
dados=list("n"=nrow(dugongues),"x"=dugongues$x,"y"=dugongues$y)
iniciais=list(
list(alpha=1,beta=1,gamma=0.4,tau=0.5),
list(alpha=2,beta=2,gamma=0.8,tau=1)
)
cat("
model{
for (i in 1 : n) {
y[i] ~ dnorm(alpha-beta*pow(gamma,x[i]), tau)
y_pred[i] ~ dnorm(alpha-beta*pow(gamma,x[i]), tau)
}
alpha ~ dnorm(0,0.000001)
beta ~ dnorm(0,0.000001)
gamma ~ dunif(0,100000)
tau ~ dgamma(0.001,0.001)
sigma2 = 1/tau
y_new ~ dnorm(alpha-beta*pow(gamma,26), tau)
}
",file="modelo5.txt")
modelo=jags.model(file="modelo5.txt",data=dados,inits=iniciais,n.chains=2, n.adapt=1000)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 27
## Unobserved stochastic nodes: 32
## Total graph size: 157
##
## Initializing model
algumas vêm direto dos parâmetros que foram amostrados, e outras são funções dos parâmetros
params = c("alpha","beta","gamma","sigma2","y_pred","y_new")
samps = coda.samples( modelo, params, n.iter=20000,thin=5)
summary(samps)
##
## Iterations = 1005:21000
## Thinning interval = 5
## Number of chains = 2
## Sample size per chain = 4000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## alpha 2.66905 0.076238 8.524e-04 2.247e-03
## beta 0.98610 0.078423 8.768e-04 1.162e-03
## gamma 0.86675 0.031622 3.535e-04 8.952e-04
## sigma2 0.01039 0.003322 3.714e-05 3.803e-05
## y_new 2.63610 0.112967 1.263e-03 1.652e-03
## y_pred[1] 1.81379 0.114807 1.284e-03 1.405e-03
## y_pred[2] 1.87146 0.111825 1.250e-03 1.226e-03
## y_pred[3] 1.87263 0.112309 1.256e-03 1.281e-03
## y_pred[4] 1.87176 0.111379 1.245e-03 1.274e-03
## y_pred[5] 1.97681 0.106444 1.190e-03 1.190e-03
## y_pred[6] 2.10801 0.105900 1.184e-03 1.236e-03
## y_pred[7] 2.18002 0.106754 1.194e-03 1.258e-03
## y_pred[8] 2.17964 0.105112 1.175e-03 1.280e-03
## y_pred[9] 2.29545 0.105802 1.183e-03 1.316e-03
## y_pred[10] 2.34259 0.107266 1.199e-03 1.317e-03
## y_pred[11] 2.36660 0.106075 1.186e-03 1.269e-03
## y_pred[12] 2.38244 0.107247 1.199e-03 1.236e-03
## y_pred[13] 2.39989 0.107107 1.197e-03 1.264e-03
## y_pred[14] 2.40256 0.106395 1.190e-03 1.265e-03
## y_pred[15] 2.41615 0.105083 1.175e-03 1.175e-03
## y_pred[16] 2.47740 0.106429 1.190e-03 1.190e-03
## y_pred[17] 2.47613 0.104468 1.168e-03 1.238e-03
## y_pred[18] 2.50068 0.103788 1.160e-03 1.167e-03
## y_pred[19] 2.50168 0.104850 1.172e-03 1.172e-03
## y_pred[20] 2.52794 0.106863 1.195e-03 1.211e-03
## y_pred[21] 2.54554 0.103202 1.154e-03 1.154e-03
## y_pred[22] 2.54582 0.104939 1.173e-03 1.198e-03
## y_pred[23] 2.56271 0.105595 1.181e-03 1.213e-03
## y_pred[24] 2.56518 0.107275 1.199e-03 1.217e-03
## y_pred[25] 2.61620 0.109354 1.223e-03 1.576e-03
## y_pred[26] 2.64407 0.115437 1.291e-03 1.852e-03
## y_pred[27] 2.65293 0.119204 1.333e-03 1.964e-03
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## alpha 2.541126 2.617451 2.660260 2.71159 2.84057
## beta 0.834457 0.933846 0.985906 1.03645 1.14119
## gamma 0.796005 0.849272 0.869506 0.88873 0.91895
## sigma2 0.005799 0.008044 0.009756 0.01204 0.01855
## y_new 2.413170 2.560977 2.635987 2.70924 2.85956
## y_pred[1] 1.582180 1.739997 1.814019 1.88953 2.03965
## y_pred[2] 1.648568 1.798455 1.871022 1.94336 2.09485
## y_pred[3] 1.647470 1.799390 1.873385 1.94710 2.09015
## y_pred[4] 1.653194 1.799120 1.870454 1.94600 2.09379
## y_pred[5] 1.770372 1.904834 1.976139 2.04585 2.19221
## y_pred[6] 1.900096 2.038531 2.108388 2.17775 2.31997
## y_pred[7] 1.970404 2.108103 2.181861 2.24948 2.39186
## y_pred[8] 1.974748 2.109269 2.178782 2.24784 2.38733
## y_pred[9] 2.086417 2.226156 2.294200 2.36507 2.50807
## y_pred[10] 2.138210 2.271010 2.341300 2.41236 2.55924
## y_pred[11] 2.156869 2.297296 2.364671 2.43711 2.57867
## y_pred[12] 2.176644 2.309458 2.381255 2.45448 2.59057
## y_pred[13] 2.192867 2.329990 2.401958 2.46945 2.61452
## y_pred[14] 2.195830 2.331512 2.403114 2.47255 2.61169
## y_pred[15] 2.210517 2.346975 2.415569 2.48333 2.62520
## y_pred[16] 2.262529 2.407772 2.477760 2.54842 2.68702
## y_pred[17] 2.269294 2.407251 2.475940 2.54469 2.68433
## y_pred[18] 2.294095 2.433272 2.500102 2.56859 2.70831
## y_pred[19] 2.293115 2.434960 2.502900 2.57029 2.70492
## y_pred[20] 2.315500 2.458529 2.527755 2.59750 2.74063
## y_pred[21] 2.342404 2.476371 2.546332 2.61397 2.74809
## y_pred[22] 2.336350 2.476823 2.545924 2.61606 2.75414
## y_pred[23] 2.354702 2.495437 2.560956 2.63030 2.77924
## y_pred[24] 2.353785 2.495732 2.564179 2.63471 2.77862
## y_pred[25] 2.398933 2.544484 2.615988 2.68650 2.82786
## y_pred[26] 2.418265 2.565895 2.642569 2.72093 2.87212
## y_pred[27] 2.419747 2.572313 2.653857 2.73186 2.88802
utilizando a função as.mcmc.list do pacote coda
samps2=rbind(as.mcmc.list(samps)[[1]],as.mcmc.list(samps)[[2]])
summary(samps2)
## alpha beta gamma sigma2
## Min. :2.431 Min. :0.6516 Min. :0.6562 Min. :0.003567
## 1st Qu.:2.617 1st Qu.:0.9338 1st Qu.:0.8493 1st Qu.:0.008044
## Median :2.660 Median :0.9859 Median :0.8695 Median :0.009756
## Mean :2.669 Mean :0.9861 Mean :0.8667 Mean :0.010389
## 3rd Qu.:2.712 3rd Qu.:1.0365 3rd Qu.:0.8887 3rd Qu.:0.012043
## Max. :3.161 Max. :1.3854 Max. :0.9500 Max. :0.039919
## y_new y_pred[1] y_pred[2] y_pred[3]
## Min. :2.222 Min. :1.329 Min. :1.391 Min. :1.352
## 1st Qu.:2.561 1st Qu.:1.740 1st Qu.:1.798 1st Qu.:1.799
## Median :2.636 Median :1.814 Median :1.871 Median :1.873
## Mean :2.636 Mean :1.814 Mean :1.871 Mean :1.873
## 3rd Qu.:2.709 3rd Qu.:1.890 3rd Qu.:1.943 3rd Qu.:1.947
## Max. :3.188 Max. :2.301 Max. :2.295 Max. :2.388
## y_pred[4] y_pred[5] y_pred[6] y_pred[7]
## Min. :1.370 Min. :1.584 Min. :1.615 Min. :1.710
## 1st Qu.:1.799 1st Qu.:1.905 1st Qu.:2.039 1st Qu.:2.108
## Median :1.870 Median :1.976 Median :2.108 Median :2.182
## Mean :1.872 Mean :1.977 Mean :2.108 Mean :2.180
## 3rd Qu.:1.946 3rd Qu.:2.046 3rd Qu.:2.178 3rd Qu.:2.249
## Max. :2.346 Max. :2.430 Max. :2.592 Max. :2.683
## y_pred[8] y_pred[9] y_pred[10] y_pred[11]
## Min. :1.723 Min. :1.817 Min. :1.908 Min. :1.876
## 1st Qu.:2.109 1st Qu.:2.226 1st Qu.:2.271 1st Qu.:2.297
## Median :2.179 Median :2.294 Median :2.341 Median :2.365
## Mean :2.180 Mean :2.295 Mean :2.343 Mean :2.367
## 3rd Qu.:2.248 3rd Qu.:2.365 3rd Qu.:2.412 3rd Qu.:2.437
## Max. :2.578 Max. :2.751 Max. :2.804 Max. :2.808
## y_pred[12] y_pred[13] y_pred[14] y_pred[15]
## Min. :1.883 Min. :1.831 Min. :1.897 Min. :1.822
## 1st Qu.:2.309 1st Qu.:2.330 1st Qu.:2.332 1st Qu.:2.347
## Median :2.381 Median :2.402 Median :2.403 Median :2.416
## Mean :2.382 Mean :2.400 Mean :2.403 Mean :2.416
## 3rd Qu.:2.454 3rd Qu.:2.469 3rd Qu.:2.473 3rd Qu.:2.483
## Max. :2.799 Max. :2.910 Max. :2.869 Max. :2.896
## y_pred[16] y_pred[17] y_pred[18] y_pred[19]
## Min. :1.976 Min. :2.012 Min. :2.042 Min. :2.040
## 1st Qu.:2.408 1st Qu.:2.407 1st Qu.:2.433 1st Qu.:2.435
## Median :2.478 Median :2.476 Median :2.500 Median :2.503
## Mean :2.477 Mean :2.476 Mean :2.501 Mean :2.502
## 3rd Qu.:2.548 3rd Qu.:2.545 3rd Qu.:2.569 3rd Qu.:2.570
## Max. :2.863 Max. :3.001 Max. :3.032 Max. :2.945
## y_pred[20] y_pred[21] y_pred[22] y_pred[23]
## Min. :2.076 Min. :2.126 Min. :2.043 Min. :2.119
## 1st Qu.:2.459 1st Qu.:2.476 1st Qu.:2.477 1st Qu.:2.495
## Median :2.528 Median :2.546 Median :2.546 Median :2.561
## Mean :2.528 Mean :2.546 Mean :2.546 Mean :2.563
## 3rd Qu.:2.598 3rd Qu.:2.614 3rd Qu.:2.616 3rd Qu.:2.630
## Max. :3.214 Max. :3.037 Max. :2.990 Max. :3.020
## y_pred[24] y_pred[25] y_pred[26] y_pred[27]
## Min. :2.106 Min. :2.013 Min. :2.185 Min. :2.196
## 1st Qu.:2.496 1st Qu.:2.544 1st Qu.:2.566 1st Qu.:2.572
## Median :2.564 Median :2.616 Median :2.643 Median :2.654
## Mean :2.565 Mean :2.616 Mean :2.644 Mean :2.653
## 3rd Qu.:2.635 3rd Qu.:2.686 3rd Qu.:2.721 3rd Qu.:2.732
## Max. :2.982 Max. :3.179 Max. :3.221 Max. :3.160
summary(samps2[,1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.431 2.617 2.660 2.669 2.712 3.161
Diagnóstico de Gelman & Rubin: através do pacote coda
Referências na literatura:
[1] Gelman, A and Rubin, DB (1992) Inference from iterative simulation using multiple sequences, Statistical Science, 7, 457-511.
[2] Brooks, SP. and Gelman, A. (1998) General methods for monitoring convergence of iterative simulations. Journal of Computational and Graphical Statistics, 7, 434-455.
gelman.diag(samps, confidence = 0.95, transform=FALSE)
## Potential scale reduction factors:
##
## Point est. Upper C.I.
## alpha 1 1.00
## beta 1 1.00
## gamma 1 1.01
## sigma2 1 1.00
## y_new 1 1.00
## y_pred[1] 1 1.00
## y_pred[2] 1 1.00
## y_pred[3] 1 1.00
## y_pred[4] 1 1.00
## y_pred[5] 1 1.00
## y_pred[6] 1 1.00
## y_pred[7] 1 1.00
## y_pred[8] 1 1.00
## y_pred[9] 1 1.00
## y_pred[10] 1 1.00
## y_pred[11] 1 1.01
## y_pred[12] 1 1.00
## y_pred[13] 1 1.00
## y_pred[14] 1 1.00
## y_pred[15] 1 1.00
## y_pred[16] 1 1.00
## y_pred[17] 1 1.00
## y_pred[18] 1 1.01
## y_pred[19] 1 1.00
## y_pred[20] 1 1.00
## y_pred[21] 1 1.00
## y_pred[22] 1 1.00
## y_pred[23] 1 1.00
## y_pred[24] 1 1.00
## y_pred[25] 1 1.00
## y_pred[26] 1 1.00
## y_pred[27] 1 1.00
##
## Multivariate psrf
##
## 1.01
gelman.plot(samps)
Traços e densidades através do pacote bayesplot
mcmc_trace(samps)
mcmc_dens(samps)
Recorrendo ao plot para soltar os traços e densidades:
plot(samps)
Gráficos de autocorrelação do pacote coda
autocorr.plot(samps[[1]],col=1)
autocorr.plot(samps[[2]],col=2)
Gráfico de dispersão com observados & esperados:
sumarios=colMeans(samps2)
alpha=round(sumarios[1],4)
beta=round(sumarios[2],4)
gamma=round(sumarios[3],4)
preditos=as.numeric(sumarios[6:32])
temp = dugongues %>%
mutate(preditos=preditos)
datatable(temp)
#para fazer anotação no gráfico
grob <- grobTree(textGrob(paste0("curva de regressão não linear: f(x)=",alpha,"-",beta,"*",gamma,"^","x"), x=0.2, y=0.1, hjust=0,
gp=gpar(col="red", fontsize=13, fontface="italic")))
temp %>%
ggplot(aes(x=x,y=preditos)) +
geom_line()+
geom_point(aes(x=x,y=y),color="blue")+
annotation_custom(grob)
Conclusões: Com a inferência Bayesiana e o pacote Rjags, é possível estabelecer as quantidades relacionadas ao tamanho dos dugongues em função da idade. Verificou-se convergência do algoritmo.
Pendências: