# Load packages
library(rstan)
## Loading required package: StanHeaders
##
## rstan version 2.32.6 (Stan version 2.32.2)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file
library(bayesplot)
## This is bayesplot version 1.11.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
library(ggplot2)
library(ggpubr)
library(gridExtra)
library(patchwork)
library(bayesrules)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks gridExtra::combine()
## ✖ tidyr::extract() masks rstan::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(modeest)
library(broom)
library(broom.mixed)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
# Global de los datos, escribir directorio de la base de datos
data_Spotify <- read.csv("C:/Users/sebau/OneDrive/Documentos/Inferencia 2/2024/Parcial Octubre/Seba GS-audience-timeline.csv")
nrow(data_Spotify)
## [1] 1017
# primeras 6 filas de la base de datos
head(data_Spotify)
## date listeners streams followers
## 1 2022-01-01 11 13 30
## 2 2022-01-02 11 14 30
## 3 2022-01-03 10 11 30
## 4 2022-01-04 11 11 30
## 5 2022-01-05 4 4 30
## 6 2022-01-06 13 13 30
# ultimas 6 filas de la base de datos
tail(data_Spotify)
## date listeners streams followers
## 1012 2024-10-08 21 21 53
## 1013 2024-10-09 315 458 61
## 1014 2024-10-10 358 534 66
## 1015 2024-10-11 21 24 68
## 1016 2024-10-12 693 1022 87
## 1017 2024-10-13 25 27 87
# Observar los streams en el tiempo
plot(data_Spotify$streams,col="blue", main="Streams diarios obtenidos en Spotify entre 2022-01-01 y 2024-10-13", xlab="Días", ylab="Streams", type="l")

# Seleccionamos una observación cada 20 días
date_each20days <- c()
streams_each20days <- c()
umbral <- nrow(data_Spotify)
i <- 288 # A partir de ahi (fecha 2022-10-15) hacemos las observaciones cada 20 días
j <- 1
while(i <= umbral) {
date_each20days[j]=data_Spotify$date[i]
streams_each20days[j]=data_Spotify$streams[i]
i <- i + 20
j <- j + 1
}
data_each20days <- data.frame(date=date_each20days,streams=streams_each20days)
# Número de observaciones
nrow(data_each20days)
## [1] 37
# MODELO DE PROBABILIDAD A PRIORI
# Datos entre el 2022-10-15 y 2023-09-20 (18 datos)
data_each20days_prior <- data_each20days[1:18,]
data_each20days_prior
## date streams
## 1 2022-10-15 118
## 2 2022-11-04 76
## 3 2022-11-24 80
## 4 2022-12-14 55
## 5 2023-01-03 59
## 6 2023-01-23 69
## 7 2023-02-12 77
## 8 2023-03-04 86
## 9 2023-03-24 99
## 10 2023-04-13 103
## 11 2023-05-03 71
## 12 2023-05-23 91
## 13 2023-06-12 184
## 14 2023-07-02 125
## 15 2023-07-22 92
## 16 2023-08-11 99
## 17 2023-08-31 84
## 18 2023-09-20 38
# Visualización de los datos
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Histograma y densidad de los datos
hist(x = data_each20days_prior$stream,xlab = "streams",col="azure2", main="Densidad de streams diarios obtenidos en Spotify entre 2022-10-15 y 2023-09-20",prob = TRUE,breaks = 10)
lines(density(data_each20days_prior$streams), # density plot
lwd = 2, # thickness of line
col = "black")

# Ajuste mediante el modelo Gamma (prioir model)
# Densidad modelo a priori Gamma(13.5, 0.15)
curve(dgamma(x, 13.5, 0.15),0,210, # density plot
xlab = "Streams",
ylab = "Density",
main = "Modelo a priori de streams",
lwd = 4, # thickness of line
col = "orange",
ylim = c(0,0.017))
# Densidad streams
lines(density(data_each20days_prior$streams), # density plot
lwd = 4, # thickness of line
col = "black")
# Moda del modelo a priori
mode <- gammaMode(13.5, 0.15)
segments(x0 = mode, y0 = 0, x1 = mode,
y1 = dgamma(mode, 13.5, 0.15),
col = "blue", lwd = 4)
# Media dell modelo a priori
mean <- 13.5/0.15
segments(x0 = mean, y0 = 0, x1 = mean,
y1 = dgamma(mean,13.5, 0.15),
col = "green", lwd = 4)
# Legend
legend(x ="topright", # Ubicación de la leyenda
c("Densidad de número promedio diario de streams","Ajuste de modelo de probabilidad a Priori","Moda del modelo a priori","Media del modelo a priori"),
col = c("black","orange","blue","green"),
lwd = c(2,2,2,2),
bty = "n")

# Summaries prior model
summarize_gamma(shape = 13.5, rate = 0.15)
## mean mode var sd
## 1 90 83.33333 600 24.4949
# Summary de los 18 datos observados para construir el modelo a priori
summary(data_each20days_prior$streams)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 38.00 72.25 85.00 89.22 99.00 184.00
# MODELO DE PROBABILIDAD A POSTERIORI
# Realizamos 2 nuevas observaciones del 2023-10-10 y 2023-10-30
data_each20days_posterior1 <- data_each20days[19:20,]
data_each20days_posterior1
## date streams
## 19 2023-10-10 202
## 20 2023-10-30 339
# Vector y1 de las 2 observaciones para construir modelo a posteriori
y1 <-data_each20days_posterior1$streams
length(y1)
## [1] 2
sum_y1 <- sum(y1)
sum_y1
## [1] 541
# Summary de las 2 observaciones
summary(y1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 202.0 236.2 270.5 270.5 304.8 339.0
# Analisis con bayesrules package
# Modelo posteriori de lambda dadas las 2 nuevas observaciones
# Summarize
summarize_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 541, n = 2)
## model shape rate mean mode var sd
## 1 prior 13.5 0.15 90.000 83.33333 600.0000 24.49490
## 2 posterior 554.5 2.15 257.907 257.44186 119.9567 10.95248
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Plot_gamma_poisson
plot_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 541, n = 2)

# Realizamos 17 nuevas observaciones entre el 2023-11-19 y 2024-10-04
data_each20days_posterior2 <- data_each20days[21:37,]
data_each20days_posterior2
## date streams
## 21 2023-11-19 14
## 22 2023-12-09 12
## 23 2023-12-29 13
## 24 2024-01-18 13
## 25 2024-02-07 10
## 26 2024-02-27 13
## 27 2024-03-18 4
## 28 2024-04-07 6
## 29 2024-04-27 4
## 30 2024-05-17 7
## 31 2024-06-06 3
## 32 2024-06-26 21
## 33 2024-07-16 27
## 34 2024-08-05 12
## 35 2024-08-25 12
## 36 2024-09-14 13
## 37 2024-10-04 18
# Vector y2 de 17 nuevas observaciones para construir nuevo modelo a posteriori
y2 <-data_each20days_posterior2$streams
length(y2)
## [1] 17
sum_y2 <- sum(y2)
sum_y2
## [1] 202
# Summary de las 17 observaciones
summary(y2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.00 7.00 12.00 11.88 13.00 27.00
# Summarize
summarize_gamma_poisson(shape = 554.5, rate = 2.15, sum_y = 202, n = 17)
## model shape rate mean mode var sd
## 1 prior 554.5 2.15 257.90698 257.4419 119.956733 10.952476
## 2 posterior 756.5 19.15 39.50392 39.4517 2.062868 1.436269
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Plot_gamma_poisson
plot_gamma_poisson(shape = 554.5, rate = 2.15, sum_y = 202, n = 17)

# Realizando las 19 observaciones juntas obtenemos el mismo resultado de modelo a posteriori
# Vector y3 de las 5 observaciones para construir modelo a posteriori
y3 <-c(y1,y2)
length(y3)
## [1] 19
sum_y3 <- sum_y1+sum_y2
sum_y3
## [1] 743
# Observamos distribución con box plots de ambos conjuntos de datos (para el modelo priori y posteriori)
observaciones<- c(1:37)
prior_post <- c()
prior_post <- c(rep("1: Prior data",times=18),rep("2: Posterior data",times=19))
data_prior_post <- data.frame("observaciones"=observaciones,"streams"=streams_each20days, "Grupo"=prior_post)
# Observarlos streams (por grupo priori y posteriori) tomados cada 20 días (entre 2022-10-15 y 2024-10-04)
par(mfrow = c(1, 1)) #ventana para un solo gráfico
ggplot(data_prior_post)+
labs(title ="Observaciones cada 20 días de streams diarios obtenidos en Spotify entre 2022-10-15 y 2024-10-04")+
geom_line(aes(observaciones ,streams,colour=Grupo)) +
geom_point(aes(observaciones ,streams,colour=Grupo))

# visualizar conjunto de boxplot
boxplot(streams ~ Grupo, data = data_prior_post,main="Box plots por grupo de datos",
col = c("bisque2", "azure2"))

# Summarize
summarize_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 743, n = 19)
## model shape rate mean mode var sd
## 1 prior 13.5 0.15 90.00000 83.33333 600.000000 24.494897
## 2 posterior 756.5 19.15 39.50392 39.45170 2.062868 1.436269
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Plot_gamma_poisson
plot_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 743, n = 19)

# Estimación a posteriori
par(mfrow = c(1, 3)) #ventana para tres gráficos
# Intervalo de credibilidad a posteriori para λ al 50%
qgamma(c(0.25, 0.75),756.5,19.15)
## [1] 38.52591 40.46295
dgamma_post<-curve(dgamma(x, 756.5,19.15),34,45, # density plot
xlab = "λ",
ylab = "Density",
main = "IC posterior al 50% para λ",
lwd = 4, # thickness of line
col = "cyan4",
ylim = c(0,0.3))
inf<-38.52591
sup<-40.46295
# Índices inferior y superior del eje X
l <- min(which(dgamma_post$x >= inf))
h <- max(which(dgamma_post$x < sup))
polygon(c(dgamma_post$x[c(l, l:h, h)]),
c(0, dgamma_post$y[l:h], 0),lwd = 1,
col = "cyan4",border = 0)
# Median (posterior model)
median <- qgamma(0.5,756.5,19.15)
segments(x0 = median, y0 = 0, x1 = median,
y1 = dgamma(median,756.5,19.15),
col = "white", lwd = 2)
# Intervalo de credibilidad a posteriori para λ al 95%
# 0.025th & 0.975th quantiles
qgamma(c(0.025, 0.975),756.5,19.15)
## [1] 36.73866 42.36808
dgamma_post<-curve(dgamma(x,756.5,19.15),34,45, # density plot
xlab = "λ",
ylab = "Density",
main = "IC posterior al 95% para λ",
lwd = 4, # thickness of line
col = "cyan4",
ylim = c(0,0.3))
inf<-36.73866
sup<-42.36808
# Índices inferior y superior del eje X
l <- min(which(dgamma_post$x >= inf))
h <- max(which(dgamma_post$x < sup))
polygon(c(dgamma_post$x[c(l, l:h, h)]),
c(0, dgamma_post$y[l:h], 0),lwd = 1,
col = "cyan4",border = 0)
# Mediana (posterior model)
median <- qgamma(0.5,756.5,19.15)
segments(x0 = median, y0 = 0, x1 = median,
y1 = dgamma(median,756.5,19.15),
col = "white", lwd = 2)
# Intervalo de credibilidad a posteriori para λ al 99%
# 0.005th & 0.995th quantiles
qgamma(c(0.005, 0.995),756.5,19.15)
## [1] 35.90245 43.30152
dgamma_post<-curve(dgamma(x, 756.5,19.15),34,45, # density plot
xlab = "λ",
ylab = "Density",
main = "IC posterior al 99% para λ",
lwd = 4, # thickness of line
col = "cyan4",
ylim = c(0,0.3))
inf<-35.90245
sup<-43.30152
# Índices inferior y superior del eje X
l <- min(which(dgamma_post$x >= inf))
h <- max(which(dgamma_post$x < sup))
polygon(c(dgamma_post$x[c(l, l:h, h)]),
c(0, dgamma_post$y[l:h], 0),lwd = 1,
col = "cyan4",border = 0)
# Median (posterior model)
median <- qgamma(0.5,756.5,19.15)
segments(x0 = median, y0 = 0, x1 = median,
y1 = dgamma(median,756.5,19.15),
col = "white", lwd = 2)

# CONTRASTE DE HIPÓTESIS UNILATERAL
# A PRIORI
# Probabilidad a priori de λ < 42
prior_prob <- pgamma(42, 13.5, 0.15)
prior_prob
## [1] 0.00842958
# Prior odds
prior_odds <- prior_prob / (1 - prior_prob)
prior_odds
## [1] 0.008501242
# A POSTERIORI
# Probabilidad a posteriori de λ < 42
post_prob <- pgamma(42,756.5,19.15)
post_prob
## [1] 0.9567944
# Posterior odds
post_odds <- post_prob / (1 - post_prob)
post_odds
## [1] 22.14517
# Bayes factor
BF <- post_odds / prior_odds
BF
## [1] 2604.934
par(mfrow = c(1, 2)) #ventana para 2 gráficoS
# visualización de las probabilidades en densidades a priori y a posteriori
dgamma_prior<-curve(dgamma(x, 13.5, 0.15),0,210, # density plot
xlab = "λ",
ylab = "Density",
main = "Probabilidad a priori de λ < 42",
lwd = 4, # thickness of line
col = "cyan4",
ylim = c(0,0.017))
sup<-42
polygon(c(dgamma_prior$x[dgamma_prior$x <= sup], sup),
c(dgamma_prior$y[dgamma_prior$x <= sup ], 0),
col = "cyan4",
border = 0)
dgamma_post<-curve(dgamma(x, 756.5,19.15),34,45, # density plot
xlab = "λ",
ylab = "Density",
main = "Probabilidad a posteriori de λ < 42",
lwd = 4, # thickness of line
col = "cyan4",
ylim = c(0,0.30))
sup<-42
polygon(c(dgamma_post$x[dgamma_post$x <= sup], sup),
c(dgamma_post$y[dgamma_post$x <= sup ], 0),
col = "cyan4",
border = 0)

# PREDICCIÓN A POSTERIORI
# lambda = Mode posterior (valor más plausible)
gammaMode(756.5, 19.15)
## [1] 39.4517
predict_l1<- c()
y_l1<- c()
for(x in 15:70){
predict_l1[x]=dpois(x,lambda=39.4517)*dgamma(39.4517,shape=756.5,rate=19.15)
y_l1[x]=x
}
data_predict_l1=data.frame("Observaciones"=y_l1,"posterior_predictive_model1"=predict_l1)
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Plot posterior predictive model con λ=39.4517
plot1 <- ggplot(data_predict_l1, aes(x = Observaciones, y = posterior_predictive_model1)) +
geom_point() + labs(title = "Mode: λ=39.4517",x = "y’", y = "f(y’| λ)f( λ|y)") +
geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model1))
# lambda = 0.25th quantil
qgamma(0.25,756.5,19.15)
## [1] 38.52591
predict_l2<- c()
y_l2<- c()
for(x in 15:70){
predict_l2[x]=dpois(x,lambda=38.52591)*dgamma(38.52591,shape=756.5,rate=19.15)
y_l2[x]=x
}
data_predict_l2=data.frame("Observaciones"=y_l2,"posterior_predictive_model2"=predict_l2)
# Plot posterior predictive model con λ=38.52591
plot2 <- ggplot(data_predict_l2, aes(x = Observaciones, y = posterior_predictive_model2)) +
geom_point() + labs(title = "q 025: λ=38.52591",x = "y’", y = "f(y’| λ)f( λ|y)") +
coord_cartesian(ylim = c(0, 0.016))+
geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model2))
# lambda = 0.75th quantil
qgamma(0.75,756.5,19.15)
## [1] 40.46295
predict_l3<- c()
y_l3<- c()
for(x in 15:70){
predict_l3[x]=dpois(x,lambda=40.46295)*dgamma(40.46295,shape=756.5,rate=19.15)
y_l3[x]=x
}
data_predict_l3=data.frame("Observaciones"=y_l3,"posterior_predictive_model3"=predict_l3)
# Plot posterior predictive model con λ=40.46295
plot3 <- ggplot(data_predict_l3, aes(x = Observaciones, y = posterior_predictive_model3)) +
geom_point() + labs(title = "q 0.75: λ=40.46295",x = "y’", y = "f(y’| λ)f( λ|y)") +
coord_cartesian(ylim = c(0, 0.016))+
geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model3))
# lambda = 0.025th quantil
qgamma(0.025,756.5,19.15)
## [1] 36.73866
predict_l4<- c()
y_l4<- c()
for(x in 15:70){
predict_l4[x]=dpois(x,lambda=36.73866)*dgamma(36.73866,shape=756.5,rate=19.15)
y_l4[x]=x
}
data_predict_l4=data.frame("Observaciones"=y_l4,"posterior_predictive_model4"=predict_l4)
# Plot posterior predictive model con λ=36.73866
plot4 <- ggplot(data_predict_l4, aes(x = Observaciones, y = posterior_predictive_model4)) +
geom_point() + labs(title = "q 0.025: λ=36.73866",x = "y’", y = "f(y’| λ)f( λ|y)") +
coord_cartesian(ylim = c(0, 0.016))+
geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model4))
# lambda = 0.975th quantil
qgamma(0.975,756.5,19.15)
## [1] 42.36808
predict_l5<- c()
y_l5<- c()
for(x in 15:70){
predict_l5[x]=dpois(x,lambda=42.36808)*dgamma(42.36808,shape=756.5,rate=19.15)
y_l5[x]=x
}
data_predict_l5=data.frame("Observaciones"=y_l5,"posterior_predictive_model5"=predict_l5)
# Plot posterior predictive model con λ=42.36808
plot5 <- ggplot(data_predict_l5, aes(x = Observaciones, y = posterior_predictive_model5)) +
geom_point() + labs(title = "q 0.975: λ=42.36808",x = "y’", y = "f(y’| λ)f( λ|y)") +
coord_cartesian(ylim = c(0, 0.016))+
geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model5))
# lambda = 0.005th quantil
qgamma(0.005,756.5,19.15)
## [1] 35.90245
predict_l6<- c()
y_l6<- c()
for(x in 15:70){
predict_l6[x]=dpois(x,lambda=35.90245)*dgamma(35.90245,shape=756.5,rate=19.15)
y_l6[x]=x
}
data_predict_l6=data.frame("Observaciones"=y_l6,"posterior_predictive_model6"=predict_l6)
# Plot posterior predictive model con λ=35.90245
plot6 <- ggplot(data_predict_l6, aes(x = Observaciones, y = posterior_predictive_model6)) +
geom_point() + labs(title = "q 0.005: λ=35.90245",x = "y’", y = "f(y’| λ)f( λ|y)") +
coord_cartesian(ylim = c(0, 0.016))+
geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model6))
# lambda = 0.995th quantil
qgamma(0.995,756.5,19.15)
## [1] 43.30152
predict_l7<- c()
y_l7<- c()
for(x in 15:70){
predict_l7[x]=dpois(x,lambda= 43.30152)*dgamma(43.30152,shape=756.5,rate=19.15)
y_l7[x]=x
}
data_predict_l7=data.frame("Observaciones"=y_l7,"posterior_predictive_model7"=predict_l7)
# Plot posterior predictive model con λ= 43.30152
plot7 <- ggplot(data_predict_l7, aes(x = Observaciones, y = posterior_predictive_model7)) +
geom_point() + labs(title = "q 0.995: λ= 43.30152",x = "y’", y = "f(y’| λ)f( λ|y)") +
coord_cartesian(ylim = c(0, 0.016))+
geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model7))
#Mostrar todos los plot de modelo predictivo a posteriori
layout <- matrix(c(1, 1,
2, 3,
4, 5,
6, 7),
nrow = 4, ncol = 2,
byrow = TRUE)
grid.arrange(plot1,plot2,plot3, plot4, plot5, plot6, plot7,
layout_matrix = layout)
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).

Expect <- 0
predict_l <- c()
y_<- c()
for(x in 15:70){
predict_l[x] <- integrate(function(lambda) dpois(x,lambda)*dgamma(x=lambda,shape=756.5,rate=19.15), lower = 0, upper = 70)$value
y_[x] <- x
Expect=predict_l[x]*y_[x]+Expect
}
data_predict_l=data.frame("Observaciones"=y_,"posterior_predictive_model"=predict_l)
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Función de densidad predictiva a posteriori
ggplot(data_predict_l, aes(x = Observaciones, y = posterior_predictive_model)) +
geom_point() + labs(title = "Función de densidad predictiva a posteriori",x = "y’", y = "f(y’| y)") +
geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model))
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).

Expect
## [1] 39.50339
#------------------------------------------------------------------------------------------------------
# MCMC with RStan
# STEP 1: DEFINE the model
Stream_GP_Model <- "
data {
int<lower = 0> Y[19];
}
parameters {
real<lower = 0> lambda;
}
model {
Y ~ poisson(lambda);
lambda ~ gamma(13.5, 0.15);
}
"
# STEP 2: SIMULATE the posterior
Stream_GP_Sim <- stan(model_code = Stream_GP_Model, data = list(Y = y3),
chains = 4, iter = 5000*2, seed = 8411)
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 2e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.2 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 0.047 seconds (Warm-up)
## Chain 1: 0.046 seconds (Sampling)
## Chain 1: 0.093 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 4e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 0.051 seconds (Warm-up)
## Chain 2: 0.045 seconds (Sampling)
## Chain 2: 0.096 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 4e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 0.048 seconds (Warm-up)
## Chain 3: 0.049 seconds (Sampling)
## Chain 3: 0.097 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 4e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 10000 [ 0%] (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%] (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%] (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%] (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%] (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%] (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%] (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%] (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%] (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%] (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%] (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 0.05 seconds (Warm-up)
## Chain 4: 0.048 seconds (Sampling)
## Chain 4: 0.098 seconds (Total)
## Chain 4:
as.array(Stream_GP_Sim, pars = "lambda") %>%
head()
## , , parameters = lambda
##
## chains
## iterations chain:1 chain:2 chain:3 chain:4
## [1,] 39.58967 38.77052 39.69134 39.64941
## [2,] 39.38215 40.10760 40.64402 39.75229
## [3,] 39.28168 42.65304 38.81953 39.81724
## [4,] 38.39951 41.57187 38.91430 39.24981
## [5,] 38.45596 42.35008 38.92253 40.07892
## [6,] 40.50505 41.13895 39.34416 39.31661
# Summary
summary(Stream_GP_Sim)
## $summary
## mean se_mean sd 2.5% 25% 50%
## lambda 39.52098 0.016997214 1.4239384 36.78887 38.55602 39.50685
## lp__ 2024.20540 0.007769028 0.6947926 2022.26197 2024.05288 2024.47436
## 75% 97.5% n_eff Rhat
## lambda 40.46938 42.3887 7018.219 1.000692
## lp__ 2024.64803 2024.6960 7997.918 1.000245
##
## $c_summary
## , , chains = chain:1
##
## stats
## parameter mean sd 2.5% 25% 50% 75%
## lambda 39.49634 1.4052490 36.82277 38.54649 39.46433 40.42725
## lp__ 2024.21807 0.6702789 2022.31815 2024.05589 2024.47861 2024.64771
## stats
## parameter 97.5%
## lambda 42.32226
## lp__ 2024.69606
##
## , , chains = chain:2
##
## stats
## parameter mean sd 2.5% 25% 50% 75%
## lambda 39.54668 1.4194493 36.78689 38.57343 39.55285 40.49938
## lp__ 2024.20801 0.6902842 2022.34648 2024.07050 2024.47222 2024.64202
## stats
## parameter 97.5%
## lambda 42.37336
## lp__ 2024.69602
##
## , , chains = chain:3
##
## stats
## parameter mean sd 2.5% 25% 50% 75%
## lambda 39.4656 1.4374325 36.7687 38.48009 39.45604 40.40563
## lp__ 2024.1952 0.7129319 2022.2476 2024.02661 2024.47503 2024.64926
## stats
## parameter 97.5%
## lambda 42.38421
## lp__ 2024.69592
##
## , , chains = chain:4
##
## stats
## parameter mean sd 2.5% 25% 50% 75%
## lambda 39.57531 1.4312783 36.80853 38.61366 39.56248 40.52277
## lp__ 2024.20032 0.7049118 2022.16163 2024.04622 2024.47269 2024.65256
## stats
## parameter 97.5%
## lambda 42.431
## lp__ 2024.696
# Diagnóstico visual para las cadenas de Markov
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Gráfico de Trazas para las 4 cadenas de Markov simuladas
color_scheme_set("mix-blue-green")
trace <- mcmc_trace(Stream_GP_Sim, pars = "lambda", size = 0.1) +
labs (x='Iteration',y='lambda value')
# densidades de valores de las 4 cadenas de Markov simuladas
mcmc_dens <- mcmc_dens_overlay(Stream_GP_Sim, pars = "lambda")
# violin plot: densidades de valores de las 4 cadenas de Markov simuladas (con los cuantiles: 0.05, 0.5 y 0.95)
color_scheme_set("teal")
mcmc_violin <- mcmc_violin(Stream_GP_Sim, pars = "lambda",probs = c(0.05, 0.5, 0.95))
# Mostrar los 3 gráficos
trace+mcmc_dens+mcmc_violin

# Gráficos de autocorrelación (de linea y de barra)
color_scheme_set("brightblue")
mcmc_acf<-mcmc_acf(Stream_GP_Sim, pars = "lambda")
mcmc_acf_bar<-(p <- mcmc_acf_bar(Stream_GP_Sim, pars = "lambda"))
mcmc_acf + mcmc_acf_bar

# Diagnóstico numérico para las cadenas de Markov
rhat(Stream_GP_Sim, pars = "lambda")
## [1] 1.000692
neff_ratio(Stream_GP_Sim, pars = "lambda")
## [1] 0.3509109
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Histograma y densidad de valores de las 4 cadenas de Markov simuladas
h<-mcmc_hist(Stream_GP_Sim, pars = "lambda") +
labs (x='lambda',y='count',
title='Histograma de valores MCMC')
d<-mcmc_dens(Stream_GP_Sim, pars = "lambda") +
labs (x='lambda',y='density',
title='Densidad de valores MCMC')
h+d
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Densidad de valores MCMC vs densidad del modelo a posteriori conocido
color_scheme_set("teal")
mcmc_dens(Stream_GP_Sim, pars = "lambda") +
labs (x='lambda',y='density',
title='Densdad de valores MCMC vs Densidad de modelo a Posteriori') +
stat_function (
fun=dgamma,
args=list(shape=756.5 ,rate=19.15),
lwd = 2,
color="black"
)

# Aproximación de la media y los intervalos de credibilidad a posteriori para λ
# Intervalos de credibilidad a posteriori al 50%
tidy(Stream_GP_Sim, conf.int = TRUE, conf.level = 0.5)
## # A tibble: 1 × 5
## term estimate std.error conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 lambda 39.5 1.42 38.6 40.5
# Shade in the middle 50% interval
mcmc_areas_50<-mcmc_areas(Stream_GP_Sim, pars = "lambda", prob = 0.5)+
labs (title='IC posterior MCMC al 50% para λ')
# Intervalos de credibilidad a posteriori al 95%
tidy(Stream_GP_Sim, conf.int = TRUE, conf.level = 0.95)
## # A tibble: 1 × 5
## term estimate std.error conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 lambda 39.5 1.42 36.8 42.4
# Shade in the middle 95% interval
mcmc_areas_95<-mcmc_areas(Stream_GP_Sim, pars = "lambda", prob = 0.95)+
labs (title='IC posterior MCMC al 95% para λ')
# Intervalos de credibilidad a posteriori al 99%
tidy(Stream_GP_Sim, conf.int = TRUE, conf.level = 0.99)
## # A tibble: 1 × 5
## term estimate std.error conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 lambda 39.5 1.42 36.0 43.4
# Shade in the middle 95% interval
mcmc_areas_99<-mcmc_areas(Stream_GP_Sim, pars = "lambda", prob = 0.99)+
labs (title='IC posterior MCMC al 99% para λ')
mcmc_areas_50+mcmc_areas_95+mcmc_areas_99

# Ordenar las 4 cadenas de Markov 1 data frame
Stream_chains_df <- as.data.frame(Stream_GP_Sim, pars = "lp__", include = FALSE)
dim(Stream_chains_df)
## [1] 20000 1
# Resumen de medidas a posteriori aproximadas por MCMC para λ
Stream_chains_df %>%
summarize(post_mean = mean(lambda),
post_median = median(lambda),
post_mode = sample_mode(lambda),
lower_50 = quantile(lambda, 0.25),
upper_50 = quantile(lambda, 0.75),
lower_95 = quantile(lambda, 0.025),
upper_95 = quantile(lambda, 0.975),
lower_99 = quantile(lambda, 0.005),
upper_99 = quantile(lambda, 0.995))
## post_mean post_median post_mode lower_50 upper_50 lower_95 upper_95 lower_99
## 1 39.52098 39.50685 39.33384 38.55602 40.46938 36.78887 42.3887 35.99579
## upper_99
## 1 43.3592
# Aproximación P(λ<42|y) con MCMC
Stream_chains_df %>%
mutate(exceeds = lambda < 42) %>%
tabyl(exceeds)
## exceeds n percent
## FALSE 872 0.0436
## TRUE 19128 0.9564
# Predicción a posteriori con MCMC
# Set the seed
set.seed(13)
# Predecir un valor de Y' por cada valor λ en la cadena
Stream_chains_df <- Stream_chains_df %>%
mutate(y_predict = rpois(length(lambda) , lambda))
# Chequear
Stream_chains_df %>%
head(5)
## lambda y_predict
## 1 39.58967 43
## 2 39.38215 37
## 3 39.28168 24
## 4 38.39951 45
## 5 38.45596 41
mean(Stream_chains_df$y_predict)
## [1] 39.59695
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Histograma predictivo a posteriori por MCMC
mcmc_hist<-mcmc_hist(Stream_chains_df, pars = "y_predict") +
labs (x='y_predict',y='density',
title='Histograma predictivo a posteriori por MCMC')
# Densidad predictiva a posteriori por MCMC
mcmc_dens<-mcmc_dens(Stream_chains_df, pars = "y_predict") +
labs (x='y_predict',y='density',
title='Densidad predictiva a posteriori por MCMC')
mcmc_hist+mcmc_dens
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Resumen de medidas de predicción a posteriori aproximadas por MCMC para λ
Stream_chains_df %>%
summarize(mean = mean(y_predict),
median = median(y_predict),
mode = sample_mode(y_predict),
lower_50 = quantile(y_predict, 0.25),
upper_50 = quantile(y_predict, 0.75),
lower_95 = quantile(y_predict, 0.025),
upper_95 = quantile(y_predict, 0.975),
lower_99 = quantile(y_predict, 0.005),
upper_99 = quantile(y_predict, 0.995))
## mean median mode lower_50 upper_50 lower_95 upper_95 lower_99
## 1 39.59695 39 38.40575 35 44 27 53 24
## upper_99
## 1 57
#------------------------------------------------------------------------------------------------------
# Algoritmo Metropolis-Hastings (con 9 observaciones de las 19)
# usamos las 9 primeras observaciones
y3_1 <- y3[1:9]
y3_1
## [1] 202 339 14 12 13 13 10 13 4
sum(y3_1)
## [1] 620
# Análisis con Bayesrules
# Summarize
summarize_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 620, n = 9)
## model shape rate mean mode var sd
## 1 prior 13.5 0.15 90.00000 83.33333 600.000000 24.494897
## 2 posterior 633.5 9.15 69.23497 69.12568 7.566664 2.750757
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Plot_gamma_poisson
plot_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 620, n = 9)

# Curvas de modelos propuestos para λ′|λ (exponenciales)
par(mfrow = c(2, 3))
curve(dexp(x,0.004),0,100, # density plot
xlab = "Streams",
ylab = "Density",
main = "Proposal:λ′|λ~Exp(0.004)",
lwd = 4, # thickness of line
col = "orange",
ylim = c(0,0.1))
curve(dexp(x,0.008),0,100, # density plot
xlab = "Streams",
ylab = "Density",
main = "Proposal:λ′|λ~Exp(0.008)",
lwd = 4, # thickness of line
col = "orange",
ylim = c(0,0.1))
curve(dexp(x,0.01),0,100, # density plot
xlab = "Streams",
ylab = "Density",
main = "Proposal:λ′|λ~Exp(0.01)",
lwd = 4, # thickness of line
col = "orange",
ylim = c(0,0.1))
curve(dexp(x,0.02),0,100, # density plot
xlab = "Streams",
ylab = "Density",
main = "Proposal:λ′|λ~Exp(0.02)",
lwd = 4, # thickness of line
col = "orange",
ylim = c(0,0.1))
curve(dexp(x,0.06),0,100, # density plot
xlab = "Streams",
ylab = "Density",
main = "Proposal:λ′|λ~Exp(0.06)",
lwd = 4, # thickness of line
col = "orange",
ylim = c(0,0.1))
curve(dexp(x,0.1),0,100, # density plot
xlab = "Streams",
ylab = "Density",
main = "Proposal:λ′|λ~Exp(0.1)",
lwd = 4, # thickness of line
col = "orange",
ylim = c(0,0.1))

# Algorithmo
one_iteration <- function(b, current){
# STEP 1: Propose the next chain location
proposal <- rexp(1,b)
# STEP 2: Decide whether or not to go there
k<-1
proposal_plaus <- dgamma(proposal, 13.5, 0.15)
current_plaus <- dgamma(current, 13.5, 0.15)
while(k <= length(y3_1)){
proposal_plaus <- proposal_plaus * dpois(y3_1[k],lambda = proposal)
current_plaus <- current_plaus * dpois(y3_1[k],lambda = current)
k=k+1
}
proposal_q <- dexp(proposal,b)
current_q <- dexp(current,b)
alpha <- min(1, proposal_plaus / current_plaus * current_q / proposal_q)
next_stop <- sample(c(proposal, current),
size = 1, prob = c(alpha,1-alpha))
return(data.frame(proposal, alpha, next_stop))
}
gammapois_tour <- function(N,b){
# 1. Start the chain at location 0.5
current <- 70
# 2. Initialize the simulation
lambda <- rep(0, N)
# 3. Simulate N Markov chain stops
for(i in 1:N){
# Simulate one iteration
sim <- one_iteration(b = b, current = current)
# Record next location
lambda[i] <- sim$next_stop
# Reset the current location
current <- sim$next_stop
}
# 4. Return the chain locations
return(data.frame(iteration = c(1:N), lambda))
}
# Gráficos de trazas según modelos de propuesta para λ′|λ
par(mfrow = c(1, 1)) #ventana para un solo gráfico
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.004)
t1<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) +
geom_line()+
labs(
title ="Proposal:λ′|λ~Exp(0.004)")
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.008)
t2<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) +
geom_line()+
labs(
title ="Proposal:λ′|λ~Exp(0.008)")
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.01)
t3<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) +
geom_line()+
labs(
title ="Proposal:λ′|λ~Exp(0.01)")
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.02)
t4<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) +
geom_line()+
labs(
title ="Proposal:λ′|λ~Exp(0.02)")
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.06)
t5<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) +
geom_line()+
labs(
title ="Proposal:λ′|λ~Exp(0.06)")
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.1)
t6<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) +
geom_line()+
labs(
title ="Proposal:λ′|λ~Exp(0.1)")
t1+t2+t3+t4+t5+t6

par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Estimaciones de densidades según modelos de propuesta para λ′|λ
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.004)
h1<-ggplot(gammapois_sim, aes(x = lambda)) +
geom_histogram(aes(y = ..density..), color = "white") +
labs(
title ="Proposal:λ′|λ~Exp(0.004)")+
stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.008)
h2<-ggplot(gammapois_sim, aes(x = lambda)) +
geom_histogram(aes(y = ..density..), color = "white") +
labs(
title ="Proposal:λ′|λ~Exp(0.008)")+
stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.01)
h3<-ggplot(gammapois_sim, aes(x = lambda)) +
geom_histogram(aes(y = ..density..), color = "white") +
labs(
title ="Proposal:λ′|λ~Exp(0.01)")+
stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.02)
h4<-ggplot(gammapois_sim, aes(x = lambda)) +
geom_histogram(aes(y = ..density..), color = "white")+
labs(
title ="Proposal:λ′|λ~Exp(0.04)")+
stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.06)
h5<-ggplot(gammapois_sim, aes(x = lambda)) +
geom_histogram(aes(y = ..density..), color = "white")+
labs(
title ="Proposal:λ′|λ~Exp(0.06)")+
stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.1)
h6<-ggplot(gammapois_sim, aes(x = lambda)) +
geom_histogram(aes(y = ..density..), color = "white")+
labs(
title ="Proposal:λ′|λ~Exp(0.1)")+
stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")
h1+h2+h3+h4+h5+h6
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Nos quedamos con proposal model:λ′|λ~Exp(0.008)
set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.008)
# Chequear data.frame de simulación
head(gammapois_sim,20)
## iteration lambda
## 1 1 70.00000
## 2 2 70.00000
## 3 3 70.00000
## 4 4 70.00000
## 5 5 70.00000
## 6 6 70.00000
## 7 7 70.00000
## 8 8 70.00000
## 9 9 70.82041
## 10 10 70.82041
## 11 11 70.82041
## 12 12 70.82041
## 13 13 70.82041
## 14 14 70.82041
## 15 15 70.82041
## 16 16 70.82041
## 17 17 70.82041
## 18 18 70.82041
## 19 19 70.82041
## 20 20 65.82388
# Resumen de medidas a posteriori aproximadas por Metropolis-Hastings para λ
gammapois_sim %>%
summarize(mean = mean(gammapois_sim$lambda),
median = median(gammapois_sim$lambda),
mode = sample_mode(gammapois_sim$lambda),
lower_50 = quantile(gammapois_sim$lambda, 0.25),
upper_50 = quantile(gammapois_sim$lambda, 0.75),
lower_95 = quantile(gammapois_sim$lambda, 0.025),
upper_95 = quantile(gammapois_sim$lambda, 0.975),
lower_99 = quantile(gammapois_sim$lambda, 0.005),
upper_99 = quantile(gammapois_sim$lambda, 0.995))
## mean median mode lower_50 upper_50 lower_95 upper_95 lower_99
## 1 69.25382 69.12313 69.12911 67.30771 70.97914 64.44451 74.92324 62.42392
## upper_99
## 1 76.57016
# Resumen de medidas del modelo exato a posteriori
summarize_gamma(shape = 633.5, rate = 9.15)
## mean mode var sd
## 1 69.23497 69.12568 7.566664 2.750757
# Mediana a posteriori
qgamma(0.5,633.5,9.15)
## [1] 69.19855
# Intervalo de credibilidad a posteriori para λ al 50%
qgamma(c(0.25, 0.75),633.5,9.15)
## [1] 67.36029 71.06994
# Intervalo de credibilidad a posteriori para λ al 95%
qgamma(c(0.025, 0.975),633.5,9.15)
## [1] 63.94784 74.72911
# Intervalo de credibilidad a posteriori para λ al 99%
qgamma(c(0.005, 0.995),633.5,9.15)
## [1] 62.35485 76.52558
# Predicción a posteriori con M-H
# Set the seed
set.seed(13)
# Predecir un valor de Y' por cada valor λ de la simulación M-H
gammapois_sim <- gammapois_sim %>%
mutate(y_predict = rpois(length(lambda) , lambda))
# Chequear
gammapois_sim %>%
head(5)
## iteration lambda y_predict
## 1 1 70 74
## 2 2 70 67
## 3 3 70 50
## 4 4 70 79
## 5 5 70 73
mean(gammapois_sim$y_predict)
## [1] 69.2812
par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Histograma y densidad predictiva a posteriori por M-H
hist(x = gammapois_sim$y_predict,xlab = "y_predict",col="paleturquoise3", main="Histograma y densidad predictiva a posteriori por M-H",prob = TRUE,breaks = 30)
lines(density(gammapois_sim$y_predict), # density plot
lwd = 3, # thickness of line
col = "black")
