Parcial 18/10/2024

# Load packages
library(rstan)

## Loading required package: StanHeaders

## 
## rstan version 2.32.6 (Stan version 2.32.2)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)

## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file

library(bayesplot)

## This is bayesplot version 1.11.1

## - Online documentation and vignettes at mc-stan.org/bayesplot

## - bayesplot theme set to bayesplot::theme_default()

##    * Does _not_ affect other ggplot2 plots

##    * See ?bayesplot_theme_set for details on theme setting

library(ggplot2)
library(ggpubr)
library(gridExtra)
library(patchwork)
library(bayesrules)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks gridExtra::combine()
## ✖ tidyr::extract() masks rstan::extract()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl)
library(modeest)
library(broom)
library(broom.mixed) 
library(janitor)

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

# Global de los datos, escribir directorio de la base de datos
data_Spotify <- read.csv("C:/Users/sebau/OneDrive/Documentos/Inferencia 2/2024/Parcial Octubre/Seba GS-audience-timeline.csv")

nrow(data_Spotify)

## [1] 1017

# primeras 6 filas de la base de datos
head(data_Spotify)

##         date listeners streams followers
## 1 2022-01-01        11      13        30
## 2 2022-01-02        11      14        30
## 3 2022-01-03        10      11        30
## 4 2022-01-04        11      11        30
## 5 2022-01-05         4       4        30
## 6 2022-01-06        13      13        30

# ultimas 6 filas de la base de datos
tail(data_Spotify)

##            date listeners streams followers
## 1012 2024-10-08        21      21        53
## 1013 2024-10-09       315     458        61
## 1014 2024-10-10       358     534        66
## 1015 2024-10-11        21      24        68
## 1016 2024-10-12       693    1022        87
## 1017 2024-10-13        25      27        87

# Observar los streams en el tiempo
plot(data_Spotify$streams,col="blue", main="Streams diarios obtenidos en Spotify entre 2022-01-01 y 2024-10-13", xlab="Días", ylab="Streams", type="l")

# Seleccionamos una observación cada 20 días
date_each20days <- c()
streams_each20days <- c()
umbral <- nrow(data_Spotify)
i <- 288 # A partir de ahi (fecha 2022-10-15) hacemos las observaciones cada 20 días
j <- 1
while(i <= umbral) {
  date_each20days[j]=data_Spotify$date[i]
  streams_each20days[j]=data_Spotify$streams[i]
  i <- i + 20
  j <- j + 1
}

data_each20days <- data.frame(date=date_each20days,streams=streams_each20days)
# Número de observaciones
nrow(data_each20days)

## [1] 37

# MODELO DE PROBABILIDAD A PRIORI


# Datos entre el 2022-10-15 y 2023-09-20  (18 datos)
data_each20days_prior <- data_each20days[1:18,]
data_each20days_prior

##          date streams
## 1  2022-10-15     118
## 2  2022-11-04      76
## 3  2022-11-24      80
## 4  2022-12-14      55
## 5  2023-01-03      59
## 6  2023-01-23      69
## 7  2023-02-12      77
## 8  2023-03-04      86
## 9  2023-03-24      99
## 10 2023-04-13     103
## 11 2023-05-03      71
## 12 2023-05-23      91
## 13 2023-06-12     184
## 14 2023-07-02     125
## 15 2023-07-22      92
## 16 2023-08-11      99
## 17 2023-08-31      84
## 18 2023-09-20      38

# Visualización de los datos

par(mfrow = c(1, 1)) #ventana para un solo gráfico

# Histograma y densidad de los datos
hist(x = data_each20days_prior$stream,xlab = "streams",col="azure2", main="Densidad de streams diarios obtenidos en Spotify entre 2022-10-15 y 2023-09-20",prob = TRUE,breaks = 10)
lines(density(data_each20days_prior$streams), # density plot
      lwd = 2, # thickness of line
      col = "black")

# Ajuste mediante el modelo Gamma (prioir model)

# Densidad modelo a priori Gamma(13.5, 0.15)
curve(dgamma(x, 13.5, 0.15),0,210, # density plot
      xlab = "Streams",
      ylab = "Density",
      main = "Modelo a priori de streams", 
      lwd = 4, # thickness of line
      col = "orange",
      ylim = c(0,0.017))

# Densidad streams
lines(density(data_each20days_prior$streams), # density plot
      lwd = 4, # thickness of line
      col = "black")

# Moda del modelo a priori
mode <- gammaMode(13.5, 0.15)
segments(x0 = mode, y0 = 0, x1 = mode, 
         y1 = dgamma(mode,  13.5, 0.15), 
         col = "blue", lwd = 4)


# Media dell modelo a priori
mean <- 13.5/0.15
segments(x0 = mean, y0 = 0, x1 = mean, 
         y1 = dgamma(mean,13.5, 0.15), 
         col = "green", lwd = 4)

# Legend
legend(x ="topright", # Ubicación de la leyenda
       c("Densidad de número promedio diario de streams","Ajuste de modelo de probabilidad a Priori","Moda del modelo a priori","Media del modelo a priori"),
       col = c("black","orange","blue","green"),
       lwd = c(2,2,2,2),
       bty = "n")

# Summaries prior model 
summarize_gamma(shape = 13.5, rate = 0.15)

##   mean     mode var      sd
## 1   90 83.33333 600 24.4949

# Summary de los 18 datos observados para construir el modelo a priori
summary(data_each20days_prior$streams)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   38.00   72.25   85.00   89.22   99.00  184.00

# MODELO DE PROBABILIDAD A POSTERIORI


# Realizamos 2 nuevas observaciones del 2023-10-10 y 2023-10-30
data_each20days_posterior1 <- data_each20days[19:20,]
data_each20days_posterior1

##          date streams
## 19 2023-10-10     202
## 20 2023-10-30     339

# Vector y1 de las 2 observaciones para construir modelo a posteriori
y1 <-data_each20days_posterior1$streams
length(y1)

## [1] 2

sum_y1 <- sum(y1)
sum_y1

## [1] 541

# Summary de las 2 observaciones
summary(y1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   202.0   236.2   270.5   270.5   304.8   339.0

# Analisis con bayesrules package


# Modelo posteriori de lambda dadas las 2 nuevas observaciones

# Summarize
summarize_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 541, n = 2)

##       model shape rate    mean      mode      var       sd
## 1     prior  13.5 0.15  90.000  83.33333 600.0000 24.49490
## 2 posterior 554.5 2.15 257.907 257.44186 119.9567 10.95248

par(mfrow = c(1, 1)) #ventana para un solo gráfico

# Plot_gamma_poisson
plot_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 541, n = 2)

# Realizamos 17 nuevas observaciones entre el 2023-11-19 y 2024-10-04
data_each20days_posterior2 <- data_each20days[21:37,]
data_each20days_posterior2

##          date streams
## 21 2023-11-19      14
## 22 2023-12-09      12
## 23 2023-12-29      13
## 24 2024-01-18      13
## 25 2024-02-07      10
## 26 2024-02-27      13
## 27 2024-03-18       4
## 28 2024-04-07       6
## 29 2024-04-27       4
## 30 2024-05-17       7
## 31 2024-06-06       3
## 32 2024-06-26      21
## 33 2024-07-16      27
## 34 2024-08-05      12
## 35 2024-08-25      12
## 36 2024-09-14      13
## 37 2024-10-04      18

# Vector y2 de 17 nuevas observaciones para construir nuevo modelo a posteriori
y2 <-data_each20days_posterior2$streams
length(y2)

## [1] 17

sum_y2 <- sum(y2)
sum_y2

## [1] 202

# Summary de las 17 observaciones
summary(y2)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    3.00    7.00   12.00   11.88   13.00   27.00

# Summarize
summarize_gamma_poisson(shape = 554.5, rate = 2.15, sum_y = 202, n = 17)

##       model shape  rate      mean     mode        var        sd
## 1     prior 554.5  2.15 257.90698 257.4419 119.956733 10.952476
## 2 posterior 756.5 19.15  39.50392  39.4517   2.062868  1.436269

par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Plot_gamma_poisson
plot_gamma_poisson(shape = 554.5, rate = 2.15, sum_y = 202, n = 17)

# Realizando las 19 observaciones juntas obtenemos el mismo resultado de modelo a posteriori

# Vector y3 de las 5 observaciones para construir modelo a posteriori
y3 <-c(y1,y2)
length(y3)

## [1] 19

sum_y3 <- sum_y1+sum_y2
sum_y3

## [1] 743

# Observamos distribución con box plots de ambos conjuntos de datos (para el modelo priori y posteriori)
observaciones<- c(1:37)
prior_post <- c()
prior_post <- c(rep("1: Prior data",times=18),rep("2: Posterior data",times=19))
data_prior_post <- data.frame("observaciones"=observaciones,"streams"=streams_each20days, "Grupo"=prior_post)

# Observarlos streams (por grupo priori y posteriori) tomados cada 20 días (entre 2022-10-15 y 2024-10-04)
par(mfrow = c(1, 1)) #ventana para un solo gráfico

ggplot(data_prior_post)+ 
  labs(title ="Observaciones cada 20 días de streams diarios obtenidos en Spotify entre 2022-10-15 y 2024-10-04")+
  geom_line(aes(observaciones ,streams,colour=Grupo)) +
  geom_point(aes(observaciones ,streams,colour=Grupo))

# visualizar conjunto de boxplot
boxplot(streams ~ Grupo, data = data_prior_post,main="Box plots por grupo de datos",
        col = c("bisque2", "azure2"))

# Summarize
summarize_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 743, n = 19)

##       model shape  rate     mean     mode        var        sd
## 1     prior  13.5  0.15 90.00000 83.33333 600.000000 24.494897
## 2 posterior 756.5 19.15 39.50392 39.45170   2.062868  1.436269

par(mfrow = c(1, 1)) #ventana para un solo gráfico

# Plot_gamma_poisson
plot_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 743, n = 19)

# Estimación a posteriori

par(mfrow = c(1, 3)) #ventana para tres gráficos

# Intervalo de credibilidad a posteriori para λ al 50%

qgamma(c(0.25, 0.75),756.5,19.15)

## [1] 38.52591 40.46295

dgamma_post<-curve(dgamma(x, 756.5,19.15),34,45, # density plot
                   xlab = "λ",
                   ylab = "Density",
                   main = "IC posterior al 50% para λ", 
                   lwd = 4, # thickness of line
                   col = "cyan4",
                   ylim = c(0,0.3))

inf<-38.52591
sup<-40.46295
# Índices inferior y superior del eje X
l <- min(which(dgamma_post$x >= inf))
h <- max(which(dgamma_post$x < sup))

polygon(c(dgamma_post$x[c(l, l:h, h)]),
        c(0, dgamma_post$y[l:h], 0),lwd = 1,
        col = "cyan4",border = 0)


# Median (posterior model)
median <- qgamma(0.5,756.5,19.15)
segments(x0 = median, y0 = 0, x1 = median, 
         y1 = dgamma(median,756.5,19.15), 
         col = "white", lwd = 2)


# Intervalo de credibilidad a posteriori para λ al 95%
# 0.025th & 0.975th quantiles 
qgamma(c(0.025, 0.975),756.5,19.15)

## [1] 36.73866 42.36808

dgamma_post<-curve(dgamma(x,756.5,19.15),34,45, # density plot
                   xlab = "λ",
                   ylab = "Density",
                   main = "IC posterior al 95% para λ", 
                   lwd = 4, # thickness of line
                   col = "cyan4",
                   ylim = c(0,0.3))

inf<-36.73866
sup<-42.36808
# Índices inferior y superior del eje X
l <- min(which(dgamma_post$x >= inf))
h <- max(which(dgamma_post$x < sup))

polygon(c(dgamma_post$x[c(l, l:h, h)]),
        c(0, dgamma_post$y[l:h], 0),lwd = 1,
        col = "cyan4",border = 0)


# Mediana (posterior model)
median <- qgamma(0.5,756.5,19.15)
segments(x0 = median, y0 = 0, x1 = median, 
         y1 = dgamma(median,756.5,19.15), 
         col = "white", lwd = 2)


# Intervalo de credibilidad a posteriori para λ al 99%
# 0.005th & 0.995th quantiles
qgamma(c(0.005, 0.995),756.5,19.15)

## [1] 35.90245 43.30152

dgamma_post<-curve(dgamma(x, 756.5,19.15),34,45, # density plot
                   xlab = "λ",
                   ylab = "Density",
                   main = "IC posterior al 99% para λ", 
                   lwd = 4, # thickness of line
                   col = "cyan4",
                   ylim = c(0,0.3))

inf<-35.90245
sup<-43.30152
# Índices inferior y superior del eje X
l <- min(which(dgamma_post$x >= inf))
h <- max(which(dgamma_post$x < sup))

polygon(c(dgamma_post$x[c(l, l:h, h)]),
        c(0, dgamma_post$y[l:h], 0),lwd = 1,
        col = "cyan4",border = 0)


# Median (posterior model)
median <- qgamma(0.5,756.5,19.15)
segments(x0 = median, y0 = 0, x1 = median, 
         y1 = dgamma(median,756.5,19.15), 
         col = "white", lwd = 2)

# CONTRASTE DE HIPÓTESIS UNILATERAL

# A PRIORI

# Probabilidad a priori de λ < 42 
prior_prob <- pgamma(42, 13.5, 0.15)
prior_prob

## [1] 0.00842958

# Prior odds
prior_odds <- prior_prob / (1 - prior_prob)
prior_odds

## [1] 0.008501242

# A POSTERIORI

# Probabilidad a posteriori de λ < 42
post_prob <- pgamma(42,756.5,19.15)
post_prob

## [1] 0.9567944

# Posterior odds
post_odds <- post_prob / (1 - post_prob)
post_odds

## [1] 22.14517

# Bayes factor
BF <- post_odds / prior_odds
BF

## [1] 2604.934

par(mfrow = c(1, 2)) #ventana para 2 gráficoS

# visualización de las probabilidades en densidades a priori y a posteriori

dgamma_prior<-curve(dgamma(x, 13.5, 0.15),0,210, # density plot
                    xlab = "λ",
                    ylab = "Density",
                    main = "Probabilidad a priori de λ < 42", 
                    lwd = 4, # thickness of line
                    col = "cyan4",
                    ylim = c(0,0.017))
sup<-42

polygon(c(dgamma_prior$x[dgamma_prior$x <= sup], sup),
        c(dgamma_prior$y[dgamma_prior$x <= sup ], 0),
        col = "cyan4",
        border = 0)


dgamma_post<-curve(dgamma(x, 756.5,19.15),34,45, # density plot
                   xlab = "λ",
                   ylab = "Density",
                   main = "Probabilidad a posteriori de λ < 42", 
                   lwd = 4, # thickness of line
                   col = "cyan4",
                   ylim = c(0,0.30))
sup<-42

polygon(c(dgamma_post$x[dgamma_post$x <= sup], sup),
        c(dgamma_post$y[dgamma_post$x <= sup ], 0),
        col = "cyan4",
        border = 0)

# PREDICCIÓN A POSTERIORI


# lambda = Mode posterior (valor más plausible)
gammaMode(756.5, 19.15)

## [1] 39.4517

predict_l1<- c()
y_l1<- c()
for(x in 15:70){    
  predict_l1[x]=dpois(x,lambda=39.4517)*dgamma(39.4517,shape=756.5,rate=19.15)
  y_l1[x]=x
}
data_predict_l1=data.frame("Observaciones"=y_l1,"posterior_predictive_model1"=predict_l1)

par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Plot posterior predictive model con λ=39.4517
plot1 <- ggplot(data_predict_l1, aes(x = Observaciones, y = posterior_predictive_model1)) + 
  geom_point() + labs(title = "Mode: λ=39.4517",x = "y’", y = "f(y’| λ)f( λ|y)") +
  geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model1))


# lambda = 0.25th quantil
qgamma(0.25,756.5,19.15)

## [1] 38.52591

predict_l2<- c()
y_l2<- c()
for(x in 15:70){    
  predict_l2[x]=dpois(x,lambda=38.52591)*dgamma(38.52591,shape=756.5,rate=19.15)
  y_l2[x]=x
}
data_predict_l2=data.frame("Observaciones"=y_l2,"posterior_predictive_model2"=predict_l2)
# Plot posterior predictive model con λ=38.52591
plot2 <- ggplot(data_predict_l2, aes(x = Observaciones, y = posterior_predictive_model2)) + 
  geom_point() + labs(title = "q 025: λ=38.52591",x = "y’", y = "f(y’| λ)f( λ|y)") +
  coord_cartesian(ylim = c(0, 0.016))+
  geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model2))


# lambda = 0.75th quantil
qgamma(0.75,756.5,19.15)

## [1] 40.46295

predict_l3<- c()
y_l3<- c()
for(x in 15:70){    
  predict_l3[x]=dpois(x,lambda=40.46295)*dgamma(40.46295,shape=756.5,rate=19.15)
  y_l3[x]=x
}
data_predict_l3=data.frame("Observaciones"=y_l3,"posterior_predictive_model3"=predict_l3)
# Plot posterior predictive model con λ=40.46295
plot3 <- ggplot(data_predict_l3, aes(x = Observaciones, y = posterior_predictive_model3)) + 
  geom_point() + labs(title = "q 0.75: λ=40.46295",x = "y’", y = "f(y’| λ)f( λ|y)") +
  coord_cartesian(ylim = c(0, 0.016))+
  geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model3))


# lambda = 0.025th quantil
qgamma(0.025,756.5,19.15)

## [1] 36.73866

predict_l4<- c()
y_l4<- c()
for(x in 15:70){    
  predict_l4[x]=dpois(x,lambda=36.73866)*dgamma(36.73866,shape=756.5,rate=19.15)
  y_l4[x]=x
}
data_predict_l4=data.frame("Observaciones"=y_l4,"posterior_predictive_model4"=predict_l4)
# Plot posterior predictive model con λ=36.73866
plot4 <- ggplot(data_predict_l4, aes(x = Observaciones, y = posterior_predictive_model4)) + 
  geom_point() + labs(title = "q 0.025: λ=36.73866",x = "y’", y = "f(y’| λ)f( λ|y)") +
  coord_cartesian(ylim = c(0, 0.016))+
  geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model4))


# lambda = 0.975th quantil
qgamma(0.975,756.5,19.15)

## [1] 42.36808

predict_l5<- c()
y_l5<- c()
for(x in 15:70){    
  predict_l5[x]=dpois(x,lambda=42.36808)*dgamma(42.36808,shape=756.5,rate=19.15)
  y_l5[x]=x
}
data_predict_l5=data.frame("Observaciones"=y_l5,"posterior_predictive_model5"=predict_l5)
# Plot posterior predictive model con λ=42.36808
plot5 <- ggplot(data_predict_l5, aes(x = Observaciones, y = posterior_predictive_model5)) + 
  geom_point() + labs(title = "q 0.975: λ=42.36808",x = "y’", y = "f(y’| λ)f( λ|y)") +
  coord_cartesian(ylim = c(0, 0.016))+
  geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model5))

# lambda = 0.005th quantil
qgamma(0.005,756.5,19.15)

## [1] 35.90245

predict_l6<- c()
y_l6<- c()
for(x in 15:70){    
  predict_l6[x]=dpois(x,lambda=35.90245)*dgamma(35.90245,shape=756.5,rate=19.15)
  y_l6[x]=x
}
data_predict_l6=data.frame("Observaciones"=y_l6,"posterior_predictive_model6"=predict_l6)
# Plot posterior predictive model con λ=35.90245
plot6 <- ggplot(data_predict_l6, aes(x = Observaciones, y = posterior_predictive_model6)) + 
  geom_point() + labs(title = "q 0.005: λ=35.90245",x = "y’", y = "f(y’| λ)f( λ|y)") +
  coord_cartesian(ylim = c(0, 0.016))+
  geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model6))


# lambda = 0.995th quantil
qgamma(0.995,756.5,19.15)

## [1] 43.30152

predict_l7<- c()
y_l7<- c()
for(x in 15:70){    
  predict_l7[x]=dpois(x,lambda= 43.30152)*dgamma(43.30152,shape=756.5,rate=19.15)
  y_l7[x]=x
}
data_predict_l7=data.frame("Observaciones"=y_l7,"posterior_predictive_model7"=predict_l7)
# Plot posterior predictive model con λ= 43.30152
plot7 <- ggplot(data_predict_l7, aes(x = Observaciones, y = posterior_predictive_model7)) + 
  geom_point() + labs(title = "q 0.995: λ= 43.30152",x = "y’", y = "f(y’| λ)f( λ|y)") +
  coord_cartesian(ylim = c(0, 0.016))+
  geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model7))

#Mostrar todos los plot de modelo predictivo a posteriori
layout <- matrix(c(1, 1,  
                   2, 3,
                   4, 5,
                   6, 7), 
                 nrow = 4, ncol = 2,
                 byrow = TRUE)

grid.arrange(plot1,plot2,plot3, plot4, plot5, plot6, plot7,
             layout_matrix = layout)

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).

Expect <- 0
predict_l <- c()
y_<- c()
for(x in 15:70){ 
  predict_l[x] <- integrate(function(lambda) dpois(x,lambda)*dgamma(x=lambda,shape=756.5,rate=19.15), lower = 0, upper = 70)$value
  y_[x] <- x
  Expect=predict_l[x]*y_[x]+Expect
}
data_predict_l=data.frame("Observaciones"=y_,"posterior_predictive_model"=predict_l)

par(mfrow = c(1, 1)) #ventana para un solo gráfico

# Función de densidad predictiva a posteriori
ggplot(data_predict_l, aes(x = Observaciones, y = posterior_predictive_model)) + 
  geom_point() + labs(title = "Función de densidad predictiva a posteriori",x = "y’", y = "f(y’| y)") +
  geom_segment(aes(x = Observaciones , xend =Observaciones, y = 0, yend = posterior_predictive_model))

## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 14 rows containing missing values or values outside the scale range
## (`geom_segment()`).

Expect

## [1] 39.50339

#------------------------------------------------------------------------------------------------------

# MCMC with RStan


# STEP 1: DEFINE the model
Stream_GP_Model <- "
  data {
    int<lower = 0> Y[19];
  }
  parameters {
    real<lower = 0> lambda;
  }
  model {
    Y ~ poisson(lambda);
    lambda ~ gamma(13.5, 0.15);
  }
"

# STEP 2: SIMULATE the posterior
Stream_GP_Sim <- stan(model_code = Stream_GP_Model, data = list(Y = y3), 
                      chains = 4, iter = 5000*2, seed = 8411)

## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 2e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.2 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.047 seconds (Warm-up)
## Chain 1:                0.046 seconds (Sampling)
## Chain 1:                0.093 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 4e-06 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.051 seconds (Warm-up)
## Chain 2:                0.045 seconds (Sampling)
## Chain 2:                0.096 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 4e-06 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.048 seconds (Warm-up)
## Chain 3:                0.049 seconds (Sampling)
## Chain 3:                0.097 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 4e-06 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.05 seconds (Warm-up)
## Chain 4:                0.048 seconds (Sampling)
## Chain 4:                0.098 seconds (Total)
## Chain 4:

as.array(Stream_GP_Sim, pars = "lambda") %>% 
  head()

## , , parameters = lambda
## 
##           chains
## iterations  chain:1  chain:2  chain:3  chain:4
##       [1,] 39.58967 38.77052 39.69134 39.64941
##       [2,] 39.38215 40.10760 40.64402 39.75229
##       [3,] 39.28168 42.65304 38.81953 39.81724
##       [4,] 38.39951 41.57187 38.91430 39.24981
##       [5,] 38.45596 42.35008 38.92253 40.07892
##       [6,] 40.50505 41.13895 39.34416 39.31661

# Summary
summary(Stream_GP_Sim)

## $summary
##              mean     se_mean        sd       2.5%        25%        50%
## lambda   39.52098 0.016997214 1.4239384   36.78887   38.55602   39.50685
## lp__   2024.20540 0.007769028 0.6947926 2022.26197 2024.05288 2024.47436
##               75%     97.5%    n_eff     Rhat
## lambda   40.46938   42.3887 7018.219 1.000692
## lp__   2024.64803 2024.6960 7997.918 1.000245
## 
## $c_summary
## , , chains = chain:1
## 
##          stats
## parameter       mean        sd       2.5%        25%        50%        75%
##    lambda   39.49634 1.4052490   36.82277   38.54649   39.46433   40.42725
##    lp__   2024.21807 0.6702789 2022.31815 2024.05589 2024.47861 2024.64771
##          stats
## parameter      97.5%
##    lambda   42.32226
##    lp__   2024.69606
## 
## , , chains = chain:2
## 
##          stats
## parameter       mean        sd       2.5%        25%        50%        75%
##    lambda   39.54668 1.4194493   36.78689   38.57343   39.55285   40.49938
##    lp__   2024.20801 0.6902842 2022.34648 2024.07050 2024.47222 2024.64202
##          stats
## parameter      97.5%
##    lambda   42.37336
##    lp__   2024.69602
## 
## , , chains = chain:3
## 
##          stats
## parameter      mean        sd      2.5%        25%        50%        75%
##    lambda   39.4656 1.4374325   36.7687   38.48009   39.45604   40.40563
##    lp__   2024.1952 0.7129319 2022.2476 2024.02661 2024.47503 2024.64926
##          stats
## parameter      97.5%
##    lambda   42.38421
##    lp__   2024.69592
## 
## , , chains = chain:4
## 
##          stats
## parameter       mean        sd       2.5%        25%        50%        75%
##    lambda   39.57531 1.4312783   36.80853   38.61366   39.56248   40.52277
##    lp__   2024.20032 0.7049118 2022.16163 2024.04622 2024.47269 2024.65256
##          stats
## parameter    97.5%
##    lambda   42.431
##    lp__   2024.696

# Diagnóstico visual para las cadenas de Markov

par(mfrow = c(1, 1)) #ventana para un solo gráfico

# Gráfico de Trazas para las  4 cadenas de Markov simuladas
color_scheme_set("mix-blue-green")
trace <- mcmc_trace(Stream_GP_Sim, pars = "lambda", size = 0.1) +
  labs (x='Iteration',y='lambda value')

# densidades de valores de las 4 cadenas de Markov simuladas
mcmc_dens <- mcmc_dens_overlay(Stream_GP_Sim, pars = "lambda")

# violin plot: densidades de valores de las 4 cadenas de Markov simuladas (con los cuantiles: 0.05, 0.5 y 0.95)
color_scheme_set("teal")
mcmc_violin <- mcmc_violin(Stream_GP_Sim, pars = "lambda",probs = c(0.05, 0.5, 0.95))

# Mostrar los 3 gráficos
trace+mcmc_dens+mcmc_violin

# Gráficos de autocorrelación (de linea y de barra)
color_scheme_set("brightblue")
mcmc_acf<-mcmc_acf(Stream_GP_Sim, pars = "lambda")

mcmc_acf_bar<-(p <- mcmc_acf_bar(Stream_GP_Sim, pars = "lambda"))
mcmc_acf + mcmc_acf_bar

# Diagnóstico numérico para las cadenas de Markov

rhat(Stream_GP_Sim, pars = "lambda")

## [1] 1.000692

neff_ratio(Stream_GP_Sim, pars = "lambda")

## [1] 0.3509109

par(mfrow = c(1, 1)) #ventana para un solo gráfico

# Histograma y densidad de valores de las 4 cadenas de Markov simuladas

h<-mcmc_hist(Stream_GP_Sim, pars = "lambda")  +
  labs (x='lambda',y='count',
        title='Histograma de valores MCMC')
d<-mcmc_dens(Stream_GP_Sim, pars = "lambda") + 
  labs (x='lambda',y='density',
        title='Densidad de valores MCMC') 
h+d

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#  Densidad de valores MCMC vs densidad del modelo a posteriori conocido
color_scheme_set("teal")
mcmc_dens(Stream_GP_Sim, pars = "lambda") + 
  labs (x='lambda',y='density',
        title='Densdad de valores MCMC vs Densidad de modelo a Posteriori') +
  stat_function (
    fun=dgamma,
    args=list(shape=756.5 ,rate=19.15),
    lwd = 2,
    color="black"
  )

# Aproximación de la media  y los intervalos de credibilidad a posteriori para  λ


# Intervalos de credibilidad a posteriori al 50%
tidy(Stream_GP_Sim, conf.int = TRUE, conf.level = 0.5)

## # A tibble: 1 × 5
##   term   estimate std.error conf.low conf.high
##   <chr>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 lambda     39.5      1.42     38.6      40.5

# Shade in the middle 50% interval
mcmc_areas_50<-mcmc_areas(Stream_GP_Sim, pars = "lambda", prob = 0.5)+
  labs (title='IC posterior MCMC al 50% para λ')

# Intervalos de credibilidad a posteriori al 95%
tidy(Stream_GP_Sim, conf.int = TRUE, conf.level = 0.95)

## # A tibble: 1 × 5
##   term   estimate std.error conf.low conf.high
##   <chr>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 lambda     39.5      1.42     36.8      42.4

# Shade in the middle 95% interval
mcmc_areas_95<-mcmc_areas(Stream_GP_Sim, pars = "lambda", prob = 0.95)+
  labs (title='IC posterior MCMC al 95% para λ')

# Intervalos de credibilidad a posteriori al 99%
tidy(Stream_GP_Sim, conf.int = TRUE, conf.level = 0.99)

## # A tibble: 1 × 5
##   term   estimate std.error conf.low conf.high
##   <chr>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 lambda     39.5      1.42     36.0      43.4

# Shade in the middle 95% interval
mcmc_areas_99<-mcmc_areas(Stream_GP_Sim, pars = "lambda", prob = 0.99)+
  labs (title='IC posterior MCMC al 99% para λ')

mcmc_areas_50+mcmc_areas_95+mcmc_areas_99

# Ordenar las 4 cadenas de Markov 1 data frame
Stream_chains_df <- as.data.frame(Stream_GP_Sim, pars = "lp__", include = FALSE)
dim(Stream_chains_df)

## [1] 20000     1

# Resumen de medidas a posteriori aproximadas por MCMC para λ
Stream_chains_df %>% 
  summarize(post_mean = mean(lambda), 
            post_median = median(lambda),
            post_mode = sample_mode(lambda),
            lower_50 = quantile(lambda, 0.25),
            upper_50 = quantile(lambda, 0.75),
            lower_95 = quantile(lambda, 0.025),
            upper_95 = quantile(lambda, 0.975),
            lower_99 = quantile(lambda, 0.005),
            upper_99 = quantile(lambda, 0.995))

##   post_mean post_median post_mode lower_50 upper_50 lower_95 upper_95 lower_99
## 1  39.52098    39.50685  39.33384 38.55602 40.46938 36.78887  42.3887 35.99579
##   upper_99
## 1  43.3592

# Aproximación P(λ<42|y) con MCMC
Stream_chains_df %>% 
  mutate(exceeds = lambda < 42) %>% 
  tabyl(exceeds)

##  exceeds     n percent
##    FALSE   872  0.0436
##     TRUE 19128  0.9564

# Predicción a posteriori con MCMC

# Set the seed
set.seed(13)

# Predecir un valor de Y' por cada valor λ en la cadena
Stream_chains_df <- Stream_chains_df %>% 
  mutate(y_predict = rpois(length(lambda) , lambda))

# Chequear
Stream_chains_df %>% 
  head(5)

##     lambda y_predict
## 1 39.58967        43
## 2 39.38215        37
## 3 39.28168        24
## 4 38.39951        45
## 5 38.45596        41

mean(Stream_chains_df$y_predict)

## [1] 39.59695

par(mfrow = c(1, 1)) #ventana para un solo gráfico

# Histograma predictivo a posteriori por MCMC
mcmc_hist<-mcmc_hist(Stream_chains_df, pars = "y_predict") + 
  labs (x='y_predict',y='density',
        title='Histograma predictivo a posteriori por MCMC') 

# Densidad predictiva a posteriori por MCMC
mcmc_dens<-mcmc_dens(Stream_chains_df, pars = "y_predict") + 
  labs (x='y_predict',y='density',
        title='Densidad predictiva a posteriori por MCMC') 

mcmc_hist+mcmc_dens

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Resumen de medidas de predicción a posteriori aproximadas por MCMC para λ

Stream_chains_df %>% 
  summarize(mean = mean(y_predict), 
            median = median(y_predict),
            mode = sample_mode(y_predict),
            lower_50 = quantile(y_predict, 0.25),
            upper_50 = quantile(y_predict, 0.75),
            lower_95 = quantile(y_predict, 0.025),
            upper_95 = quantile(y_predict, 0.975),
            lower_99 = quantile(y_predict, 0.005),
            upper_99 = quantile(y_predict, 0.995))

##       mean median     mode lower_50 upper_50 lower_95 upper_95 lower_99
## 1 39.59695     39 38.40575       35       44       27       53       24
##   upper_99
## 1       57

#------------------------------------------------------------------------------------------------------

# Algoritmo Metropolis-Hastings (con 9 observaciones de las 19)

# usamos las 9 primeras observaciones 
y3_1 <- y3[1:9]
y3_1

## [1] 202 339  14  12  13  13  10  13   4

sum(y3_1)

## [1] 620

# Análisis con Bayesrules 
# Summarize
summarize_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 620, n = 9)

##       model shape rate     mean     mode        var        sd
## 1     prior  13.5 0.15 90.00000 83.33333 600.000000 24.494897
## 2 posterior 633.5 9.15 69.23497 69.12568   7.566664  2.750757

par(mfrow = c(1, 1)) #ventana para un solo gráfico

# Plot_gamma_poisson
plot_gamma_poisson(shape = 13.5, rate = 0.15, sum_y = 620, n = 9)

# Curvas de modelos propuestos para λ′|λ (exponenciales)
par(mfrow = c(2, 3))

curve(dexp(x,0.004),0,100, # density plot
      xlab = "Streams",
      ylab = "Density",
      main = "Proposal:λ′|λ~Exp(0.004)", 
      lwd = 4, # thickness of line
      col = "orange",
      ylim = c(0,0.1))

curve(dexp(x,0.008),0,100, # density plot
      xlab = "Streams",
      ylab = "Density",
      main = "Proposal:λ′|λ~Exp(0.008)", 
      lwd = 4, # thickness of line
      col = "orange",
      ylim = c(0,0.1))

curve(dexp(x,0.01),0,100, # density plot
      xlab = "Streams",
      ylab = "Density",
      main = "Proposal:λ′|λ~Exp(0.01)", 
      lwd = 4, # thickness of line
      col = "orange",
      ylim = c(0,0.1))

curve(dexp(x,0.02),0,100, # density plot
      xlab = "Streams",
      ylab = "Density",
      main = "Proposal:λ′|λ~Exp(0.02)", 
      lwd = 4, # thickness of line
      col = "orange",
      ylim = c(0,0.1))

curve(dexp(x,0.06),0,100, # density plot
      xlab = "Streams",
      ylab = "Density",
      main = "Proposal:λ′|λ~Exp(0.06)", 
      lwd = 4, # thickness of line
      col = "orange",
      ylim = c(0,0.1))

curve(dexp(x,0.1),0,100, # density plot
      xlab = "Streams",
      ylab = "Density",
      main = "Proposal:λ′|λ~Exp(0.1)", 
      lwd = 4, # thickness of line
      col = "orange",
      ylim = c(0,0.1))

# Algorithmo

one_iteration <- function(b, current){
  # STEP 1: Propose the next chain location
  proposal <- rexp(1,b)
  # STEP 2: Decide whether or not to go there
  k<-1
  proposal_plaus <- dgamma(proposal, 13.5, 0.15)
  current_plaus <- dgamma(current, 13.5, 0.15)
  while(k <= length(y3_1)){    
    proposal_plaus <- proposal_plaus * dpois(y3_1[k],lambda = proposal) 
    current_plaus <- current_plaus * dpois(y3_1[k],lambda = current)
    k=k+1
  }
  proposal_q     <- dexp(proposal,b)
  current_q      <- dexp(current,b)
  alpha <- min(1, proposal_plaus / current_plaus * current_q / proposal_q)
  next_stop <- sample(c(proposal, current), 
                      size = 1, prob = c(alpha,1-alpha))
  return(data.frame(proposal, alpha, next_stop))
}



gammapois_tour <- function(N,b){
  # 1. Start the chain at location 0.5
  current <- 70
  
  # 2. Initialize the simulation
  lambda <- rep(0, N)
  
  # 3. Simulate N Markov chain stops
  for(i in 1:N){    
    # Simulate one iteration
    sim <- one_iteration(b = b, current = current)
    
    # Record next location
    lambda[i] <- sim$next_stop
    
    # Reset the current location
    current <- sim$next_stop
  }
  
  # 4. Return the chain locations
  return(data.frame(iteration = c(1:N), lambda))
}

# Gráficos de trazas según modelos de propuesta para λ′|λ

par(mfrow = c(1, 1)) #ventana para un solo gráfico

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.004)
t1<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) + 
  geom_line()+ 
  labs(
    title ="Proposal:λ′|λ~Exp(0.004)")

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.008)
t2<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) + 
  geom_line()+ 
  labs(
    title ="Proposal:λ′|λ~Exp(0.008)")

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.01)
t3<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) + 
  geom_line()+ 
  labs(
    title ="Proposal:λ′|λ~Exp(0.01)")

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.02)
t4<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) + 
  geom_line()+ 
  labs(
    title ="Proposal:λ′|λ~Exp(0.02)")

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.06)
t5<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) + 
  geom_line()+ 
  labs(
    title ="Proposal:λ′|λ~Exp(0.06)")

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.1)
t6<-ggplot(gammapois_sim , aes(x = iteration, y = lambda)) + 
  geom_line()+ 
  labs(
    title ="Proposal:λ′|λ~Exp(0.1)")

t1+t2+t3+t4+t5+t6

par(mfrow = c(1, 1)) #ventana para un solo gráfico
# Estimaciones de densidades según modelos de propuesta para λ′|λ

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.004)
h1<-ggplot(gammapois_sim, aes(x = lambda)) + 
  geom_histogram(aes(y = ..density..), color = "white") + 
  labs(
    title ="Proposal:λ′|λ~Exp(0.004)")+
  stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.008)
h2<-ggplot(gammapois_sim, aes(x = lambda)) + 
  geom_histogram(aes(y = ..density..), color = "white") + 
  labs(
    title ="Proposal:λ′|λ~Exp(0.008)")+
  stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.01)
h3<-ggplot(gammapois_sim, aes(x = lambda)) + 
  geom_histogram(aes(y = ..density..), color = "white") + 
  labs(
    title ="Proposal:λ′|λ~Exp(0.01)")+
  stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.02)
h4<-ggplot(gammapois_sim, aes(x = lambda)) + 
  geom_histogram(aes(y = ..density..), color = "white")+
  labs(
    title ="Proposal:λ′|λ~Exp(0.04)")+
  stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.06)
h5<-ggplot(gammapois_sim, aes(x = lambda)) + 
  geom_histogram(aes(y = ..density..), color = "white")+
  labs(
    title ="Proposal:λ′|λ~Exp(0.06)")+
  stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.1)
h6<-ggplot(gammapois_sim, aes(x = lambda)) + 
  geom_histogram(aes(y = ..density..), color = "white")+
  labs(
    title ="Proposal:λ′|λ~Exp(0.1)")+
  stat_function(fun = dgamma, args = list(633.5,9.15),lwd=1.5, color = "blue")


h1+h2+h3+h4+h5+h6

## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Nos quedamos con proposal model:λ′|λ~Exp(0.008)

set.seed(131184)
gammapois_sim <- gammapois_tour(N = 20000, b = 0.008)

# Chequear data.frame de simulación
head(gammapois_sim,20)

##    iteration   lambda
## 1          1 70.00000
## 2          2 70.00000
## 3          3 70.00000
## 4          4 70.00000
## 5          5 70.00000
## 6          6 70.00000
## 7          7 70.00000
## 8          8 70.00000
## 9          9 70.82041
## 10        10 70.82041
## 11        11 70.82041
## 12        12 70.82041
## 13        13 70.82041
## 14        14 70.82041
## 15        15 70.82041
## 16        16 70.82041
## 17        17 70.82041
## 18        18 70.82041
## 19        19 70.82041
## 20        20 65.82388

# Resumen de medidas a posteriori aproximadas por Metropolis-Hastings para λ

gammapois_sim %>% 
  summarize(mean = mean(gammapois_sim$lambda), 
            median = median(gammapois_sim$lambda),
            mode = sample_mode(gammapois_sim$lambda),
            lower_50 = quantile(gammapois_sim$lambda, 0.25),
            upper_50 = quantile(gammapois_sim$lambda, 0.75),
            lower_95 = quantile(gammapois_sim$lambda, 0.025),
            upper_95 = quantile(gammapois_sim$lambda, 0.975),
            lower_99 = quantile(gammapois_sim$lambda, 0.005),
            upper_99 = quantile(gammapois_sim$lambda, 0.995))

##       mean   median     mode lower_50 upper_50 lower_95 upper_95 lower_99
## 1 69.25382 69.12313 69.12911 67.30771 70.97914 64.44451 74.92324 62.42392
##   upper_99
## 1 76.57016

# Resumen de medidas del modelo exato a posteriori
summarize_gamma(shape = 633.5, rate = 9.15)

##       mean     mode      var       sd
## 1 69.23497 69.12568 7.566664 2.750757

# Mediana a posteriori
qgamma(0.5,633.5,9.15)

## [1] 69.19855

# Intervalo de credibilidad a posteriori para λ al 50%
qgamma(c(0.25, 0.75),633.5,9.15)

## [1] 67.36029 71.06994

# Intervalo de credibilidad a posteriori para λ al 95%
qgamma(c(0.025, 0.975),633.5,9.15)

## [1] 63.94784 74.72911

# Intervalo de credibilidad a posteriori para λ al 99%
qgamma(c(0.005, 0.995),633.5,9.15)

## [1] 62.35485 76.52558

# Predicción a posteriori con M-H

# Set the seed
set.seed(13)

# Predecir un valor de Y' por cada valor λ de la simulación M-H
gammapois_sim <- gammapois_sim %>% 
  mutate(y_predict = rpois(length(lambda) , lambda))

# Chequear
gammapois_sim %>% 
  head(5)

##   iteration lambda y_predict
## 1         1     70        74
## 2         2     70        67
## 3         3     70        50
## 4         4     70        79
## 5         5     70        73

mean(gammapois_sim$y_predict)

## [1] 69.2812

par(mfrow = c(1, 1)) #ventana para un solo gráfico

# Histograma y densidad predictiva a posteriori por M-H
hist(x = gammapois_sim$y_predict,xlab = "y_predict",col="paleturquoise3", main="Histograma y densidad predictiva a posteriori por M-H",prob = TRUE,breaks = 30)
lines(density(gammapois_sim$y_predict), # density plot
      lwd = 3, # thickness of line
      col = "black")

Parcial 18/10/2024

Sebastián Grimberg