library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(brms)
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.5.1
## Loading 'brms' package (version 2.22.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following object is masked from 'package:stats':
## 
##     ar
library(bayesplot)
## This is bayesplot version 1.13.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting
## 
## Attaching package: 'bayesplot'
## The following object is masked from 'package:brms':
## 
##     rhat
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
# ==== 1. Load data ====

ebpp <- read.csv("C:/DATA/EBPP.csv", header = TRUE, fileEncoding = "latin1")
# Tạo các biến mới
ebpp <- ebpp %>%
  mutate(id = factor(id),
         Se = (TP + 1) / (TP + FN + 1),
         Sp = (TN + 1) / (TN + FP + 1))

id_study <- ebpp %>%
  select(id, Study) %>%
  distinct()
# ==== 2. Fit Bayesian model for Sensitivity ====

fit_Se <- brm(
  formula = TP | trials(TP + FN) ~ 1 + (1 | id),
  data = ebpp,
  family = binomial(),
  prior = c(prior(normal(0, 5), class = "Intercept"),
            prior(exponential(1), class = sd)),
  chains = 4,
  iter = 4000,
  warmup = 1000,
  cores = 4,
  seed = 2025
)
## Compiling Stan program...
## Start sampling
fit_Se
##  Family: binomial 
##   Links: mu = logit 
## Formula: TP | trials(TP + FN) ~ 1 + (1 | id) 
##    Data: ebpp (Number of observations: 42) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~id (Number of levels: 42) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.38      0.18     1.07     1.76 1.00     3012     4529
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     2.12      0.23     1.67     2.57 1.00     2118     3519
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# ==== 3. Fit Bayesian model for Specificity ====

fit_Sp <- brm(
  formula = TN | trials(TN + FP) ~ 1 + (1 | id),
  data = ebpp,
  family = binomial(),
  prior = c(prior(normal(0, 5), class = "Intercept"),
            prior(exponential(1), class = sd)),
  chains = 4,
  iter = 4000,
  warmup = 1000,
  cores = 4,
  seed = 2025
)
## Compiling Stan program...
## Start sampling
fit_Sp
##  Family: binomial 
##   Links: mu = logit 
## Formula: TN | trials(TN + FP) ~ 1 + (1 | id) 
##    Data: ebpp (Number of observations: 42) 
##   Draws: 4 chains, each with iter = 4000; warmup = 1000; thin = 1;
##          total post-warmup draws = 12000
## 
## Multilevel Hyperparameters:
## ~id (Number of levels: 42) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.23      0.15     0.97     1.58 1.00     1810     3079
## 
## Regression Coefficients:
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     1.71      0.19     1.33     2.10 1.01     1018     2231
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# ==== 4. Posterior summaries ====
#plogis(fixef(fit_Se)["Intercept", "Estimate"]) # Trung bình độ nhạy
#plogis(fixef(fit_Sp)["Intercept", "Estimate"]) # Trung bình độ đặc hiệu
# ==== 5. Posterior distributions ====
draws_Se <- plogis(as_draws_df(fit_Se)$b_Intercept)
draws_Sp <- plogis(as_draws_df(fit_Sp)$b_Intercept)

p1 <- mcmc_areas(as_draws_df(fit_Se), pars = "b_Intercept") +
  ggtitle("Posterior logit(Sensitivity)")

p2 <- mcmc_areas(as_draws_df(fit_Sp), pars = "b_Intercept") +
  ggtitle("Posterior logit(Specificity)")

grid.arrange(p1, p2, ncol = 2)

# ==== 6. Histograms of posterior draws ==== 

hist(draws_Se, breaks = 40, col = "skyblue",
     main = "Posterior of Sensitivity",
     xlab = "Sensitivity")

hist(draws_Sp, breaks = 40, col = "salmon",
     main = "Posterior of Specificity",
     xlab = "Specificity")

# ==== 7. Forest plot - Sensitivity ====

mu_se <- fixef(fit_Se)["Intercept", "Estimate"]
post_Se <- as.data.frame(ranef(fit_Se)$id[, , "Intercept"]) %>%
  rename(Estimate = Estimate, Q2.5 = Q2.5, Q97.5 = Q97.5)

id_names <- rownames(ranef(fit_Se)$id)

forest_data_Se <- data.frame(id = id_names,
                             logit_mean = mu_se + post_Se$Estimate,
                             logit_lower = mu_se + post_Se$Q2.5,
                             logit_upper = mu_se + post_Se$Q97.5) %>%
  mutate(Se_mean = plogis(logit_mean),
         Se_lower = plogis(logit_lower),
         Se_upper = plogis(logit_upper)) %>%
  left_join(id_study, by = "id") %>%
  mutate(Study = ifelse(is.na(Study), id, Study))

ggplot(forest_data_Se, aes(x = Se_mean, y = reorder(Study, Se_mean))) +
  geom_point() +
  geom_errorbarh(aes(xmin = Se_lower, xmax = Se_upper), height = 0.2) +
  geom_vline(xintercept = plogis(mu_se), linetype = "dashed", color = "red") +
  labs(x = "Độ nhạy hậu định", y = "Nghiên cứu", title = "Forest plot - Sensitivity") +
  theme_minimal()

# ==== 8. Forest plot - Specificity ====

mu_sp <- fixef(fit_Sp)["Intercept", "Estimate"]
post_Sp <- as.data.frame(ranef(fit_Sp)$id[, , "Intercept"]) %>%
  rename(Estimate = Estimate, Q2.5 = Q2.5, Q97.5 = Q97.5)

id_names <- rownames(ranef(fit_Sp)$id)

forest_data_Sp <- data.frame(id = id_names,
                             logit_mean = mu_sp + post_Sp$Estimate,
                             logit_lower = mu_sp + post_Sp$Q2.5,
                             logit_upper = mu_sp + post_Sp$Q97.5) %>%
  mutate(Sp_mean = plogis(logit_mean),
         Sp_lower = plogis(logit_lower),
         Sp_upper = plogis(logit_upper)) %>%
  left_join(id_study, by = "id") %>%
  mutate(Study = ifelse(is.na(Study), id, Study))

ggplot(forest_data_Sp, aes(x = Sp_mean, y = reorder(Study, Sp_mean))) +
  geom_point() +
  geom_errorbarh(aes(xmin = Sp_lower, xmax = Sp_upper), height = 0.2) +
  geom_vline(xintercept = plogis(mu_sp), linetype = "dashed", color = "blue") +
  labs(x = "Độ đặc hiệu hậu định",
       y = "Nghiên cứu",
       title = "Forest plot - Specificity") +
  theme_minimal()

## het ##