Define the simulation function

run_sim1 <- function(studyspecific_probs, n_obs_per_study, seed = 1) {
  if (0) {
    studyspecific_probs = seq(0.1, 0.5, by = 0.05)
    n_obs_per_study = 20
    seed <- 1
  }
  
  require(dplyr)
  set.seed(seed)
  
  n_studies <- length(studyspecific_probs)
  n_obs_total <- n_studies*n_obs_per_study
  
  df1 <- data.frame(row_id = 1:n_obs_total) %>%
    mutate(
      study_id = rep(1:n_studies, each = n_obs_per_study),
      # draw study-specific p from Unif(l,u) first, then take bernoulli sample for each individual
      prob1 = rep(studyspecific_probs, each = n_obs_per_study),
      draw1 = rbinom(n = nrow(.), size = 1, prob = prob1),
      # for each individual, draw a new p from Unif(l,u) and use it to take a bernoulli sample
      prob2 = sample(x = studyspecific_probs, size = nrow(.), replace = TRUE),
      draw2 = rbinom(n = nrow(.), size = 1, prob = prob2)
    )
  
  binom_mean <- plogis(mean(qlogis(studyspecific_probs)))
  binom_se <- sqrt((binom_mean / (1-binom_mean)) / n_obs_total)
  
  df2 <- df1 %>%
    group_by(study_id) %>%
    summarize(
      prev1 = sum(draw1) / n(),
      prev2 = sum(draw2) / n()
    )
  
  metrics1 <- with(df2, list(
    mean1 = mean(prev1), sd1 = sd(prev1),
    mean2 = mean(prev1), sd2 = sd(prev2)
  ))
  
  out <- list(df1=df1, df2=df2, metrics1=metrics1, binom_se=binom_se)
  return(out)
  
}

Run the sim on random realizations

n_iterations <- 100
results1 <- lapply(1:n_iterations, function(i) {
  
  run_sim1(
    studyspecific_probs = seq(0.1, 0.5, by = 0.05), 
    n_obs_per_study = 20, seed = i
  )
  
})
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Show the data; first iteration only

Rows unique by individual

library(DT)
DT::datatable(data = results1[[1]]$df1)

Rows unique by study (after calculating prevalence)

DT::datatable(data = results1[[1]]$df2)

Plot the results

library(tidyr)

df_results <- bind_rows(lapply(results1, function(x) {
  as.data.frame(append(x$metrics1, list(binom_se = x$binom_se)))
})) %>%
  mutate(iter = row_number()) %>%
  pivot_longer(cols = -iter, names_to = "metric")


library(ggplot2)
df_plot1 <- df_results %>% filter(metric %in% c("sd1", "sd2"))
ggplot2::ggplot(data = df_plot1) +
  ggplot2::ylim(c(0, NA)) +
  geom_violin(mapping = aes(x = metric, y = value)) +
  ggtitle("Standard deviation of prevalence estimates (100 iterations)") +
  facet_wrap(~metric)