Using the current settings, the following are simulated:
library(tidyverse)
simulation <- function(rr,n_herds,prevalence_of_exposure,sample_size_case,sample_size_control){
herd <- tibble(
id = seq(1:n_herds),
exposure = rbinom(n_herds,1,prevalence_of_exposure),
unexposed_dead_risk = rnorm(n_herds,mean = 0.3,sd=0.04),
exposed_dead_risk = unexposed_dead_risk*rr,
risk = ifelse(exposure==1,exposed_dead_risk,unexposed_dead_risk),
dead = rbinom(n_herds,1,risk)
) %>% select(id,exposure,dead)
tab1 <- herd %>% summarise(
exp1_outcome_1 = sum(exposure==1 & dead==1),
exp0_outcome_1 = sum(exposure==0 & dead==1),
exp1_outcome_0 = sum(exposure==1 & dead==0),
exp0_outcome_0 = sum(exposure==0 & dead==0),
total_exp1 = exp1_outcome_1 + exp1_outcome_0,
total_exp0 = exp0_outcome_1 + exp0_outcome_0,
true_risk_ratio = round((exp1_outcome_1 / total_exp1) / (exp0_outcome_1 / total_exp0),2),
true_odds_ratio = round((exp1_outcome_1 / exp1_outcome_0) / (exp0_outcome_1 / exp0_outcome_0),2)
) %>% select(true_risk_ratio,true_odds_ratio)
cases <- herd %>% filter(dead==1) %>% slice_sample(n = sample_size_case)
cases$case = 1
controls_all <- herd %>% slice_sample(n = sample_size_control)
controls_all$case = 0
case_control_all <- rbind(cases,controls_all)
tab2 <- case_control_all %>% summarise(
exp1_outcome_1 = sum(exposure==1 & case==1),
exp0_outcome_1 = sum(exposure==0 & case==1),
exp1_outcome_0 = sum(exposure==1 & case==0),
exp0_outcome_0 = sum(exposure==0 & case==0),
total_exp1 = exp1_outcome_1 + exp1_outcome_0,
total_exp0 = exp0_outcome_1 + exp0_outcome_0,
cross_product_random_selection_controls = round((exp1_outcome_1 / exp1_outcome_0) / (exp0_outcome_1 / exp0_outcome_0),2)
) %>% select(cross_product_random_selection_controls)
controls_dead0 <- herd %>% filter(dead==0) %>% slice_sample(n = sample_size_control)
controls_dead0$case = 0
case_control_dead0 <- rbind(cases,controls_dead0)
tab3 <- case_control_dead0 %>% summarise(
exp1_outcome_1 = sum(exposure==1 & case==1),
exp0_outcome_1 = sum(exposure==0 & case==1),
exp1_outcome_0 = sum(exposure==1 & case==0),
exp0_outcome_0 = sum(exposure==0 & case==0),
total_exp1 = exp1_outcome_1 + exp1_outcome_0,
total_exp0 = exp0_outcome_1 + exp0_outcome_0,
cross_product_non_cases_as_controls = round((exp1_outcome_1 / exp1_outcome_0) / (exp0_outcome_1 / exp0_outcome_0),2)
) %>% select(cross_product_non_cases_as_controls)
cbind(tab1,tab2,tab3) %>% as.data.frame()
}
In this iteration, the exposure of interest increases the risk of death by 2 times (rr = 2). We simulate 10000 herds where the prevalence of exposure is 0.4. The baseline risk of death is 0.3 on average.
After the 10000 herds are simulated (i.e. a source cohort), then we conduct a case-control study by randomly selecting cases (200 herds where death occurred) and then another 400 controls. There are two ways we select controls. The first way is to take a random selection of non-cases (i.e. herds where death = 0). The second way is to take a random sample of herds, ignoring their outcome status completely (death = 0 or 1).
Finally the cross-product (aka exosure odds ratio) is calculated for each types of case-control studies.
simulation(rr=2,
n_herds = 10000,
prevalence_of_exposure = 0.4,
sample_size_case = 200,
sample_size_control = 400) %>% t
## [,1]
## true_risk_ratio 2.09
## true_odds_ratio 3.75
## cross_product_random_selection_controls 1.93
## cross_product_non_cases_as_controls 3.51
simulation_output <- replicate(1000,simulation(rr=2,
n_herds = 100000,
prevalence_of_exposure = 0.4,
sample_size_case = 200,
sample_size_control = 400),
simplify = T
) %>% as.data.frame %>% t %>% as.data.frame()
simulation_output$true_risk_ratio <- simulation_output$true_risk_ratio %>% as.numeric
simulation_output$true_odds_ratio <- simulation_output$true_odds_ratio %>% as.numeric
simulation_output$cross_product_random_selection_controls <- simulation_output$cross_product_random_selection_controls %>% as.numeric
simulation_output$cross_product_non_cases_as_controls <- simulation_output$cross_product_non_cases_as_controls %>% as.numeric
simulation_output %>% summarise(
average_true_rr = round(mean(true_risk_ratio,na.rm=T),2),
controls_selected_at_random = round(mean(cross_product_random_selection_controls),2),
average_true_or = round(mean(true_odds_ratio,na.rm=T),2),
controls_are_non_cases = round(mean(cross_product_non_cases_as_controls),2)
) %>% t
## [,1]
## average_true_rr 2.00
## controls_selected_at_random 2.02
## average_true_or 3.50
## controls_are_non_cases 3.56