# Set seed for reproducibility
set.seed(123)
# Phase 1: Written Exam
phase_1 <- data.frame(
ID = 1:650,
Phase1 = sample(c("Pass", "Fail"), 650, replace = TRUE, prob = c(3/4, 1/4))
)
# Only pass to next phase
phase_2 <- phase_1 %>% filter(Phase1 == "Pass")
phase_2$Phase2 <- sample(c("Pass", "Fail"), nrow(phase_2), replace = TRUE, prob = c(9/10, 1/10))
# Only pass to next phase
phase_3a <- phase_2 %>% filter(Phase2 == "Pass")
phase_3a$Phase3a <- sample(c("Pass", "Disqualified"), nrow(phase_3a), replace = TRUE, prob = c(6/10, 4/10))
# Only pass to next phase
phase_3b <- phase_3a %>% filter(Phase3a == "Pass")
phase_3b$Phase3b <- sample(c("Pass", "Fail"), nrow(phase_3b), replace = TRUE, prob = c(2/3, 1/3))
# Only pass to next phase
phase_4 <- phase_3b %>% filter(Phase3b == "Pass")
phase_4$Phase4 <- sample(c("Pass", "Fail"), nrow(phase_4), replace = TRUE, prob = c(4/5, 1/5))
# Only accept to next phase
phase_5 <- phase_4 %>% filter(Phase4 == "Pass")
phase_5$Phase5 <- sample(c("Graduate", "Resign", "Termination"), nrow(phase_5), replace = TRUE, prob = c(3/5, 1/5, 1/5))
# Filter graduates
graduates <- phase_5 %>% filter(Phase5 == "Graduate")
# Ensure 8-12% graduate, add a limit to iterations to avoid infinite loop
max_iterations <- 1000
iteration <- 0
while((nrow(graduates) < 8 || nrow(graduates) > 12) && iteration < max_iterations){
phase_5$Phase5 <- sample(c("Graduate", "Resign", "Termination"), nrow(phase_5), replace = TRUE, prob = c(1/5, 2/5, 2/5))
graduates <- phase_5 %>% filter(Phase5 == "Graduate")
iteration <- iteration + 1
}
if(iteration == max_iterations) {
warning("Reached maximum number of iterations to adjust graduates. The result may not be within the 8-12% range.")
}
## Warning: Reached maximum number of iterations to adjust graduates. The result
## may not be within the 8-12% range.
# Combine all data for final dataset
final_data <- bind_rows(
phase_1 %>% filter(Phase1 == "Fail") %>% mutate(Phase2 = NA, Phase3a = NA, Phase3b = NA, Phase4 = NA, Phase5 = NA_character_),
phase_2 %>% filter(Phase2 == "Fail") %>% mutate(Phase3a = NA, Phase3b = NA, Phase4 = NA, Phase5 = NA_character_),
phase_3a %>% filter(Phase3a == "Disqualified") %>% mutate(Phase3b = NA, Phase4 = NA, Phase5 = NA_character_),
phase_3b %>% filter(Phase3b == "Fail") %>% mutate(Phase4 = NA, Phase5 = NA_character_),
phase_4 %>% filter(Phase4 %in% c("Resign", "Termination")) %>% mutate(Phase5 = NA_character_),
phase_5
)
# View final data
head(final_data)
## ID Phase1 Phase2 Phase3a Phase3b Phase4 Phase5
## 1 2 Fail <NA> <NA> <NA> <NA> <NA>
## 2 4 Fail <NA> <NA> <NA> <NA> <NA>
## 3 5 Fail <NA> <NA> <NA> <NA> <NA>
## 4 8 Fail <NA> <NA> <NA> <NA> <NA>
## 5 11 Fail <NA> <NA> <NA> <NA> <NA>
## 6 16 Fail <NA> <NA> <NA> <NA> <NA>
## `summarise()` has grouped output by 'Phase'. You can override using the
## `.groups` argument.
## `summarise()` has grouped output by 'Phase'. You can override using the
## `.groups` argument.
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.