SAFD Recruitment Funnel

# Set seed for reproducibility
set.seed(123)

# Phase 1: Written Exam
phase_1 <- data.frame(
  ID = 1:650,
  Phase1 = sample(c("Pass", "Fail"), 650, replace = TRUE, prob = c(3/4, 1/4))
)

# Only pass to next phase
phase_2 <- phase_1 %>% filter(Phase1 == "Pass")
phase_2$Phase2 <- sample(c("Pass", "Fail"), nrow(phase_2), replace = TRUE, prob = c(9/10, 1/10))

# Only pass to next phase
phase_3a <- phase_2 %>% filter(Phase2 == "Pass")
phase_3a$Phase3a <- sample(c("Pass", "Disqualified"), nrow(phase_3a), replace = TRUE, prob = c(6/10, 4/10))

# Only pass to next phase
phase_3b <- phase_3a %>% filter(Phase3a == "Pass")
phase_3b$Phase3b <- sample(c("Pass", "Fail"), nrow(phase_3b), replace = TRUE, prob = c(2/3, 1/3))

# Only pass to next phase
phase_4 <- phase_3b %>% filter(Phase3b == "Pass")
phase_4$Phase4 <- sample(c("Pass", "Fail"), nrow(phase_4), replace = TRUE, prob = c(4/5, 1/5))

# Only accept to next phase
phase_5 <- phase_4 %>% filter(Phase4 == "Pass")
phase_5$Phase5 <- sample(c("Graduate", "Resign", "Termination"), nrow(phase_5), replace = TRUE, prob = c(3/5, 1/5, 1/5))

# Filter graduates
graduates <- phase_5 %>% filter(Phase5 == "Graduate")

# Ensure 8-12% graduate, add a limit to iterations to avoid infinite loop
max_iterations <- 1000
iteration <- 0
while((nrow(graduates) < 8 || nrow(graduates) > 12) && iteration < max_iterations){
  phase_5$Phase5 <- sample(c("Graduate", "Resign", "Termination"), nrow(phase_5), replace = TRUE, prob = c(1/5, 2/5, 2/5))
  graduates <- phase_5 %>% filter(Phase5 == "Graduate")
  iteration <- iteration + 1
}

if(iteration == max_iterations) {
  warning("Reached maximum number of iterations to adjust graduates. The result may not be within the 8-12% range.")
}

## Warning: Reached maximum number of iterations to adjust graduates. The result
## may not be within the 8-12% range.

# Combine all data for final dataset
final_data <- bind_rows(
  phase_1 %>% filter(Phase1 == "Fail") %>% mutate(Phase2 = NA, Phase3a = NA, Phase3b = NA, Phase4 = NA, Phase5 = NA_character_),
  phase_2 %>% filter(Phase2 == "Fail") %>% mutate(Phase3a = NA, Phase3b = NA, Phase4 = NA, Phase5 = NA_character_),
  phase_3a %>% filter(Phase3a == "Disqualified") %>% mutate(Phase3b = NA, Phase4 = NA, Phase5 = NA_character_),
  phase_3b %>% filter(Phase3b == "Fail") %>% mutate(Phase4 = NA, Phase5 = NA_character_),
  phase_4 %>% filter(Phase4 %in% c("Resign", "Termination")) %>% mutate(Phase5 = NA_character_),
  phase_5
)


# View final data
head(final_data)

##   ID Phase1 Phase2 Phase3a Phase3b Phase4 Phase5
## 1  2   Fail   <NA>    <NA>    <NA>   <NA>   <NA>
## 2  4   Fail   <NA>    <NA>    <NA>   <NA>   <NA>
## 3  5   Fail   <NA>    <NA>    <NA>   <NA>   <NA>
## 4  8   Fail   <NA>    <NA>    <NA>   <NA>   <NA>
## 5 11   Fail   <NA>    <NA>    <NA>   <NA>   <NA>
## 6 16   Fail   <NA>    <NA>    <NA>   <NA>   <NA>

## `summarise()` has grouped output by 'Phase'. You can override using the
## `.groups` argument.

## `summarise()` has grouped output by 'Phase'. You can override using the
## `.groups` argument.

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

SAFD Recruitment Funnel

VeronicaPerez, MissyPortugual, TeofiloReyes

2024-07-13