set.seed(2026)
# Generar 100 nuevos clientes sintéticos basados en distribuciones reales
nuevos_datos <- tibble(
age = sample(18:95, 100, replace = TRUE,
prob = dnorm(18:95, mean = mean(bank_data$age), sd = sd(bank_data$age))),
job = sample(levels(bank_data$job), 100, replace = TRUE),
marital = sample(levels(bank_data$marital), 100, replace = TRUE),
education = sample(levels(bank_data$education), 100, replace = TRUE),
default = sample(levels(bank_data$default), 100, replace = TRUE),
housing = sample(levels(bank_data$housing), 100, replace = TRUE),
loan = sample(levels(bank_data$loan), 100, replace = TRUE),
contact = sample(levels(bank_data$contact), 100, replace = TRUE),
month = sample(levels(bank_data$month), 100, replace = TRUE),
day_of_week = sample(levels(bank_data$day_of_week), 100, replace = TRUE),
duration = round(rnorm(100, mean(bank_data$duration), sd(bank_data$duration))),
campaign = sample(1:10, 100, replace = TRUE, prob = c(0.4, 0.3, 0.15, 0.08, 0.04, 0.02, 0.005, 0.003, 0.001, 0.0005)),
pdays = sample(c(999, sample(0:30, 99, replace = TRUE)), 100),
previous = sample(0:5, 100, replace = TRUE, prob = c(0.6, 0.25, 0.1, 0.03, 0.015, 0.005)),
poutcome = sample(levels(bank_data$poutcome), 100, replace = TRUE),
emp_var_rate = rnorm(100, mean(bank_data$emp_var_rate), sd(bank_data$emp_var_rate)),
cons_price_idx = rnorm(100, mean(bank_data$cons_price_idx), sd(bank_data$cons_price_idx)),
cons_conf_idx = rnorm(100, mean(bank_data$cons_conf_idx), sd(bank_data$cons_conf_idx)),
euribor3m = rnorm(100, mean(bank_data$euribor3m), sd(bank_data$euribor3m)),
nr_employed = rnorm(100, mean(bank_data$nr_employed), sd(bank_data$nr_employed))
) %>%
mutate(duration = pmax(0, duration)) # Asegurar valores positivos
kable(head(nuevos_datos, 10), caption = "Primeros 10 Clientes Sintéticos Generados") %>%
kable_styling(bootstrap_options = c("striped", "hover"))