set.seed(4791)
n <- 500
df_raw <- tibble(
project_id = paste0("NIA-", sprintf("%04d", 1:n)),
state = sample(c("Lagos","Abuja","Kano","Rivers","Oyo"), n,
replace=TRUE, prob=c(0.28,0.22,0.18,0.17,0.15)),
urban_rural = sample(c("Urban","Rural"), n, replace=TRUE, prob=c(0.65,0.35)),
material_type = sample(c("Imported-Dominant","Local-Dominant"), n,
replace=TRUE, prob=c(0.58,0.42)),
income_monthly_ngn= round(runif(n,80000,900000) +
ifelse(sample(c("Lagos","Abuja"),n,replace=TRUE,
prob=c(0.5,0.5)) %in% c("Lagos","Abuja"),80000,0), -3),
land_cost_m = round(runif(n,1.5,22) +
ifelse(sample(c("Urban","Rural"),n,replace=TRUE,
prob=c(0.65,0.35))=="Urban",5,0), 2),
material_cost_idx = round(rnorm(n,118,22), 1),
approval_months = round(rpois(n,7) + runif(n,0,6)),
interest_rate_pct = round(runif(n,18.5,31.5), 2)
) |>
mutate(
cost_bump = if_else(material_type=="Imported-Dominant", 18.5, 5.0),
urban_add = if_else(urban_rural=="Urban", 8.5, 0),
construction_cost_m = round(
10.5 + cost_bump + urban_add +
land_cost_m*0.55 + material_cost_idx*0.085 +
approval_months*0.12 + interest_rate_pct*0.38 + rnorm(n,0,3.8), 2)
) |>
mutate(
annual_income_m = income_monthly_ngn * 12 / 1e6,
cost_income_ratio = construction_cost_m / annual_income_m,
affordable = factor(if_else(cost_income_ratio<=8,"Yes","No"),
levels=c("No","Yes")),
affordability_score = round(100 - pmin(cost_income_ratio/0.38,100), 1)
) |>
dplyr::select(-cost_bump, -urban_add)
set.seed(812)
mi <- sample(1:n,10)
df_raw$approval_months[mi[1:5]] <- NA
df_raw$material_cost_idx[mi[6:10]] <- NA
cat("Dimensions:", nrow(df_raw),"x",ncol(df_raw),"
")