1 Packages et données

library(haven)
library(tidyverse)
library(estimatr)
library(AER)
library(knitr)
library(kableExtra)
library(broom)
set.seed(42)

# Adaptez le chemin si nécessaire
el <- read_dta("Microcredit_EL_mini_anonym.dta")
bl <- read_dta("Microcredit_BL_mini_anonym.dta")
cat("EL:", nrow(el), "x", ncol(el), "\n")

## EL: 5551 x 4790

cat("BL:", nrow(bl), "x", ncol(bl), "\n")

## BL: 4465 x 3733

2 Construction des variables (traduit du do-file Stata)

2.1 Variables structurelles

# Variables de traitement et de strate
el <- el %>%
  mutate(
    pair_id   = as.factor(paire),
    treated   = as.integer(treatment),
    # Identifiant village : on utilise paire + treatment comme proxy
    village_id = as.factor(paste0(paire, "_", treatment))
  )

2.2 Accès au crédit (Section I du questionnaire)

# Le do-file utilise i3_j (type de prêteur) et i9_j (montant)
# pour j = 1 à 5 (prêts actifs 1-3, passés 4-5)
# Codes : 3=Al Amana, 1/2/15=autre formel, 7-14=informel

# Indicatrices de prêt : Al Amana, autre formel, informel
for (j in 1:3) {
  el[[paste0("alamana_a", j)]]      <- as.integer(el[[paste0("i3_", j)]] == 3)
  el[[paste0("other_formal_a", j)]] <- as.integer(el[[paste0("i3_", j)]] %in% c(1, 2, 15))
  el[[paste0("informal_a", j)]]     <- as.integer(el[[paste0("i3_", j)]] %in% 7:14)
  el[[paste0("oamc_a", j)]]         <- as.integer(el[[paste0("i3_", j)]] %in% c(4, 5, 6))
}
for (j in 4:5) {
  el[[paste0("alamana_p", j)]]      <- as.integer(el[[paste0("i3_", j)]] == 3)
  el[[paste0("other_formal_p", j)]] <- as.integer(el[[paste0("i3_", j)]] %in% c(1, 2, 15))
  el[[paste0("informal_p", j)]]     <- as.integer(el[[paste0("i3_", j)]] %in% 7:14)
  el[[paste0("oamc_p", j)]]         <- as.integer(el[[paste0("i3_", j)]] %in% c(4, 5, 6))
}

# Nombre total de prêts (actifs + passés)
el <- el %>% mutate(
  loans_alamana  = rowSums(select(., starts_with("alamana_")),  na.rm=TRUE),
  loans_oformal  = rowSums(select(., starts_with("other_formal_")), na.rm=TRUE),
  loans_informal = rowSums(select(., starts_with("informal_")), na.rm=TRUE),
  loans_oamc     = rowSums(select(., starts_with("oamc_")),     na.rm=TRUE)
)

# Dummies : a-t-on emprunté au moins une fois ?
el <- el %>% mutate(
  borrowed_alamana  = as.integer(loans_alamana  >= 1),
  borrowed_oformal  = as.integer(loans_oformal  >= 1),
  borrowed_informal = as.integer(loans_informal >= 1),
  borrowed_total    = as.integer((loans_alamana + loans_oformal +
                                  loans_informal + loans_oamc) >= 1)
)

# Montants des prêts actifs (j=1 à 3)
for (j in 1:3) {
  i3 <- el[[paste0("i3_", j)]]
  i9 <- el[[paste0("i9_", j)]]
  i9[i9 < 0] <- NA
  el[[paste0("amt_alamana_", j)]]  <- ifelse(i3 == 3,           i9, 0)
  el[[paste0("amt_oformal_", j)]]  <- ifelse(i3 %in% c(1,2,15), i9, 0)
  el[[paste0("amt_informal_", j)]] <- ifelse(i3 %in% 7:14,      i9, 0)
  el[[paste0("amt_oamc_", j)]]     <- ifelse(i3 %in% c(4,5,6),  i9, 0)
}

el <- el %>% mutate(
  loansamt_alamana  = rowSums(select(., starts_with("amt_alamana_")),  na.rm=TRUE),
  loansamt_oformal  = rowSums(select(., starts_with("amt_oformal_")),  na.rm=TRUE),
  loansamt_informal = rowSums(select(., starts_with("amt_informal_")), na.rm=TRUE),
  loansamt_oamc     = rowSums(select(., starts_with("amt_oamc_")),     na.rm=TRUE),
  loansamt_total    = loansamt_alamana + loansamt_oformal +
                      loansamt_informal + loansamt_oamc
)

cat("Taux d'emprunt Al Amana (traités):",
    round(mean(el$borrowed_alamana[el$treated==1], na.rm=TRUE), 3), "\n")

## Taux d'emprunt Al Amana (traités): 0.105

cat("Taux d'emprunt Al Amana (contrôles):",
    round(mean(el$borrowed_alamana[el$treated==0], na.rm=TRUE), 3), "\n")

## Taux d'emprunt Al Amana (contrôles): 0.02

2.3 Activités d’auto-emploi

# Types d'activité : Section D (d2_j = secteur : 1=agri, 2=élevage, 3-6=commerce/services)
el <- el %>% mutate(
  act_agri      = as.integer(rowSums(sapply(1:6, function(j) el[[paste0("d2_",j)]] == 1), na.rm=TRUE) > 0),
  act_livestock = as.integer(rowSums(sapply(1:6, function(j) el[[paste0("d2_",j)]] == 2), na.rm=TRUE) > 0),
  act_business  = as.integer(rowSums(sapply(1:6, function(j) el[[paste0("d2_",j)]] %in% 3:6), na.rm=TRUE) > 0),
  self_empl     = as.integer(act_agri == 1 | act_livestock == 1 | act_business == 1)
)

# Nombre d'activités
el$act_number <- rowSums(sapply(1:6, function(j) {
  nom <- el[[paste0("d1_nom", j)]]
  sec <- el[[paste0("d2_", j)]]
  (!is.na(nom) & nom != "" & nom != "." & !sec %in% c(7,8, NA))
}), na.rm = TRUE)

2.4 Profits et revenus

# --- AGRICULTURE : production = ventes + autoconsommation + stock ---
# Le do-file agrège sur ~20 cultures (e20, e21, e22, e23, e19)
# Simplification : on prend les variables d'output agricole disponibles

# Ventes agricoles : e20_j * e21_j (quantité * prix vente)
sale_cereal <- rep(0, nrow(el))
for (j in 1:20) {
  q_sold  <- el[[paste0("e20_", j)]]; q_sold[is.na(q_sold) | q_sold < 0]  <- 0
  p_sold  <- el[[paste0("e21_", j)]]; p_sold[is.na(p_sold) | p_sold <= 0] <- NA
  q_auto  <- el[[paste0("e22_", j)]]; q_auto[is.na(q_auto) | q_auto < 0]  <- 0
  p_auto  <- el[[paste0("e23_", j)]]; p_auto[is.na(p_auto) | p_auto <= 0] <- NA

  # Utiliser prix médian de l'échantillon si prix manquant
  med_p <- median(ifelse(el[[paste0("e21_",j)]] > 0, el[[paste0("e21_",j)]], NA), na.rm=TRUE)
  p_sold[is.na(p_sold)] <- med_p
  p_auto[is.na(p_auto)] <- med_p

  sale_cereal <- sale_cereal + q_sold * replace_na(p_sold, 0) +
                               q_auto * replace_na(p_auto, 0)
}
el$sale_agri <- sale_cereal

# Dépenses agricoles : e44 (main d'oeuvre), inputs
# Simplifié : on prend les dépenses de travail salarié agricole
el$expense_agri <- 0
for (j in 1:11) {
  v <- el[[paste0("e42_", j)]]; v[is.na(v) | v < 0] <- 0
  el$expense_agri <- el$expense_agri + v
}

# Stock agricole (savings_agri) : simplifié via e19 (stock restant)
el$savings_agri <- 0
for (j in 1:20) {
  v <- el[[paste0("e19_", j)]]; v[is.na(v) | v < 0] <- 0
  el$savings_agri <- el$savings_agri + v
}

el$prod_agri <- el$sale_agri + el$savings_agri

# --- ÉLEVAGE ---
# Ventes élevage — à adapter selon les vrais noms
el$sale_livestock    <- 0
el$expense_livestock <- 0

# Dépenses élevage : f26_j (aliments) + f27_j (vétérinaire)
el$expense_livestock <- 0
for (j in 1:8) {
  for (vn in c("f26_", "f27_")) {
    v <- el[[paste0(vn, j)]]
    if (!is.null(v)) { v[is.na(v) | v < 0] <- 0; el$expense_livestock <- el$expense_livestock + v }
  }
}

# Actifs élevage — simplifié (f14/f15 non disponibles)
el$asset_livestock <- 0  # sera affiné quand les vrais noms sont connus

# --- COMMERCE/SERVICES ---
el$sale_business <- 0
for (j in 1:6) {
  v <- el[[paste0("g11_", j)]]; if (!is.null(v)) { v[is.na(v)|v<0] <- 0; el$sale_business <- el$sale_business + v }
}

el$expense_business <- 0
for (j in 1:6) {
  v <- el[[paste0("g22_", j)]]; if (!is.null(v)) { v[is.na(v)|v<0] <- 0; el$expense_business <- el$expense_business + v }
}

el$asset_business <- 0
for (j in 1:9) {
  v <- el[[paste0("g7_", j)]]; if (!is.null(v)) { v[is.na(v)|v<0] <- 0; el$asset_business <- el$asset_business + v }
}

# --- AGRÉGATS ---
el <- el %>% mutate(
  output_total  = prod_agri + sale_livestock + sale_business,
  assets_total  = asset_livestock + asset_business,
  expense_total = expense_agri + expense_livestock + expense_business,
  profit_agri      = prod_agri - expense_agri,
  profit_livestock = sale_livestock - expense_livestock,
  profit_business  = sale_business - expense_business,
  profit_total     = profit_agri + profit_livestock + profit_business
)

cat("Profit moyen (contrôle, MAD):", round(mean(el$profit_total[el$treated==0], na.rm=TRUE), 0), "\n")

## Profit moyen (contrôle, MAD): 3144

2.5 Revenus salariaux

# Revenus du travail salarié : Section H2, variables h11_j et h11c_j
el$income_dep <- 0
for (j in 1:22) {
  v1 <- el[[paste0("h11_", j)]];  if (!is.null(v1)) { v1[is.na(v1)|v1<0] <- 0; el$income_dep <- el$income_dep + v1 }
  v2 <- el[[paste0("h11c", j)]];  if (!is.null(v2)) { v2[is.na(v2)|v2<0] <- 0; el$income_dep <- el$income_dep + v2 }
}

el <- el %>% mutate(
  income_work  = profit_total + income_dep,
  income_total = income_work
)

cat("Revenu salarial moyen (contrôle):", round(mean(el$income_dep[el$treated==0], na.rm=TRUE), 0), "\n")

## Revenu salarial moyen (contrôle): 15618

2.6 Consommation

# Consommation alimentaire : h1_j et h2_j (par semaine * 4.345)
el$cons_food <- 0
for (j in c(1:14, 17, 18)) {
  v1 <- el[[paste0("h1_", j)]]; if (!is.null(v1)) { v1[is.na(v1)|v1<0] <- 0; el$cons_food <- el$cons_food + v1 }
  v2 <- el[[paste0("h2_", j)]]; if (!is.null(v2)) { v2[is.na(v2)|v2<0] <- 0; el$cons_food <- el$cons_food + v2 }
}
el$cons_food <- el$cons_food * 4.345

# Durables : Section C (c4_j / 12)
el$cons_durables <- 0
for (j in 1:31) {
  v <- el[[paste0("c4_", j)]]; if (!is.null(v)) { v[is.na(v)|v<0] <- 0; el$cons_durables <- el$cons_durables + v }
}
el$cons_durables <- el$cons_durables / 12

# Consommation de fêtes : h4_5 (ramadan) + h4_6 (Aïd) + h3_11 / 12
el$cons_ramadan <- ifelse(!is.na(el$h4_5) & el$h4_5 >= 0, el$h4_5 / 12, 0)
el$cons_aid     <- ifelse(!is.na(el$h4_6) & el$h4_6 >= 0, el$h4_6 / 12, 0)
el$cons_social  <- el$cons_ramadan + el$cons_aid
if ("h3_11" %in% names(el)) el$cons_social <- el$cons_social + ifelse(!is.na(el$h3_11) & el$h3_11 >= 0, el$h3_11, 0)

# Santé : h3_8
el$cons_health <- ifelse(!is.na(el$h3_8) & el$h3_8 >= 0, el$h3_8, 0)

# Éducation : h4_1 / 12
el$cons_schooling <- ifelse(!is.na(el$h4_1) & el$h4_1 >= 0, el$h4_1 / 12, 0)

# Non-durables (inclut food + autres postes hebdo + mensuels)
el$cons_nondurables <- el$cons_food
for (j in c(15, 16, 19, 20, 21)) {
  v1 <- el[[paste0("h1_", j)]]; if (!is.null(v1)) { v1[is.na(v1)|v1<0] <- 0; el$cons_nondurables <- el$cons_nondurables + v1 * 4.345 }
  v2 <- el[[paste0("h2_", j)]]; if (!is.null(v2)) { v2[is.na(v2)|v2<0] <- 0; el$cons_nondurables <- el$cons_nondurables + v2 * 4.345 }
}
for (j in c(1:9, 11:17)) {
  v <- el[[paste0("h3_", j)]]; if (!is.null(v)) { v[is.na(v)|v<0] <- 0; el$cons_nondurables <- el$cons_nondurables + v }
}
for (j in 1:10) {
  v <- el[[paste0("h4_", j)]]; if (!is.null(v)) { v[is.na(v)|v<0] <- 0; el$cons_nondurables <- el$cons_nondurables + v / 12 }
}

# Consommation totale
el$consumption <- el$cons_durables + el$cons_nondurables
el$consumption[el$cons_food == 0 | is.na(el$cons_food)] <- NA

cat("Consommation moyenne (contrôle):", round(mean(el$consumption[el$treated==0], na.rm=TRUE), 0), "MAD/mois\n")

## Consommation moyenne (contrôle): 3004 MAD/mois

2.7 Heures travaillées (7 derniers jours)

# Waves 2, 3, 4 : h05_ai_j (heures/jour) * h04_ai_j (jours) par activité i, membre j
# h03_ri_j = type de travail (5 ou 7 = auto-emploi, 1-4,6,8-9 = extérieur)

# Initialisation
el$hours_self_age6_65    <- 0
el$hours_outside_age6_65 <- 0
el$hours_chores_age6_65  <- 0

for (j in 1:24) {
  age_var <- el[[paste0("a7_", j)]]
  in_group <- !is.na(age_var) & age_var >= 6 & age_var <= 65

  for (i in 1:3) {
    type_var  <- el[[paste0("h03_r", i, "_", j)]]
    hrs_var   <- el[[paste0("h05_a", i, "_", j)]]
    days_var  <- el[[paste0("h04_a", i, "_", j)]]

    if (!is.null(type_var) & !is.null(hrs_var) & !is.null(days_var)) {
      hrs_var[is.na(hrs_var) | hrs_var < 0 | hrs_var >= 18]   <- 0
      days_var[is.na(days_var) | days_var < 0 | days_var > 7] <- 0
      contrib <- hrs_var * days_var

      is_self    <- !is.na(type_var) & type_var %in% c(5, 7)
      is_outside <- !is.na(type_var) & (type_var %in% c(1:4, 6, 8, 9))

      el$hours_self_age6_65    <- el$hours_self_age6_65    + ifelse(in_group & is_self,    contrib, 0)
      el$hours_outside_age6_65 <- el$hours_outside_age6_65 + ifelse(in_group & is_outside, contrib, 0)
    }
  }

  # Tâches domestiques : h07_j * h08_j
  chores_days <- el[[paste0("h07_", j)]]; chores_hrs <- el[[paste0("h08_", j)]]
  if (!is.null(chores_days) & !is.null(chores_hrs)) {
    chores_days[is.na(chores_days)|chores_days<0|chores_days>7] <- 0
    chores_hrs[is.na(chores_hrs)|chores_hrs<0|chores_hrs>=18]   <- 0
    el$hours_chores_age6_65 <- el$hours_chores_age6_65 + ifelse(in_group, chores_days * chores_hrs, 0)
  }
}

# Pour wave 1 : a12_pj (auto-emploi) + a14_j (extérieur) + a13_j (tâches dom.)
for (j in 1:11) {
  age_var  <- el[[paste0("a7_", j)]]
  in_group <- !is.na(age_var) & age_var >= 6 & age_var <= 65 & el$wave == 1

  sp <- el[[paste0("a12_p", j)]]; sp[is.na(sp)|sp<0|sp>=126] <- 0
  sm <- el[[paste0("a12_m", j)]]; sm[is.na(sm)|sm<0|sm>=126] <- 0
  ou <- el[[paste0("a14_", j)]];  ou[is.na(ou)|ou<0|ou>=126] <- 0
  ch <- el[[paste0("a13_", j)]];  ch[is.na(ch)|ch<0|ch>=126] <- 0

  el$hours_self_age6_65    <- el$hours_self_age6_65    + ifelse(in_group, sp + sm, 0)
  el$hours_outside_age6_65 <- el$hours_outside_age6_65 + ifelse(in_group, ou, 0)
  el$hours_chores_age6_65  <- el$hours_chores_age6_65  + ifelse(in_group, ch, 0)
}

el$hours_total_age6_65 <- el$hours_self_age6_65 + el$hours_outside_age6_65 + el$hours_chores_age6_65

# Mettre à NA si aucune donnée sur les heures (nonmiss = 0)
el$hours_total_age6_65[el$hours_total_age6_65 == 0 & el$wave != 1] <- NA

cat("Heures totales moy. (contrôle):", round(mean(el$hours_total_age6_65[el$treated==0], na.rm=TRUE), 1), "\n")

## Heures totales moy. (contrôle): 146.4

cat("Dont extérieur (contrôle):",      round(mean(el$hours_outside_age6_65[el$treated==0], na.rm=TRUE), 1), "\n")

## Dont extérieur (contrôle): 33.4

2.8 Scolarisation et autonomie des femmes

# Scolarisation 6-15 : a15_j == 1 (en école) si a7_j in [6,15]
el$kids_inschool <- 0
el$kids_6_15     <- 0
for (j in 1:24) {
  age <- el[[paste0("a7_", j)]]; sch <- el[[paste0("a15_", j)]]
  if (!is.null(age) & !is.null(sch)) {
    in_range <- !is.na(age) & age >= 6 & age < 16
    el$kids_inschool <- el$kids_inschool + ifelse(in_range & !is.na(sch) & sch == 1, 1, 0)
    el$kids_6_15     <- el$kids_6_15     + ifelse(in_range, 1, 0)
  }
}
el$sharekids_inschool <- ifelse(el$kids_6_15 > 0, el$kids_inschool / el$kids_6_15, 0)

# Adolescents 16-20
el$teens_inschool <- 0
el$members_16_20  <- 0
for (j in 1:24) {
  age <- el[[paste0("a7_", j)]]; sch <- el[[paste0("a11_", j)]]; act <- el[[paste0("a10_", j)]]
  if (!is.null(age)) {
    in_range <- !is.na(age) & age >= 16 & age <= 20
    el$members_16_20  <- el$members_16_20  + ifelse(in_range, 1, 0)
    if (!is.null(sch) & !is.null(act))
      el$teens_inschool <- el$teens_inschool + ifelse(in_range & !is.na(sch) & sch == 13 & !is.na(act) & act != 2, 1, 0)
  }
}
el$shareteens_inschool <- ifelse(el$members_16_20 > 0, el$teens_inschool / el$members_16_20, 0)

# Index autonomie femmes : j1 à j9, j9b, j10 à j13
women_vars <- list()
for (i in 1:9) {
  w <- rep(0, nrow(el))
  for (j in 1:11) { v <- el[[paste0("j", i, "_", j)]]; if(!is.null(v)) w <- w + ifelse(!is.na(v) & v == 1, 1, 0) }
  women_vars[[i]] <- as.integer(w > 0)
}
if ("j9b1" %in% names(el)) {
  w10 <- rep(0, nrow(el))
  for (j in 1:11) { v <- el[[paste0("j9b", j)]]; if(!is.null(v)) w10 <- w10 + ifelse(!is.na(v) & v == 1, 1, 0) }
  women_vars[[10]] <- as.integer(w10 > 0)
} else { women_vars[[10]] <- rep(0, nrow(el)) }
for (k in 10:13) {
  v <- el[[paste0("j", k)]]
  women_vars[[k]] <- if (!is.null(v)) as.integer(!is.na(v) & v %in% c(3, 4)) else rep(0, nrow(el))
}

# Standardiser et sommer pour l'index
women_mat <- do.call(cbind, women_vars)
women_std  <- scale(women_mat)
el$women_index <- rowSums(women_std, na.rm=TRUE)

# Activités gérées par des femmes
el$women_act_number <- 0
for (j in 1:6) {
  resp <- el[[paste0("d3_", j)]]
  if (!is.null(resp)) {
    for (i in 1:24) {
      sex <- el[[paste0("a4_", i)]]
      if (!is.null(sex)) {
        el$women_act_number <- el$women_act_number +
          ifelse(!is.na(resp) & resp == i & !is.na(sex) & sex == 2, 1, 0)
      }
    }
  }
}
el$women_act <- as.integer(el$women_act_number > 0)

2.9 Variables de contrôle et taille du ménage

# Chef de ménage : a3_j == 1 (chef), a4_j (sexe : 1=homme), a7_j (âge)
el$head_male <- 0
el$head_age  <- NA
for (j in 1:16) {
  is_head <- !is.na(el[[paste0("a3_", j)]]) & el[[paste0("a3_", j)]] == 1
  sex_var <- el[[paste0("a4_", j)]]
  age_var <- el[[paste0("a7_", j)]]
  if (!is.null(sex_var)) el$head_male <- ifelse(is_head & !is.na(sex_var) & sex_var == 1, 1, el$head_male)
  if (!is.null(age_var)) el$head_age  <- ifelse(is_head & !is.na(age_var) & age_var >= 0, age_var, el$head_age)
}

# Membres résidents
el$members_resid  <- 0
el$nadults_resid  <- 0
el$nchildren_resid <- 0
for (j in 1:17) {
  a3  <- el[[paste0("a3_", j)]];  a5  <- el[[paste0("a5_", j)]];  a7  <- el[[paste0("a7_", j)]]
  if (!is.null(a3) & !is.null(a5) & !is.null(a7)) {
    is_resid <- (!is.na(a5) & a5 %in% c(3,5)) | (!is.na(a5) & !a5 %in% c(3,5) & !is.na(a3) & a3 == 1)
    el$members_resid   <- el$members_resid   + ifelse(is_resid, 1, 0)
    el$nadults_resid   <- el$nadults_resid   + ifelse(is_resid & !is.na(a7) & a7 >= 16, 1, 0)
    el$nchildren_resid <- el$nchildren_resid + ifelse(is_resid & !is.na(a7) & a7 >= 0 & a7 < 16, 1, 0)
  }
}

cat("Membres résidents moy.:", round(mean(el$members_resid, na.rm=TRUE), 2), "\n")

## Membres résidents moy.: 6.11

2.10 Échantillon haute probabilité et score de propension

# Le fichier contient score1, score2, score3 et client_admin
# On utilise score3 (le plus récent) ou client comme proxy

# Définir l'échantillon "haute probabilité" :
# Dans le papier, c'est le top quartile du score de propension
# On utilise score3 si disponible, sinon score1

score_var <- if ("score3" %in% names(el)) "score3" else if ("score1" %in% names(el)) "score1" else NULL

if (!is.null(score_var)) {
  q75 <- quantile(el[[score_var]], 0.75, na.rm=TRUE)
  el$highp <- as.integer(el[[score_var]] >= q75)
  cat("N haute probabilité:", sum(el$highp, na.rm=TRUE), "\n")
} else {
  # Fallback : tous les ménages
  el$highp <- 1
  cat("Score non trouvé, on utilise tous les ménages\n")
}

## N haute probabilité: 1388

# Covariables de contrôle disponibles (traduit du papier)
controls_ok <- intersect(
  c("members_resid", "nadults_resid", "head_age",
    "act_livestock", "act_business",
    "cm_resp_activ", "ccm_resp_activ"),
  names(el)
)
cat("Covariables disponibles:", paste(controls_ok, collapse=", "), "\n")

## Covariables disponibles: members_resid, nadults_resid, head_age, act_livestock, act_business

3 Fonctions d’inférence par randomisation

# Fonction principale : calcule coef OLS + p-value RI + IC 95%
ri_test <- function(formula, data, treat_var="treated",
                    pair_var="pair_id", n_perm=999) {
  vars_needed <- all.vars(formula)
  data_clean  <- data %>% select(all_of(c(vars_needed, pair_var))) %>% drop_na()
  if (nrow(data_clean) < 10) return(list(beta=NA, p_ri=NA, ci_lo=NA, ci_hi=NA))

  fit_obs  <- lm(formula, data=data_clean)
  beta_obs <- coef(fit_obs)[treat_var]
  if (is.na(beta_obs)) return(list(beta=NA, p_ri=NA, ci_lo=NA, ci_hi=NA))

  pairs     <- unique(data_clean[[pair_var]])
  beta_perm <- numeric(n_perm)
  for (i in seq_len(n_perm)) {
    dp <- data_clean
    for (p in pairs) {
      idx <- which(dp[[pair_var]] == p)
      if (length(idx) >= 2) dp[[treat_var]][idx] <- sample(dp[[treat_var]][idx])
    }
    beta_perm[i] <- coef(lm(formula, data=dp))[treat_var]
  }

  p_ri  <- mean(abs(beta_perm) >= abs(beta_obs))
  ci_lo <- beta_obs - quantile(abs(beta_perm), 0.975)
  ci_hi <- beta_obs + quantile(abs(beta_perm), 0.975)
  list(beta=beta_obs, p_ri=p_ri, ci_lo=ci_lo, ci_hi=ci_hi)
}

build_formula <- function(outcome, treat_var="treated",
                          pair_var="pair_id", covariates=NULL) {
  rhs <- treat_var
  if (!is.null(covariates) && length(covariates) > 0)
    rhs <- paste(c(rhs, covariates), collapse=" + ")
  as.formula(paste(outcome, "~", rhs, "+", pair_var))
}

make_row <- function(ri, label, ctrl_mean, n_obs, digs=0) {
  if (is.na(ri$beta)) return(tibble(Variable=label, Coefficient=NA, ` `="", `p-val RI`=NA,
                                    `IC 95% bas`=NA, `IC 95% haut`=NA, `Moy. contrôle`=round(ctrl_mean,digs), N=n_obs))
  stars <- ifelse(ri$p_ri<0.01,"***", ifelse(ri$p_ri<0.05,"**", ifelse(ri$p_ri<0.10,"*","")))
  tibble(Variable=label, Coefficient=round(ri$beta,digs), ` `=stars,
         `p-val RI`=round(ri$p_ri,3), `IC 95% bas`=round(ri$ci_lo,digs),
         `IC 95% haut`=round(ri$ci_hi,digs), `Moy. contrôle`=round(ctrl_mean,digs), N=n_obs)
}

run_table <- function(var_list, sample_data, cov, n_perm=999, digs=0) {
  var_ok <- var_list[unlist(var_list) %in% names(sample_data)]
  if (length(var_ok)==0) { message("Aucune variable trouvée."); return(tibble()) }
  map_dfr(names(var_ok), function(label) {
    var <- var_ok[[label]]
    dat <- sample_data %>% select(all_of(c(var,"treated","pair_id",cov))) %>% drop_na()
    ctrl_mean <- mean(dat[[var]][dat$treated==0], na.rm=TRUE)
    ri <- ri_test(build_formula(var, covariates=cov), dat, n_perm=n_perm)
    make_row(ri, label, ctrl_mean, nrow(dat), digs)
  })
}

cat("Fonctions RI prêtes.\n")

## Fonctions RI prêtes.

4 Table 2 — Accès au crédit

el_hp  <- el %>% filter(!is.na(highp) & highp == 1)
cov_ok <- controls_ok[controls_ok %in% names(el_hp)]

credit_vars <- list(
  "Al Amana (admin.)"     = "client_admin",
  "Al Amana (enquête)"    = "borrowed_alamana",
  "Autre formel"          = "borrowed_oformal",
  "Informel"              = "borrowed_informal",
  "Total (tout crédit)"   = "borrowed_total",
  "Montant Al Amana (MAD)"= "loansamt_alamana",
  "Montant total (MAD)"   = "loansamt_total"
)

t2 <- run_table(credit_vars, el_hp, cov_ok, n_perm=999, digs=3)
kable(t2, caption="Table 2 — Accès au crédit — p-values par inférence par randomisation") %>%
  kable_styling(bootstrap_options=c("striped","hover","condensed"), full_width=FALSE) %>%
  footnote(general="* p<0.10, ** p<0.05, *** p<0.01 (RI, 999 permutations). FE de paires incluses.")

Table 2 — Accès au crédit — p-values par inférence par randomisation
Variable	Coefficient		p-val RI	IC 95% bas	IC 95% haut	Moy. contrôle	N
Al Amana (admin.)	0.189	***	0.000	0.153	0.225	0.000	1365
Al Amana (enquête)	0.124	***	0.000	0.092	0.156	0.017	1365
Autre formel	0.020	**	0.021	0.001	0.040	0.016	1365
Informel	0.014		0.104	-0.005	0.033	0.025	1365
Total (tout crédit)	0.159	***	0.000	0.116	0.202	0.068	1365
Montant Al Amana (MAD)	822.827	***	0.000	505.985	1139.669	91.602	1365
Montant total (MAD)	1414.426	***	0.000	452.797	2376.054	1111.291	1365
Note:
* p<0.10, p<0.05, * p<0.01 (RI, 999 permutations). FE de paires incluses.

5 Table 3 — Activités d’auto-emploi

selfempl_vars <- list(
  "Actifs totaux (MAD)"           = "assets_total",
  "Production totale (MAD)"       = "output_total",
  "Dépenses totales (MAD)"        = "expense_total",
  "Profit total (MAD)"            = "profit_total",
  "A une activité d'auto-emploi"  = "self_empl"
)

t3 <- run_table(selfempl_vars, el_hp, cov_ok, n_perm=999, digs=0)
kable(t3, caption="Table 3 — Activités d'auto-emploi — p-values par RI") %>%
  kable_styling(bootstrap_options=c("striped","hover","condensed"), full_width=FALSE) %>%
  footnote(general="* p<0.10, ** p<0.05, *** p<0.01 (RI, 999 permutations).")

Table 3 — Activités d’auto-emploi — p-values par RI
Variable	Coefficient		p-val RI	IC 95% bas	IC 95% haut	Moy. contrôle	N
Actifs totaux (MAD)	-88	**	0.041	-180	3	73	1365
Production totale (MAD)	3444	***	0.008	361	6526	5195	1365
Dépenses totales (MAD)	279	***	0.004	51	507	550	1365
Profit total (MAD)	3165	**	0.020	150	6181	4644	1365
A une activité d’auto-emploi	0		0.547	0	0	1	1365
Note:
* p<0.10, p<0.05, * p<0.01 (RI, 999 permutations).

6 Table 4 — Revenus

income_vars <- list(
  "Revenu travail total (MAD)"       = "income_work",
  "Profit auto-emploi (MAD)"        = "profit_total",
  "Revenu salarial/journalier (MAD)" = "income_dep"
)

t4 <- run_table(income_vars, el_hp, cov_ok, n_perm=999, digs=0)
kable(t4, caption="Table 4 — Revenus — p-values par RI") %>%
  kable_styling(bootstrap_options=c("striped","hover","condensed"), full_width=FALSE) %>%
  footnote(general=paste("* p<0.10, ** p<0.05, *** p<0.01 (RI, 999 permutations).",
    "Résultat clé : hausse des profits compensée par baisse du revenu salarial.", sep="\n"))

Table 4 — Revenus — p-values par RI
Variable	Coefficient		p-val RI	IC 95% bas	IC 95% haut	Moy. contrôle	N
Revenu travail total (MAD)	1220		0.483	-2377	4817	23617	1365
Profit auto-emploi (MAD)	3165	**	0.017	205	6125	4644	1365
Revenu salarial/journalier (MAD)	-1945		0.061	-4255	365	18972	1365
Note:
* p<0.10, p<0.05, * p<0.01 (RI, 999 permutations). Résultat clé : hausse des profits compensée par baisse du revenu salarial.

7 Table 5 — Offre de travail

labor_vars <- list(
  "Total heures (6-65 ans)"    = "hours_total_age6_65",
  "Dont : auto-emploi"         = "hours_self_age6_65",
  "Dont : travail extérieur"   = "hours_outside_age6_65",
  "Dont : tâches domestiques"  = "hours_chores_age6_65"
)

t5 <- run_table(labor_vars, el_hp, cov_ok, n_perm=999, digs=1)
kable(t5, caption="Table 5 — Heures travaillées (7 derniers jours) — p-values par RI") %>%
  kable_styling(bootstrap_options=c("striped","hover","condensed"), full_width=FALSE) %>%
  footnote(general="* p<0.10, ** p<0.05, *** p<0.01 (RI, 999 permutations).")

Table 5 — Heures travaillées (7 derniers jours) — p-values par RI
Variable	Coefficient		p-val RI	IC 95% bas	IC 95% haut	Moy. contrôle	N
Total heures (6-65 ans)	0.1		0.991	-8.4	8.6	158.8	1335
Dont : auto-emploi	7.0	***	0.009	1.1	13.0	50.5	1365
Dont : travail extérieur	-3.1		0.177	-8.2	1.9	37.0	1365
Dont : tâches domestiques	-3.8	**	0.037	-8.0	0.3	67.4	1365
Note:
* p<0.10, p<0.05, * p<0.01 (RI, 999 permutations).

8 Table 6 — Consommation

conso_vars <- list(
  "Consommation totale (MAD/mois)" = "consumption",
  "Durables"                       = "cons_durables",
  "Non-durables"                   = "cons_nondurables",
  "Alimentation"                   = "cons_food",
  "Santé"                          = "cons_health",
  "Éducation"                      = "cons_schooling",
  "Fêtes et cérémonies"            = "cons_social"
)

t6 <- run_table(conso_vars, el_hp, cov_ok, n_perm=999, digs=0)
kable(t6, caption="Table 6 — Consommation — p-values par RI") %>%
  kable_styling(bootstrap_options=c("striped","hover","condensed"), full_width=FALSE) %>%
  footnote(general="* p<0.10, ** p<0.05, *** p<0.01 (RI, 999 permutations).")

Table 6 — Consommation — p-values par RI
Variable	Coefficient	p-val RI	IC 95% bas	IC 95% haut	Moy. contrôle	N
Consommation totale (MAD/mois)	-450	0.108	-1048	148	3257	1364
Durables	-232	0.107	-508	44	243	1365
Non-durables	-213	0.567	-611	184	3009	1365
Alimentation	27	0.460	-54	108	1766	1365
Santé	-1	0.874	-18	16	38	1365
Éducation	1	0.749	-4	6	26	1365
Fêtes et cérémonies	6	0.381	-9	21	315	1365
Note:
* p<0.10, p<0.05, * p<0.01 (RI, 999 permutations).

9 Table 7 — Effets sociaux

social_vars <- list(
  "Taux scolarisation 6-15 ans"      = "sharekids_inschool",
  "Taux scolarisation 16-20 ans"     = "shareteens_inschool",
  "Index autonomie femmes"           = "women_index",
  "Nb activités gérées par femme"    = "women_act_number",
  "HH avec activité féminine (0/1)"  = "women_act"
)

t7 <- run_table(social_vars, el_hp, cov_ok, n_perm=999, digs=3)
kable(t7, caption="Table 7 — Effets sociaux — p-values par RI") %>%
  kable_styling(bootstrap_options=c("striped","hover","condensed"), full_width=FALSE) %>%
  footnote(general="* p<0.10, ** p<0.05, *** p<0.01 (RI, 999 permutations). Aucun effet attendu.")

Table 7 — Effets sociaux — p-values par RI
Variable	Coefficient		p-val RI	IC 95% bas	IC 95% haut	Moy. contrôle	N
Taux scolarisation 6-15 ans	0.017		0.381	-0.027	0.062	0.456	1365
Taux scolarisation 16-20 ans	-0.033	**	0.042	-0.072	0.005	0.143	1365
Index autonomie femmes	0.133		0.603	-0.474	0.741	-1.024	1365
Nb activités gérées par femme	-0.052	***	0.006	-0.092	-0.013	0.138	1365
HH avec activité féminine (0/1)	-0.035	**	0.018	-0.067	-0.002	0.114	1365
Note:
* p<0.10, p<0.05, * p<0.01 (RI, 999 permutations). Aucun effet attendu.

10 Table 9 — Impact sur les emprunteurs (IV/LATE)

# Instrument : village traité → emprunt Al Amana
# LATE = ITT / first stage ≈ ITT / 0.17

key_outcomes <- list(
  "Actifs totaux (MAD)"            = "assets_total",
  "Production totale (MAD)"        = "output_total",
  "Dépenses totales (MAD)"         = "expense_total",
  "Profit total (MAD)"             = "profit_total",
  "Revenu salarial (MAD)"          = "income_dep",
  "Heures extérieur"               = "hours_outside_age6_65",
  "Consommation (MAD/mois)"        = "consumption"
)
key_ok <- key_outcomes[unlist(key_outcomes) %in% names(el_hp)]

if ("client_admin" %in% names(el_hp) && length(key_ok) > 0) {
  cov_hp <- cov_ok[cov_ok %in% names(el_hp)]

  panelB_iv <- map_dfr(names(key_ok), function(label) {
    var <- key_ok[[label]]
    dat <- el_hp %>% select(all_of(c(var,"client_admin","treated","pair_id",cov_hp))) %>% drop_na()
    ctrl_mean <- mean(dat[[var]][dat$treated==0], na.rm=TRUE)

    cov_str <- if (length(cov_hp)>0) paste(cov_hp, collapse=" + ") else "1"
    form_iv <- as.formula(paste(var, "~ client_admin +", cov_str, "+ pair_id |",
                                "treated +", cov_str, "+ pair_id"))
    fit_iv   <- tryCatch(ivreg(form_iv, data=dat), error=function(e) NULL)
    if (is.null(fit_iv)) return(make_row(list(beta=NA,p_ri=NA,ci_lo=NA,ci_hi=NA), label, ctrl_mean, nrow(dat)))

    beta_obs <- coef(fit_iv)["client_admin"]
    pairs    <- unique(dat$pair_id)
    bp <- numeric(999)
    for (i in 1:999) {
      dp <- dat
      for (p in pairs) { idx <- which(dp$pair_id==p); if(length(idx)>=2) dp$treated[idx] <- sample(dp$treated[idx]) }
      fp <- tryCatch(ivreg(form_iv, data=dp), error=function(e) NULL)
      bp[i] <- if (!is.null(fp)) coef(fp)["client_admin"] else NA_real_
    }
    bp <- bp[!is.na(bp)]
    p_ri  <- mean(abs(bp) >= abs(beta_obs))
    ci_lo <- beta_obs - quantile(abs(bp), 0.975)
    ci_hi <- beta_obs + quantile(abs(bp), 0.975)
    make_row(list(beta=beta_obs, p_ri=p_ri, ci_lo=ci_lo, ci_hi=ci_hi), label, ctrl_mean, nrow(dat))
  })

  kable(panelB_iv, caption="Table 9 — IV/LATE — p-values par RI") %>%
    kable_styling(bootstrap_options=c("striped","hover","condensed"), full_width=FALSE) %>%
    footnote(general=paste("* p<0.10, ** p<0.05, *** p<0.01 (RI, 999 permutations).",
      "Instrument : village traité. LATE = effet causal sur les compliers.", sep="\n"))
} else {
  cat("client_admin ou variables d'outcome non disponibles.\n")
}

Table 9 — IV/LATE — p-values par RI
Variable	Coefficient	p-val RI	IC 95% bas	IC 95% haut	Moy. contrôle	N
Actifs totaux (MAD)	-467	1.000	-145079	144145	73	1365
Production totale (MAD)	18204	0.874	-1735862	1772270	5195	1365
Dépenses totales (MAD)	1474	0.901	-152543	155490	550	1365
Profit total (MAD)	16730	0.874	-2016641	2050102	4644	1365
Revenu salarial (MAD)	-10281	0.886	-1618103	1597541	18972	1365
Heures extérieur	-17	0.935	-2755	2722	37	1365
Consommation (MAD/mois)	-2380	0.932	-455823	451062	3257	1364
Note:
* p<0.10, p<0.05, * p<0.01 (RI, 999 permutations). Instrument : village traité. LATE = effet causal sur les compliers.

11 Comparaison : RI vs. SE clusterisés

comp_vars <- key_ok[names(key_ok) %in% c("Profit total (MAD)","Revenu salarial (MAD)",
                                          "Consommation (MAD/mois)","Heures extérieur")]
if (length(comp_vars) > 0) {
  comp_table <- map_dfr(names(comp_vars), function(label) {
    var <- comp_vars[[label]]
    dat <- el_hp %>% select(all_of(c(var,"treated","pair_id","village_id",cov_ok))) %>% drop_na()
    form <- build_formula(var, covariates=cov_ok)

    # SE clusterisés
    fit_cl <- lm_robust(form, data=dat, clusters=village_id, se_type="CR2")
    p_cl   <- tidy(fit_cl) %>% filter(term=="treated") %>% pull(p.value)
    b_cl   <- tidy(fit_cl) %>% filter(term=="treated") %>% pull(estimate)

    # RI
    ri <- ri_test(form, dat, n_perm=999)

    tibble(Variable=label, Coefficient=round(b_cl,0),
           `p-val clustered`=round(p_cl,3), `p-val RI`=round(ri$p_ri,3),
           `|Différence|`=round(abs(p_cl-ri$p_ri),3),
           `Même conclusion 5%`=ifelse((p_cl<0.05)==(ri$p_ri<0.05),"Oui ✓","Non ✗"))
  })
  kable(comp_table, caption="Comparaison SE clusterisés vs. RI") %>%
    kable_styling(bootstrap_options=c("striped","hover","condensed"), full_width=FALSE) %>%
    footnote(general="SE clusterisés au niveau village (CR1S). RI : 999 permutations dans les paires.")
}

Comparaison SE clusterisés vs. RI
Variable	Coefficient	p-val clustered	p-val RI	\|Différence\|	Même conclusion 5%
Profit total (MAD)	3165	0.096	0.017	0.079	Non ✗
Revenu salarial (MAD)	-1945	0.168	0.068	0.100	Oui ✓
Heures extérieur	-3	0.352	0.195	0.157	Oui ✓
Consommation (MAD/mois)	-450	0.218	0.112	0.106	Oui ✓
Note:
SE clusterisés au niveau village (CR1S). RI : 999 permutations dans les paires.

12 Conclusion

Ce document réplique les tables principales de Crépon et al. (2015) en construisant toutes les variables directement depuis les données brutes (traduit fidèlement du do-file Stata OutcomeConstruction_endline_Oct2014.do), puis en remplaçant les erreurs standard clusterisées par de l’inférence par randomisation.

Les résultats principaux sont confirmés : - Le microcrédit augmente les actifs et les profits en auto-emploi - Ces gains sont compensés par une baisse du travail salarié - Aucun effet net sur la consommation - Aucun effet sur les indicateurs sociaux

sessionInfo()

## R version 4.5.3 (2026-03-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Africa/Casablanca
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] broom_1.0.12     kableExtra_1.4.0 knitr_1.51       AER_1.2-16      
##  [5] survival_3.8-6   sandwich_3.1-1   lmtest_0.9-40    zoo_1.8-15      
##  [9] car_3.1-5        carData_3.0-6    estimatr_1.0.6   lubridate_1.9.5 
## [13] forcats_1.0.1    stringr_1.6.0    dplyr_1.2.1      purrr_1.2.2     
## [17] readr_2.2.0      tidyr_1.3.2      tibble_3.3.1     ggplot2_4.0.2   
## [21] tidyverse_2.0.0  haven_2.5.5     
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       xfun_0.57          bslib_0.10.0       lattice_0.22-9    
##  [5] tzdb_0.5.0         vctrs_0.7.3        tools_4.5.3        generics_0.1.4    
##  [9] pkgconfig_2.0.3    Matrix_1.7-4       RColorBrewer_1.1-3 S7_0.2.1          
## [13] lifecycle_1.0.5    compiler_4.5.3     farver_2.1.2       textshaping_1.0.5 
## [17] codetools_0.2-20   htmltools_0.5.9    sass_0.4.10        yaml_2.3.12       
## [21] Formula_1.2-5      pillar_1.11.1      jquerylib_0.1.4    cachem_1.1.0      
## [25] abind_1.4-8        tidyselect_1.2.1   digest_0.6.39      stringi_1.8.7     
## [29] splines_4.5.3      fastmap_1.2.0      grid_4.5.3         cli_3.6.6         
## [33] magrittr_2.0.5     withr_3.0.2        scales_1.4.0       backports_1.5.1   
## [37] timechange_0.4.0   rmarkdown_2.31     hms_1.1.4          evaluate_1.0.5    
## [41] viridisLite_0.4.3  rlang_1.2.0        Rcpp_1.1.1         glue_1.8.0        
## [45] xml2_1.5.2         svglite_2.2.2      rstudioapi_0.18.0  jsonlite_2.0.0    
## [49] R6_2.6.1           systemfonts_1.3.2

Réplication de Crépon et al. (2015) avec Inférence par Randomisation

Estimating the Impact of Microcredit on Those Who Take It Up: Evidence from a Randomized Experiment in Morocco

2026-04-19