—1.Set up—

library(readxl)
library(dplyr)

## Warning: package 'dplyr' was built under R version 4.3.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(stringr)
library(writexl)

DATA_PATH <- "D:/Mes Donnees/CANALLs/AET Scoring/Data/FarMAETool_-_all_versions_-_Français_fr_-_2026-05-14-05-33-42.xlsx"
OUT_PATH  <- "scored_farmae.xlsx"

2.Data upload

df_raw <- read_excel(DATA_PATH, sheet = "FarMAETool")
 
# Clean column names for R:
#   /  → _sl_   (sub-list separator kept distinct)
#   -  → _neg_  (preserves the -1 "other animals" code)
#   all remaining non-alphanumeric → _
#   collapse multiple _ → single _
#   strip leading/trailing _
clean_name <- function(x) {
  x |>
    iconv(to = "ASCII//TRANSLIT") |>          # strip accents
    str_replace_all("/", "_sl_") |>
    str_replace_all("-", "_neg_") |>
    str_replace_all("[^A-Za-z0-9_]", "_") |>
    str_replace_all("_+", "_") |>
    str_remove("^_") |>
    str_remove("_$") |>
    str_to_lower()
}
 
df <- df_raw
names(df) <- clean_name(names(df))
 
# Verify a few key column name translations
# soil_practices/soilpractice1 → soil_practices_sl_soilpractice1
# type_animal/-1               → type_animal_sl__neg_1
# produit_forestier/1          → produit_forestier_sl_1

3.Helper functions

# Return a binary 0/1 column (NA → 0)
bin_col <- function(col_name) {
  if (!col_name %in% names(df)) return(rep(0L, nrow(df)))
  x <- suppressWarnings(as.integer(as.numeric(df[[col_name]])))
  replace(x, is.na(x), 0L)
}

# Score a single-select column via lookup; unrecognised → NA
score_single <- function(col_name, lookup) {
  if (!col_name %in% names(df)) return(rep(NA_real_, nrow(df)))
  as.numeric(lookup[as.character(df[[col_name]])])
}

# Weighted sum of binary columns
# score_map: named numeric vector  (col_name → score)
score_binary <- function(score_map) {
  Reduce("+", Map(function(col, sc) bin_col(col) * sc,
                  names(score_map), unname(score_map)))
}

# Coalesce two vectors: first non-NA wins
coalesce2 <- function(a, b) ifelse(!is.na(a), a, b)

# Normalise to [0,1] using distance-to-target; clamp to [0,1]
norm_dtt <- function(x, max_val, min_val = 0) {
  span <- max_val - min_val
  if (span == 0) return(rep(0, length(x)))
  pmin(pmax((x - min_val) / span, 0), 1)
}

# Shade % → plot score for TR2
shade_score <- function(pct) {
  pct <- suppressWarnings(as.numeric(pct))
  case_when(
    is.na(pct) ~ NA_real_,
    pct <= 10  ~ 0,
    pct <= 20  ~ 0.5,
    pct <= 30  ~ 1,
    pct <= 40  ~ 1.5,
    pct <= 50  ~ 2,
    pct <= 60  ~ 2.5,
    TRUE       ~ 3
  )
}

—4.Farm Dimention scores—

# ── 4. FARM DIMENSION — RAW SCORES ───────────────────────────────────────────

raw <- tibble(
  farmer_id = paste0(df[["_index"]], "_", df[["nom_prenom"]])
)

raw <- tibble(
  farmer_id = paste0(df[["_index"]], "_", df[["nom_prenom"]]),
  zone      = df[["zone"]],
  Type      = case_when(
    df[["zone"]] %in% c("zone1", "Zone de savane")     ~ "Savannah",
    df[["zone"]] %in% c("zone2", "Zone de transition") ~ "Transition",
    df[["zone"]] %in% c("zone3", "Zone de forêt")      ~ "Forest",
    TRUE ~ NA_character_
  )
)

4.1 SOIL MANAGEMENT

# SO1  Soil Fertility Practices (multi-select binary; max = 18)
raw$SO1 <- score_binary(c(
  soil_practices_sl_soilpractice1  =  0,   # None
  soil_practices_sl_soilpractice2  =  2,   # Soil analysis
  soil_practices_sl_soilpractice3  =  2,   # Foliar mineral – cacao
  soil_practices_sl_soilpractice4  =  1,   # Foliar mineral – other
  soil_practices_sl_soilpractice5  =  2,   # Mineral soil – cacao
  soil_practices_sl_soilpractice6  =  1,   # Mineral soil – other
  soil_practices_sl_soilpractice7  =  2,   # Organic – cacao
  soil_practices_sl_soilpractice8  =  1,   # Organic – other
  soil_practices_sl_soilpractice9  =  1,   # Cover crops – other
  soil_practices_sl_soilpractice10 =  2,   # Microbial inoculants – cacao
  soil_practices_sl_soilpractice11 =  2,   # Biostimulants – cacao
  soil_practices_sl_soilpractice12 =  2    # Fallow
))

# SO2  Balanced Fertilisers (single-select; max = 3)
SO2_lookup <- c(
  practicefertilizer1 = 0,   practicefertilizer2 = 0.5,
  practicefertilizer3 = 1,   practicefertilizer4 = 2,
  practicefertilizer5 = 3,   practicefertilizer6 = 3,
  # full-text fallbacks (older survey versions)
  "Aucune fertilisation" = 0,
  "J'utilise 100 % d'engrais minéraux, appliqués en foliaire (par exemple)." = 0.5,
  "J'utilise 100 % d'engrais minéraux appliqués au sol (par exemple)." = 1,
  "J'utilise 100 % d'engrais organiques (par exemple) pour le cacao." = 2,
  "J'utilise principalement une combinaison d'engrais minéraux appliqués au sol et d'engrais organiques." = 3,
  "J'utilise principalement une combinaison d'engrais foliaires minéraux et d'engrais organiques" = 3,
  "J'utilise principalement une combinaison d'engrais minéraux appliqués en foliaire et d'engrais organique" = 3
)
raw$SO2 <- score_single("type_fertilizer", SO2_lookup)

# SO3  Crop & Household Residues (Q1 single-select + Q2 single-select; max = 4)
# [E1] house_residu is single-select in survey (should be multi); max capped at 1
SO3_q1_lookup <- c(
  residu1 = 0, residu2 = 1, residu3 = 2, residu4 = 3,
  "Tous sont brules ou elimines." = 0,
  "La majorite est brulee, mais une partie est utilisee comme compost, paillis ou aliment pour le betail." = 1,
  "La majorite est utilisee comme compost, paillis ou aliment pour le betail, mais une partie est brulee occasionnellement." = 2,
  "Tous sont utilises comme compost, paillis ou aliment pour le betail. Aucune matiere organique n'est brulee, a l'exception des plantes malades." = 3
)
SO3_q2_lookup <- c(
  houseresidu1 = 0, houseresidu2 = 1, houseresidu3 = 1,
  houseresidu4 = 1, houseresidu5 = 1,
  "Je ne récupère aucun résidu." = 0,
  "Utiliser les déchets alimentaires pour nourrir le petit bétail." = 1,
  "Collecter et utiliser l'urine comme engrais." = 1,
  "Collecter et utiliser les excréments comme engrais." = 1,
  "Composter les dechets alimentaires pour l'engrais." = 1
)
raw$SO3 <- replace_na(score_single("crop_residu",  SO3_q1_lookup), 0) +
           replace_na(score_single("house_residu", SO3_q2_lookup), 0)

# SO4  Livestock Effluent (single-select; conditional; max = 3)
SO4_lookup <- c(
  livestock1 = NA_real_,   # no livestock → not scored
  livestock2 = 0,
  livestock3 = 2,
  livestock4 = 3,
  "Pas d'élevage sur l'exploitation" = NA_real_,
  "Les effluents ne sont pas épandus et sont laissés à se décomposer" = 0,
  "Une partie des effluents d'élevage est épandue sans traitement à la surface du sol, mais non incorporée" = 2,
  "Tous les effluents d'élevage sont épandus après compostage (fumier traité) à la surface du sol. Aucun effluent n'est gaspillé." = 3
)
raw$SO4 <- score_single("effluent_livestock", SO4_lookup)

# SO5  Seasonal Tillage (single-select; max = 3)
SO5_lookup <- c(
  soil1 = 0, soil2 = 1.5, soil3 = 3,
  "Le travail du sol est fréquemment utilisé pour toutes les cultures saisonnières." = 0,
  "Certaines parcelles sont en travail du sol et d'autres en semis direct." = 1.5,
  "Semis direct pour toutes les cultures saisonnières." = 3
)
raw$SO5 <- score_single("seasonnal_soil_practice", SO5_lookup)

# SO6  Soil Erosion Practices (multi-select binary; max = 12)
# [E2] soilerosion1 blank label (legacy) → score 0; soilerosion2 = "None" → score 0
raw$SO6 <- score_binary(c(
  combat_soil_erosion_sl_soilerosion1  = 0,   # blank/legacy
  combat_soil_erosion_sl_soilerosion2  = 0,   # None
  combat_soil_erosion_sl_soilerosion3  = 1,   # Continuous planting
  combat_soil_erosion_sl_soilerosion4  = 2,   # Mulch cocoa
  combat_soil_erosion_sl_soilerosion5  = 1,   # Mulch seasonal
  combat_soil_erosion_sl_soilerosion6  = 1,   # No/minimal tillage
  combat_soil_erosion_sl_soilerosion7  = 1,   # Terraces
  combat_soil_erosion_sl_soilerosion8  = 2,   # Vegetation barriers
  combat_soil_erosion_sl_soilerosion9  = 2,   # Agroforestry cocoa
  combat_soil_erosion_sl_soilerosion10 = 1,   # Agroforestry other
  combat_soil_erosion_sl_soilerosion11 = 1    # Other measures
))

4.2 Water MANAGEMENT

# WM1  Water Saving Practices (multi-select binary; max = 4)
raw$WM1 <- score_binary(c(
  proactive_recycling_water_sl_recyclingwater1 = 0,  # None
  proactive_recycling_water_sl_recyclingwater2 = 1,  # Rainwater harvesting
  proactive_recycling_water_sl_recyclingwater3 = 1,  # Drip irrigation
  proactive_recycling_water_sl_recyclingwater4 = 1,  # Micro-watersheds
  proactive_recycling_water_sl_recyclingwater5 = 1   # Mulch
))

# WM2  Water Body Protection (single-select; conditional; max = 2)
# [E5] waterbodies1 = no water bodies → not scored (NA)
WM2_lookup <- c(
  waterbodies1 = NA_real_,
  waterbodies2 = 0,  waterbodies3 = 0,
  waterbodies4 = 1,  waterbodies5 = 2,
  "Aucun plan d'eau dans votre exploitation" = NA_real_,
  "Aucun plan d'eau" = NA_real_,
  "Aucune mesure de protection" = 0,
  "Aucune pratique de protection de l'eau" = 0,
  "Une pratique de protection des plans d'eau" = 1,
  "Deux ou plusieurs pratiques de protection des plans d'eau" = 2
)
raw$WM2 <- score_single("quality_water_bodies", WM2_lookup)

# WM3  Integrated Water (single-select; conditional on WM2; max = 3)
WM3_lookup <- c(
  integratingwater1 = 0, integratingwater2 = 1.5, integratingwater3 = 3,
  "Aucune pratique intégrant les plans d'eau de l'exploitation à la production végétale ou animale" = 0,
  "Une pratique intégrant les plans d'eau de l'exploitation à la production végétale OU animale." = 1.5,
  "Plusieurs pratiques intégrant les plans d'eau de l'exploitation à la production végétale ET/OU animale." = 3
)
raw$WM3 <- ifelse(is.na(raw$WM2),
                  NA_real_,
                  score_single("integrating_water", WM3_lookup))

4.3 CROP MANAGEMENT

# CR1  Crop Diversification (count of crop types; max = 8 for normalisation)
crop_groups <- list(
  mais     = c("crop_mais_sl_culturecrop1",          "crop_mais_sl_culturecrop2"),
  manioc   = c("crop_manioc_sl_culturecrop1",         "crop_manioc_sl_culturecrop2"),
  cocoa    = c("crop_cocoa_sl_culturecrop1",           "crop_cocoa_sl_culturecrop2"),
  arbres   = c("crop_arbres_fruit_sl_culturecrop1",    "crop_arbres_fruit_sl_culturecrop2"),
  plantain = c("crop_plantain_banane_sl_culturecrop1", "crop_plantain_banane_sl_culturecrop2"),
  igname   = c("crop_igname_sl_culturecrop1",          "crop_igname_sl_culturecrop2"),
  arcahide = c("crop_arcahide_sl_culturecrop1",         "crop_arcahide_sl_culturecrop2")
)
cr1 <- rep(0L, nrow(df))
for (cols in crop_groups) {
  present <- pmax(bin_col(cols[1]), bin_col(cols[2]))
  cr1 <- cr1 + present
}
if ("crops_others" %in% names(df)) {
  cr1 <- cr1 + as.integer(tolower(as.character(df$crops_others)) == "oui")
}
raw$CR1 <- cr1

# CR2  Intercropping (two single-selects; max = 9)
CR2_cacao_lookup <- c(
  cropassociation1 = 0, cropassociation2 = 2,
  cropassociation3 = 4, cropassociation4 = 6,
  "Le cacao est la seule culture sur toutes mes parcelles." = 0,
  "Le cacao est la seule culture, mais sur certaines parcelles, j'ai une petite production d'autres cultures." = 2,
  "Le cacao est la principale culture, mais j'ai une production importante d'autres cultures sur les memes parcelles." = 4,
  "Associez le cacao a d'autres cultures ; la production d'autres cultures est aussi importante que celle du cacao." = 6
)
CR2_other_lookup <- c(
  cropwithoutassociation1 = 0,
  cropwithoutassociation2 = 1.5,
  cropwithoutassociation3 = 3,
  "Cultures pures dans toutes mes autres parcelles" = 0,
  "Associer plusieurs cultures dans certaines de mes autres parcelles" = 1.5,
  "Associer plusieurs cultures dans toutes mes autres parcelles" = 3
)
raw$CR2 <- replace_na(score_single("crop_association",       CR2_cacao_lookup), 0) +
           replace_na(score_single("crop_without_association", CR2_other_lookup), 0)

# CR4  Ecological Plants (single-select; max = 3)
CR4_lookup <- c(
  uncultivateplant1 = 0, uncultivateplant2 = 1,
  uncultivateplant3 = 2, uncultivateplant4 = 3,
  plantecological1  = 0, plantecological2  = 1,
  plantecological3  = 2, plantecological4  = 3,
  "Je ne possède pas de plantes cultivées à des fins écologiques." = 0,
  "Un type de plante est cultivé pour un seul bénéfice écologique (par exemple, protection des pollinisateurs, régulation des nuisibles, purification de l'eau ou protection des cours d'eau). N'inclut pas les arbres ni les cultures de couverture." = 1,
  "Deux types de plantes sont cultivés pour un ou plusieurs bénéfices écologiques." = 2,
  "Trois plantes ou plus sont cultivées pour plusieurs bénéfices écologiques." = 3
)
raw$CR4 <- score_single("uncultivate_plant_ecological", CR4_lookup)

4.4 LIVESTOCK

no_animals <- bin_col("type_animal_sl_noanimal") == 1L

# AN1  Feed Access (conditional; max = 3)
AN1_lookup <- c(
  accessnutritious1 = NA_real_,
  accessnutritious2 = 0, accessnutritious3 = 1,
  accessnutritious4 = 2, accessnutritious5 = 3,
  "Acune application" = NA_real_,
  "Le bétail passe trois mois ou plus de l'année sans accès à une quantité suffisante d'aliments." = 0,
  "Le bétail passe un ou deux mois de l'année sans accès à une quantité suffisante d'aliments." = 1,
  "Le bétail a un accès constant à l'alimentation, mais manque d'accès régulier à certains nutriments." = 2,
  "Le bétail a un accès constant à une alimentation nutritive et diversifiée." = 3
)
raw$AN1 <- ifelse(no_animals, NA_real_, score_single("access_nutritious", AN1_lookup))

# AN2  Water for Livestock (conditional; max = 1)
AN2_lookup <- c(
  accessdrink1 = NA_real_, accessdrink2 = 0,
  accessdrink3 = 0,        accessdrink4 = 1,
  "Aucune application" = NA_real_,
  "Le bétail n'a pas accès à l'eau." = 0,
  "Le bétail a un accès irrégulier à l'eau et/ou l'eau fournie n'est pas propre." = 0,
  "Le bétail a un accès constant à de l'eau propre." = 1
)
raw$AN2 <- ifelse(no_animals, NA_real_, score_single("access_drink", AN2_lookup))

# AN3  Animal Diversity (multi-select binary; max = 10)
animal_cols <- paste0("type_animal_sl_", c(
  "typeanimal2","typeanimal3","typeanimal4","typeanimal5",
  "typeanimal6","typeanimal7","typeanimal8","typeanimal9",
  "typeanimal10","_neg_1"
))
raw$AN3 <- Reduce("+", lapply(animal_cols, bin_col))

# AN4  Veterinary Care (conditional; max = 3)
# [E3] "no vet care → 0" option missing from survey
AN4_lookup <- c(
  receivemedical1 = NA_real_,
  receivemedical2 = 1, receivemedical3 = 1,
  receivemedical4 = 2, receivemedical5 = 3,
  "Aucune application" = NA_real_,
  "Les animaux bénéficient de soins vétérinaires ou avec des plantes médicinales" = 1,
  "Certains animaux recoivent des soins vétérinaires ou des medicaments lorsqu'ils sont clairement malades." = 1,
  "Tous les animaux recoivent des soins vétérinaires des qu'une maladie est detectee." = 2,
  "Tous les animaux bénéficient de soins préventifs (vaccination, vermifugation, déparasitage, etc.) et reçoivent des soins vétérinaires et des médicaments dès qu'une maladie est détectée." = 3
)
raw$AN4 <- ifelse(no_animals, NA_real_, score_single("receive_medical", AN4_lookup))

# AN5  Crop-Livestock Integration (conditional; max = 3)
AN5_lookup <- c(
  integratebreed1 = 0, integratebreed2 = 1,
  integratebreed3 = 1.5, integratebreed4 = 3,
  "Aucune gestion d'élevage intégrée n'est pratiquée." = 0,
  "L'intégration cultures-élevage se limite à l'utilisation de fumier animal comme amendement du sol et/ou de cultures et d'adventices pour l'alimentation du bétail." = 1,
  "Un système intégré cultures-élevage or mixte est pratiqué." = 1.5,
  "Au moins deux systèmes intégrés cultures-élevage ou mixtes sont pratiqués." = 3
)
raw$AN5 <- ifelse(no_animals, NA_real_, score_single("intagrate_breed", AN5_lookup))

4.5 TREES & AGROFORESTRY

# TR1  Tree Integration (single-select; max = 3)
TR1_lookup <- c(
  cultivationbreeding1 = 0, cultivationbreeding2 = 1,
  cultivationbreeding3 = 2, cultivationbreeding4 = 3,
  "Les arbres ne sont pas intégrés aux systèmes de culture ou d'élevage/l'exploitation est dépourvue d'arbres." = 0,
  "Une pratique agroforestière* est utilisée pour intégrer les arbres aux systèmes de culture/d'élevage (par exemple, culture en couloirs, sylvopastoralisme, arbres d'ombrage, arbres à bois d'œuvre, etc.)." = 1,
  "Deux pratiques agroforestières sont utilisées pour intégrer les arbres aux systèmes de culture/d'élevage." = 2,
  "Trois pratiques agroforestières ou plus sont utilisées pour intégrer les arbres aux systèmes de culture/d'élevage." = 3
)
raw$TR1 <- score_single("cultivation_breeding", TR1_lookup)

# TR2  Shade Level – average over up to 4 plots (max = 3)
shade_plots <- c("niveauombrageune","niveauombragedeux",
                 "niveauombragetrois","niveauombragequatre")
shade_mat <- sapply(shade_plots, function(col) {
  if (col %in% names(df)) shade_score(df[[col]]) else rep(NA_real_, nrow(df))
})
raw$TR2 <- rowMeans(shade_mat, na.rm = TRUE)

# TR3  Perennial Species (single-select; max = 4)
TR3_lookup <- c(
  especenombre1 = 0, especenombre2 = 1, especenombre3 = 2,
  especenombre4 = 3, especenombre5 = 4,
  "0" = 0, "1 to 3" = 1, "4 to 5" = 2, "6 to 10" = 3, ">10" = 4
)
raw$TR3 <- score_single("espece_nombre", TR3_lookup)

# TR4  Forest Products (can be negative; max = 10, min = -1)
raw$TR4 <- score_binary(c(
  produit_forestier_sl_produitforesterie1  =  1,
  produit_forestier_sl_produitforesterie2  =  1,
  produit_forestier_sl_produitforesterie3  = -1,  # commercial firewood
  produit_forestier_sl_produitforesterie4  =  1,
  produit_forestier_sl_produitforesterie5  =  1,
  produit_forestier_sl_produitforesterie6  =  1,
  produit_forestier_sl_produitforesterie7  =  1,
  produit_forestier_sl_produitforesterie8  =  1,
  produit_forestier_sl_produitforesterie9  =  1,
  produit_forestier_sl_produitforesterie10 =  1,
  produit_forestier_sl_1                   =  1   # other forest product
))

# TR5  Tree Fodder (conditional; max = 4)
TR5_lookup <- c(
  fourragearboricole1 = NA_real_,
  fourragearboricole2 = 1, fourragearboricole3 = 2,
  fourragearboricole4 = 3, fourragearboricole5 = 4,
  "L’exploitation ne possède pas d’élevage." = NA_real_,
  "Aucun arbre n’est utilisé comme fourrage sur l’exploitation, ou il n’y a pas d’arbres sur l’exploitation." = 1,
  "Un seul type de fourrage arboricole est disponible une partie de l’année (récolté ou intégré aux pâturages)." = 2,
  "Deux types de fourrage arboricole sont disponibles une partie de l’année (récolté ou intégré aux pâturages)." = 3,
  "Plus de deux types de fourrage arboricole sont disponibles toute l’année (récolté ou intégré aux pâturages)." = 4
)
raw$TR5 <- ifelse(no_animals, NA_real_, score_single("fourrage_production", TR5_lookup))

4.6 PEST & DISEASE MANAGEMENT

# PD1  Chemical Weeding  (two sub-questions; max = 9)
# [E6] Two parallel question sets; coalesce: lutte_herbicide or lutte_cacao
PD1_cacao_lookup <- c(
  herbicidecacao1 = 0, herbicidecacao2 = 2,
  herbicidecacao3 = 4, herbicidecacao4 = 6,
  luttecacao1 = 0,     luttecacao2 = 2,
  luttecacao3 = 4,     luttecacao4 = 6,
  "Dans toutes mes parcelles" = 0,
  "Dans la plupart de mes parcelles" = 2,
  "Uniquement dans certaines parcelles ou conditions" = 4,
  "Jamais" = 6
)
PD1_other_lookup <- c(
  herbicidesans1 = 0, herbicidesans2 = 1,
  herbicidesans3 = 2, herbicidesans4 = 3,
  luttecacaosans1 = 0, luttecacaosans2 = 1,
  luttecacaosans3 = 2, luttecacaosans4 = 3,
  "Dans toutes mes parcelles" = 0,
  "Dans la plupart de mes parcelles" = 1,
  "Uniquement dans certaines parcelles ou conditions" = 2,
  "Jamais" = 3
)
pd1_cacao <- coalesce2(score_single("lutte_herbicide", PD1_cacao_lookup),
                       score_single("lutte_cacao",     PD1_cacao_lookup))
pd1_other <- coalesce2(score_single("lutte_cacao_sans",    PD1_other_lookup),
                       score_single("lutte_herbicide_sans", PD1_other_lookup))
raw$PD1 <- replace_na(pd1_cacao, 0) + replace_na(pd1_other, 0)

# PD2  Manual/Mechanical Weeding Effectiveness (single-select; max = 3)
PD2_lookup <- c(
  desherbagecacao1 = 0, desherbagecacao2 = 1.5, desherbagecacao3 = 3,
  "La plupart de mes parcelles présentent des mauvaises herbes non maîtrisées" = 0,
  "Dans la plupart de mes parcelles, certaines mauvaises herbes ne sont pas bien maîtrisées" = 1.5,
  "Dans toutes mes parcelles, les mauvaises herbes sont maîtrisées. Les mauvaises herbes n'affectent pas mon cacao." = 3
)
raw$PD2 <- score_single("desherbage_cacao", PD2_lookup)

# PD3  Weeding Residue Management (multi-select binary; max = 12)
raw$PD3 <- score_binary(c(
  # Cacao plots (each practice = 2; not-used or burned = 0)
  `residu_valeur_sl_residuvaleur1` = 0,   # not used
  `residu_valeur_sl_residuvaleur2` = 2,   # incorporated
  `residu_valeur_sl_residuvaleur3` = 2,   # mulch
  `residu_valeur_sl_residuvaleur4` = 2,   # composting
  `residu_valeur_sl_residuvaleur5` = 2,   # fertiliser
  `residu_valeur_sl_residuvaleur6` = 0,   # burned
  # Other plots (each = 1)
  `residu_valeur_sans_sl_residuvaleursans1` = 0,
  `residu_valeur_sans_sl_residuvaleursans2` = 1,
  `residu_valeur_sans_sl_residuvaleursans3` = 1,
  `residu_valeur_sans_sl_residuvaleursans4` = 1,
  `residu_valeur_sans_sl_residuvaleursans5` = 1,
  `residu_valeur_sans_sl_residuvaleursans6` = 0
))

# PD4  Chemical Pest Control (two single-selects; max = 9)
PD4_cacao_lookup <- c(
  chimiquemaladie1 = 0, chimiquemaladie2 = 2,
  chimiquemaladie3 = 4, chimiquemaladie4 = 6,
  "Toujours sur la plupart de mes parcelles" = 0,
  "Sur certaines parcelles, certaines années" = 2,
  "Seulement dans quelques cas spécifiques" = 4,
  "Jamais" = 6
)
PD4_other_lookup <- c(
  chimiquemaladiesans1 = 0, chimiquemaladiesans2 = 1,
  chimiquemaladiesans3 = 2, chimiquemaladiesans4 = 3,
  "Toujours sur la plupart de mes parcelles" = 0,
  "Sur certaines parcelles, certaines années" = 1,
  "Seulement dans quelques cas spécifiques" = 2,
  "Jamais" = 3
)
raw$PD4 <- replace_na(score_single("chimique_maladie",      PD4_cacao_lookup), 0) +
           replace_na(score_single("chimique_maladie_sans",  PD4_other_lookup), 0)

# PD5  Hazardous Pesticides (can be negative; max = 3, min = -12)
raw$PD5 <- score_binary(c(
  `pesticide_cacao_sl_pesticidecacao1` =  2,  # none used → bonus
  `pesticide_cacao_sl_pesticidecacao2` = -2,
  `pesticide_cacao_sl_pesticidecacao3` = -2,
  `pesticide_cacao_sl_pesticidecacao4` = -2,
  `pesticide_cacao_sl_pesticidecacao5` = -2,
  `pesticide_cacao_sl_pesticidecacao6` = -2,
  `pesticide_cacao_sans_sl_pesticidecacaosans1` =  1,
  `pesticide_cacao_sans_sl_pesticidecacaosans2` = -1,
  `pesticide_cacao_sans_sl_pesticidecacaosans3` = -1,
  `pesticide_cacao_sans_sl_pesticidecacaosans4` = -1,
  `pesticide_cacao_sans_sl_pesticidecacaosans5` = -1,
  `pesticide_cacao_sans_sl_pesticidecacaosans6` = -1
))

# PD6  IPM Practices (multi-select binary; max = 7)
raw$PD6 <- score_binary(c(
  lutte_integre_sl_lutteintegre1 = 0,  # no IPM
  lutte_integre_sl_lutteintegre2 = 1,  lutte_integre_sl_lutteintegre3 = 1,
  lutte_integre_sl_lutteintegre4 = 1,  lutte_integre_sl_lutteintegre5 = 1,
  lutte_integre_sl_lutteintegre6 = 1,  lutte_integre_sl_lutteintegre7 = 1,
  lutte_integre_sl_4             = 1   # other IPM
))

# PD7  Biological Control (multi-select binary; max = 2)
raw$PD7 <- score_binary(c(
  pratique_maladie_sl_pratiquemaladie1 = 0,
  pratique_maladie_sl_pratiquemaladie2 = 1,
  pratique_maladie_sl_pratiquemaladie3 = 1
))

# PD8  Pesticide Safety (3 sub-questions; max = 6, min = -3)
PD8_q1_lookup <- c(
  equipementprotec1 = -1, equipementprotec2 = 0, equipementprotec3 = 2,
  "Aucune protection" = -1,
  "Protection de base (par exemple, bottes/gants uniquement)" = 0,
  "Protection complète (masque, gants, manches longues, bottes)" = 2
)
# stockage: binary sub-columns — take max score if multiple selected
pd8_stock <- pmax(
  ifelse(bin_col("stockage_pesticide_sl_stockagepest1") == 1, -1, NA_real_),
  ifelse(bin_col("stockage_pesticide_sl_stockagepest2") == 1,  0, NA_real_),
  ifelse(bin_col("stockage_pesticide_sl_stockagepest3") == 1,  2, NA_real_),
  na.rm = TRUE
)
PD8_q3_lookup <- c(
  bouteillepest1 = -1, bouteillepest2 = 0, bouteillepest3 = 2,
  "Nous laissons les emballages et bouteilles vides à n'importe quel endroit de l'exploitation" = -1,
  "Nous disposons d'un emplacement dédié dans l'exploitation où nous enterrons les emballages et bouteilles vides" = 0,
  "Nous les accumulons dans un local ou un conteneur spécial, puis nous les rapportons à la ville ou aux fournisseurs, où ils sont éliminés de manière appropriée." = 2
)
raw$PD8 <- replace_na(score_single("equipement_protection", PD8_q1_lookup), 0) +
           replace_na(pd8_stock, 0) +
           replace_na(score_single("bouteille_pesticide", PD8_q3_lookup), 0)

4.7 ENERGY

# EN1  Renewable Energy Use (single-select; max = 3)
# [E4] exploitationenergie5 "No reserves" is erroneous → NA
EN1_lookup <- c(
  exploitationenergie1 = 0, exploitationenergie2 = 1,
  exploitationenergie3 = 2, exploitationenergie4 = 3,
  exploitationenergie5 = NA_real_,
  "Aucune énergie renouvelable n’est utilisée ou produite sur l’exploitation" = 0,
  "La majorité de l'énergie est non renouvelable, mais une petite quantité d'énergie renouvelable est utilisée" = 1,
  "La moitié de l’énergie utilisée est renouvelable et produite sur l’exploitation, l’autre moitié est achetée" = 2,
  "Presque toute l’énergie nécessaire est couverte par des énergies renouvelables. L'utilisation des énergies non-renouvelables est très faible" = 3
)
raw$EN1 <- score_single("energie_exploitation", EN1_lookup)

# EN2  Energy Saving Practices (multi-select binary; max = 6)
raw$EN2 <- score_binary(c(
  energie_reserve_sl_energiereserve1 = 0,  # None
  energie_reserve_sl_energiereserve2 = 1,  energie_reserve_sl_energiereserve3 = 1,
  energie_reserve_sl_energiereserve4 = 1,  energie_reserve_sl_energiereserve5 = 1,
  energie_reserve_sl_energiereserve6 = 1,  energie_reserve_sl_6               = 1
))

5.1 Menage

# HH1
HH1_lookup <- c(
  activnonagricole1 = 0, activnonagricole2 = 1, activnonagricole3 = 2,
  "Aucun membre du ménage n'exerce d'activité non agricole" = 0,
  "Au moins un membre du ménage exerce un travail non agricole à temps partiel pour un revenu complémentaire" = 1,
  "Deux ou plus membres du ménage exercent un travail non agricole à temps partiel pour un revenu complémentaire" = 2
)
raw$HH1 <- score_single("non_agricole", HH1_lookup)

# HH2
HH2_lookup <- c(
  alimentationbesoin1 = 0, alimentationbesoin2 = 1,
  alimentationbesoin3 = 2, alimentationbesoin4 = 3,
  "Le ménage n'a pas accès à une quantité suffisante d'aliments venant de son exploitation ou du marché, pour répondre à ses besoins nutritionnels tout au long de l'année" = 0,
  "Le ménage a accès à une quantité suffisante de nourriture; venant de son exploitation ou du marché, mais son alimentation n'est pas suffisamment diversifiée pour répondre aux besoins nutritionnels du ménage" = 1,
  "Le ménage a accès à une quantité suffisante d'aliments sains et variés, venant de son exploitation ou du marché, pour répondre à ses besoins nutritionnels une majeure partie de l'année" = 2,
  "Le ménage a accès régulièrement à une quantité suffisante d'aliments sains, variés et culturellement acceptés, venant de son exploitation ou du marché, pour répondre à ses besoins nutritionnels en toute période de l'année" = 3
)
raw$HH2 <- score_single("alimentation_besoin", HH2_lookup)

# HH3
HH3_lookup <- c(
  servicefinance1 = 0, servicefinance2 = 1, servicefinance3 = 2,
  "Aucun membre du ménage n'a accès à des services financiers ou de façon très limitée (service informel)" = 0,
  "Le ménage a accès à des services financiers pour couvrir ses coûts (de production) et ses besoins de base." = 1,
  "Le ménage a accès à des services financiers pour des investissements plus importants" = 2
)
raw$HH3 <- score_single("service_finance", HH3_lookup)

# HH4  Land Tenure (multi-select binary; take max; max = 3)
hh4_mat <- sapply(c(
  docfoncier1 = 0, docfoncier2 = 1, docfoncier3 = 2, docfoncier4 = 3
), function(sc) {
  col <- paste0("doc_foncier_sl_", names(sc))
  ifelse(bin_col(col) == 1L, sc, NA_real_)
})
raw$HH4 <- apply(hh4_mat, 1, max, na.rm = TRUE)

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf

raw$HH4[!is.finite(raw$HH4)] <- NA_real_

# Simpler HH4 using pmax:
hh4_scores <- c(doc_foncier_sl_docfoncier1 = 0, doc_foncier_sl_docfoncier2 = 1,
                doc_foncier_sl_docfoncier3 = 2, doc_foncier_sl_docfoncier4 = 3)
raw$HH4 <- do.call(pmax, c(
  lapply(names(hh4_scores), function(col)
    ifelse(bin_col(col) == 1L, hh4_scores[[col]], NA_real_)),
  list(na.rm = TRUE)
))

# HH5  Nutritional Habits (multi-select binary; max = 7)
raw$HH5 <- score_binary(setNames(rep(1, 7), paste0(
  "hab_nutrition_sl_habnutrition", 1:7
)))
# HH6  – shared Likert lookup used for all three sub-questions
likert5 <- c(
  humancompet1 = 0, humancompet2 = 1, humancompet3 = 2, humancompet4 = 3, humancompet5 = 4,
  humandecision1 = 0, humandecision2 = 1, humandecision3 = 2, humandecision4 = 3, humandecision5 = 4,
  humanpropriete1 = 0, humanpropriete2 = 1, humanpropriete3 = 2, humanpropriete4 = 3, humanpropriete5 = 4,
  "Très en désaccord" = 0,
  "En désaccord" = 1,
  "Pas d'opinion" = 2,
  "D'accord" = 3,
  "Tout à fait d'accord" = 4
)
hh6_mat <- cbind(
  score_single("human_competence", likert5),
  score_single("human_decision",   likert5),
  score_single("human_propriete",  likert5)
)
raw$HH6 <- rowMeans(hh6_mat, na.rm = TRUE)

5.2 Main d’oeuvre

# WR1  Workforce size (placeholder = 1; scoring formula to be defined)
raw$WR1 <- 1

# WR2  Wages vs. 3500 FCFA/day reference (max = 3)
# salaire_mensuel assumed monthly → daily = /26
# journalier_employe already daily
MAX_DAILY <- 3500
sal_monthly  <- suppressWarnings(as.numeric(df$salaire_mensuel))
sal_unit     <- as.character(df$unite_salaire)
# Convert permanent wage to daily; flag campaign wages as NA
daily_perm <- case_when(
  sal_unit %in% c("unitésalaire1", "unitesalaire1") ~ sal_monthly,
  sal_unit %in% c("unitésalaire2", "unitesalaire2") ~ sal_monthly / 26,
  TRUE ~ NA_real_   # campaign wage (unitésalaire3) not reliably convertible
)
sal_occasion <- suppressWarnings(as.numeric(df$journalier_employe))
wr2_perm <- pmin(replace_na(daily_perm,  0) / MAX_DAILY, 1) * 3
wr2_occ  <- pmin(replace_na(sal_occasion, 0) / MAX_DAILY, 1) * 3
raw$WR2  <- rowMeans(cbind(wr2_perm, wr2_occ), na.rm = TRUE)

# WR3  Employee health & safety (multi-select binary; max = 6)
raw$WR3 <- score_binary(c(
  sanitaire_employe_sl_sanitairemploye1 = 1,
  sanitaire_employe_sl_sanitairemploye2 = 1,
  sanitaire_employe_sl_sanitairemploye3 = 1,
  sanitaire_employe_sl_sanitairemploye4 = 1,
  sanitaire_employe_sl_sanitairemploye5 = 1,
  sanitaire_employe_sl_5                = 1
))

5.3 Communite

# CM1  Community Group Membership (multi-select binary; max = 6)
raw$CM1 <- score_binary(setNames(rep(1L, 6), paste0(
  "activ_communautaire_sl_activcommunautaire", 1:6
)))

# CM2  Collective Resource Management (two multi-selects; max = 17)
raw$CM2 <- score_binary(setNames(rep(1L, 12), paste0(
               "collect_communautaire_sl_collectcommunautaire", 1:12))) +
           score_binary(setNames(rep(1L, 5), paste0(
               "gestion_ressource_sl_gestionressource", 1:5)))

# CM3  (note: "Pas d'opinion" = 1.5, different from HH6)
CM3_lookup <- c(
  pouvoirlegal1 = 0, pouvoirlegal2 = 1, pouvoirlegal3 = 1.5,
  pouvoirlegal4 = 2, pouvoirlegal5 = 3,
  "Très en désaccord" = 0,
  "En désaccord" = 1,
  "Pas d'opinion" = 1.5,
  "D'accord" = 2,
  "Tout à fait d'accord" = 3
)
raw$CM3 <- score_single("pouvoir_legal", CM3_lookup)

# CM4
CM4_lookup <- c(
  activcocreation1 = 0, activcocreation2 = 1,
  activcocreation3 = 2, activcocreation4 = 3,
  "Non, n'a jamais participé à des activités de co-création" = 0,
  "Peu de fois" = 1,
  "Quelquefois" = 2,
  "Quelque fois" = 2,   # variant spelling found in data
  "Fréquemment" = 3
)
raw$CM4 <- score_single("activ_cocreation", CM4_lookup)

# CM5
CM5_lookup <- c(
  setNames(0:4, paste0("confiancegest", 1:5)),
  "Très en désaccord" = 0, "En désaccord" = 1, "Pas d'opinion" = 2,
  "D'accord" = 3, "Tout à fait d'accord" = 4
)
raw$CM5 <- score_single("confiance_gest", CM5_lookup)

# CM6
CM6_lookup <- c(
  setNames(0:4, paste0("promcommunautaire", 1:5)),
  "Très en désaccord" = 0, "En désaccord" = 1, "Pas d'opinion" = 2,
  "D'accord" = 3, "Tout à fait d'accord" = 4
)
raw$CM6 <- score_single("prom_communautaire", CM6_lookup)

# CM7
CM7_lookup <- c(
  setNames(0:4, paste0("commpouvoir", 1:5)),
  "Très en désaccord" = 0, "En désaccord" = 1, "Pas d'opinion" = 2,
  "D'accord" = 3, "Tout à fait d'accord" = 4
)
raw$CM7 <- score_single("comm_pouvoir", CM7_lookup)

5.4 Chaine de valeur

# VC1  Sales Channel – max score per buyer type (max = 3)
vc1_cacao <- do.call(pmax, c(list(
  ifelse(bin_col("acheteur_cacao_sl_acheteurcacao1") == 1L, 3, NA_real_),
  ifelse(bin_col("acheteur_cacao_sl_acheteurcacao2") == 1L, 3, NA_real_),
  ifelse(bin_col("acheteur_cacao_sl_acheteurcacao3") == 1L, 1, NA_real_)
), list(na.rm = TRUE)))
vc1_other <- do.call(pmax, c(list(
  ifelse(bin_col("acheteur_cacao_sans_sl_acheteursans1") == 1L, 3, NA_real_),
  ifelse(bin_col("acheteur_cacao_sans_sl_acheteursans2") == 1L, 3, NA_real_),
  ifelse(bin_col("acheteur_cacao_sans_sl_acheteursans3") == 1L, 1, NA_real_)
), list(na.rm = TRUE)))
raw$VC1 <- rowMeans(cbind(vc1_cacao, vc1_other), na.rm = TRUE)

# VC2  Cooperative Membership (two yes/no; max = 2)
raw$VC2 <- as.integer(df$coop_cacao      %in% c("ouicoop1", "Oui")) +
           as.integer(df$coop_cacao_sans  %in% c("nonouicoop1", "Oui"))

# VC3  Farm Records (two yes/no; max = 2)
raw$VC3 <- as.integer(df$registre_cacao      %in% c("registrecacao1", "oui")) +
           as.integer(df$registre_cacao_sans  %in% c("registresans1",  "oui"))

# VC4  Premium Price (single-select; max = 3)
VC4_lookup <- c(
  prixpremium1 = 0, prixpremiumautre1 = 1,
  prixpremiumautre2 = 2, prixpremiumautre3 = 3,
  "Jamais" = 0,
  "Quelquefois" = 1,
  "Quelque fois" = 1,   # variant spelling
  "Fréquemment" = 2,
  "Toujours" = 3
)
raw$VC4 <- score_single("prix_premium", VC4_lookup)

# POL1  Policy Participation (single-select; max = 4)
# Note: column name has capital I: polItique_agri
# POL1
POL1_lookup <- c(
  politiqueagri1 = 0, politiqueagri2 = 1, politiqueagri3 = 2,
  politiqueagri4 = 3, politiqueagri5 = 4,
  "Ne participe pas et n'est pas informé des politiques" = 0,
  "A été informé de certaines politiques" = 1,
  "A été consulté pour certaines politiques" = 2,
  "A été activement engagé dans des processus de prise de décisions" = 3,
  "Est en mesure d'influencer les processus de prise de décisions" = 4
)
pol_col <- if ("politique_agri" %in% names(df)) "politique_agri" else
           grep("politiqu.*agri", names(df), value = TRUE, ignore.case = TRUE)[1]
raw$POL1 <- score_single(pol_col, POL1_lookup)

—-Save database on absolute values

write_xlsx(raw, path = "D:/Mes Donnees/CANALLs/AET Scoring/Data/absolute_values.xlsx")

—6. NORMALISE — distance-to-target (0-1)—

norms <- list(
  # Farm
  SO1  = c(18, 0),  SO2  = c(3, 0),   SO3  = c(4, 0),   SO4  = c(3, 0),
  SO5  = c(3, 0),   SO6  = c(12, 0),
  WM1  = c(4, 0),   WM2  = c(2, 0),   WM3  = c(3, 0),
  CR1  = c(8, 0),   CR2  = c(9, 0),   CR4  = c(3, 0),
  AN1  = c(3, 0),   AN2  = c(1, 0),   AN3  = c(10, 0),
  AN4  = c(3, 0),   AN5  = c(3, 0),
  TR1  = c(3, 0),   TR2  = c(3, 0),   TR3  = c(4, 0),
  TR4  = c(10, -1), TR5  = c(4, 0),
  PD1  = c(9, 0),   PD2  = c(3, 0),   PD3  = c(12, 0),
  PD4  = c(9, 0),   PD5  = c(3, -12), PD6  = c(7, 0),
  PD7  = c(2, 0),   PD8  = c(6, -3),
  EN1  = c(3, 0),   EN2  = c(6, 0),
  # Social
  HH1  = c(2, 0),   HH2  = c(3, 0),   HH3  = c(2, 0),
  HH4  = c(3, 0),   HH5  = c(7, 0),   HH6  = c(4, 0),
  WR1  = c(1, 0),   WR2  = c(3, 0),   WR3  = c(6, 0),
  CM1  = c(6, 0),   CM2  = c(17, 0),  CM3  = c(3, 0),
  CM4  = c(3, 0),   CM5  = c(4, 0),   CM6  = c(4, 0),  CM7 = c(4, 0),
  VC1  = c(3, 0),   VC2  = c(2, 0),   VC3  = c(2, 0),
  VC4  = c(3, 0),   POL1 = c(4, 0)
)

norm <- tibble(farmer_id = raw$farmer_id)
for (ind in names(norms)) {
  if (!ind %in% names(raw)) next
  mx <- norms[[ind]][1]
  mn <- norms[[ind]][2]
  s  <- raw[[ind]]
  if (ind == "CR1") s <- pmin(s, 8)       # cap at 8
  norm[[paste0(ind, "_n")]] <- norm_dtt(s, mx, mn)
}
norm <- tibble(
  farmer_id = raw$farmer_id,
  zone      = raw$zone,
  Type      = raw$Type
)
for (ind in names(norms)) {
  if (!ind %in% names(raw)) next
  mx <- norms[[ind]][1]
  mn <- norms[[ind]][2]
  s  <- raw[[ind]]
  if (ind == "CR1") s <- pmin(s, 8)
  norm[[paste0(ind, "_n")]] <- norm_dtt(s, mx, mn)
}

norm <- norm[norm$farmer_id != "_NA", ]
raw  <- raw[raw$farmer_id  != "_NA", ]

write_xlsx(norm, path = "D:/Mes Donnees/CANALLs/AET Scoring/Data/norm.xlsx")

— 7. Fill gaps and not aplicables using averages for same type

Different approaches where considered. For now, we decided to fill the gaps using average of the same farm type (i.e., type-specific mean imputation). As the database grows , farm types may exclude completely some indicators (e.g., a farm type may be without considering livivestock)

First lets check the NA per variable per type

# ── NA count per indicator per Type ──────────────────────────────────────────

# Get all normalised indicator columns
ind_cols <- names(norm)[str_detect(names(norm), "_n$")]

# Count NAs per indicator per Type
na_by_type <- norm %>%
  group_by(Type) %>%
  summarise(
    n_farms = n(),
    across(all_of(ind_cols),
           ~ sum(is.na(.x)),
           .names = "{.col}"),
    .groups = "drop"
  ) %>%
  # Add a total row across all types
  bind_rows(
    norm %>%
      summarise(
        Type    = "ALL",
        n_farms = n(),
        across(all_of(ind_cols), ~ sum(is.na(.x)))
      )
  )

# Also express as % of farms in each type
na_pct_by_type <- norm %>%
  group_by(Type) %>%
  summarise(
    n_farms = n(),
    across(all_of(ind_cols),
           ~ round(sum(is.na(.x)) / n() * 100, 1),
           .names = "{.col}"),
    .groups = "drop"
  ) %>%
  bind_rows(
    norm %>%
      summarise(
        Type    = "ALL",
        n_farms = n(),
        across(all_of(ind_cols),
               ~ round(sum(is.na(.x)) / n() * 100, 1))
      )
  )

# Print both tables
cat("\n=== NA COUNT per Type ===\n")

## 
## === NA COUNT per Type ===

print(as.data.frame(na_by_type))

##         Type n_farms SO1_n SO2_n SO3_n SO4_n SO5_n SO6_n WM1_n WM2_n WM3_n
## 1     Forest      26     0     0     0    20     0     0     0    15    15
## 2   Savannah      23     0     0     0    15     0     0     0    11    11
## 3 Transition      10     0     0     0     7     0     0     0     4     4
## 4        ALL      59     0     0     0    42     0     0     0    30    30
##   CR1_n CR2_n CR4_n AN1_n AN2_n AN3_n AN4_n AN5_n TR1_n TR2_n TR3_n TR4_n TR5_n
## 1     1     0     0    22    20     0    18    12     0     0     0     0    24
## 2     0     0     0    14     9     0    16     8     0     5     0     0    15
## 3     0     0     0     6     7     0     7     4     0     1     0     0     7
## 4     1     0     0    42    36     0    41    24     0     6     0     0    46
##   PD1_n PD2_n PD3_n PD4_n PD5_n PD6_n PD7_n PD8_n EN1_n EN2_n HH1_n HH2_n HH3_n
## 1     0     0     0     0     0     0     0     0     1     0     2     0     0
## 2     0     0     0     0     0     0     0     0     3     0     4     0     0
## 3     0     0     0     0     0     0     0     0     1     0     0     0     0
## 4     0     0     0     0     0     0     0     0     5     0     6     0     0
##   HH4_n HH5_n HH6_n WR1_n WR2_n WR3_n CM1_n CM2_n CM3_n CM4_n CM5_n CM6_n CM7_n
## 1     0     0     0     0     0     0     0     0     0     0     0     0     0
## 2     0     0     0     0     0     0     0     0     0     0     0     0     0
## 3     0     0     1     0     0     0     0     0     0     0     0     0     0
## 4     0     0     1     0     0     0     0     0     0     0     0     0     0
##   VC1_n VC2_n VC3_n VC4_n POL1_n
## 1     0     0     0     0      0
## 2     0     0     0     0      0
## 3     0     0     0     0      0
## 4     0     0     0     0      0

cat("\n=== NA % per Type ===\n")

## 
## === NA % per Type ===

print(as.data.frame(na_pct_by_type))

##         Type n_farms SO1_n SO2_n SO3_n SO4_n SO5_n SO6_n WM1_n WM2_n WM3_n
## 1     Forest      26     0     0     0  76.9     0     0     0  57.7  57.7
## 2   Savannah      23     0     0     0  65.2     0     0     0  47.8  47.8
## 3 Transition      10     0     0     0  70.0     0     0     0  40.0  40.0
## 4        ALL      59     0     0     0  71.2     0     0     0  50.8  50.8
##   CR1_n CR2_n CR4_n AN1_n AN2_n AN3_n AN4_n AN5_n TR1_n TR2_n TR3_n TR4_n TR5_n
## 1   3.8     0     0  84.6  76.9     0  69.2  46.2     0   0.0     0     0  92.3
## 2   0.0     0     0  60.9  39.1     0  69.6  34.8     0  21.7     0     0  65.2
## 3   0.0     0     0  60.0  70.0     0  70.0  40.0     0  10.0     0     0  70.0
## 4   1.7     0     0  71.2  61.0     0  69.5  40.7     0  10.2     0     0  78.0
##   PD1_n PD2_n PD3_n PD4_n PD5_n PD6_n PD7_n PD8_n EN1_n EN2_n HH1_n HH2_n HH3_n
## 1     0     0     0     0     0     0     0     0   3.8     0   7.7     0     0
## 2     0     0     0     0     0     0     0     0  13.0     0  17.4     0     0
## 3     0     0     0     0     0     0     0     0  10.0     0   0.0     0     0
## 4     0     0     0     0     0     0     0     0   8.5     0  10.2     0     0
##   HH4_n HH5_n HH6_n WR1_n WR2_n WR3_n CM1_n CM2_n CM3_n CM4_n CM5_n CM6_n CM7_n
## 1     0     0   0.0     0     0     0     0     0     0     0     0     0     0
## 2     0     0   0.0     0     0     0     0     0     0     0     0     0     0
## 3     0     0  10.0     0     0     0     0     0     0     0     0     0     0
## 4     0     0   1.7     0     0     0     0     0     0     0     0     0     0
##   VC1_n VC2_n VC3_n VC4_n POL1_n
## 1     0     0     0     0      0
## 2     0     0     0     0      0
## 3     0     0     0     0      0
## 4     0     0     0     0      0

write_xlsx(na_by_type, path = "D:/Mes Donnees/CANALLs/AET Scoring/Data/na_by_type.xlsx")

for mow will replace all the NA by the “median”o of the farm type, for all variables. this needs to be re-visited once we have more data

# ── Median imputation by Type ─────────────────────────────────────────────────
# Replace NA in all normalised indicator columns with the median
# of the same Type group (covers both structural and data quality NAs)

ind_cols_n <- names(norm)[str_detect(names(norm), "_n$")]

norm <- norm %>%
  group_by(Type) %>%
  mutate(across(
    all_of(ind_cols_n),
    ~ ifelse(is.na(.x), median(.x, na.rm = TRUE), .x)
  )) %>%
  ungroup()

— 8. Relativize comparing each indicator to the average of the type

# --- 8. Relativize normalized scores by Type maximum -------------------------
# Divide each farm's normalized score by the maximum of that indicator
# within the same Type group.
# Result: values 0-1 where 1 = at or above the best farm in the Type
# 0/0 case (Type max = 0): assign 1 (neutral)

ind_cols_n <- names(norm)[str_detect(names(norm), "_n$")]
rel_cols   <- str_replace(ind_cols_n, "_n$", "_r")

indicators_relativized <- norm %>%
  select(farmer_id, zone, Type) %>%
  bind_cols(
    norm %>%
      select(all_of(ind_cols_n)) %>%
      bind_cols(select(norm, Type)) %>%
      group_by(Type) %>%
      mutate(across(
        all_of(ind_cols_n),
        ~ {
          type_max <- max(.x, na.rm = TRUE)
          if (is.na(type_max) || type_max == 0) {
            rep(1, length(.x))
          } else {
            .x / type_max
          }
        },
        .names = "{str_replace(.col, '_n$', '_r')}"
      )) %>%
      ungroup() %>%
      select(all_of(rel_cols))
  )

# ── Check 0/0 cases ──────────────────────────────────────────────────────────
zero_type_max <- norm %>%
  select(all_of(ind_cols_n), Type) %>%
  group_by(Type) %>%
  summarise(across(all_of(ind_cols_n),
                   ~ as.integer(max(.x, na.rm = TRUE) == 0 |
                                 is.na(max(.x, na.rm = TRUE))),
                   .names = "{.col}"),
            .groups = "drop") %>%
  pivot_longer(-Type, names_to = "indicator", values_to = "is_zero") %>%
  filter(is_zero == 1)

if (nrow(zero_type_max) > 0) {
  cat("\nIndicators with Type max = 0 (assigned neutral score of 1):\n")
  print(zero_type_max)
} else {
  cat("\nNo 0/0 cases found.\n")
}

## 
## Indicators with Type max = 0 (assigned neutral score of 1):
## # A tibble: 2 × 3
##   Type       indicator is_zero
##   <chr>      <chr>       <int>
## 1 Transition SO4_n           1
## 2 Transition AN2_n           1

write_xlsx(indicators_relativized,
           path = "D:/Mes Donnees/CANALLs/AET Scoring/Data/indicators_relativized.xlsx")
message("✓ Saved to indicators_relativized.xlsx")

## ✓ Saved to indicators_relativized.xlsx

— 9. Weigh indicators values

#Now transform the normalized scores to weighed scored

# --- 9. Weight the relativized scores ----------------------------------------
# Multiply each _r score by its overall weight
# Result: weighted scores as proportion of maximum achievable weight per indicator
# Composite scores on 0-100 scale

Weights_indicators <- read_excel("D:/Mes Donnees/CANALLs/AET Scoring/Data/Weights_indicators_per.xlsx")

weight_vec <- setNames(
  Weights_indicators$Overal_weight,
  Weights_indicators$Indicator
)

# Match _r columns to weights (via indicator name)
# _r columns named e.g. SO1_r; weights keyed on SO1_n → strip suffix, remap
common_inds   <- intersect(ind_cols_n, names(weight_vec))  # matched on _n names

missing_from_norm <- setdiff(names(weight_vec), ind_cols_n)
if (length(missing_from_norm) > 0)
  message("In weights but missing from norm (skipped): ",
          paste(missing_from_norm, collapse = ", "))

## In weights but missing from norm (skipped): CR3_n

# Renormalise weights so they sum to 100
w_present      <- weight_vec[common_inds]
w_present_norm <- w_present / sum(w_present) * 100

# Column name vectors
r_cols_matched <- str_replace(common_inds, "_n$", "_r")  # relativized cols
weighted_cols  <- str_replace(common_inds, "_n$", "_w")  # output weighted cols

# Farm vs social split
farm_inds    <- common_inds[str_detect(common_inds, "^(SO|WM|CR|AN|TR|PD|EN)")]
social_inds  <- common_inds[str_detect(common_inds, "^(HH|WR|CM|VC|PO)")]
farm_w_cols  <- str_replace(farm_inds,   "_n$", "_w")
social_w_cols<- str_replace(social_inds, "_n$", "_w")

# ── Build indicators_weighted ─────────────────────────────────────────────────
indicators_weighted <- indicators_relativized %>%
  select(farmer_id, zone, Type)

# Add relativized columns
for (col in r_cols_matched) {
  indicators_weighted[[col]] <- indicators_relativized[[col]]
}

# Add weighted columns: relativized score × renormalised weight
for (i in seq_along(common_inds)) {
  indicators_weighted[[weighted_cols[i]]] <-
    indicators_relativized[[r_cols_matched[i]]] * w_present_norm[i]
}

# Farm index: rescaled to 0-100
farm_w_sum <- sum(w_present_norm[farm_inds])
indicators_weighted$farm_index <- rowSums(
  indicators_weighted[, farm_w_cols], na.rm = FALSE
) / farm_w_sum * 100

# Social index: rescaled to 0-100
social_w_sum <- sum(w_present_norm[social_inds])
indicators_weighted$social_index <- rowSums(
  indicators_weighted[, social_w_cols], na.rm = FALSE
) / social_w_sum * 100

# Overall: average of farm and social
indicators_weighted$overall_index <- rowMeans(
  cbind(indicators_weighted$farm_index,
        indicators_weighted$social_index),
  na.rm = TRUE
)

# ── Add absolute weighted scores (_a) ─────────────────────────────────────────
# _a = normalized score (_n) × renormalised weight
# No relativization — pure absolute weighted score

abs_cols <- str_replace(common_inds, "_n$", "_a")

# Pull normalised scores directly from norm
for (i in seq_along(common_inds)) {
  indicators_weighted[[abs_cols[i]]] <-
    norm[[common_inds[i]]] * w_present_norm[i]
}

# Absolute composite scores
farm_a_cols   <- str_replace(farm_inds,   "_n$", "_a")
social_a_cols <- str_replace(social_inds, "_n$", "_a")

indicators_weighted$farm_index_a <- rowSums(
  indicators_weighted[, farm_a_cols], na.rm = FALSE
) / farm_w_sum * 100

indicators_weighted$social_index_a <- rowSums(
  indicators_weighted[, social_a_cols], na.rm = FALSE
) / social_w_sum * 100

indicators_weighted$overall_index_a <- rowMeans(
  cbind(indicators_weighted$farm_index_a,
        indicators_weighted$social_index_a),
  na.rm = TRUE
)

# ── Summary ───────────────────────────────────────────────────────────────────
cat("\n=== Absolute weighted composite scores (0-100) ===\n")

## 
## === Absolute weighted composite scores (0-100) ===

cat("100 = farm achieves the theoretical maximum on all indicators\n\n")

## 100 = farm achieves the theoretical maximum on all indicators

cat("Farm index_a   — mean:", round(mean(indicators_weighted$farm_index_a,    na.rm=TRUE), 2),
    "  range: [", round(min(indicators_weighted$farm_index_a,    na.rm=TRUE), 2), ",",
                  round(max(indicators_weighted$farm_index_a,    na.rm=TRUE), 2), "]\n")

## Farm index_a   — mean: 26.02   range: [ 14.65 , 53.68 ]

cat("Social index_a — mean:", round(mean(indicators_weighted$social_index_a,  na.rm=TRUE), 2),
    "  range: [", round(min(indicators_weighted$social_index_a,  na.rm=TRUE), 2), ",",
                  round(max(indicators_weighted$social_index_a,  na.rm=TRUE), 2), "]\n")

## Social index_a — mean: 42.39   range: [ 22.14 , 60.66 ]

cat("Overall_a      — mean:", round(mean(indicators_weighted$overall_index_a, na.rm=TRUE), 2), "\n")

## Overall_a      — mean: 34.2

— 10. Visualization of results

Absolute values for all the indicators in each dimension for each farm type

# --- 10. Radar chart visualization by Type -----------------------------------

library(ggplot2)
library(patchwork)

## Warning: package 'patchwork' was built under R version 4.3.3

library(dplyr)
library(tidyr)
library(stringr)
library(purrr)

type_colors <- c("Forest"     = "#1D9E75",
                 "Savannah"   = "#BA7517",
                 "Transition" = "#534AB7")

# Weights for normalizing _a to 0-100%
farm_weights_map   <- setNames(w_present_norm[farm_inds],
                               str_remove(farm_inds,   "_n$"))
social_weights_map <- setNames(w_present_norm[social_inds],
                               str_remove(social_inds, "_n$"))

abs_cols_farm   <- str_replace(farm_inds,   "_n$", "_a")
abs_cols_social <- str_replace(social_inds, "_n$", "_a")

# ── Helper: convert radar data to cartesian coordinates ──────────────────────
# Uses manual cartesian instead of coord_polar to ensure reliable polygon closure

prep_radar <- function(df, ind_cols, weights_map) {
  labels <- str_remove(ind_cols, "_a$")
  n      <- length(labels)
  # Angles start at top (pi/2) and go clockwise
  angles <- seq(pi / 2, pi / 2 - 2 * pi, length.out = n + 1)[1:n]

  means <- df %>%
    select(Type, all_of(ind_cols)) %>%
    group_by(Type) %>%
    summarise(across(everything(), ~ mean(.x, na.rm = TRUE)),
              .groups = "drop")

  long <- means %>%
    pivot_longer(-Type, names_to = "col", values_to = "value") %>%
    mutate(
      indicator = str_remove(col, "_a$"),
      pct       = value / weights_map[indicator] * 100,
      idx       = match(indicator, labels),
      angle     = angles[idx],
      x         = pct * cos(angle),
      y         = pct * sin(angle)
    ) %>%
    arrange(Type, idx)

  # Close polygon: repeat first row with same angle
  closure <- long %>%
    filter(idx == 1) %>%
    mutate(idx = n + 1)

  bind_rows(long, closure) %>%
    mutate(indicator = factor(indicator, levels = labels))
}

# ── Helper: build and plot one radar ─────────────────────────────────────────
plot_radar <- function(radar_data, comp_data, title) {
  labels <- levels(radar_data$indicator)
  n      <- length(labels)
  angles <- seq(pi / 2, pi / 2 - 2 * pi, length.out = n + 1)[1:n]

  # Axis label positions (outside the 100% circle)
  label_r    <- 120
  axis_df    <- tibble(
    indicator = labels,
    x         = label_r * cos(angles),
    y         = label_r * sin(angles)
  )

  # Grid circles (25 / 50 / 75 / 100)
  grid_vals <- c(25, 50, 75, 100)
  circle_df <- map_dfr(grid_vals, function(r) {
    theta <- seq(0, 2 * pi, length.out = 200)
    tibble(x = r * cos(theta), y = r * sin(theta), r = r)
  })

  # Spokes (center → 100%)
  spoke_df <- tibble(
    x    = 100 * cos(angles),
    y    = 100 * sin(angles),
    xend = 0, yend = 0
  )

  # Legend labels include composite score
  legend_labels <- setNames(
    paste0(comp_data$Type, "  (", round(comp_data$score, 1), ")"),
    comp_data$Type
  )

  ggplot() +
    # Grid circles
    geom_path(data   = circle_df,
              aes(x  = x, y = y, group = factor(r)),
              color  = "grey88", linewidth = 0.35) +
    # Spokes
    geom_segment(data = spoke_df,
                 aes(x = x, y = y, xend = xend, yend = yend),
                 color = "grey88", linewidth = 0.35) +
    # % labels on vertical axis
    geom_text(data  = tibble(r = grid_vals,
                             x = 3,
                             y = grid_vals),
              aes(x = x, y = y, label = paste0(r, "%")),
              size = 3.5, color = "grey65", hjust = 0) +    # was 2.3
    # Polygons
    geom_polygon(data  = radar_data,
                 aes(x = x, y = y,
                     color = Type, fill = Type, group = Type),
                 alpha = 0.13, linewidth = 0.85) +
    # Points (exclude closure row)
    geom_point(data = filter(radar_data, idx <= n),
               aes(x = x, y = y, color = Type),
               size = 1.6) +
    # Indicator labels
    geom_text(data  = axis_df,
              aes(x = x, y = y, label = indicator),
              size  = 4, color = "grey25",                  # was 2.5
              lineheight = 0.85, fontface = "plain") +
    coord_equal(
      xlim = c(-label_r - 10, label_r + 10),
      ylim = c(-label_r - 10, label_r + 10)
    ) +
    scale_color_manual(values = type_colors, labels = legend_labels) +
    scale_fill_manual(values  = type_colors, labels = legend_labels) +
    labs(title  = title,
         color  = "Type  (dimension index 0–100)",
         fill   = "Type  (dimension index 0–100)") +
    theme_void(base_size = 10) +
theme(
      legend.position  = "bottom",
      legend.direction = "horizontal",
      legend.text      = element_text(size = 15),           # was 9
      legend.title     = element_text(size = 15, color = "grey40"),  # was 8.5
      plot.title       = element_text(face  = "bold", size = 17,     # was 12
                                      hjust = 0.5,
                                      margin = margin(b = 8)),
      plot.margin      = margin(10, 10, 10, 10)
    )
}

# ── Prepare data ──────────────────────────────────────────────────────────────
radar_farm   <- prep_radar(indicators_weighted, abs_cols_farm,   farm_weights_map)
radar_social <- prep_radar(indicators_weighted, abs_cols_social, social_weights_map)

comp_farm <- indicators_weighted %>%
  group_by(Type) %>%
  summarise(score = mean(farm_index_a, na.rm = TRUE), .groups = "drop")

comp_social <- indicators_weighted %>%
  group_by(Type) %>%
  summarise(score = mean(social_index_a, na.rm = TRUE), .groups = "drop")

# ── Build plots ───────────────────────────────────────────────────────────────
p_farm <- plot_radar(
  radar_farm, comp_farm,
  "Farm dimension — absolute weighted scores by Type (%  max achievable)"
)

p_social <- plot_radar(
  radar_social, comp_social,
  "Social dimension — absolute weighted scores by Type (%  max achievable)"
)

# ── Combine ───────────────────────────────────────────────────────────────────
p_combined <- p_farm / p_social +
plot_annotation(
    title   = "FarMAE Agroecology Index — radar charts by farm Type",
    caption = paste0(
      "Each spoke = % of maximum achievable weighted score for that indicator.\n",
      "Legend value = mean dimension composite index (0–100) across farms of that Type."
    ),
    theme = theme(
      plot.title   = element_text(face = "bold", size = 18, hjust = 0.5),  # was 14
      plot.caption = element_text(size = 12, color = "grey50", hjust = 0)  # was 8
    )
  )

# ── Save ──────────────────────────────────────────────────────────────────────
dir.create("D:/Mes Donnees/CANALLs/AET Scoring/Outputs", showWarnings = FALSE)

ggsave(
  "D:/Mes Donnees/CANALLs/AET Scoring/Outputs/radar_by_type.png",
  p_combined,
  width  = 14,
  height = 20,
  dpi    = 300
)

## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.

message("✓ Radar chart saved to Outputs/radar_by_type.png")

## ✓ Radar chart saved to Outputs/radar_by_type.png

now a table that descrive the indicators, with their means and also the CV to show how informative they are

# --- 11. Summary table: mean per Type + informativeness ----------------------

library(writexl)

# Short descriptions for each indicator (4-6 words)
indicator_descriptions <- tribble(
  ~indicator, ~description,
  "SO1", "Soil nutrient availability practices",
  "SO2", "Balanced fertiliser use",
  "SO3", "Crop and household residue use",
  "SO4", "Livestock effluent management",
  "SO5", "Seasonal tillage practices",
  "SO6", "Soil erosion control practices",
  "WM1", "Water saving practices",
  "WM2", "Water body protection",
  "WM3", "Integrated water management",
  "CR1", "Crop type diversification",
  "CR2", "Intercropping and association",
  "CR4", "Ecologically beneficial plants",
  "AN1", "Livestock feed access",
  "AN2", "Livestock water access",
  "AN3", "Animal species diversity",
  "AN4", "Veterinary care quality",
  "AN5", "Crop-livestock integration",
  "TR1", "Tree integration in farm",
  "TR2", "Cocoa shade level",
  "TR3", "Perennial species number",
  "TR4", "Forest product use",
  "TR5", "Tree fodder sources",
  "PD1", "Chemical weed control",
  "PD2", "Manual weeding effectiveness",
  "PD3", "Weed residue management",
  "PD4", "Chemical pest control",
  "PD5", "Hazardous pesticide avoidance",
  "PD6", "Integrated pest management",
  "PD7", "Biological pest control",
  "PD8", "Pesticide handling safety",
  "EN1", "Renewable energy use",
  "EN2", "Energy saving practices",
  "HH1", "Off-farm income diversification",
  "HH2", "Food access and diversity",
  "HH3", "Financial services access",
  "HH4", "Land tenure security",
  "HH5", "Household nutrition practices",
  "HH6", "Women in assets and decisions",
  "WR1", "Permanent workforce size",
  "WR2", "Worker daily wage level",
  "WR3", "Worker wellbeing practices",
  "CM1", "Community group membership",
  "CM2", "Shared resource management",
  "CM3", "Community decision influence",
  "CM4", "Co-creation participation",
  "CM5", "Community trust level",
  "CM6", "Traditional values perception",
  "CM7", "Gender balance perception",
  "VC1", "Sales channel diversity",
  "VC2", "Cooperative membership",
  "VC3", "Farm record keeping",
  "VC4", "Certification scheme benefits",
  "POL1","Policy participation level"
)

# Dimension labels
farm_inds_labels   <- str_remove(farm_inds,   "_n$")
social_inds_labels <- str_remove(social_inds, "_n$")

indicator_descriptions <- indicator_descriptions %>%
  mutate(dimension = case_when(
    indicator %in% farm_inds_labels   ~ "Farm",
    indicator %in% social_inds_labels ~ "Social",
    TRUE ~ NA_character_
  ))

# ── Compute mean and CV per indicator per Type ────────────────────────────────
# Use normalised _n columns (0-1 scale) for CV so it is comparable across indicators

compute_stats <- function(df_norm, ind_cols_n) {
  labels <- str_remove(ind_cols_n, "_n$")

  # Per-Type stats
  by_type <- df_norm %>%
    select(Type, all_of(ind_cols_n)) %>%
    group_by(Type) %>%
    summarise(
      across(all_of(ind_cols_n),
             list(
               mean = ~ round(mean(.x, na.rm = TRUE) * 100, 1),
               cv   = ~ {
                 m <- mean(.x, na.rm = TRUE)
                 s <- sd(.x,   na.rm = TRUE)
                 if (is.na(m) || m < 0.05) NA_real_   # CV unstable near 0
                 else round(s / m * 100, 1)
               }
             ),
             .names = "{str_remove(.col, '_n$')}_{.fn}"),
      .groups = "drop"
    )

  # Overall stats (all Types pooled)
  overall <- df_norm %>%
    select(all_of(ind_cols_n)) %>%
    summarise(
      across(all_of(ind_cols_n),
             list(
               mean = ~ round(mean(.x, na.rm = TRUE) * 100, 1),
               cv   = ~ {
                 m <- mean(.x, na.rm = TRUE)
                 s <- sd(.x,   na.rm = TRUE)
                 if (is.na(m) || m < 0.05) NA_real_
                 else round(s / m * 100, 1)
               }
             ),
             .names = "{str_remove(.col, '_n$')}_{.fn}")
    ) %>%
    mutate(Type = "ALL")

  bind_rows(by_type, overall)
}

all_ind_n <- c(farm_inds, social_inds)
stats_wide <- compute_stats(norm, all_ind_n)

# ── Reshape to long then pivot to final table format ─────────────────────────
stats_long <- stats_wide %>%
  pivot_longer(-Type,
               names_to  = c("indicator", ".value"),
               names_pattern = "^(.+)_(mean|cv)$")

summary_table <- stats_long %>%
  pivot_wider(
    id_cols     = indicator,
    names_from  = Type,
    values_from = c(mean, cv),
    names_glue  = "{Type}_{.value}"
  ) %>%
  # Join descriptions and dimension
  left_join(indicator_descriptions, by = "indicator") %>%
  # Reorder columns cleanly
  select(
    dimension, indicator, description,
    Forest_mean,     Forest_cv,
    Savannah_mean,   Savannah_cv,
    Transition_mean, Transition_cv,
    ALL_mean,        ALL_cv
  ) %>%
  arrange(dimension, indicator)

# Rename for readability
summary_table <- summary_table %>%
  rename(
    Dimension          = dimension,
    Indicator          = indicator,
    Description        = description,
    `Forest mean (%)`      = Forest_mean,
    `Forest CV (%)`        = Forest_cv,
    `Savannah mean (%)`    = Savannah_mean,
    `Savannah CV (%)`      = Savannah_cv,
    `Transition mean (%)` = Transition_mean,
    `Transition CV (%)`   = Transition_cv,
    `Overall mean (%)`    = ALL_mean,
    `Overall CV (%)`      = ALL_cv
  )

# ── Print to console ──────────────────────────────────────────────────────────
cat("\n=== Indicator summary table ===\n")

## 
## === Indicator summary table ===

cat("Mean = % of theoretical maximum (from normalised score × 100)\n")

## Mean = % of theoretical maximum (from normalised score × 100)

cat("CV   = Coefficient of Variation (SD/mean × 100):\n")

## CV   = Coefficient of Variation (SD/mean × 100):

cat("       High CV → indicator discriminates well between farms\n")

##        High CV → indicator discriminates well between farms

cat("       Low CV  → farms score similarly, less informative\n")

##        Low CV  → farms score similarly, less informative

cat("       NA      → mean < 5%, CV unreliable\n\n")

##        NA      → mean < 5%, CV unreliable

print(summary_table, n = Inf)

## # A tibble: 52 × 11
##    Dimension Indicator Description             `Forest mean (%)` `Forest CV (%)`
##    <chr>     <chr>     <chr>                               <dbl>           <dbl>
##  1 Farm      AN1       Livestock feed access                80.8            21.6
##  2 Farm      AN2       Livestock water access               96.2            20.4
##  3 Farm      AN3       Animal species diversi…               9.6           116. 
##  4 Farm      AN4       Veterinary care quality              35.9            36.4
##  5 Farm      AN5       Crop-livestock integra…               7.1           315. 
##  6 Farm      CR1       Crop type diversificat…              82.7            20.5
##  7 Farm      CR2       Intercropping and asso…              33.8            70.1
##  8 Farm      EN1       Renewable energy use                  5.1           398. 
##  9 Farm      EN2       Energy saving practices               0.6            NA  
## 10 Farm      PD1       Chemical weed control                45.3            63.2
## 11 Farm      PD2       Manual weeding effecti…              38.5           112. 
## 12 Farm      PD3       Weed residue management              17             102. 
## 13 Farm      PD4       Chemical pest control                46.2            74.5
## 14 Farm      PD5       Hazardous pesticide av…              76.2            39.1
## 15 Farm      PD6       Integrated pest manage…              10.4           113. 
## 16 Farm      PD7       Biological pest control              19.2           196. 
## 17 Farm      PD8       Pesticide handling saf…              44.9            58.8
## 18 Farm      SO1       Soil nutrient availabi…              10.7            76.3
## 19 Farm      SO2       Balanced fertiliser use              43.6            98  
## 20 Farm      SO3       Crop and household res…              16.3           156. 
## 21 Farm      SO4       Livestock effluent man…              35.9            63.9
## 22 Farm      SO5       Seasonal tillage pract…              32.7           106. 
## 23 Farm      SO6       Soil erosion control p…               1              NA  
## 24 Farm      TR1       Tree integration in fa…              25.6            92.3
## 25 Farm      TR2       Cocoa shade level                    70.2            24.2
## 26 Farm      TR3       Perennial species numb…              73.1            39.8
## 27 Farm      TR4       Forest product use                   32.2            28  
## 28 Farm      TR5       Tree fodder sources                  25               0  
## 29 Farm      WM1       Water saving practices                4.8            NA  
## 30 Farm      WM2       Water body protection                 3.8            NA  
## 31 Farm      WM3       Integrated water manag…               1.9            NA  
## 32 Social    CM1       Community group member…              19.2            31.9
## 33 Social    CM2       Shared resource manage…              17.4            51.9
## 34 Social    CM3       Community decision inf…              53.8            23.7
## 35 Social    CM4       Co-creation participat…              46.2            64.8
## 36 Social    CM5       Community trust level                51.9            38.3
## 37 Social    CM6       Traditional values per…              60.6            33.4
## 38 Social    CM7       Gender balance percept…              56.7            38.5
## 39 Social    HH1       Off-farm income divers…              42.3            64.2
## 40 Social    HH2       Food access and divers…              38.5            80.2
## 41 Social    HH3       Financial services acc…              34.6            89.2
## 42 Social    HH4       Land tenure security                 61.5            76.1
## 43 Social    HH5       Household nutrition pr…              38.5            54.4
## 44 Social    HH6       Women in assets and de…              52.2            42.2
## 45 Social    POL1      Policy participation l…              21.2            98.6
## 46 Social    VC1       Sales channel diversity              87.2            24.4
## 47 Social    VC2       Cooperative membership               44.2            92.3
## 48 Social    VC3       Farm record keeping                  11.5           282. 
## 49 Social    VC4       Certification scheme b…               5.1           302. 
## 50 Social    WR1       Permanent workforce si…             100               0  
## 51 Social    WR2       Worker daily wage level              34.2            80.5
## 52 Social    WR3       Worker wellbeing pract…              22.4            36  
## # ℹ 6 more variables: `Savannah mean (%)` <dbl>, `Savannah CV (%)` <dbl>,
## #   `Transition mean (%)` <dbl>, `Transition CV (%)` <dbl>,
## #   `Overall mean (%)` <dbl>, `Overall CV (%)` <dbl>

# ── Export ────────────────────────────────────────────────────────────────────
write_xlsx(
  list(
    Summary        = summary_table,
    Interpretation = tibble(
      Metric = c("Mean (%)", "CV (%)"),
      Meaning = c(
        "Average normalised score across farms of that Type, expressed as % of the theoretical maximum. 100% = all farms at maximum.",
        "Coefficient of Variation = SD / mean × 100. High CV (>50%) = indicator is discriminating well between farms. Low CV (<20%) = farms score similarly, indicator adds little information. NA = mean too close to 0 for reliable CV."
      )
    )
  ),
  path = "D:/Mes Donnees/CANALLs/AET Scoring/Data/indicator_summary_table.xlsx"
)
message("✓ Saved to indicator_summary_table.xlsx")

## ✓ Saved to indicator_summary_table.xlsx

# ── Formatted summary table using ggplot2 ─────────────────────────────────────
library(ggplot2)
# ── Formatted summary table using ggplot2 ─────────────────────────────────────

# ── Formatted summary table using ggplot2 — split by dimension ───────────────

# Column definitions
tbl_cols <- tribble(
  ~field,                   ~label,        ~xmin,  ~xmax,  ~type,
  "Indicator",              "Ind.",         0.00,   0.70,   "id",
  "Description",            "Description",  0.70,   3.20,   "text",
  "Forest mean (%)",        "Mean%",        3.20,   4.00,   "mean",
  "Forest CV (%)",          "CV%",          4.00,   4.65,   "cv",
  "Savannah mean (%)",      "Mean%",        4.65,   5.45,   "mean",
  "Savannah CV (%)",        "CV%",          5.45,   6.10,   "cv",
  "Transition mean (%)",    "Mean%",        6.10,   6.90,   "mean",
  "Transition CV (%)",      "CV%",          6.90,   7.55,   "cv",
  "Overall mean (%)",       "Mean%",        7.55,   8.35,   "mean",
  "Overall CV (%)",         "CV%",          8.35,   9.00,   "cv"
) %>% mutate(xcenter = (xmin + xmax) / 2)

# Color palettes
mean_pal <- colorRampPalette(c("#ffffff","#c7e9c0","#41ab5d","#00441b"))(101)
cv_pal   <- colorRampPalette(c("#ffffff","#fee0d2","#fc9272","#cb181d"))(151)

get_fill <- function(val, type, even_row) {
  if (type == "mean") {
    if (is.na(val)) return("#eeeeee")
    return(mean_pal[min(max(round(val), 0), 100) + 1])
  }
  if (type == "cv") {
    if (is.na(val)) return("#eeeeee")
    return(cv_pal[min(max(round(val), 0), 150) + 1])
  }
  return(if (even_row) "#f7f7f7" else "#ffffff")
}

# ── Core plot function ────────────────────────────────────────────────────────
make_dim_table <- function(data, dim_label, dim_color, filename) {

  n <- nrow(data)

  cell_rows <- list()
  for (i in seq_len(n)) {
    row  <- data[i, ]
    even <- i %% 2 == 0
    for (j in seq_len(nrow(tbl_cols))) {
      cl  <- tbl_cols[j, ]
      raw <- row[[cl$field]]
      num <- suppressWarnings(as.numeric(raw))
      txt <- if (is.na(raw) || raw == "NA") "—" else as.character(raw)

      cell_rows[[length(cell_rows) + 1]] <- data.frame(
        xmin  = cl$xmin,  xmax = cl$xmax,
        ymin  = -i,       ymax = -(i - 1),
        x     = if (cl$field == "Description") cl$xmin + 0.06 else cl$xcenter,
        y     = -(i - 0.5),
        lab   = txt,
        fill  = get_fill(num, cl$type, even),
        hjust = if (cl$field == "Description") 0 else 0.5,
        fsz   = if (cl$field == "Description") 6.7 else 6.8,
        stringsAsFactors = FALSE
      )
    }
  }
  cells_df <- do.call(rbind, cell_rows)

  spanners <- tribble(
    ~xmin,  ~xmax,  ~label,         ~fill,
    0.00,   0.70,   "Indicator",    "#2c3e50",
    0.70,   3.20,   "Description",  "#2c3e50",
    3.20,   4.65,   "Forest",       "#154360",
    4.65,   6.10,   "Savannah",     "#7d6608",
    6.10,   7.55,   "Transition",   "#4a235a",
    7.55,   9.00,   "Overall",      "#1b4f3a"
  ) %>% mutate(x = (xmin + xmax) / 2, y = 1.5, ymin = 1, ymax = 2)

  col_headers <- tbl_cols %>%
    mutate(y = 0.5, ymin = 0, ymax = 1, fill = "#34495e")

  title_y <- 3.2
  cap_y   <- -(n + 2.2)
  leg_y   <- -(n + 1.4)

  p <- ggplot() +

    geom_rect(data = cells_df,
              aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, fill=I(fill)),
              color = "grey82", linewidth = 0.15) +
    geom_text(data = cells_df,
              aes(x=x, y=y, label=lab, hjust=hjust, size=I(fsz)),
              color = "grey15", vjust = 0.5) +

    annotate("text", x = -0.08, y = -(n / 2),
             label = dim_label, size = 9.2, fontface = "bold",
             color = dim_color, angle = 90, hjust = 0.5) +

    geom_rect(data = spanners,
              aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, fill=I(fill)),
              color = "grey60", linewidth = 0.15) +
    geom_text(data = spanners,
              aes(x=x, y=y, label=label),
              size = 8.6, color = "white", fontface = "bold", vjust = 0.5) +

    geom_rect(data = col_headers,
              aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, fill=I(fill)),
              color = "grey60", linewidth = 0.15) +
    geom_text(data = col_headers,
              aes(x=xcenter, y=y, label=label),
              size = 7.6, color = "white", fontface = "bold",
              vjust = 0.5, hjust = 0.5, lineheight = 0.85) +

    annotate("text", x = 4.5, y = title_y,
             label = paste0("FarMAE — ", dim_label,
                            " Dimension: Indicator Summary by Farm Type"),
             size = 13.9, fontface = "bold", hjust = 0.5, color = "#1a1a1a") +

    annotate("rect",
             xmin = 0.0, xmax = 0.55,
             ymin = leg_y - 0.15, ymax = leg_y + 0.15,
             fill = "#41ab5d", color = NA) +
    annotate("text", x = 0.65, y = leg_y,
             label = "Mean: white = 0%  →  dark green = 100% of theoretical maximum",
             size = 6.7, hjust = 0, color = "grey40") +
    annotate("rect",
             xmin = 4.6, xmax = 5.15,
             ymin = leg_y - 0.15, ymax = leg_y + 0.15,
             fill = "#fc9272", color = NA) +
    annotate("text", x = 5.25, y = leg_y,
             label = "CV: white = low discrimination  →  dark red = high discrimination",
             size = 6.7, hjust = 0, color = "grey40") +

    annotate("text", x = 4.5, y = cap_y,
             label = paste0(
               "Mean = average normalised score as % of theoretical maximum.   ",
               "CV = Coefficient of Variation (SD / mean × 100).   ",
               "High CV (>50%) = indicator discriminates well between farms.   ",
               "Low CV (<20%) = farms score similarly, less informative.   ",
               "— = CV unreliable (mean < 5%).\n",
               "Source: FarMAE survey, Cameroon 2026.   ",
               "N = 59 farms  (Forest n = 26  |  Savannah n = 23  |  Transition n = 10).   ",
               "Scores normalised using distance-to-target method.   ",
               "Missing and not-applicable values imputed using within-type median."
             ),
             size = 6.1, hjust = 0.5, color = "grey50", lineheight = 1.3) +

    coord_cartesian(
      clip = "off",
      xlim = c(-0.25, 9.0),
      ylim = c(cap_y - 0.4, title_y + 0.5)
    ) +
    theme_void() +
    theme(
      plot.background = element_rect(fill = "white", color = NA),
      plot.margin     = margin(15, 20, 15, 30)
    )

  ggsave(
    filename,
    p,
    width     = 22,
    height    = n * 0.86 + 10,
    dpi       = 300,
    bg        = "white",
    limitsize = FALSE
  )
  message("✓ Saved: ", filename)
}

# ── Generate one table per dimension ─────────────────────────────────────────
make_dim_table(
  data      = filter(summary_table, Dimension == "Farm"),
  dim_label = "FARM",
  dim_color = "#154360",
  filename  = "D:/Mes Donnees/CANALLs/AET Scoring/Outputs/indicator_table_farm.png"
)

## ✓ Saved: D:/Mes Donnees/CANALLs/AET Scoring/Outputs/indicator_table_farm.png

make_dim_table(
  data      = filter(summary_table, Dimension == "Social"),
  dim_label = "SOCIAL",
  dim_color = "#4a235a",
  filename  = "D:/Mes Donnees/CANALLs/AET Scoring/Outputs/indicator_table_social.png"
)

## ✓ Saved: D:/Mes Donnees/CANALLs/AET Scoring/Outputs/indicator_table_social.png

Now weiguted socres per farm for each dimension, separating by farm type

library(patchwork)

# ── Individual farm radar charts by Type ─────────────────────────────────────
# One figure per Type (Forest, Savannah, Transition)
# Two panels per figure: Farm indicators and Social indicators
# Each line = one farm; values = _w columns expressed as % of Type maximum

# Weights for normalizing _w back to 0–100%
farm_w_weights   <- setNames(w_present_norm[farm_inds],   str_remove(farm_inds,   "_n$"))
social_w_weights <- setNames(w_present_norm[social_inds], str_remove(social_inds, "_n$"))

farm_w_cols_plot   <- str_replace(farm_inds,   "_n$", "_w")
social_w_cols_plot <- str_replace(social_inds, "_n$", "_w")
farm_labels        <- str_remove(farm_inds,    "_n$")
social_labels      <- str_remove(social_inds,  "_n$")

type_colors <- c("Forest"     = "#1D9E75",
                 "Savannah"   = "#BA7517",
                 "Transition" = "#534AB7")

# ── Helper: cartesian radar data for individual farms ────────────────────────
prep_indiv_radar <- function(df, ind_cols, weights_map, labels_vec) {
  n      <- length(labels_vec)
  angles <- seq(pi/2, pi/2 - 2*pi, length.out = n + 1)[1:n]

  long <- df %>%
    select(farmer_id, all_of(ind_cols)) %>%
    pivot_longer(-farmer_id, names_to = "col", values_to = "value") %>%
    mutate(
      indicator = str_remove(col, "_w$"),
      pct       = value / weights_map[indicator] * 100,
      idx       = match(indicator, labels_vec),
      angle     = angles[idx],
      x         = pct * cos(angle),
      y         = pct * sin(angle)
    ) %>%
    arrange(farmer_id, idx)

  # Close polygons
  closure <- long %>%
    filter(idx == 1) %>%
    mutate(idx   = n + 1,
           angle = angles[1],
           x     = pct * cos(angle),
           y     = pct * sin(angle))

  bind_rows(long, closure) %>% arrange(farmer_id, idx)
}

# ── Helper: plot one radar panel ─────────────────────────────────────────────
plot_indiv_radar <- function(radar_data, labels_vec, color, title) {
  n      <- length(labels_vec)
  angles <- seq(pi/2, pi/2 - 2*pi, length.out = n + 1)[1:n]

  label_r <- 120
  axis_df <- tibble(
    indicator = labels_vec,
    x         = label_r * cos(angles),
    y         = label_r * sin(angles)
  )

  grid_vals <- c(25, 50, 75, 100)
  circle_df <- do.call(rbind, lapply(grid_vals, function(r) {
    theta <- seq(0, 2 * pi, length.out = 200)
    data.frame(x = r * cos(theta), y = r * sin(theta), r = r)
  }))

  spoke_df <- data.frame(
    x    = 100 * cos(angles),
    y    = 100 * sin(angles),
    xend = 0, yend = 0
  )

  ggplot() +
    # Grid circles
    geom_path(data = circle_df,
              aes(x = x, y = y, group = factor(r)),
              color = "grey88", linewidth = 0.35) +
    # Spokes
    geom_segment(data = spoke_df,
                 aes(x = x, y = y, xend = xend, yend = yend),
                 color = "grey88", linewidth = 0.35) +
    # Grid % labels
    geom_text(data = data.frame(r = grid_vals, x = 3, y = grid_vals),
              aes(x = x, y = y, label = paste0(r, "%")),
              size = 3.5, color = "grey65", hjust = 0) +
    # One polygon per farm
    geom_polygon(data  = radar_data,
                 aes(x = x, y = y, group = farmer_id),
                 fill  = color, alpha = 0.07,
                 color = color, linewidth = 0.5) +
    # Indicator labels
    geom_text(data = axis_df,
              aes(x = x, y = y, label = indicator),
              size = 3.8, color = "grey25", lineheight = 0.85) +
    coord_equal(
      xlim = c(-label_r - 10, label_r + 10),
      ylim = c(-label_r - 10, label_r + 10)
    ) +
    labs(title = title) +
    theme_void(base_size = 11) +
    theme(
      plot.title  = element_text(face  = "bold", size = 14,
                                 hjust = 0.5, margin = margin(b = 8)),
      plot.margin = margin(10, 10, 10, 10)
    )
}

# ── Generate one figure per Type ─────────────────────────────────────────────
for (tp in c("Forest", "Savannah", "Transition")) {

  df_type <- indicators_weighted %>% filter(Type == tp)
  color   <- type_colors[tp]
  n_farms <- nrow(df_type)

  # Farm panel
  radar_farm_indiv <- prep_indiv_radar(
    df_type, farm_w_cols_plot, farm_w_weights, farm_labels
  )
  p_farm_indiv <- plot_indiv_radar(
    radar_farm_indiv, farm_labels, color,
    paste0("Farm indicators  (n = ", n_farms, " farms)")
  )

  # Social panel
  radar_social_indiv <- prep_indiv_radar(
    df_type, social_w_cols_plot, social_w_weights, social_labels
  )
  p_social_indiv <- plot_indiv_radar(
    radar_social_indiv, social_labels, color,
    paste0("Social indicators  (n = ", n_farms, " farms)")
  )

  # Combine Farm + Social side by side
  p_combined <- p_farm_indiv + p_social_indiv +
    plot_annotation(
      title   = paste0("FarMAE — Individual farm profiles: ", tp, " Type"),
      caption = paste0(
        "Each polygon = one farm.   ",
        "Values = weighted relativized scores (_w) expressed as % of ",
        "maximum achievable weight per indicator.\n",
        "100% = farm matches the best farm in the ", tp,
        " Type for that indicator.   ",
        "Scores normalised using distance-to-target method.   ",
        "Missing values imputed using within-type median."
      ),
      theme = theme(
        plot.title   = element_text(face  = "bold", size = 16, hjust = 0.5),
        plot.caption = element_text(size  = 9,  color = "grey50", hjust = 0,
                                    margin = margin(t = 8))
      )
    )

  ggsave(
    paste0("D:/Mes Donnees/CANALLs/AET Scoring/Outputs/radar_individual_",
           tolower(tp), ".png"),
    p_combined,
    width  = 22,
    height = 12,
    dpi    = 300,
    bg     = "white"
  )
  message("✓ Saved: radar_individual_", tolower(tp), ".png")
}

## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.

## ✓ Saved: radar_individual_forest.png

## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.

## ✓ Saved: radar_individual_savannah.png

## Warning: annotation$theme is not a valid theme.
## Please use `theme()` to construct themes.

## ✓ Saved: radar_individual_transition.png

NOw the table with values per farmer, per indicator and agregated per dimension and whole farm

# ── Excel export: per-farm scores (_w) + dimension + overall indices ──────────

library(writexl)

# Build export table: context + all _w columns + composite scores
export_w <- indicators_weighted %>%
  select(
    farmer_id, zone, Type,
    all_of(weighted_cols),          # all _w indicator columns
    farm_index, social_index, overall_index   # relativized composites
  ) %>%
  # Round all numeric columns to 3 decimal places
  mutate(across(where(is.numeric), ~ round(.x, 3)))

# Add a readable header by renaming _w columns to just the indicator name
names(export_w) <- names(export_w) %>%
  str_replace("_w$", "")

# Split into farm and social indicator columns for separate sheets
farm_cols_export   <- c("farmer_id", "zone", "Type",
                        str_remove(farm_w_cols,   "_w$"),
                        "farm_index")
social_cols_export <- c("farmer_id", "zone", "Type",
                        str_remove(social_w_cols, "_w$"),
                        "social_index")
overall_cols       <- c("farmer_id", "zone", "Type",
                        "farm_index", "social_index", "overall_index")

write_xlsx(
  list(
    All_indicators = export_w,
    Farm_dimension = export_w %>% select(all_of(farm_cols_export)),
    Social_dimension = export_w %>% select(all_of(social_cols_export)),
    Composite_scores = export_w %>% select(all_of(overall_cols))
  ),
  path = "D:/Mes Donnees/CANALLs/AET Scoring/Data/scores_per_farm_w.xlsx"
)
message("✓ Saved to scores_per_farm_w.xlsx")

## ✓ Saved to scores_per_farm_w.xlsx

data

Diego

2026-05-23

—1.Set up—

2.Data upload

3.Helper functions

—4.Farm Dimention scores—

4.1 SOIL MANAGEMENT

4.2 Water MANAGEMENT

4.3 CROP MANAGEMENT

4.4 LIVESTOCK

4.5 TREES & AGROFORESTRY

4.6 PEST & DISEASE MANAGEMENT

4.7 ENERGY

5.1 Menage

5.2 Main d’oeuvre

5.3 Communite

5.4 Chaine de valeur

—-Save database on absolute values

—6. NORMALISE — distance-to-target (0-1)—

— 7. Fill gaps and not aplicables using averages for same type

First lets check the NA per variable per type

for mow will replace all the NA by the “median”o of the farm type, for all variables. this needs to be re-visited once we have more data

— 8. Relativize comparing each indicator to the average of the type

— 9. Weigh indicators values

— 10. Visualization of results

Absolute values for all the indicators in each dimension for each farm type

now a table that descrive the indicators, with their means and also the CV to show how informative they are

Now weiguted socres per farm for each dimension, separating by farm type

NOw the table with values per farmer, per indicator and agregated per dimension and whole farm

data

Diego

2026-05-23

—1.Set up—

2.Data upload

3.Helper functions

—4.Farm Dimention scores—

4.1 SOIL MANAGEMENT

4.2 Water MANAGEMENT

4.3 CROP MANAGEMENT

4.4 LIVESTOCK

4.5 TREES & AGROFORESTRY

4.6 PEST & DISEASE MANAGEMENT

4.7 ENERGY

5.—Social Dimention scores—

5.1 Menage

5.2 Main d’oeuvre

5.3 Communite

5.4 Chaine de valeur

—-Save database on absolute values

—6. NORMALISE — distance-to-target (0-1)—

— 7. Fill gaps and not aplicables using averages for same type

First lets check the NA per variable per type

for mow will replace all the NA by the “median”o of the farm type, for all variables. this needs to be re-visited once we have more data

— 8. Relativize comparing each indicator to the average of the type

— 9. Weigh indicators values

— 10. Visualization of results

Absolute values for all the indicators in each dimension for each farm type

now a table that descrive the indicators, with their means and also the CV to show how informative they are

Now weiguted socres per farm for each dimension, separating by farm type

NOw the table with values per farmer, per indicator and agregated per dimension and whole farm