#we wor with the database yield_n_out_clean which is the raw after eliminatin of yield outliers and NAs:

#Exploration of output variables DQ_future_adoption: farmers where asked at harvest time of each season, if they will adopt the practice in other plots in the future. DQ_Preference: Farmers ranked the treatments based on prefference DQ_CA_real_adoption: At weeding time, starting in season 2, farmers were asked if they acually adopted the practice in any of their other plots DQ_CA_spillover: Farmers where asked if any of their neighbours adopted the practice

Based on some concepts from Abetu et al., 2026 and Rogeres et al., 2003 we will call: DQ_Preference = Perception of the practice CA as compared to conv (persuation) = Perception DQ_future_adoption = farmers intention to adopt the practice (desicion) = Adoption_intention DQ_CA_real_adoption = Farmers actually test the practice (implementation) = Adoption_behaviour DQ_CA_spillover = dissemination of the CA to neighbour farmers = Diffusion

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#bring database from folder
yield_n_out_clean <- read.csv("D:/Mes Donnees/OAF_CIRAD/CA_MoU/data/processed/yield_n_out_clean.csv")


yield_n_out_clean <- yield_n_out_clean %>%
  rename(
    Perception = DQ_Preference,
    Adoption_intention = DQ_future_adoption,
    Adoption_behaviour = DQ_CA_real_adoption,
    Diffusion = DQ_CA_spillover
  )

Based on an initial exploration of variable: -Adoption Intential and Perception are complete; only 2 NA in the first. However, data is missing for 2023B for Adoption behabiour and DIffusion All the variables are 1 (yes to adoption, positive perception and positive to difussion) or 0 (no)

# Updated variable names
vars <- c("Adoption_intention", "Perception", "Adoption_behaviour", "Diffusion")

# Function to summarize each variable
summarize_variable <- function(var, data) {
  cat("Variable:", var, "\n")
  cat("Type:", class(data[[var]]), "\n")
  cat("Total non-NA values:", sum(!is.na(data[[var]])), "\n")
  cat("Total NA values:", sum(is.na(data[[var]])), "\n\n")
}

# Apply summary
for (v in vars) {
  summarize_variable(v, yield_n_out_clean)
}
## Variable: Adoption_intention 
## Type: integer 
## Total non-NA values: 11244 
## Total NA values: 2 
## 
## Variable: Perception 
## Type: integer 
## Total non-NA values: 11246 
## Total NA values: 0 
## 
## Variable: Adoption_behaviour 
## Type: integer 
## Total non-NA values: 9145 
## Total NA values: 2101 
## 
## Variable: Diffusion 
## Type: integer 
## Total non-NA values: 9145 
## Total NA values: 2101
# Variables with NA to analyze
na_vars <- c("Adoption_behaviour", "Diffusion")

# Categorical grouping variables
group_vars <- c("IQR_Season", "IQF_environment")

# Loop to examine NA distribution
for (na_var in na_vars) {
  cat("NA distribution for:", na_var, "\n\n")
  
  for (grp_var in group_vars) {
    cat("Grouped by:", grp_var, "\n")
    
    tab <- with(yield_n_out_clean, table(yield_n_out_clean[[grp_var]], is.na(yield_n_out_clean[[na_var]])))
    colnames(tab) <- c("Not_NA", "NA")
    print(tab)
    cat("\n")
  }
  
  cat("----------------------------------------------------------\n\n")
}
## NA distribution for: Adoption_behaviour 
## 
## Grouped by: IQR_Season 
##      
##       Not_NA   NA
##   23A   2227    0
##   23B      0 2073
##   24A   1872    0
##   24B   1894    4
##   25A   1600    0
##   25B   1552   24
## 
## Grouped by: IQF_environment 
##              
##               Not_NA   NA
##   Highland      1648  500
##   Mid_Low_Dry   3080  666
##   Mid_Low_Wet   2573  549
##   Transition    1844  386
## 
## ----------------------------------------------------------
## 
## NA distribution for: Diffusion 
## 
## Grouped by: IQR_Season 
##      
##       Not_NA   NA
##   23A   2227    0
##   23B      0 2073
##   24A   1872    0
##   24B   1894    4
##   25A   1600    0
##   25B   1552   24
## 
## Grouped by: IQF_environment 
##              
##               Not_NA   NA
##   Highland      1648  500
##   Mid_Low_Dry   3080  666
##   Mid_Low_Wet   2573  549
##   Transition    1844  386
## 
## ----------------------------------------------------------

analyzing adoption of CA practices (B, C, D), computing predicted adoption percentages, and creating a clean summary table excluding Treatment A, with easy-to-read labels:

Notes: I elinates 23 A and B from the analysis beause missing data in 23B and no sense of 23A (first season). AS for yield, there is no difference among CA tratments. Insteand of averaging what I will no onwards is to use treat C which is the one that performed betted in yield and even as a trend seem to by little better fro adoption and prefference that the others. Also is the one that OAF has predilection for

library(dplyr)
library(tidyr)
library(purrr)
library(multcompView)

# Define binary adoption variables
vars <- c("Adoption_intention", "Perception", "Adoption_behaviour", "Diffusion")

# Clean and reshape the data
data_clean <- yield_n_out_clean %>%
  filter(Treat_code %in% c("B", "C", "D"), !IQR_Season %in% c("23A", "23B")) %>%
  filter(Treat_code %in% c("B", "C", "D")) %>%  # remove Treatment A
  dplyr::select(IQF_environment, Treat_code, all_of(vars)) %>%
  pivot_longer(
    cols = all_of(vars),
    names_to = "outcome",
    values_to = "value"
  ) %>%
  filter(!is.na(value))  # remove NA responses

# Run chi-square test and post-hoc comparisons per outcome and environment
chi_results <- map_dfr(vars, function(var) {
  map_dfr(unique(data_clean$IQF_environment), function(env) {

    # Subset data for this outcome × environment
    sub_df <- data_clean %>%
      filter(outcome == var, IQF_environment == env)

    # Create contingency table
    tbl <- table(sub_df$Treat_code, sub_df$value)

    # Skip if not enough data
    if (any(rowSums(tbl) == 0) || nrow(tbl) < 2 || ncol(tbl) < 2) return(NULL)

    # Run chi-square test
    chi <- chisq.test(tbl)

    # Post-hoc pairwise comparison
    pw <- pairwise.prop.test(tbl[, "1"], rowSums(tbl), p.adjust.method = "BH")

    # Extract all treatment levels
    all_treats <- rownames(tbl)
    letters_all <- multcompLetters(pw$p.value)$Letters

    # Ensure all treatments get a letter — assign "a" if missing
    letters_complete <- setNames(rep("a", length(all_treats)), all_treats)
    letters_complete[names(letters_all)] <- letters_all

    # Summarize proportions
    prop_df <- as.data.frame.matrix(tbl)
    prop_df$total <- rowSums(tbl)
    prop_df$percent_1s <- round(100 * prop_df$`1` / prop_df$total, 1)
    prop_df$Treatment <- rownames(prop_df)
    prop_df$letter <- letters_complete[rownames(prop_df)]

    prop_df %>%
      dplyr::select(Treatment, percent_1s, letter) %>%
      mutate(
        outcome = var,
        environment = env,
        chi_p_value = chi$p.value
      )
  })
})
## Warning in prop.test(x[c(i, j)], n[c(i, j)], ...): Chi-squared approximation
## may be incorrect
# Reorder columns
chi_results <- chi_results %>%
  dplyr::select(outcome, environment, Treatment, percent_1s, letter, chi_p_value)

Treatment C across seasons

library(dplyr)
library(tidyr)
library(purrr)
library(multcompView)

# Binary variables of interest
vars <- c("Adoption_intention", "Perception", "Adoption_behaviour", "Diffusion")

# Clean data: only Treatment C, exclude seasons 23A and 23B
data_clean <- yield_n_out_clean %>%
  filter(Treat_code == "C", !IQR_Season %in% c("23A", "23B")) %>%
  dplyr::select(IQR_block,IQF_environment, IQR_Season, all_of(vars)) %>%
  pivot_longer(
    cols = all_of(vars),
    names_to = "outcome",
    values_to = "value"
  ) %>%
  filter(!is.na(value))  # remove NA values

# Chi-square analysis comparing Treatment C across seasons for each outcome × environment
season_chi_results <- map_dfr(vars, function(var) {
  map_dfr(unique(data_clean$IQF_environment), function(env) {
    
    sub_df <- data_clean %>%
      filter(outcome == var, IQF_environment == env)
    
    tbl <- table(sub_df$IQR_Season, sub_df$value)

    # Skip if "1" is missing or all counts in "1" are zero
    if (!"1" %in% colnames(tbl) || all(tbl[, "1"] == 0) || nrow(tbl) < 2 || ncol(tbl) < 2) {
      return(NULL)
    }

    # Run chi-square test
    chi <- suppressWarnings(chisq.test(tbl))

    # Pairwise comparisons
    prop_success <- tbl[, "1"]
    prop_total <- rowSums(tbl)

    # Handle edge cases
    if (any(prop_total == 0)) return(NULL)

    pw <- suppressWarnings(pairwise.prop.test(prop_success, prop_total, p.adjust.method = "BH"))

    # Assign significance letters
    all_seasons <- rownames(tbl)
    letters_all <- multcompLetters(pw$p.value)$Letters
    letters_complete <- setNames(rep("a", length(all_seasons)), all_seasons)
    letters_complete[names(letters_all)] <- letters_all

    # Summarize results
    prop_df <- as.data.frame.matrix(tbl)
    prop_df$total <- rowSums(tbl)
    prop_df$percent_1s <- round(100 * prop_df$`1` / prop_df$total, 1)
    prop_df$Season <- rownames(prop_df)
    prop_df$letter <- letters_complete[rownames(prop_df)]

    prop_df %>%
      dplyr::select(Season, percent_1s, letter) %>%
      mutate(
        outcome = var,
        environment = env,
        chi_p_value = chi$p.value
      )
  })
})

# Reorder and print
season_chi_results <- season_chi_results %>%
  dplyr::select(outcome, environment, Season, percent_1s, letter, chi_p_value)

How do response change along the time?

library(ggplot2)

# Convert Season to factor with proper ordering
season_chi_results$Season <- factor(season_chi_results$Season, levels = sort(unique(season_chi_results$Season)))

# Plot: Evolution of Adoption Metrics for Treatment C across Seasons
ggplot(season_chi_results, aes(x = Season, y = percent_1s, group = outcome, color = outcome)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  geom_text(aes(label = letter), vjust = -0.5, size = 3.5, show.legend = FALSE) +
  facet_wrap(~ environment, ncol = 2) +
  labs(
    title = "Adoption Metrics Over Time (Treatment C Only)",
    subtitle = "Proportion of 1s (%) across Seasons and Environments",
    y = "% with value == 1",
    x = "Season",
    color = "Adoption Metric"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 12),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Alluvial plots to see how is the evolution of adoption indicators per farmer across seasons

library(ggplot2)
library(ggalluvial)
## Warning: package 'ggalluvial' was built under R version 4.3.3
library(dplyr)

# Ensure variables are factors for alluvial plot
data_alluvial <- data_clean %>%
  mutate(
    IQR_Season = factor(IQR_Season, levels = sort(unique(IQR_Season))),
    value = factor(value, levels = c(0, 1), labels = c("No", "Yes"))
  )

# Example: Plot alluvial by outcome and environment
ggplot(data_alluvial,
       aes(x = IQR_Season, stratum = value, alluvium = interaction(IQR_block, outcome),
           fill = value, label = value)) +
  geom_flow(stat = "alluvium", lode.guidance = "forward", alpha = 0.6, linewidth = 0.3) +
  geom_stratum(width = 0.3) +
  facet_wrap(~ outcome, scales = "free_y") +
  labs(
    title = "Farmer-Level Evolution of Binary Outcomes Over Seasons",
    x = "Season", y = "Number of farmers",
    fill = "Response"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Now how do the adoption variables relat to each other, in sequencial way Perception>Adoption intention>Adoption behaviour> Diffusion

library(tidyr)
library(dplyr)

# Convert value to factor (optional)
data_clean <- data_clean %>%
  mutate(value = factor(value, levels = c(0, 1), labels = c("No", "Yes")))

# Pivot wider so each outcome becomes a column
data_wide <- data_clean %>%
  pivot_wider(
    names_from = outcome,
    values_from = value
  )



library(ggplot2)
library(ggalluvial)

# Make sure variables are in order
response_stages <- c("Perception", "Adoption_intention", "Adoption_behaviour", "Diffusion")

# Loop over seasons to plot one per season
unique_seasons <- unique(data_wide$IQR_Season)

for (season in unique_seasons) {
  
  data_season <- data_wide %>% filter(IQR_Season == season)
  
  # Plot
  p <- ggplot(data_season,
              aes(axis1 = Perception,
                  axis2 = Adoption_intention,
                  axis3 = Adoption_behaviour,
                  axis4 = Diffusion)) +
    geom_alluvium(aes(fill = Perception), width = 1/12, alpha = 0.6) +
    geom_stratum(width = 1/12, fill = "gray90", color = "black") +
    geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 3) +
    scale_x_discrete(limits = response_stages, expand = c(.05, .05)) +
    labs(
      title = paste("Farmer Progression -", season),
      x = "Stage",
      y = "Number of Farmers"
    ) +
    theme_minimal(base_size = 13)

  print(p)
}
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

#How the response of Yes to intention of adoption is actually seen in adoption the following season

Notes: there is a relation among farmers having a adoption intention and adoption behoviour the following season, but its not that strong. - Most of the farmers say that they have intention to adopt, but they do not (Only 10% of those with intention to adopt actually adopted next season) - Among the ones that adopt, there has more proportion of those that the previous season say they were going to adopt (30% more aprox)

library(dplyr)
library(tidyr)

# 1. Convert Yes/No to numeric
data_wide_bin <- data_wide %>%
  mutate(
    Adoption_intention = ifelse(Adoption_intention == "Yes", 1, 0),
    Adoption_behaviour = ifelse(Adoption_behaviour == "Yes", 1, 0),
    IQR_Season = factor(IQR_Season, levels = c("24A", "24B", "25A", "25B"))
  )

# 2. Add lag of intention
data_with_lag <- data_wide_bin %>%
  arrange(IQR_block, IQR_Season) %>%
  group_by(IQR_block) %>%
  mutate(Intention_prev = lag(Adoption_intention)) %>%
  ungroup()

# 3. Compute summary for those with behaviour == 1
summary_behaviour <- data_with_lag %>%
  filter(IQR_Season %in% c("24B", "25A", "25B")) %>%
  filter(Adoption_behaviour == 1) %>%
  group_by(IQR_Season) %>%
  summarise(
    N_behaviour_1 = n(),
    With_prev_intention_1 = sum(Intention_prev == 1, na.rm = TRUE),
    `% with prev. intention (among adopters)` = round(100 * With_prev_intention_1 / N_behaviour_1, 1),
    .groups = "drop"
  )

# 4. Compute % of previous intention among *all* farmers
summary_all <- data_with_lag %>%
  filter(IQR_Season %in% c("24B", "25A", "25B")) %>%
  group_by(IQR_Season) %>%
  summarise(
    N_total = n(),
    Prev_intention_all = sum(Intention_prev == 1, na.rm = TRUE),
    `% with prev. intention (all farmers)` = round(100 * Prev_intention_all / N_total, 1),
    .groups = "drop"
  )

# 5. Merge both summaries
summary_combined <- left_join(summary_behaviour, summary_all, by = "IQR_Season")

# 6. Print result
print(summary_combined)
## # A tibble: 3 × 7
##   IQR_Season N_behaviour_1 With_prev_intention_1 % with prev. intentio…¹ N_total
##   <fct>              <int>                 <int>                   <dbl>   <int>
## 1 24B                   26                    18                    69.2     475
## 2 25A                   57                    45                    78.9     453
## 3 25B                   48                    38                    79.2     446
## # ℹ abbreviated name: ¹​`% with prev. intention (among adopters)`
## # ℹ 2 more variables: Prev_intention_all <int>,
## #   `% with prev. intention (all farmers)` <dbl>
library(dplyr)

# Make sure your seasons are correctly ordered
data_with_lag <- data_with_lag %>%
  mutate(
    IQR_Season = factor(IQR_Season, levels = c("24A", "24B", "25A", "25B", "26A", "26B")),
    Adoption_intention = as.numeric(Adoption_intention),
    Adoption_behaviour = as.numeric(Adoption_behaviour)
  )

# Identify farmers with Adoption_intention = 1 in a season,
# and check if they had Adoption_behaviour = 1 in the following season
intention_implemented <- data_with_lag %>%
  arrange(IQR_block, IQR_Season) %>%
  group_by(IQR_block) %>%
  mutate(
    Behaviour_next = lead(Adoption_behaviour),
    Season_next = lead(IQR_Season)
  ) %>%
  ungroup() %>%
  filter(Adoption_intention == 1, !is.na(Behaviour_next)) %>%
  group_by(IQR_Season) %>%
  summarise(
    N_with_intention_1 = n(),
    N_implemented_next = sum(Behaviour_next == 1, na.rm = TRUE),
    Percent_implemented = round(100 * N_implemented_next / N_with_intention_1, 1),
    .groups = "drop"
  )

# Print summary
print(intention_implemented)
## # A tibble: 3 × 4
##   IQR_Season N_with_intention_1 N_implemented_next Percent_implemented
##   <fct>                   <int>              <int>               <dbl>
## 1 24A                       284                 19                 6.7
## 2 24B                       307                 46                15  
## 3 25A                       288                 36                12.5

Does the Adoption intension in a season predict the adoption behaviour in the follwoing season?

library(dplyr)

# 1. Convert Yes/No to binary 1/0
data_bin <- data_wide %>%
  mutate(
    Adoption_intention = ifelse(Adoption_intention == "Yes", 1, 0),
    Adoption_behaviour = ifelse(Adoption_behaviour == "Yes", 1, 0),
    IQR_Season = factor(IQR_Season, levels = c("24A", "24B", "25A", "25B"))
  )

# 2. Create previous season's intention per farmer
data_with_lag <- data_bin %>%
  arrange(IQR_block, IQR_Season) %>%
  group_by(IQR_block) %>%
  mutate(Intention_prev = lag(Adoption_intention)) %>%
  ungroup()

# 3. Filter to seasons where previous intention exists
data_filtered <- data_with_lag %>%
  filter(IQR_Season %in% c("24B", "25A", "25B")) %>%
  filter(!is.na(Intention_prev) & !is.na(Adoption_behaviour))

# 4. Create contingency table
contingency_table <- table(data_filtered$Intention_prev, data_filtered$Adoption_behaviour)
print(contingency_table)
##    
##       0   1
##   0 424  28
##   1 778 101
# 5. Run Chi-squared test
chi_test <- chisq.test(contingency_table)
print(chi_test)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  contingency_table
## X-squared = 8.9687, df = 1, p-value = 0.002746
# Optional: add labels for readability
rownames(contingency_table) <- c("Prev Intention = 0", "Prev Intention = 1")
colnames(contingency_table) <- c("Behaviour = 0", "Behaviour = 1")

# Print nicely formatted
cat("\nContingency Table:\n")
## 
## Contingency Table:
print(contingency_table)
##                     
##                      Behaviour = 0 Behaviour = 1
##   Prev Intention = 0           424            28
##   Prev Intention = 1           778           101
cat("\nChi-squared Test Results:\n")
## 
## Chi-squared Test Results:
print(chi_test)
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  contingency_table
## X-squared = 8.9687, df = 1, p-value = 0.002746

Same but by season

library(dplyr)

# Only test for seasons with a previous season (exclude first season)
seasons_to_test <- c("24B", "25A", "25B")

# Loop over each season and perform Chi-squared test
for (season in seasons_to_test) {
  cat("\n============================\n")
  cat("Season:", season, "\n")
  
  # Subset data
  df_season <- data_with_lag %>%
    filter(IQR_Season == season) %>%
    filter(!is.na(Intention_prev) & !is.na(Adoption_behaviour))
  
  # Create contingency table
  contingency_table <- table(df_season$Intention_prev, df_season$Adoption_behaviour)
  print(contingency_table)
  
  # Run chi-squared test
  chi_result <- chisq.test(contingency_table, correct = TRUE)
  print(chi_result)
}
## 
## ============================
## Season: 24B 
##    
##       0   1
##   0 153   6
##   1 264  18
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  contingency_table
## X-squared = 0.886, df = 1, p-value = 0.3466
## 
## 
## ============================
## Season: 25A 
##    
##       0   1
##   0 133  12
##   1 261  45
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  contingency_table
## X-squared = 3.1247, df = 1, p-value = 0.07712
## 
## 
## ============================
## Season: 25B 
##    
##       0   1
##   0 138  10
##   1 253  38
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  contingency_table
## X-squared = 3.3795, df = 1, p-value = 0.06601

Do CA group together as for yield? If so, can we combine them as 1?

How can we assess overall potential adoption and disemination of CA (treatments and variables combined)

#What is the potential adoption as overall and for different AEZs?

#How does overall potential adoption change along time?

How do potential adoption differ for maize and for beans?

How is potential adoption modulated by performance of CA each season?

What is the overall potential adoption for each performance type?

What are the direct and indirect drivers of each individual adoption metric? And the overall adoption?

How much can we increase adoption per farmer reached if we target as compared to blanket?