#we wor with the database yield_n_out_clean which is the raw after eliminatin of yield outliers and NAs:
#Exploration of output variables DQ_future_adoption: farmers where asked at harvest time of each season, if they will adopt the practice in other plots in the future. DQ_Preference: Farmers ranked the treatments based on prefference DQ_CA_real_adoption: At weeding time, starting in season 2, farmers were asked if they acually adopted the practice in any of their other plots DQ_CA_spillover: Farmers where asked if any of their neighbours adopted the practice
Based on some concepts from Abetu et al., 2026 and Rogeres et al., 2003 we will call: DQ_Preference = Perception of the practice CA as compared to conv (persuation) = Perception DQ_future_adoption = farmers intention to adopt the practice (desicion) = Adoption_intention DQ_CA_real_adoption = Farmers actually test the practice (implementation) = Adoption_behaviour DQ_CA_spillover = dissemination of the CA to neighbour farmers = Diffusion
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#bring database from folder
yield_n_out_clean <- read.csv("D:/Mes Donnees/OAF_CIRAD/CA_MoU/data/processed/yield_n_out_clean.csv")
yield_n_out_clean <- yield_n_out_clean %>%
rename(
Perception = DQ_Preference,
Adoption_intention = DQ_future_adoption,
Adoption_behaviour = DQ_CA_real_adoption,
Diffusion = DQ_CA_spillover
)
Based on an initial exploration of variable: -Adoption Intential and Perception are complete; only 2 NA in the first. However, data is missing for 2023B for Adoption behabiour and DIffusion All the variables are 1 (yes to adoption, positive perception and positive to difussion) or 0 (no)
# Updated variable names
vars <- c("Adoption_intention", "Perception", "Adoption_behaviour", "Diffusion")
# Function to summarize each variable
summarize_variable <- function(var, data) {
cat("Variable:", var, "\n")
cat("Type:", class(data[[var]]), "\n")
cat("Total non-NA values:", sum(!is.na(data[[var]])), "\n")
cat("Total NA values:", sum(is.na(data[[var]])), "\n\n")
}
# Apply summary
for (v in vars) {
summarize_variable(v, yield_n_out_clean)
}
## Variable: Adoption_intention
## Type: integer
## Total non-NA values: 11244
## Total NA values: 2
##
## Variable: Perception
## Type: integer
## Total non-NA values: 11246
## Total NA values: 0
##
## Variable: Adoption_behaviour
## Type: integer
## Total non-NA values: 9145
## Total NA values: 2101
##
## Variable: Diffusion
## Type: integer
## Total non-NA values: 9145
## Total NA values: 2101
# Variables with NA to analyze
na_vars <- c("Adoption_behaviour", "Diffusion")
# Categorical grouping variables
group_vars <- c("IQR_Season", "IQF_environment")
# Loop to examine NA distribution
for (na_var in na_vars) {
cat("NA distribution for:", na_var, "\n\n")
for (grp_var in group_vars) {
cat("Grouped by:", grp_var, "\n")
tab <- with(yield_n_out_clean, table(yield_n_out_clean[[grp_var]], is.na(yield_n_out_clean[[na_var]])))
colnames(tab) <- c("Not_NA", "NA")
print(tab)
cat("\n")
}
cat("----------------------------------------------------------\n\n")
}
## NA distribution for: Adoption_behaviour
##
## Grouped by: IQR_Season
##
## Not_NA NA
## 23A 2227 0
## 23B 0 2073
## 24A 1872 0
## 24B 1894 4
## 25A 1600 0
## 25B 1552 24
##
## Grouped by: IQF_environment
##
## Not_NA NA
## Highland 1648 500
## Mid_Low_Dry 3080 666
## Mid_Low_Wet 2573 549
## Transition 1844 386
##
## ----------------------------------------------------------
##
## NA distribution for: Diffusion
##
## Grouped by: IQR_Season
##
## Not_NA NA
## 23A 2227 0
## 23B 0 2073
## 24A 1872 0
## 24B 1894 4
## 25A 1600 0
## 25B 1552 24
##
## Grouped by: IQF_environment
##
## Not_NA NA
## Highland 1648 500
## Mid_Low_Dry 3080 666
## Mid_Low_Wet 2573 549
## Transition 1844 386
##
## ----------------------------------------------------------
Notes: I elinates 23 A and B from the analysis beause missing data in 23B and no sense of 23A (first season). AS for yield, there is no difference among CA tratments. Insteand of averaging what I will no onwards is to use treat C which is the one that performed betted in yield and even as a trend seem to by little better fro adoption and prefference that the others. Also is the one that OAF has predilection for
library(dplyr)
library(tidyr)
library(purrr)
library(multcompView)
# Define binary adoption variables
vars <- c("Adoption_intention", "Perception", "Adoption_behaviour", "Diffusion")
# Clean and reshape the data
data_clean <- yield_n_out_clean %>%
filter(Treat_code %in% c("B", "C", "D"), !IQR_Season %in% c("23A", "23B")) %>%
filter(Treat_code %in% c("B", "C", "D")) %>% # remove Treatment A
dplyr::select(IQF_environment, Treat_code, all_of(vars)) %>%
pivot_longer(
cols = all_of(vars),
names_to = "outcome",
values_to = "value"
) %>%
filter(!is.na(value)) # remove NA responses
# Run chi-square test and post-hoc comparisons per outcome and environment
chi_results <- map_dfr(vars, function(var) {
map_dfr(unique(data_clean$IQF_environment), function(env) {
# Subset data for this outcome × environment
sub_df <- data_clean %>%
filter(outcome == var, IQF_environment == env)
# Create contingency table
tbl <- table(sub_df$Treat_code, sub_df$value)
# Skip if not enough data
if (any(rowSums(tbl) == 0) || nrow(tbl) < 2 || ncol(tbl) < 2) return(NULL)
# Run chi-square test
chi <- chisq.test(tbl)
# Post-hoc pairwise comparison
pw <- pairwise.prop.test(tbl[, "1"], rowSums(tbl), p.adjust.method = "BH")
# Extract all treatment levels
all_treats <- rownames(tbl)
letters_all <- multcompLetters(pw$p.value)$Letters
# Ensure all treatments get a letter — assign "a" if missing
letters_complete <- setNames(rep("a", length(all_treats)), all_treats)
letters_complete[names(letters_all)] <- letters_all
# Summarize proportions
prop_df <- as.data.frame.matrix(tbl)
prop_df$total <- rowSums(tbl)
prop_df$percent_1s <- round(100 * prop_df$`1` / prop_df$total, 1)
prop_df$Treatment <- rownames(prop_df)
prop_df$letter <- letters_complete[rownames(prop_df)]
prop_df %>%
dplyr::select(Treatment, percent_1s, letter) %>%
mutate(
outcome = var,
environment = env,
chi_p_value = chi$p.value
)
})
})
## Warning in prop.test(x[c(i, j)], n[c(i, j)], ...): Chi-squared approximation
## may be incorrect
# Reorder columns
chi_results <- chi_results %>%
dplyr::select(outcome, environment, Treatment, percent_1s, letter, chi_p_value)
library(dplyr)
library(tidyr)
library(purrr)
library(multcompView)
# Binary variables of interest
vars <- c("Adoption_intention", "Perception", "Adoption_behaviour", "Diffusion")
# Clean data: only Treatment C, exclude seasons 23A and 23B
data_clean <- yield_n_out_clean %>%
filter(Treat_code == "C", !IQR_Season %in% c("23A", "23B")) %>%
dplyr::select(IQR_block,IQF_environment, IQR_Season, all_of(vars)) %>%
pivot_longer(
cols = all_of(vars),
names_to = "outcome",
values_to = "value"
) %>%
filter(!is.na(value)) # remove NA values
# Chi-square analysis comparing Treatment C across seasons for each outcome × environment
season_chi_results <- map_dfr(vars, function(var) {
map_dfr(unique(data_clean$IQF_environment), function(env) {
sub_df <- data_clean %>%
filter(outcome == var, IQF_environment == env)
tbl <- table(sub_df$IQR_Season, sub_df$value)
# Skip if "1" is missing or all counts in "1" are zero
if (!"1" %in% colnames(tbl) || all(tbl[, "1"] == 0) || nrow(tbl) < 2 || ncol(tbl) < 2) {
return(NULL)
}
# Run chi-square test
chi <- suppressWarnings(chisq.test(tbl))
# Pairwise comparisons
prop_success <- tbl[, "1"]
prop_total <- rowSums(tbl)
# Handle edge cases
if (any(prop_total == 0)) return(NULL)
pw <- suppressWarnings(pairwise.prop.test(prop_success, prop_total, p.adjust.method = "BH"))
# Assign significance letters
all_seasons <- rownames(tbl)
letters_all <- multcompLetters(pw$p.value)$Letters
letters_complete <- setNames(rep("a", length(all_seasons)), all_seasons)
letters_complete[names(letters_all)] <- letters_all
# Summarize results
prop_df <- as.data.frame.matrix(tbl)
prop_df$total <- rowSums(tbl)
prop_df$percent_1s <- round(100 * prop_df$`1` / prop_df$total, 1)
prop_df$Season <- rownames(prop_df)
prop_df$letter <- letters_complete[rownames(prop_df)]
prop_df %>%
dplyr::select(Season, percent_1s, letter) %>%
mutate(
outcome = var,
environment = env,
chi_p_value = chi$p.value
)
})
})
# Reorder and print
season_chi_results <- season_chi_results %>%
dplyr::select(outcome, environment, Season, percent_1s, letter, chi_p_value)
library(ggplot2)
# Convert Season to factor with proper ordering
season_chi_results$Season <- factor(season_chi_results$Season, levels = sort(unique(season_chi_results$Season)))
# Plot: Evolution of Adoption Metrics for Treatment C across Seasons
ggplot(season_chi_results, aes(x = Season, y = percent_1s, group = outcome, color = outcome)) +
geom_line(size = 1) +
geom_point(size = 2) +
geom_text(aes(label = letter), vjust = -0.5, size = 3.5, show.legend = FALSE) +
facet_wrap(~ environment, ncol = 2) +
labs(
title = "Adoption Metrics Over Time (Treatment C Only)",
subtitle = "Proportion of 1s (%) across Seasons and Environments",
y = "% with value == 1",
x = "Season",
color = "Adoption Metric"
) +
theme_minimal() +
theme(
text = element_text(size = 12),
axis.text.x = element_text(angle = 45, hjust = 1)
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#Alluvial plots to see how is the evolution of adoption indicators per
farmer across seasons
library(ggplot2)
library(ggalluvial)
## Warning: package 'ggalluvial' was built under R version 4.3.3
library(dplyr)
# Ensure variables are factors for alluvial plot
data_alluvial <- data_clean %>%
mutate(
IQR_Season = factor(IQR_Season, levels = sort(unique(IQR_Season))),
value = factor(value, levels = c(0, 1), labels = c("No", "Yes"))
)
# Example: Plot alluvial by outcome and environment
ggplot(data_alluvial,
aes(x = IQR_Season, stratum = value, alluvium = interaction(IQR_block, outcome),
fill = value, label = value)) +
geom_flow(stat = "alluvium", lode.guidance = "forward", alpha = 0.6, linewidth = 0.3) +
geom_stratum(width = 0.3) +
facet_wrap(~ outcome, scales = "free_y") +
labs(
title = "Farmer-Level Evolution of Binary Outcomes Over Seasons",
x = "Season", y = "Number of farmers",
fill = "Response"
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#Now how do the adoption variables relat to each other, in sequencial
way Perception>Adoption intention>Adoption behaviour>
Diffusion
library(tidyr)
library(dplyr)
# Convert value to factor (optional)
data_clean <- data_clean %>%
mutate(value = factor(value, levels = c(0, 1), labels = c("No", "Yes")))
# Pivot wider so each outcome becomes a column
data_wide <- data_clean %>%
pivot_wider(
names_from = outcome,
values_from = value
)
library(ggplot2)
library(ggalluvial)
# Make sure variables are in order
response_stages <- c("Perception", "Adoption_intention", "Adoption_behaviour", "Diffusion")
# Loop over seasons to plot one per season
unique_seasons <- unique(data_wide$IQR_Season)
for (season in unique_seasons) {
data_season <- data_wide %>% filter(IQR_Season == season)
# Plot
p <- ggplot(data_season,
aes(axis1 = Perception,
axis2 = Adoption_intention,
axis3 = Adoption_behaviour,
axis4 = Diffusion)) +
geom_alluvium(aes(fill = Perception), width = 1/12, alpha = 0.6) +
geom_stratum(width = 1/12, fill = "gray90", color = "black") +
geom_text(stat = "stratum", aes(label = after_stat(stratum)), size = 3) +
scale_x_discrete(limits = response_stages, expand = c(.05, .05)) +
labs(
title = paste("Farmer Progression -", season),
x = "Stage",
y = "Number of Farmers"
) +
theme_minimal(base_size = 13)
print(p)
}
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.
#How the response of Yes to intention of adoption is actually seen in adoption the following season
Notes: there is a relation among farmers having a adoption intention and adoption behoviour the following season, but its not that strong. - Most of the farmers say that they have intention to adopt, but they do not (Only 10% of those with intention to adopt actually adopted next season) - Among the ones that adopt, there has more proportion of those that the previous season say they were going to adopt (30% more aprox)
library(dplyr)
library(tidyr)
# 1. Convert Yes/No to numeric
data_wide_bin <- data_wide %>%
mutate(
Adoption_intention = ifelse(Adoption_intention == "Yes", 1, 0),
Adoption_behaviour = ifelse(Adoption_behaviour == "Yes", 1, 0),
IQR_Season = factor(IQR_Season, levels = c("24A", "24B", "25A", "25B"))
)
# 2. Add lag of intention
data_with_lag <- data_wide_bin %>%
arrange(IQR_block, IQR_Season) %>%
group_by(IQR_block) %>%
mutate(Intention_prev = lag(Adoption_intention)) %>%
ungroup()
# 3. Compute summary for those with behaviour == 1
summary_behaviour <- data_with_lag %>%
filter(IQR_Season %in% c("24B", "25A", "25B")) %>%
filter(Adoption_behaviour == 1) %>%
group_by(IQR_Season) %>%
summarise(
N_behaviour_1 = n(),
With_prev_intention_1 = sum(Intention_prev == 1, na.rm = TRUE),
`% with prev. intention (among adopters)` = round(100 * With_prev_intention_1 / N_behaviour_1, 1),
.groups = "drop"
)
# 4. Compute % of previous intention among *all* farmers
summary_all <- data_with_lag %>%
filter(IQR_Season %in% c("24B", "25A", "25B")) %>%
group_by(IQR_Season) %>%
summarise(
N_total = n(),
Prev_intention_all = sum(Intention_prev == 1, na.rm = TRUE),
`% with prev. intention (all farmers)` = round(100 * Prev_intention_all / N_total, 1),
.groups = "drop"
)
# 5. Merge both summaries
summary_combined <- left_join(summary_behaviour, summary_all, by = "IQR_Season")
# 6. Print result
print(summary_combined)
## # A tibble: 3 × 7
## IQR_Season N_behaviour_1 With_prev_intention_1 % with prev. intentio…¹ N_total
## <fct> <int> <int> <dbl> <int>
## 1 24B 26 18 69.2 475
## 2 25A 57 45 78.9 453
## 3 25B 48 38 79.2 446
## # ℹ abbreviated name: ¹`% with prev. intention (among adopters)`
## # ℹ 2 more variables: Prev_intention_all <int>,
## # `% with prev. intention (all farmers)` <dbl>
library(dplyr)
# Make sure your seasons are correctly ordered
data_with_lag <- data_with_lag %>%
mutate(
IQR_Season = factor(IQR_Season, levels = c("24A", "24B", "25A", "25B", "26A", "26B")),
Adoption_intention = as.numeric(Adoption_intention),
Adoption_behaviour = as.numeric(Adoption_behaviour)
)
# Identify farmers with Adoption_intention = 1 in a season,
# and check if they had Adoption_behaviour = 1 in the following season
intention_implemented <- data_with_lag %>%
arrange(IQR_block, IQR_Season) %>%
group_by(IQR_block) %>%
mutate(
Behaviour_next = lead(Adoption_behaviour),
Season_next = lead(IQR_Season)
) %>%
ungroup() %>%
filter(Adoption_intention == 1, !is.na(Behaviour_next)) %>%
group_by(IQR_Season) %>%
summarise(
N_with_intention_1 = n(),
N_implemented_next = sum(Behaviour_next == 1, na.rm = TRUE),
Percent_implemented = round(100 * N_implemented_next / N_with_intention_1, 1),
.groups = "drop"
)
# Print summary
print(intention_implemented)
## # A tibble: 3 × 4
## IQR_Season N_with_intention_1 N_implemented_next Percent_implemented
## <fct> <int> <int> <dbl>
## 1 24A 284 19 6.7
## 2 24B 307 46 15
## 3 25A 288 36 12.5
Does the Adoption intension in a season predict the adoption behaviour in the follwoing season?
library(dplyr)
# 1. Convert Yes/No to binary 1/0
data_bin <- data_wide %>%
mutate(
Adoption_intention = ifelse(Adoption_intention == "Yes", 1, 0),
Adoption_behaviour = ifelse(Adoption_behaviour == "Yes", 1, 0),
IQR_Season = factor(IQR_Season, levels = c("24A", "24B", "25A", "25B"))
)
# 2. Create previous season's intention per farmer
data_with_lag <- data_bin %>%
arrange(IQR_block, IQR_Season) %>%
group_by(IQR_block) %>%
mutate(Intention_prev = lag(Adoption_intention)) %>%
ungroup()
# 3. Filter to seasons where previous intention exists
data_filtered <- data_with_lag %>%
filter(IQR_Season %in% c("24B", "25A", "25B")) %>%
filter(!is.na(Intention_prev) & !is.na(Adoption_behaviour))
# 4. Create contingency table
contingency_table <- table(data_filtered$Intention_prev, data_filtered$Adoption_behaviour)
print(contingency_table)
##
## 0 1
## 0 424 28
## 1 778 101
# 5. Run Chi-squared test
chi_test <- chisq.test(contingency_table)
print(chi_test)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency_table
## X-squared = 8.9687, df = 1, p-value = 0.002746
# Optional: add labels for readability
rownames(contingency_table) <- c("Prev Intention = 0", "Prev Intention = 1")
colnames(contingency_table) <- c("Behaviour = 0", "Behaviour = 1")
# Print nicely formatted
cat("\nContingency Table:\n")
##
## Contingency Table:
print(contingency_table)
##
## Behaviour = 0 Behaviour = 1
## Prev Intention = 0 424 28
## Prev Intention = 1 778 101
cat("\nChi-squared Test Results:\n")
##
## Chi-squared Test Results:
print(chi_test)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency_table
## X-squared = 8.9687, df = 1, p-value = 0.002746
Same but by season
library(dplyr)
# Only test for seasons with a previous season (exclude first season)
seasons_to_test <- c("24B", "25A", "25B")
# Loop over each season and perform Chi-squared test
for (season in seasons_to_test) {
cat("\n============================\n")
cat("Season:", season, "\n")
# Subset data
df_season <- data_with_lag %>%
filter(IQR_Season == season) %>%
filter(!is.na(Intention_prev) & !is.na(Adoption_behaviour))
# Create contingency table
contingency_table <- table(df_season$Intention_prev, df_season$Adoption_behaviour)
print(contingency_table)
# Run chi-squared test
chi_result <- chisq.test(contingency_table, correct = TRUE)
print(chi_result)
}
##
## ============================
## Season: 24B
##
## 0 1
## 0 153 6
## 1 264 18
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency_table
## X-squared = 0.886, df = 1, p-value = 0.3466
##
##
## ============================
## Season: 25A
##
## 0 1
## 0 133 12
## 1 261 45
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency_table
## X-squared = 3.1247, df = 1, p-value = 0.07712
##
##
## ============================
## Season: 25B
##
## 0 1
## 0 138 10
## 1 253 38
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: contingency_table
## X-squared = 3.3795, df = 1, p-value = 0.06601
#What is the potential adoption as overall and for different AEZs?
#How does overall potential adoption change along time?