Results for
Hypothesis 1.1: Prevalence of Antibiotic Use in Farms and
Households
# =========================================================================
# RESEARCH QUESTION 1: HYPOTHESIS 1.1 ANALYSIS
# Preventive and growth-promoting antibiotic use is common in farm animals,
# with prevalence rates higher than corresponding use in human medicine
# =========================================================================
theme_set(
theme_minimal(base_size = 12) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(color = "gray95", linewidth = 0.3),
axis.text = element_text(color = "black", size = 9),
axis.title = element_text(size = 10, face = "bold"),
plot.title = element_text(size = 11, face = "bold"),
plot.subtitle = element_text(size = 9, face = "italic", color = "gray30"),
legend.position = "bottom",
legend.title = element_text(size = 9),
legend.text = element_text(size = 8),
panel.border = element_rect(color = "white", fill = NA, linewidth = 0.5),
strip.background = element_rect(fill = "white", color = NA),
strip.text = element_text(size = 9, face = "bold", color = "gray30")
)
)
low_sat_palette <- c("#a8c0e0", "#e0b5a8", "#a8d3b5", "#c3b5d3", "#d8cfb0", "#a9d0d3")
# -------------------------------------------------------------------------
# 1. DATA PREPARATION
# -------------------------------------------------------------------------
# Load datasets if not already in memory
if (!exists("farm_data")) {
farm_data <- readRDS(here("Data", "processed", "farm_data_processed.rds"))
}
if (!exists("household_data")) {
household_data <- readRDS(here("Data", "processed", "household_data_processed.rds"))
}
# Verify key variables in farm data
farm_vars_check <- c("any_ab_use", "ab_treatment", "ab_prevention", "ab_growth",
"chlo", "nitr", "dime", "metr", "cipr", "oflo", "carb", "olaq", "baci")
missing_farm_vars <- farm_vars_check[!farm_vars_check %in% names(farm_data)]
if(length(missing_farm_vars) > 0) {
warning(paste("Missing farm data variables:", paste(missing_farm_vars, collapse=", ")))
}
# Verify key variables in household data
household_vars_check <- c("any_ab_last_illness")
missing_household_vars <- household_vars_check[!household_vars_check %in% names(household_data)]
if(length(missing_household_vars) > 0) {
warning(paste("Missing household data variables:", paste(missing_household_vars, collapse=", ")))
}
# Check for symptom-related variables in household data
symptom_vars <- grep("q7_1___", names(household_data), value = TRUE)
if(length(symptom_vars) == 0) {
warning("No symptom variables found in household data. Cannot create preventive use proxy.")
}
# -------------------------------------------------------------------------
# 2. FARM ANTIBIOTIC USE PREVALENCE ANALYSIS
# -------------------------------------------------------------------------
# 2.1 Calculate farm antibiotic use prevalence with confidence intervals
farm_ab_ci <- farm_data %>%
summarise(
n = n(),
ab_use = sum(any_ab_use, na.rm = TRUE),
ab_treatment = sum(ab_treatment, na.rm = TRUE),
ab_prevention = sum(ab_prevention, na.rm = TRUE),
ab_growth = sum(ab_growth, na.rm = TRUE)
) %>%
mutate(
# Overall use with confidence intervals
ab_use_pct = ab_use / n * 100,
ab_use_ci_lower = binom.confint(ab_use, n, method = "wilson")$lower * 100,
ab_use_ci_upper = binom.confint(ab_use, n, method = "wilson")$upper * 100,
# Treatment use with confidence intervals
ab_treatment_pct = ab_treatment / n * 100,
ab_treatment_ci_lower = binom.confint(ab_treatment, n, method = "wilson")$lower * 100,
ab_treatment_ci_upper = binom.confint(ab_treatment, n, method = "wilson")$upper * 100,
# Preventive use with confidence intervals
ab_prevention_pct = ab_prevention / n * 100,
ab_prevention_ci_lower = binom.confint(ab_prevention, n, method = "wilson")$lower * 100,
ab_prevention_ci_upper = binom.confint(ab_prevention, n, method = "wilson")$upper * 100,
# Growth promotion use with confidence intervals
ab_growth_pct = ab_growth / n * 100,
ab_growth_ci_lower = binom.confint(ab_growth, n, method = "wilson")$lower * 100,
ab_growth_ci_upper = binom.confint(ab_growth, n, method = "wilson")$upper * 100
)
# 2.2 Calculate non-treatment antibiotic use in farms
farm_non_treatment <- farm_data %>%
summarise(
n = n(),
any_non_treatment = sum(ab_prevention == 1 | ab_growth == 1, na.rm = TRUE),
prevention_only = sum(ab_prevention == 1 & ab_treatment == 0 & ab_growth == 0, na.rm = TRUE),
growth_only = sum(ab_growth == 1 & ab_treatment == 0 & ab_prevention == 0, na.rm = TRUE),
both_purposes = sum(ab_prevention == 1 & ab_growth == 1 & ab_treatment == 0, na.rm = TRUE)
) %>%
mutate(
# Calculate percentages
any_non_treatment_pct = any_non_treatment / n * 100,
prevention_only_pct = prevention_only / n * 100,
growth_only_pct = growth_only / n * 100,
both_purposes_pct = both_purposes / n * 100,
# Calculate confidence intervals
any_non_treatment_ci_lower = binom.confint(any_non_treatment, n, method = "wilson")$lower * 100,
any_non_treatment_ci_upper = binom.confint(any_non_treatment, n, method = "wilson")$upper * 100,
prevention_only_ci_lower = binom.confint(prevention_only, n, method = "wilson")$lower * 100,
prevention_only_ci_upper = binom.confint(prevention_only, n, method = "wilson")$upper * 100,
growth_only_ci_lower = binom.confint(growth_only, n, method = "wilson")$lower * 100,
growth_only_ci_upper = binom.confint(growth_only, n, method = "wilson")$upper * 100,
both_purposes_ci_lower = binom.confint(both_purposes, n, method = "wilson")$lower * 100,
both_purposes_ci_upper = binom.confint(both_purposes, n, method = "wilson")$upper * 100
)
# 2.3 Display farm antibiotic use table
farm_ab_table <- data.frame(
Purpose = c("Any use", "Treatment", "Prevention", "Growth promotion"),
Count = c(farm_ab_ci$ab_use, farm_ab_ci$ab_treatment,
farm_ab_ci$ab_prevention, farm_ab_ci$ab_growth),
Percentage = c(farm_ab_ci$ab_use_pct, farm_ab_ci$ab_treatment_pct,
farm_ab_ci$ab_prevention_pct, farm_ab_ci$ab_growth_pct),
CI_Lower = c(farm_ab_ci$ab_use_ci_lower, farm_ab_ci$ab_treatment_ci_lower,
farm_ab_ci$ab_prevention_ci_lower, farm_ab_ci$ab_growth_ci_lower),
CI_Upper = c(farm_ab_ci$ab_use_ci_upper, farm_ab_ci$ab_treatment_ci_upper,
farm_ab_ci$ab_prevention_ci_upper, farm_ab_ci$ab_growth_ci_upper)
) %>%
mutate(
CI = paste0(round(CI_Lower, 1), "-", round(CI_Upper, 1)),
`Prevalence (95% CI)` = paste0(round(Percentage, 1), "% (", CI, ")")
)
# Display farm antibiotic use table
kable(farm_ab_table %>% dplyr::select(Purpose, Count, `Prevalence (95% CI)`),
caption = "Farm Antibiotic Use Prevalence with 95% Confidence Intervals",
align = "lrr") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)
Farm Antibiotic Use Prevalence with 95% Confidence Intervals
Purpose
|
Count
|
Prevalence (95% CI)
|
Any use
|
1089
|
94.2% (92.7-95.4)
|
Treatment
|
1033
|
89.4% (87.5-91)
|
Prevention
|
558
|
48.3% (45.4-51.2)
|
Growth promotion
|
10
|
0.9% (0.5-1.6)
|
# -------------------------------------------------------------------------
# 3. HOUSEHOLD ANTIBIOTIC USE PREVALENCE ANALYSIS
# -------------------------------------------------------------------------
# 3.1 Calculate household antibiotic use prevalence with confidence intervals
household_ab_ci <- household_data %>%
summarise(
n = n(),
ab_use = sum(any_ab_last_illness, na.rm = TRUE)
) %>%
mutate(
# Overall use with confidence intervals
ab_use_pct = ab_use / n * 100,
ab_use_ci_lower = binom.confint(ab_use, n, method = "wilson")$lower * 100,
ab_use_ci_upper = binom.confint(ab_use, n, method = "wilson")$upper * 100
)
# 3.2 Display household antibiotic use
household_ab_table <- data.frame(
Purpose = "Any use",
Count = household_ab_ci$ab_use,
Percentage = household_ab_ci$ab_use_pct,
CI_Lower = household_ab_ci$ab_use_ci_lower,
CI_Upper = household_ab_ci$ab_use_ci_upper
) %>%
mutate(
CI = paste0(round(CI_Lower, 1), "-", round(CI_Upper, 1)),
`Prevalence (95% CI)` = paste0(round(Percentage, 1), "% (", CI, ")")
)
kable(household_ab_table %>% dplyr::select(Purpose, Count, `Prevalence (95% CI)`),
caption = "Household Antibiotic Use Prevalence with 95% Confidence Intervals",
align = "lrr") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)
Household Antibiotic Use Prevalence with 95% Confidence Intervals
Purpose
|
Count
|
Prevalence (95% CI)
|
Any use
|
521
|
5.5% (5.1-6)
|
# -------------------------------------------------------------------------
# 4. CREATING HOUSEHOLD PREVENTIVE USE PROXY INDICATOR
# -------------------------------------------------------------------------
# 4.1 Create symptom indicators
# Helper function to safely check if a variable indicates a symptom
is_symptom_positive <- function(data, var_name) {
# Check if variable exists
if(!(var_name %in% names(data))) {
return(rep(FALSE, nrow(data)))
}
# Get the variable
var_data <- data[[var_name]]
# Handle different data types
result <- rep(FALSE, length(var_data))
for(i in 1:length(var_data)) {
val <- var_data[i]
# Skip NA values
if(is.na(val)) {
result[i] <- FALSE
next
}
# Handle character values
if(is.character(val)) {
result[i] <- (val == "Yes")
next
}
# Handle numeric values
if(is.numeric(val)) {
result[i] <- (val == 1)
next
}
# Default for other types
result[i] <- FALSE
}
return(result)
}
# Create indicator variables for different symptom types
n_rows <- nrow(household_data)
# Initialize flags
has_fever <- rep(FALSE, n_rows)
has_resp <- rep(FALSE, n_rows)
has_gi <- rep(FALSE, n_rows)
has_any <- rep(FALSE, n_rows)
# Fever - variable q7_1___1 (adjust variable names if needed)
fever_var <- "q7_1___1"
if(fever_var %in% symptom_vars) {
has_fever <- is_symptom_positive(household_data, fever_var)
}
# Respiratory symptoms - variables q7_1___2, q7_1___4, q7_1___5 (adjust as needed)
resp_vars <- c("q7_1___2", "q7_1___4", "q7_1___5")
for(var in intersect(resp_vars, symptom_vars)) {
has_resp <- has_resp | is_symptom_positive(household_data, var)
}
# GI symptoms - variables q7_1___6, q7_1___7 (adjust as needed)
gi_vars <- c("q7_1___6", "q7_1___7")
for(var in intersect(gi_vars, symptom_vars)) {
has_gi <- has_gi | is_symptom_positive(household_data, var)
}
# Any symptom - check all symptom variables
for(var in symptom_vars) {
has_any <- has_any | is_symptom_positive(household_data, var)
}
# Add flags to the dataset
household_data$fever_indicator <- as.integer(has_fever)
household_data$resp_indicator <- as.integer(has_resp)
household_data$gi_indicator <- as.integer(has_gi)
household_data$any_symptom <- as.integer(has_any)
# 4.2 Create the primary preventive use proxy indicator (stringent definition)
household_data <- household_data %>%
mutate(
# Stringent definition: AB use with NO symptoms at all
preventive_use_proxy_stringent = case_when(
any_ab_last_illness == 1 & any_symptom == 0 ~ 1,
TRUE ~ 0
),
# Alternative definition: AB use with only minor symptoms
# (no fever, no respiratory symptoms, no GI symptoms)
preventive_use_proxy_relaxed = case_when(
any_ab_last_illness == 1 & fever_indicator == 0 &
resp_indicator == 0 & gi_indicator == 0 ~ 1,
TRUE ~ 0
)
)
# 4.3 Calculate household preventive use statistics - primary definition
household_preventive_stringent <- household_data %>%
summarise(
n = n(),
preventive_n = sum(preventive_use_proxy_stringent, na.rm = TRUE),
preventive_pct = mean(preventive_use_proxy_stringent, na.rm = TRUE) * 100
)
# Add confidence intervals
ci_stringent <- binom.test(household_preventive_stringent$preventive_n,
household_preventive_stringent$n)$conf.int * 100
household_preventive_stringent$preventive_ci_lower <- ci_stringent[1]
household_preventive_stringent$preventive_ci_upper <- ci_stringent[2]
# 4.4 Calculate household preventive use with alternative definition
household_preventive_relaxed <- household_data %>%
summarise(
n = n(),
preventive_n = sum(preventive_use_proxy_relaxed, na.rm = TRUE),
preventive_pct = mean(preventive_use_proxy_relaxed, na.rm = TRUE) * 100
)
# Add confidence intervals
ci_relaxed <- binom.test(household_preventive_relaxed$preventive_n,
household_preventive_relaxed$n)$conf.int * 100
household_preventive_relaxed$preventive_ci_lower <- ci_relaxed[1]
household_preventive_relaxed$preventive_ci_upper <- ci_relaxed[2]
# Display results for both definitions
cat("\nHousehold Preventive Antibiotic Use - Sensitivity Analysis:\n")
##
## Household Preventive Antibiotic Use - Sensitivity Analysis:
cat("1. Stringent definition (NO symptoms):\n")
## 1. Stringent definition (NO symptoms):
cat(" Count:", household_preventive_stringent$preventive_n, "out of",
household_preventive_stringent$n, "households\n")
## Count: 0 out of 9411 households
cat(" Percentage:", round(household_preventive_stringent$preventive_pct, 1),
"% (95% CI:", round(household_preventive_stringent$preventive_ci_lower, 1), "-",
round(household_preventive_stringent$preventive_ci_upper, 1), "%)\n\n")
## Percentage: 0 % (95% CI: 0 - 0 %)
cat("2. Relaxed definition (minor or no symptoms):\n")
## 2. Relaxed definition (minor or no symptoms):
cat(" Count:", household_preventive_relaxed$preventive_n, "out of",
household_preventive_relaxed$n, "households\n")
## Count: 7 out of 9411 households
cat(" Percentage:", round(household_preventive_relaxed$preventive_pct, 1),
"% (95% CI:", round(household_preventive_relaxed$preventive_ci_lower, 1), "-",
round(household_preventive_relaxed$preventive_ci_upper, 1), "%)\n")
## Percentage: 0.1 % (95% CI: 0 - 0.2 %)
# For subsequent analyses, use the stringent definition as primary
household_preventive <- household_preventive_stringent
# -------------------------------------------------------------------------
# 5. ANALYSIS BY SYMPTOM/DISEASE STATUS
# -------------------------------------------------------------------------
# 5.1 Create breakdown of household antibiotic use by symptom status
household_by_symptom <- household_data %>%
mutate(
symptom_status = case_when(
resp_indicator == 1 ~ "Respiratory symptoms",
gi_indicator == 1 ~ "GI symptoms",
fever_indicator == 1 ~ "Fever",
any_symptom == 1 ~ "Other symptoms",
TRUE ~ "No symptoms"
)
) %>%
group_by(symptom_status) %>%
summarise(
n = n(),
ab_use_n = sum(any_ab_last_illness, na.rm = TRUE),
ab_use_pct = mean(any_ab_last_illness, na.rm = TRUE) * 100,
.groups = "drop"
)
# Calculate confidence intervals
household_by_symptom <- household_by_symptom %>%
mutate(
# Calculate CIs using a more robust method
ab_use_ci_lower = mapply(function(x, n) {
if(n == 0) return(0)
ci <- binom.test(x, n)$conf.int
return(ci[1] * 100)
}, ab_use_n, n),
ab_use_ci_upper = mapply(function(x, n) {
if(n == 0) return(0)
ci <- binom.test(x, n)$conf.int
return(ci[2] * 100)
}, ab_use_n, n),
# Format the CI string properly, handling any NA values
CI = ifelse(is.na(ab_use_ci_upper),
paste0(round(ab_use_ci_lower, 1), "-?"),
paste0(round(ab_use_ci_lower, 1), "-", round(ab_use_ci_upper, 1))),
`Prevalence (95% CI)` = paste0(round(ab_use_pct, 1), "% (", CI, ")")
)
ci_data <- binom.confint(household_by_symptom$ab_use_n, household_by_symptom$n,
method = "wilson", conf.level = 0.95)
household_by_symptom$ab_use_ci_lower <- ci_data$lower * 100
household_by_symptom$ab_use_ci_upper <- ci_data$upper * 100
# Display the results
kable(household_by_symptom %>%
dplyr::select(symptom_status, n, ab_use_n, `Prevalence (95% CI)`),
caption = "Household Antibiotic Use by Symptom Status",
col.names = c("Symptom Status", "Sample Size", "AB Users", "Prevalence (95% CI)"),
align = "lrrr") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)
Household Antibiotic Use by Symptom Status
Symptom Status
|
Sample Size
|
AB Users
|
Prevalence (95% CI)
|
Fever
|
94
|
30
|
31.9% (22.7-42.3)
|
GI symptoms
|
36
|
16
|
44.4% (27.9-61.9)
|
No symptoms
|
6
|
0
|
0% (0-45.9)
|
Other symptoms
|
8381
|
7
|
0.1% (0-0.2)
|
Respiratory symptoms
|
894
|
468
|
52.3% (49-55.7)
|
# -------------------------------------------------------------------------
# 6. COMPARATIVE ANALYSIS OF FARM VS HOUSEHOLD ANTIBIOTIC USE
# -------------------------------------------------------------------------
# 6.1 Comparison of overall antibiotic use
# Create a contingency table
overall_table <- matrix(
c(farm_ab_ci$ab_use, farm_ab_ci$n - farm_ab_ci$ab_use,
household_ab_ci$ab_use, household_ab_ci$n - household_ab_ci$ab_use),
nrow = 2,
dimnames = list(
c("Farm", "Household"),
c("Antibiotic Use", "No Antibiotic Use")
)
)
# Display the contingency table
kable(overall_table, caption = "Contingency Table for Overall Antibiotic Use") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)
Contingency Table for Overall Antibiotic Use
|
Antibiotic Use
|
No Antibiotic Use
|
Farm
|
1089
|
521
|
Household
|
67
|
8890
|
# Perform chi-square test
overall_chi <- chisq.test(overall_table)
# Calculate effect size (Phi coefficient)
overall_phi <- sqrt(overall_chi$statistic / sum(overall_table))
# Determine effect size interpretation
overall_effect <- case_when(
abs(overall_phi) < 0.1 ~ "Negligible",
abs(overall_phi) < 0.3 ~ "Small",
abs(overall_phi) < 0.5 ~ "Medium",
TRUE ~ "Large"
)
# 6.2 Comparison of preventive antibiotic use
# Create comparison data frame
preventive_comparison <- data.frame(
Category = c("Farm Preventive Use", "Household Preventive Use (Proxy)"),
Count = c(farm_ab_ci$ab_prevention, household_preventive$preventive_n),
Total = c(farm_ab_ci$n, household_preventive$n),
Percentage = c(farm_ab_ci$ab_prevention_pct, household_preventive$preventive_pct),
CI_Lower = c(farm_ab_ci$ab_prevention_ci_lower, household_preventive$preventive_ci_lower),
CI_Upper = c(farm_ab_ci$ab_prevention_ci_upper, household_preventive$preventive_ci_upper)
)
# Add formatted percentage and confidence intervals
preventive_comparison <- preventive_comparison %>%
mutate(
Percent_CI = paste0(
round(Percentage, 1), "% (",
round(CI_Lower, 1), "-",
round(CI_Upper, 1), "%)"
)
)
# Display comparison table
kable(preventive_comparison %>%
dplyr::select(Category, Count, Total, Percent_CI),
caption = "Comparison of Preventive Antibiotic Use in Farms and Households",
col.names = c("Category", "Number of AB Users", "Total Sample", "Usage Rate (95% CI)"))
Comparison of Preventive Antibiotic Use in Farms and
Households
Farm Preventive Use |
558 |
1156 |
48.3% (45.4-51.2%) |
Household Preventive Use (Proxy) |
0 |
9411 |
0% (0-0%) |
# Create a contingency table for preventive use
preventive_table <- matrix(
c(preventive_comparison$Count[1], preventive_comparison$Total[1] - preventive_comparison$Count[1],
preventive_comparison$Count[2], preventive_comparison$Total[2] - preventive_comparison$Count[2]),
nrow = 2,
byrow = TRUE,
dimnames = list(
c("Farm", "Household"),
c("Preventive Use", "No Preventive Use")
)
)
# Perform chi-square test
preventive_chi <- chisq.test(preventive_table)
# Calculate effect size (Phi coefficient)
preventive_phi <- sqrt(preventive_chi$statistic / sum(preventive_table))
# Determine effect size interpretation
preventive_effect <- case_when(
abs(preventive_phi) < 0.1 ~ "Negligible",
abs(preventive_phi) < 0.3 ~ "Small",
abs(preventive_phi) < 0.5 ~ "Medium",
TRUE ~ "Large"
)
# Calculate the ratio of farm:household preventive use
preventive_ratio <- farm_ab_ci$ab_prevention_pct / household_preventive$preventive_pct
# 6.3 MULTILEVEL ANALYSIS OF ANTIBIOTIC USE
# Check if we have community identifiers
has_community_ids <- all(c("district", "commune") %in% names(farm_data)) &&
all(c("district", "commune") %in% names(household_data))
if(has_community_ids) {
# Create community ID variable
farm_data <- farm_data %>%
mutate(community_id = paste(district, commune, sep = "_"))
household_data <- household_data %>%
mutate(community_id = paste(district, commune, sep = "_"))
# Load required packages
if(!require(lme4)) install.packages("lme4")
library(lme4)
if(!require(performance)) install.packages("performance")
library(performance)
cat("\nMultilevel Analysis of Antibiotic Use:\n")
# Check for sufficient communities
farm_communities <- unique(farm_data$community_id)
household_communities <- unique(household_data$community_id)
cat("Number of communities in farm data:", length(farm_communities), "\n")
cat("Number of communities in household data:", length(household_communities), "\n")
# Proceed with multilevel modeling if we have sufficient communities (at least 10)
if(length(farm_communities) >= 10) {
# Farm multilevel model for any antibiotic use
tryCatch({
farm_mlm <- glmer(any_ab_use ~ 1 + (1|community_id),
family = binomial, data = farm_data)
farm_icc <- performance::icc(farm_mlm)
cat("\nFarm antibiotic use - Intraclass Correlation Coefficient:",
round(farm_icc, 3), "\n")
cat("This means approximately", round(farm_icc*100, 1),
"% of the variance in farm antibiotic use is attributable to community-level factors.\n")
# Farm multilevel model for preventive use
farm_prev_mlm <- glmer(ab_prevention ~ 1 + (1|community_id),
family = binomial, data = farm_data)
farm_prev_icc <- performance::icc(farm_prev_mlm)
cat("\nFarm preventive use - Intraclass Correlation Coefficient:",
round(farm_prev_icc, 3), "\n")
cat("This means approximately", round(farm_prev_icc*100, 1),
"% of the variance in farm preventive use is attributable to community-level factors.\n")
}, error = function(e) {
cat("Could not fit multilevel model for farm data:", e$message, "\n")
})
} else {
cat("Insufficient number of communities for farm multilevel modeling.\n")
}
# Proceed with household multilevel modeling if we have sufficient communities
if(length(household_communities) >= 10) {
# Household multilevel model
tryCatch({
household_mlm <- glmer(any_ab_last_illness ~ 1 + (1|community_id),
family = binomial, data = household_data)
household_icc <- performance::icc(household_mlm)
cat("\nHousehold antibiotic use - Intraclass Correlation Coefficient:",
round(household_icc, 3), "\n")
cat("This means approximately", round(household_icc*100, 1),
"% of the variance in household antibiotic use is attributable to community-level factors.\n")
# If we used the same communities, calculate correlation between community effects
shared_communities <- intersect(farm_communities, household_communities)
if(length(shared_communities) >= 5) {
cat("\nAnalyzing correlation between community effects in farm and household antibiotic use:\n")
# Extract random effects
farm_ranef <- ranef(farm_mlm)$community_id
household_ranef <- ranef(household_mlm)$community_id
# Prepare data
ranef_data <- data.frame(
community_id = rownames(farm_ranef),
farm_effect = farm_ranef[,1],
stringsAsFactors = FALSE
)
household_effects <- data.frame(
community_id = rownames(household_ranef),
household_effect = household_ranef[,1],
stringsAsFactors = FALSE
)
ranef_data <- ranef_data %>%
inner_join(household_effects, by = "community_id")
# Calculate correlation
community_cor <- cor.test(ranef_data$farm_effect, ranef_data$household_effect,
method = "spearman")
cat("Correlation between community effects:", round(community_cor$estimate, 3),
"(p =", round(community_cor$p.value, 3), ")\n")
if(community_cor$p.value < 0.05) {
cat("This suggests significant community-level correlation between farm and household antibiotic use patterns.\n")
} else {
cat("No significant community-level correlation detected between farm and household antibiotic use patterns.\n")
}
}
}, error = function(e) {
cat("Could not fit multilevel model for household data:", e$message, "\n")
})
} else {
cat("Insufficient number of communities for household multilevel modeling.\n")
}
} else {
cat("\nCommunity identifiers not available - multilevel analysis skipped.\n")
}
##
## Multilevel Analysis of Antibiotic Use:
## Number of communities in farm data: 64
## Number of communities in household data: 64
##
## Farm antibiotic use - Intraclass Correlation Coefficient: Could not fit multilevel model for farm data: argument 2 (type 'list') cannot be handled by 'cat'
##
## Household antibiotic use - Intraclass Correlation Coefficient: Could not fit multilevel model for household data: argument 2 (type 'list') cannot be handled by 'cat'
# -------------------------------------------------------------------------
# 7. ANTIBIOTIC TYPES COMPARATIVE ANALYSIS
# -------------------------------------------------------------------------
# 7.1 Extract antibiotic type data from both datasets
# Farm antibiotic types - CORRECTED to properly handle binary variables (0/1)
farm_ab_types <- farm_data %>%
summarise(
n = n(),
# Properly calculate percentage using binary indicators (== 1)
# These variables are binary where 1 = Yes (used this antibiotic), 0 = No
chloramphenicol = mean(chlo == 1, na.rm = TRUE) * 100,
nitrofurans = mean(nitr == 1, na.rm = TRUE) * 100,
dimetridazole = mean(dime == 1, na.rm = TRUE) * 100,
metronidazole = mean(metr == 1, na.rm = TRUE) * 100,
ciprofloxacin = mean(cipr == 1, na.rm = TRUE) * 100,
ofloxacin = mean(oflo == 1, na.rm = TRUE) * 100,
carbadox = mean(carb == 1, na.rm = TRUE) * 100,
olaquindox = mean(olaq == 1, na.rm = TRUE) * 100,
bacitracin = mean(baci == 1, na.rm = TRUE) * 100
)
# Household antibiotic types - check and adapt to actual variable names
household_ab_cols <- grep("^num_q7_11___\\d+$", names(household_data), value = TRUE)
if(length(household_ab_cols) > 0) {
household_ab_types <- household_data %>%
summarise(across(all_of(household_ab_cols), ~mean(. == 1, na.rm = TRUE) * 100)) %>%
mutate(n = nrow(household_data))
# Map variable names to antibiotic types - adjust mapping as needed
household_ab_map <- data.frame(
variable = household_ab_cols,
antibiotic = case_when(
grepl("1$", household_ab_cols) ~ "amoxicillin",
grepl("2$", household_ab_cols) ~ "ampicillin",
grepl("3$", household_ab_cols) ~ "amoxicillin_clavulanic",
grepl("4$", household_ab_cols) ~ "azithromycin",
grepl("5$", household_ab_cols) ~ "cefalexin",
grepl("6$", household_ab_cols) ~ "cefixime",
grepl("7$", household_ab_cols) ~ "cefuroxime",
grepl("8$", household_ab_cols) ~ "quinolones",
grepl("9$", household_ab_cols) ~ "cotrimoxazole",
grepl("10$", household_ab_cols) ~ "metronidazole",
grepl("11$", household_ab_cols) ~ "penicillin",
TRUE ~ "other"
),
stringsAsFactors = FALSE
)
# Transform to long format
household_ab_long <- household_ab_types %>%
dplyr::select(-n) %>%
pivot_longer(cols = everything(), names_to = "variable", values_to = "percentage") %>%
left_join(household_ab_map, by = "variable") %>%
group_by(antibiotic) %>%
summarise(percentage = sum(percentage), .groups = "drop") %>% # Sum by antibiotic type
filter(!is.na(antibiotic)) %>%
mutate(sector = "Household")
# Farm antibiotic types - long format
farm_ab_long <- farm_ab_types %>%
dplyr::select(-n) %>%
pivot_longer(cols = everything(), names_to = "antibiotic", values_to = "percentage") %>%
mutate(sector = "Farm")
# Classify antibiotics into broader classes
household_ab_classes <- data.frame(
antibiotic = c("amoxicillin", "ampicillin", "amoxicillin_clavulanic", "penicillin",
"azithromycin", "cefalexin", "cefixime", "cefuroxime",
"quinolones", "cotrimoxazole", "metronidazole"),
class = c("Penicillins", "Penicillins", "Penicillins", "Penicillins",
"Macrolides", "Cephalosporins", "Cephalosporins", "Cephalosporins",
"Quinolones", "Sulfonamides", "Nitroimidazoles"),
stringsAsFactors = FALSE
)
farm_ab_classes <- data.frame(
antibiotic = c("chloramphenicol", "nitrofurans", "dimetridazole", "metronidazole",
"ciprofloxacin", "ofloxacin", "carbadox", "olaquindox", "bacitracin"),
class = c("Amphenicols", "Nitrofurans", "Nitroimidazoles", "Nitroimidazoles",
"Quinolones", "Quinolones", "Others", "Others", "Polypeptides"),
stringsAsFactors = FALSE
)
# Group by antibiotic class with CORRECTED method to avoid double-counting
# This ensures proper handling of multiple antibiotics within the same class
household_class_usage <- household_ab_long %>%
left_join(household_ab_classes, by = "antibiotic") %>%
filter(!is.na(class)) %>%
group_by(class) %>%
# For household data, we'll assume percentage calculation was correct
summarise(percentage = sum(percentage), .groups = "drop") %>%
mutate(sector = "Household")
# For farm data, we'll use a different approach to avoid double-counting
# This is especially important for classes with multiple antibiotics
farm_class_usage <- farm_data %>%
# Create class indicators - a farm uses a class if it uses any antibiotic in that class
mutate(
Amphenicols = as.integer(chlo == 1),
Nitrofurans = as.integer(nitr == 1),
Nitroimidazoles = as.integer(dime == 1 | metr == 1), # Either antibiotic counts
Quinolones = as.integer(cipr == 1 | oflo == 1), # Either antibiotic counts
Others = as.integer(carb == 1 | olaq == 1), # Either antibiotic counts
Polypeptides = as.integer(baci == 1)
) %>%
# Calculate percentage for each class
summarise(across(c(Amphenicols, Nitrofurans, Nitroimidazoles,
Quinolones, Others, Polypeptides),
~mean(., na.rm = TRUE) * 100)) %>%
# Convert to long format
pivot_longer(cols = everything(), names_to = "class", values_to = "percentage") %>%
mutate(sector = "Farm")
# Combine datasets for comparison
ab_class_comparison <- bind_rows(household_class_usage, farm_class_usage)
# Print corrected data to verify
print("Corrected antibiotic class comparison data:")
print(ab_class_comparison)
# Find shared antibiotic classes
shared_classes <- intersect(household_class_usage$class, farm_class_usage$class)
shared_class_comparison <- ab_class_comparison %>%
filter(class %in% shared_classes)
# Calculate correlation for shared classes if sufficient data
if(length(shared_classes) >= 3) {
correlation_data <- shared_class_comparison %>%
pivot_wider(names_from = sector, values_from = percentage)
class_correlation <- cor.test(correlation_data$Household, correlation_data$Farm,
method = "spearman")
print("Correlation between farm and household antibiotic class usage:")
print(class_correlation)
}
# -------------------------------------------------------------------------
# 8. VISUALIZATIONS
# -------------------------------------------------------------------------
# 8.1 Prepare visualization data
# Farm antibiotic use purposes data
farm_purposes <- data.frame(
purpose = c("Treatment", "Prevention", "Growth Promotion"),
percentage = c(
farm_ab_ci$ab_treatment_pct,
farm_ab_ci$ab_prevention_pct,
farm_ab_ci$ab_growth_pct
)
) %>%
mutate(purpose = factor(purpose, levels = c("Treatment", "Prevention", "Growth Promotion")))
# Comparison data for farm vs household overall antibiotic use
comparison_data <- data.frame(
sector = c("Farm", "Household"),
prevalence = c(farm_ab_ci$ab_use_pct, household_ab_ci$ab_use_pct),
ci_lower = c(farm_ab_ci$ab_use_ci_lower, household_ab_ci$ab_use_ci_lower),
ci_upper = c(farm_ab_ci$ab_use_ci_upper, household_ab_ci$ab_use_ci_upper)
)
# Preventive use comparison data
preventive_data <- data.frame(
Category = c("Farm Preventive Use", "Household Preventive Use (Proxy)"),
Usage = c(farm_ab_ci$ab_prevention_pct, household_preventive$preventive_pct),
Lower = c(farm_ab_ci$ab_prevention_ci_lower, household_preventive$preventive_ci_lower),
Upper = c(farm_ab_ci$ab_prevention_ci_upper, household_preventive$preventive_ci_upper)
)
# Non-treatment antibiotic use data
non_treatment_data <- data.frame(
purpose = c("Any Non-Treatment", "Prevention Only", "Growth Promotion Only", "Both Purposes"),
percentage = c(
farm_non_treatment$any_non_treatment_pct,
farm_non_treatment$prevention_only_pct,
farm_non_treatment$growth_only_pct,
farm_non_treatment$both_purposes_pct
),
ci_lower = c(
farm_non_treatment$any_non_treatment_ci_lower,
farm_non_treatment$prevention_only_ci_lower,
farm_non_treatment$growth_only_ci_lower,
farm_non_treatment$both_purposes_ci_lower
),
ci_upper = c(
farm_non_treatment$any_non_treatment_ci_upper,
farm_non_treatment$prevention_only_ci_upper,
farm_non_treatment$growth_only_ci_upper,
farm_non_treatment$both_purposes_ci_upper
)
) %>%
arrange(desc(percentage))
# 8.2 Create individual visualizations
# 1. Overall antibiotic use prevalence comparison
overall_comparison_plot <- ggplot(comparison_data, aes(x = sector, y = prevalence, fill = sector)) +
geom_bar(stat = "identity", width = 0.6, color = "gray30", linewidth = 0.2) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2, color = "gray30", linewidth = 0.5) +
geom_text(aes(label = paste0(round(prevalence, 1), "%")),
vjust = -0.8, size = 3.2, color = "gray30") +
labs(title = "Overall Antibiotic Use Prevalence",
subtitle = paste0("Chi-square test: χ² = ", round(overall_chi$statistic, 1),
", p < 0.001; Effect size: ", round(overall_phi, 3), " (", overall_effect, ")"),
x = "",
y = "Prevalence (%)") +
theme(legend.position = "none",
axis.text.x = element_text(size = 10)) +
scale_fill_manual(values = c("Farm" = "#a8c0e0", "Household" = "#e0b5a8")) +
scale_y_continuous(limits = c(0, max(comparison_data$ci_upper) * 1.15),
breaks = seq(0, 100, by = 20),
expand = c(0, 0))
print(overall_comparison_plot)
# 2. Preventive antibiotic use comparison
preventive_plot <- ggplot(preventive_data, aes(x = Category, y = Usage, fill = Category)) +
geom_bar(stat = "identity", width = 0.6, color = "gray30", linewidth = 0.2) +
geom_errorbar(aes(ymin = Lower, ymax = Upper), width = 0.2, color = "gray30", linewidth = 0.5) +
geom_text(aes(label = sprintf("%.1f%%", Usage)),
vjust = -0.8, size = 3.2, color = "gray30") +
labs(title = "Preventive Antibiotic Use Comparison",
subtitle = paste0("Farm:Household ratio = ", round(preventive_ratio, 1), ":1"),
x = "",
y = "Prevalence (%)") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1, size = 9)) +
scale_fill_manual(values = c("Farm Preventive Use" = "#a8c0e0",
"Household Preventive Use (Proxy)" = "#a8d3b5")) +
scale_y_continuous(limits = c(0, max(preventive_data$Upper) * 1.15),
breaks = seq(0, 60, by = 10),
expand = c(0, 0))
print(preventive_plot)
# 3. Farm antibiotic use purposes
total_percentage <- sum(farm_purposes$percentage)
cat("Total percentage across all purposes:", total_percentage, "%\n")
farm_purpose_plot <- ggplot(farm_purposes, aes(x = "Farm", y = percentage, fill = purpose)) +
geom_bar(stat = "identity", position = "stack", width = 0.6, color = "white", linewidth = 0.2) +
geom_text(aes(label = paste0(round(percentage, 1), "%")),
position = position_stack(vjust = 0.5),
color = "white", fontface = "bold", size = 3) +
labs(title = "Antibiotic Use Purposes on Farms",
subtitle = paste0("n = ", farm_ab_ci$n, " farms",
ifelse(total_percentage > 100,
"\nNote: Percentages sum to >100% as farms may use antibiotics for multiple purposes",
"")),
x = "",
y = "Percentage of Farms (%)") +
theme(legend.position = "right",
legend.title = element_blank(),
legend.key.size = unit(0.8, "cm"),
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA)) +
scale_fill_manual(values = c(
"Treatment" = "#a8c0e0",
"Prevention" = "#e0b5a8",
"Growth Promotion" = "#a8d3b5"
)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 100))
print(farm_purpose_plot)
# 4. Non-treatment antibiotic use analysis
non_treatment_plot <- ggplot(non_treatment_data,
aes(x = reorder(purpose, -percentage), y = percentage, fill = purpose)) +
geom_bar(stat = "identity", width = 0.6, color = "gray30", linewidth = 0.2) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2, color = "gray30", linewidth = 0.5) +
geom_text(aes(label = sprintf("%.1f%%", percentage)),
vjust = -0.5, size = 3, color = "gray30") +
labs(title = "Non-Treatment Antibiotic Use in Farms",
subtitle = paste0("n = ", farm_ab_ci$n, " farms"),
x = "",
y = "Prevalence (%)") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1, size = 8.5)) +
scale_fill_manual(values = c(
"Any Non-Treatment" = "#a8c0e0",
"Prevention Only" = "#e0b5a8",
"Growth Promotion Only" = "#a8d3b5",
"Both Purposes" = "#c3b5d3"
)) +
scale_y_continuous(limits = c(0, max(non_treatment_data$ci_upper) * 1.15),
expand = c(0, 0))
print(non_treatment_plot)
# 5. Household antibiotic use by symptom status (if available)
if(exists("household_by_symptom") && nrow(household_by_symptom) > 0) {
household_by_symptom$symptom_status <- factor(
household_by_symptom$symptom_status,
levels = c("Respiratory symptoms", "GI symptoms", "Fever",
"Other symptoms", "No symptoms")
)
symptom_plot <- ggplot(household_by_symptom,
aes(x = reorder(symptom_status, -ab_use_pct), y = ab_use_pct,
fill = symptom_status)) +
geom_bar(stat = "identity", width = 0.7, color = "gray30", linewidth = 0.2) +
geom_errorbar(aes(ymin = ab_use_ci_lower, ymax = ab_use_ci_upper),
width = 0.2, color = "gray30", linewidth = 0.5) +
geom_text(aes(label = sprintf("%.1f%%", ab_use_pct)),
vjust = -0.5, size = 3, color = "gray30") +
labs(title = "Household Antibiotic Use by Symptom Status",
subtitle = paste0("n = ", household_ab_ci$n, " households"),
x = "",
y = "Antibiotic Use Rate (%)") +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1, size = 8.5)) +
scale_fill_manual(values = c(
"Respiratory symptoms" = "#a8c0e0",
"GI symptoms" = "#e0b5a8",
"Fever" = "#a8d3b5",
"Other symptoms" = "#c3b5d3",
"No symptoms" = "#d8cfb0"
)) +
scale_y_continuous(limits = c(0, max(household_by_symptom$ab_use_ci_upper) * 1.15),
expand = c(0, 0))
print(symptom_plot)
}
# 6. Comparative chart of antibiotic classes - Science/Lancet style
class_plot <- ggplot(ab_class_comparison,
aes(x = reorder(class, -percentage), y = percentage, fill = sector)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7,
color = "gray30", linewidth = 0.2) +
geom_text(aes(label = sprintf("%.1f%%", percentage)),
position = position_dodge(width = 0.7),
vjust = -0.3, size = 2.8, color = "gray30") +
labs(title = "Antibiotic Classes Used in Farms and Households",
subtitle = "Proportion of farms/households using each antibiotic class",
x = "",
y = "Usage Rate (%)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
legend.position = "bottom",
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank()) +
scale_fill_manual(values = c("Farm" = "#a8c0e0", "Household" = "#e0b5a8")) +
scale_y_continuous(limits = c(0, max(ab_class_comparison$percentage) * 1.2),
breaks = seq(0, 100, by = 20),
expand = c(0, 0))
print(class_plot)
ggsave("antibiotic_class_comparison.png", class_plot,
width = 8, height = 5, dpi = 300)
# 2. Shared antibiotic classes (if available)
if(length(shared_classes) > 0) {
shared_class_plot <- ggplot(shared_class_comparison,
aes(x = reorder(class, -percentage), y = percentage, fill = sector)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7,
color = "gray30", linewidth = 0.2) +
geom_text(aes(label = sprintf("%.1f%%", percentage)),
position = position_dodge(width = 0.7),
vjust = -0.3, size = 2.8, color = "gray30") +
labs(title = "Shared Antibiotic Classes in Farms and Households",
subtitle = ifelse(exists("class_correlation"),
paste0("Spearman correlation: rho = ",
round(class_correlation$estimate, 2),
", p = ", round(class_correlation$p.value, 3)),
"Comparison of usage patterns across sectors"),
x = "",
y = "Usage Rate (%)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
legend.position = "bottom",
panel.background = element_rect(fill = "white", color = NA),
plot.background = element_rect(fill = "white", color = NA),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank()) +
scale_fill_manual(values = c("Farm" = "#a8c0e0", "Household" = "#e0b5a8")) +
scale_y_continuous(limits = c(0, max(shared_class_comparison$percentage) * 1.2),
breaks = seq(0, 100, by = 20),
expand = c(0, 0))
print(shared_class_plot)
}
}
## [1] "Corrected antibiotic class comparison data:"
## # A tibble: 12 × 3
## class percentage sector
## <chr> <dbl> <chr>
## 1 Cephalosporins 2.66 Household
## 2 Macrolides 0.319 Household
## 3 Nitroimidazoles 0.128 Household
## 4 Penicillins 3.81 Household
## 5 Quinolones 0.0957 Household
## 6 Sulfonamides 0.0744 Household
## 7 Amphenicols 0.460 Farm
## 8 Nitrofurans 0.276 Farm
## 9 Nitroimidazoles 0.276 Farm
## 10 Quinolones 0.736 Farm
## 11 Others 0.276 Farm
## 12 Polypeptides 0 Farm


## Total percentage across all purposes: 138 %





# 7. Comparison of common antibiotic classes - Science/Lancet style
if(exists("shared_class_comparison") && length(shared_classes) > 0) {
shared_class_plot <- ggplot(shared_class_comparison,
aes(x = reorder(class, -percentage), y = percentage, fill = sector)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7,
color = "gray30", linewidth = 0.2) +
geom_text(aes(label = sprintf("%.1f%%", percentage)),
position = position_dodge(width = 0.7),
vjust = -0.3, size = 2.8, color = "gray30") +
labs(title = "Shared Antibiotic Classes in Farms and Households",
x = "",
y = "Usage Rate (%)",
fill = "") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
legend.position = "bottom") +
scale_fill_manual(values = c("Farm" = "#a8c0e0", "Household" = "#e0b5a8")) +
scale_y_continuous(limits = c(0, max(shared_class_comparison$percentage) * 1.2),
breaks = seq(0, 50, by = 10),
expand = c(0, 0))
print(shared_class_plot)
ggsave("shared_antibiotic_classes.pdf", shared_class_plot,
width = 7, height = 5, dpi = 300)
ggsave("shared_antibiotic_classes.png", shared_class_plot,
width = 7, height = 5, dpi = 300)
if(length(shared_classes) >= 3 && exists("correlation_data")) {
corr_plot <- ggplot(correlation_data, aes(x = Household, y = Farm, label = class)) +
geom_point(color = "#a8c0e0", size = 3, alpha = 0.8) +
geom_text_repel(size = 3, color = "gray30", box.padding = 0.5) +
geom_smooth(method = "lm", se = TRUE, color = "#e0b5a8", fill = "#e0b5a8", alpha = 0.2) +
labs(title = "Correlation of Antibiotic Class Usage",
subtitle = paste0("Spearman correlation: rho = ",
round(class_correlation$estimate, 2),
", p = ", round(class_correlation$p.value, 3)),
x = "Household Usage Rate (%)",
y = "Farm Usage Rate (%)") +
theme(panel.grid.major = element_line(color = "gray95", linewidth = 0.3)) +
coord_fixed(ratio = 1, xlim = c(0, max(correlation_data$Household, correlation_data$Farm) * 1.1),
ylim = c(0, max(correlation_data$Household, correlation_data$Farm) * 1.1))
print(corr_plot)
}
}

# 8.3 Create forest plot comparing antibiotic use across different categories
# Prepare data for forest plot
forest_data <- bind_rows(
# Farm data by purpose
farm_purposes %>%
mutate(
category = "Farm",
subcategory = purpose,
estimate = percentage,
# Add confidence intervals (approximate for visualization purposes)
lower = pmax(0, percentage - 5),
upper = pmin(100, percentage + 5)
) %>%
dplyr::select(category, subcategory, estimate, lower, upper),
# Household data
data.frame(
category = "Household",
subcategory = "Any Use",
estimate = household_ab_ci$ab_use_pct,
lower = household_ab_ci$ab_use_ci_lower,
upper = household_ab_ci$ab_use_ci_upper,
stringsAsFactors = FALSE
),
# Household preventive use (both definitions)
data.frame(
category = "Household",
subcategory = c("Preventive (Stringent)", "Preventive (Relaxed)"),
estimate = c(household_preventive_stringent$preventive_pct,
household_preventive_relaxed$preventive_pct),
lower = c(household_preventive_stringent$preventive_ci_lower,
household_preventive_relaxed$preventive_ci_lower),
upper = c(household_preventive_stringent$preventive_ci_upper,
household_preventive_relaxed$preventive_ci_upper),
stringsAsFactors = FALSE
)
) %>%
# Create a combined label for ordering
mutate(
combined_label = paste0(category, ": ", subcategory),
combined_label = factor(combined_label,
levels = rev(unique(combined_label)))
)
# Create the forest plot
forest_plot <- ggplot(forest_data, aes(x = estimate, y = combined_label,
color = category)) +
geom_rect(aes(ymin = as.numeric(combined_label) - 0.5,
ymax = as.numeric(combined_label) + 0.5,
xmin = 0, xmax = 100),
fill = "gray97", alpha = 0.5, color = NA) +
geom_point(size = 3) +
geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0.3, linewidth = 0.5) +
labs(title = "Antibiotic Use Patterns in Farms and Households",
subtitle = "Point estimates with 95% confidence intervals",
x = "Prevalence (%)",
y = "",
color = "") +
theme(panel.grid.major.x = element_line(color = "gray95", linewidth = 0.3),
panel.grid.major.y = element_blank(),
legend.position = "bottom") +
scale_color_manual(values = c("Farm" = "#a8c0e0", "Household" = "#e0b5a8")) +
scale_x_continuous(limits = c(0, 100), breaks = seq(0, 100, by = 20),
expand = c(0, 0)) +
geom_text(aes(x = estimate + 3, label = sprintf("%.1f%%", estimate)),
size = 2.8, hjust = 0, color = "gray30")
print(forest_plot)

# 8.4 Create One Health perspective integrated visualization
# Prepare data for One Health visualization
one_health_data <- bind_rows(
# Farm data
data.frame(
sector = "Farm",
category = c("Treatment", "Prevention", "Growth Promotion"),
percentage = c(farm_ab_ci$ab_treatment_pct,
farm_ab_ci$ab_prevention_pct,
farm_ab_ci$ab_growth_pct),
stringsAsFactors = FALSE
),
# Household data
household_by_symptom %>%
transmute(
sector = "Household",
category = symptom_status,
percentage = ab_use_pct
)
)
# Create the One Health visualization
one_health_plot <- ggplot(one_health_data,
aes(x = sector, y = percentage, fill = category)) +
geom_bar(stat = "identity", position = "stack", width = 0.7) +
geom_text(aes(label = sprintf("%.1f%%", percentage)),
position = position_stack(vjust = 0.5),
color = "white", fontface = "bold", size = 3) +
labs(title = "One Health Perspective: Integrated Antibiotic Use Patterns",
subtitle = "Comparison of antibiotic use purposes across farm and household sectors",
x = "",
y = "Percentage (%)") +
scale_fill_manual(values = c(
# Farm colors (blues)
"Treatment" = "#4a6fe3",
"Prevention" = "#6aabcc",
"Growth Promotion" = "#a8d5eb",
# Household colors (reds/oranges)
"Respiratory symptoms" = "#d3825f",
"GI symptoms" = "#e9ae98",
"Fever" = "#c15c41",
"Other symptoms" = "#f3cfbf",
"No symptoms" = "#fae3d7"
)) +
theme(legend.position = "right",
legend.title = element_blank())
print(one_health_plot)

# -------------------------------------------------------------------------
# 9. SUMMARY OF FINDINGS
# -------------------------------------------------------------------------
# Create a comprehensive summary table
summary_table <- data.frame(
Measure = c("Overall Antibiotic Use", "Farm Treatment Use", "Farm Prevention Use",
"Farm Growth Promotion Use", "Household Preventive Use (Proxy)"),
Prevalence = c(
paste0(round(farm_ab_ci$ab_use_pct, 1), "% vs ", round(household_ab_ci$ab_use_pct, 1), "%"),
paste0(round(farm_ab_ci$ab_treatment_pct, 1), "%"),
paste0(round(farm_ab_ci$ab_prevention_pct, 1), "%"),
paste0(round(farm_ab_ci$ab_growth_pct, 1), "%"),
paste0(round(household_preventive$preventive_pct, 1), "%")
),
Comparison = c(
paste0("χ² = ", round(overall_chi$statistic, 1),
", p ", ifelse(overall_chi$p.value < 0.001, "< 0.001",
paste0("= ", round(overall_chi$p.value, 3))),
"; Effect: ", overall_effect),
"N/A",
paste0("Farm:Household ratio = ", round(preventive_ratio, 1), ":1"),
"N/A",
paste0("χ² = ", round(preventive_chi$statistic, 1),
", p ", ifelse(preventive_chi$p.value < 0.001, "< 0.001",
paste0("= ", round(preventive_chi$p.value, 3))),
"; Effect: ", preventive_effect)
),
Conclusion = c(
ifelse(overall_chi$p.value < 0.05,
"Significant difference in overall use between farms and households",
"No significant difference in overall use"),
"Most common purpose for antibiotic use in farms",
ifelse(preventive_chi$p.value < 0.05,
"Significantly higher preventive use in farms than households",
"No significant difference in preventive use"),
paste0(round(farm_non_treatment$any_non_treatment_pct, 1),
"% of farms use antibiotics for non-treatment purposes"),
"Based on proxy measure of antibiotic use without symptoms"
)
)
# Display the summary table
kable(summary_table,
caption = "Summary of Key Findings for Hypothesis 1.1",
align = "llll") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE) %>%
column_spec(1, bold = TRUE) %>%
row_spec(0, bold = TRUE, background = "#f2f2f2")
Summary of Key Findings for Hypothesis 1.1
Measure
|
Prevalence
|
Comparison
|
Conclusion
|
Overall Antibiotic Use
|
94.2% vs 5.5%
|
χ² = 6260.6, p < 0.001; Effect: Large
|
Significant difference in overall use between farms and households
|
Farm Treatment Use
|
89.4%
|
N/A
|
Most common purpose for antibiotic use in farms
|
Farm Prevention Use
|
48.3%
|
Farm:Household ratio = Inf:1
|
Significantly higher preventive use in farms than households
|
Farm Growth Promotion Use
|
0.9%
|
N/A
|
48.4% of farms use antibiotics for non-treatment purposes
|
Household Preventive Use (Proxy)
|
0%
|
χ² = 4786.3, p < 0.001; Effect: Large
|
Based on proxy measure of antibiotic use without symptoms
|
# -------------------------------------------------------------------------
# 10. FINAL CONCLUSION ON HYPOTHESIS 1.1
# -------------------------------------------------------------------------
cat("\n\n### CONCLUSION ON HYPOTHESIS 1.1 ###\n\n")
##
##
## ### CONCLUSION ON HYPOTHESIS 1.1 ###
cat("Hypothesis 1.1 states that preventive and growth-promoting antibiotic use is common in farm animals, with prevalence rates higher than corresponding use in human medicine.\n\n")
## Hypothesis 1.1 states that preventive and growth-promoting antibiotic use is common in farm animals, with prevalence rates higher than corresponding use in human medicine.
# Overall conclusion
if(farm_ab_ci$ab_prevention_pct > household_preventive$preventive_pct && preventive_chi$p.value < 0.05) {
cat("CONCLUSION: The data SUPPORTS hypothesis 1.1. \n\n")
} else if(farm_ab_ci$ab_prevention_pct > household_preventive$preventive_pct && preventive_chi$p.value >= 0.05) {
cat("CONCLUSION: The data PARTIALLY SUPPORTS hypothesis 1.1. \n\n")
} else {
cat("CONCLUSION: The data DOES NOT SUPPORT hypothesis 1.1. \n\n")
}
## CONCLUSION: The data SUPPORTS hypothesis 1.1.
# Detailed explanation
cat(paste0("Farm preventive antibiotic use (", round(farm_ab_ci$ab_prevention_pct, 1),
"%, 95% CI: ", round(farm_ab_ci$ab_prevention_ci_lower, 1), "-",
round(farm_ab_ci$ab_prevention_ci_upper, 1), "%) is approximately ",
round(preventive_ratio, 1), " times higher than household preventive use (",
round(household_preventive$preventive_pct, 1), "%, 95% CI: ",
round(household_preventive$preventive_ci_lower, 1), "-",
round(household_preventive$preventive_ci_upper, 1), "%). "))
## Farm preventive antibiotic use (48.3%, 95% CI: 45.4-51.2%) is approximately Inf times higher than household preventive use (0%, 95% CI: 0-0%).
if(preventive_chi$p.value < 0.05) {
cat(paste0("This difference is statistically significant (χ² = ",
round(preventive_chi$statistic, 1), ", p ",
ifelse(preventive_chi$p.value < 0.001, "< 0.001", paste0("= ", round(preventive_chi$p.value, 3))),
") with a ", tolower(preventive_effect), " effect size (Phi = ", round(preventive_phi, 3), ").\n\n"))
} else {
cat(paste0("However, this difference is not statistically significant (χ² = ",
round(preventive_chi$statistic, 1), ", p = ", round(preventive_chi$p.value, 3),
"), possibly due to limited sample size or high variability.\n\n"))
}
## This difference is statistically significant (χ² = 4786.3, p < 0.001) with a large effect size (Phi = 0.673).
cat(paste0("Additionally, ", round(farm_non_treatment$any_non_treatment_pct, 1),
"% of farms use antibiotics for non-treatment purposes, with ",
round(farm_ab_ci$ab_growth_pct, 1), "% specifically using antibiotics for growth promotion,
a practice not found in human medicine.\n\n"))
## Additionally, 48.4% of farms use antibiotics for non-treatment purposes, with 0.9% specifically using antibiotics for growth promotion,
## a practice not found in human medicine.
cat("These findings suggest that non-therapeutic antibiotic use is more prevalent in farm settings compared to household use,
consistent with the hypothesis. This has important implications for antimicrobial resistance risks, as preventive and growth-promoting
antibiotic use in agriculture may contribute significantly to the overall antibiotic selection pressure in the environment.\n\n")
## These findings suggest that non-therapeutic antibiotic use is more prevalent in farm settings compared to household use,
## consistent with the hypothesis. This has important implications for antimicrobial resistance risks, as preventive and growth-promoting
## antibiotic use in agriculture may contribute significantly to the overall antibiotic selection pressure in the environment.
cat("Limitations to consider include the use of a proxy measure for household preventive use and potential differences in antibiotic
classification between sectors. Future research should include more direct measures of household antibiotic use purposes and examine
the specific drivers of preventive antibiotic use in both sectors.")
## Limitations to consider include the use of a proxy measure for household preventive use and potential differences in antibiotic
## classification between sectors. Future research should include more direct measures of household antibiotic use purposes and examine
## the specific drivers of preventive antibiotic use in both sectors.
Results for
Hypothesis 1.3: Non-linear Relationship Between Farm Size and Antibiotic
Use
if(!exists("farm_data")) {
farm_data <- readRDS(here("data", "processed", "farm_data_processed.rds"))
}
# Calculate antibiotic use prevalence by farm size category
ab_by_size <- farm_data %>%
group_by(farm_size_cat) %>%
summarise(
n = n(),
ab_use_any_n = sum(any_ab_use, na.rm = TRUE),
ab_use_any_pct = mean(any_ab_use, na.rm = TRUE) * 100,
ab_treatment_pct = mean(ab_treatment, na.rm = TRUE) * 100,
ab_prevention_pct = mean(ab_prevention, na.rm = TRUE) * 100,
ab_growth_pct = mean(ab_growth, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(!is.na(farm_size_cat)) # Remove NA farm sizes
# Display results
ab_by_size %>%
kable(caption = "Antibiotic Use by Farm Size Category",
col.names = c("Farm Size", "Number of Farms", "Farms Using Antibiotics",
"% Any Use", "% Treatment", "% Prevention", "% Growth Promotion")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Antibiotic Use by Farm Size Category
Farm Size
|
Number of Farms
|
Farms Using Antibiotics
|
% Any Use
|
% Treatment
|
% Prevention
|
% Growth Promotion
|
Large
|
485
|
464
|
96
|
91
|
50
|
0.62
|
Medium
|
659
|
614
|
93
|
88
|
47
|
0.91
|
Small
|
12
|
11
|
92
|
92
|
33
|
8.33
|
# Chi-square test for association between farm size category and antibiotic use
size_chi_table <- farm_data %>%
filter(!is.na(farm_size_cat)) %>%
group_by(farm_size_cat) %>%
summarise(
ab_use = sum(any_ab_use, na.rm = TRUE),
no_ab_use = sum(1 - any_ab_use, na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::select(-farm_size_cat) %>%
as.matrix()
rownames(size_chi_table) <- ab_by_size$farm_size_cat
# Display the contingency table
kable(size_chi_table, caption = "Contingency Table: Antibiotic Use by Farm Size Category") %>%
kable_styling()
Contingency Table: Antibiotic Use by Farm Size Category
|
ab_use
|
no_ab_use
|
Large
|
464
|
21
|
Medium
|
614
|
45
|
Small
|
11
|
1
|
# Perform chi-square test
size_chi_test <- chisq.test(size_chi_table)
# Display results
cat("Chi-square test for differences in antibiotic use among farm size categories:\n")
## Chi-square test for differences in antibiotic use among farm size categories:
cat("Chi-square value:", size_chi_test$statistic, "\n")
## Chi-square value: 3.3
cat("Degrees of freedom:", size_chi_test$parameter, "\n")
## Degrees of freedom: 2
cat("P-value:", size_chi_test$p.value, "\n")
## P-value: 0.19
cat("Interpretation:", ifelse(size_chi_test$p.value < 0.05,
"Significant differences in antibiotic use among farm size categories",
"No significant differences in antibiotic use among farm size categories"), "\n")
## Interpretation: No significant differences in antibiotic use among farm size categories
# Calculate Cramer's V for effect size
cramers_v <- sqrt(size_chi_test$statistic / (sum(size_chi_table) * (min(dim(size_chi_table)) - 1)))
cat("Effect size (Cramer's V):", cramers_v, "\n")
## Effect size (Cramer's V): 0.054
cat("Interpretation of effect size:",
case_when(
cramers_v < 0.1 ~ "Negligible effect",
cramers_v < 0.3 ~ "Small effect",
cramers_v < 0.5 ~ "Medium effect",
TRUE ~ "Large effect"
), "\n")
## Interpretation of effect size: Negligible effect
### LOESS Smoothing for Non-linear Relationship
# Prepare data for LOESS smoothing
size_ab_data <- farm_data %>%
filter(!is.na(area_farm) & !is.na(any_ab_use)) %>%
dplyr::select(area_farm, any_ab_use, ab_treatment, ab_prevention, ab_growth)
# Create LOESS smoothed curve for overall antibiotic use
overall_plot <- ggplot(size_ab_data, aes(x = area_farm, y = any_ab_use)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "loess", se = TRUE) +
labs(title = "Non-linear Relationship Between Farm Size and Antibiotic Use",
subtitle = "LOESS Smoothing Curve with 95% Confidence Interval",
x = "Farm Area (square meters)",
y = "Probability of Antibiotic Use") +
theme_minimal()
# Print the plot
print(overall_plot)

# Create LOESS smoothed curves for different purposes
treatment_plot <- ggplot(size_ab_data, aes(x = area_farm, y = ab_treatment)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "loess", se = TRUE) +
labs(title = "Farm Size and Treatment Antibiotic Use",
x = "Farm Area (square meters)",
y = "Probability of Treatment Use") +
theme_minimal()
prevention_plot <- ggplot(size_ab_data, aes(x = area_farm, y = ab_prevention)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "loess", se = TRUE) +
labs(title = "Farm Size and Preventive Antibiotic Use",
x = "Farm Area (square meters)",
y = "Probability of Preventive Use") +
theme_minimal()
growth_plot <- ggplot(size_ab_data, aes(x = area_farm, y = ab_growth)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "loess", se = TRUE) +
labs(title = "Farm Size and Growth Promotion Antibiotic Use",
x = "Farm Area (square meters)",
y = "Probability of Growth Promotion Use") +
theme_minimal()
# Combine plots for purposes
purpose_plots <- ggarrange(treatment_plot, prevention_plot, growth_plot,
ncol = 3, nrow = 1, common.legend = TRUE)
# Print combined plot
print(purpose_plots)

#######################################################################
# Enhancing Hypothesis 1.3: Non-linear relationship between farm size
# and antibiotic use – Causal inference analysis
#######################################################################
# Insert after the "### LOESS Smoothing for Non-linear Relationship"
# section in the h1-3-analysis.R script
# This code strengthens causal inference regarding farm size and antibiotic use
# by controlling for confounders and conducting deeper analyses
### Multivariate Analysis: Non-linear Relationship Controlling for Confounders
# Load necessary libraries
if(!requireNamespace("mgcv", quietly = TRUE)) {
install.packages("mgcv")
}
library(mgcv)
# Data preparation
gam_data <- farm_data %>%
filter(!is.na(area_farm) & !is.na(any_ab_use) & !is.na(animal_type) & !is.na(edu))
# Fit Generalized Additive Model (GAM) controlling for animal type and education level
cat("Fitting Generalized Additive Model (GAM) controlling for confounders...\n")
## Fitting Generalized Additive Model (GAM) controlling for confounders...
gam_model <- tryCatch({
gam(any_ab_use ~ s(area_farm) + animal_type + edu,
family = binomial, data = gam_data)
}, error = function(e) {
message("GAM model fitting failed: ", e$message)
return(NULL)
})
# Display model summary
if(!is.null(gam_model)) {
print(summary(gam_model))
# Visualization of adjusted non-linear relationship between farm size and antibiotic use
plot_data <- expand.grid(
area_farm = seq(min(gam_data$area_farm, na.rm = TRUE),
max(gam_data$area_farm, na.rm = TRUE), length.out = 100),
animal_type = unique(gam_data$animal_type),
edu = median(gam_data$edu, na.rm = TRUE)
)
# Predict values
plot_data$predicted <- predict(gam_model, newdata = plot_data, type = "response")
# Plot adjusted non-linear relationships by animal type
adjusted_plot <- ggplot(plot_data, aes(x = area_farm, y = predicted, color = animal_type)) +
geom_line(size = 1) +
labs(title = "Adjusted Non-linear Relationship between Farm Size and Antibiotic Use",
subtitle = "Controlling for Animal Type and Education Level",
x = "Farm Size (m²)",
y = "Probability of Antibiotic Use",
color = "Animal Type") +
theme_minimal() +
scale_color_brewer(palette = "Set1") +
theme(legend.position = "bottom")
print(adjusted_plot)
# Overall effect plot without grouping
overall_data <- expand.grid(
area_farm = seq(min(gam_data$area_farm, na.rm = TRUE),
max(gam_data$area_farm, na.rm = TRUE), length.out = 100),
animal_type = "Chicken", # reference category
edu = median(gam_data$edu, na.rm = TRUE)
)
overall_data$predicted <- predict(gam_model, newdata = overall_data, type = "response")
# Extract confidence intervals from smooth terms
smooth_terms <- plot(gam_model, select = 1, seWithMean = TRUE)
smooth_df <- data.frame(
area_farm = smooth_terms[[1]]$x,
fit = smooth_terms[[1]]$fit,
se = smooth_terms[[1]]$se
)
# Calculate confidence intervals on logit scale
smooth_df$upper <- smooth_df$fit + 1.96 * smooth_df$se
smooth_df$lower <- smooth_df$fit - 1.96 * smooth_df$se
# Convert to probability scale
smooth_df$fit_prob <- plogis(smooth_df$fit)
smooth_df$upper_prob <- plogis(smooth_df$upper)
smooth_df$lower_prob <- plogis(smooth_df$lower)
# Plot smooth effect with confidence intervals
smooth_plot <- ggplot() +
geom_ribbon(data = smooth_df,
aes(x = area_farm, ymin = lower_prob, ymax = upper_prob),
fill = "lightblue", alpha = 0.3) +
geom_line(data = overall_data, aes(x = area_farm, y = predicted),
color = "darkblue", size = 1) +
labs(title = "Smooth Effect of Farm Size on Antibiotic Use",
subtitle = "With 95% Confidence Interval (Controlling for Animal Type and Education Level)",
x = "Farm Size (m²)",
y = "Probability of Antibiotic Use") +
theme_minimal()
print(smooth_plot)
# Identify potential breakpoint (highest curvature)
diff_data <- data.frame(
area_farm = overall_data$area_farm[-c(1, length(overall_data$area_farm))],
second_deriv = diff(diff(overall_data$predicted))
)
potential_breakpoint <- diff_data$area_farm[which.max(abs(diff_data$second_deriv))]
cat("Potential breakpoint based on curvature analysis:", round(potential_breakpoint, 0), "m²\n")
}
##
## Family: binomial
## Link function: logit
##
## Formula:
## any_ab_use ~ s(area_farm) + animal_type + edu
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.0308 0.3499 5.80 0.0000000065 ***
## animal_typeChicken 1.2361 0.4004 3.09 0.0020 **
## animal_typePig 1.0849 0.3729 2.91 0.0036 **
## edu -0.0608 0.0440 -1.38 0.1667
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(area_farm) 3.16 3.92 4.98 0.29
##
## R-sq.(adj) = 0.0226 Deviance explained = 3.9%
## UBRE = -0.56027 Scale est. = 1 n = 1149



## Potential breakpoint based on curvature analysis: 1151 m²
### Analysis of Farm Characteristics around Breakpoint
# Set breakpoint range
breakpoint <- ifelse(exists("potential_breakpoint"), potential_breakpoint, 500)
window_size <- 200
# Identify farms around breakpoint
transition_farms <- farm_data %>%
filter(area_farm >= (breakpoint - window_size) &
area_farm <= (breakpoint + window_size))
# Farms before, at, and after breakpoint
before_transition <- farm_data %>% filter(area_farm < (breakpoint - window_size)) %>% mutate(transition_group = "Before Transition")
at_transition <- transition_farms %>% mutate(transition_group = "Transition Zone")
after_transition <- farm_data %>% filter(area_farm > (breakpoint + window_size)) %>% mutate(transition_group = "After Transition")
# Combine and analyze data
transition_analysis <- bind_rows(before_transition, at_transition, after_transition) %>%
filter(!is.na(transition_group))
### Segmented Regression Analysis
# Fit a logistic regression model first
log_model <- glm(any_ab_use ~ area_farm, data = size_ab_data, family = binomial)
# Summarize the model
summary_log_model <- summary(log_model)
cat("Logistic regression of antibiotic use on farm size:\n")
## Logistic regression of antibiotic use on farm size:
cat("Coefficient for farm size:", coef(log_model)[2], "\n")
## Coefficient for farm size: 0.000042
cat("Standard error:", summary_log_model$coefficients[2, 2], "\n")
## Standard error: 0.000041
cat("p-value:", summary_log_model$coefficients[2, 4], "\n")
## p-value: 0.3
cat("Null deviance:", summary_log_model$null.deviance, "on", summary_log_model$df.null, "degrees of freedom\n")
## Null deviance: 512 on 1155 degrees of freedom
cat("Residual deviance:", summary_log_model$deviance, "on", summary_log_model$df.residual, "degrees of freedom\n")
## Residual deviance: 510 on 1154 degrees of freedom
cat("AIC:", summary_log_model$aic, "\n")
## AIC: 514
# Try to fit a segmented model
# This is wrapped in a tryCatch block because segmented regression might not converge
tryCatch({
library(segmented)
# Initial guess for breakpoint
initial_bp <- median(size_ab_data$area_farm)
# Fit segmented model
segmented_model <- segmented(log_model, seg.Z = ~area_farm, psi = list(area_farm = c(initial_bp)))
# Print summary
print(summary(segmented_model))
# Extract breakpoints
breakpoints <- segmented_model$psi[, 2]
cat("Estimated breakpoint in farm size:", breakpoints, "square meters\n")
# Generate predictions for plotting
new_data <- data.frame(area_farm = seq(min(size_ab_data$area_farm),
max(size_ab_data$area_farm),
length.out = 100))
# Get predictions from both models
new_data$pred_log <- predict(log_model, newdata = new_data, type = "response")
new_data$pred_seg <- predict(segmented_model, newdata = new_data, type = "response")
# Plot segmented regression results with comparison to regular logistic regression
seg_plot <- ggplot(size_ab_data, aes(x = area_farm, y = any_ab_use)) +
geom_point(alpha = 0.3) +
geom_line(data = new_data, aes(x = area_farm, y = pred_log, color = "Logistic Regression"),
size = 1, linetype = "dashed") +
geom_line(data = new_data, aes(x = area_farm, y = pred_seg, color = "Segmented Regression"),
size = 1) +
geom_vline(xintercept = breakpoints, linetype = "dashed", color = "red") +
labs(title = "Segmented Regression of Farm Size and Antibiotic Use",
subtitle = paste("Breakpoint at", round(breakpoints, 0), "square meters"),
x = "Farm Area (square meters)",
y = "Probability of Antibiotic Use",
color = "Model Type") +
theme_minimal() +
scale_color_manual(values = c("Logistic Regression" = "blue", "Segmented Regression" = "red"))
# Print the segmented plot
print(seg_plot)
}, error = function(e) {
message("Segmented regression did not converge. Using only LOESS smoothing for visualization.")
print(e)
})
##
## ***Regression Model with Segmented Relationship(s)***
##
## Call:
## segmented.glm(obj = log_model, seg.Z = ~area_farm, psi = list(area_farm = c(initial_bp)))
##
## Estimated Break-Point(s):
## Est. St.Err
## psi1.area_farm 10000 9231
##
## Coefficients of the linear terms:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.573098 0.173096 14.87 <0.0000000000000002 ***
## area_farm 0.000167 0.000120 1.40 0.16
## U1.area_farm -0.000184 0.000124 -1.48 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 511.68 on 1155 degrees of freedom
## Residual deviance: 507.02 on 1152 degrees of freedom
## AIC: 515
##
## Boot restarting based on 9 samples. Last fit:
## Convergence attained in 0 iterations (rel. change -0.00000000000000078463)
## Estimated breakpoint in farm size: 10000 square meters

### Additional Analysis: Farm Size and Specific Antibiotic Types
# Calculate specific antibiotic use by farm size category
specific_ab_by_size <- farm_data %>%
group_by(farm_size_cat) %>%
summarise(
n = n(),
chlo_pct = mean(chlo, na.rm = TRUE) * 100,
nitr_pct = mean(nitr, na.rm = TRUE) * 100,
dime_pct = mean(dime, na.rm = TRUE) * 100,
metr_pct = mean(metr, na.rm = TRUE) * 100,
cipr_pct = mean(cipr, na.rm = TRUE) * 100,
oflo_pct = mean(oflo, na.rm = TRUE) * 100,
carb_pct = mean(carb, na.rm = TRUE) * 100,
olaq_pct = mean(olaq, na.rm = TRUE) * 100,
baci_pct = mean(baci, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(!is.na(farm_size_cat))
# Convert to long format for visualization
specific_ab_size_long <- specific_ab_by_size %>%
dplyr::select(-n) %>%
pivot_longer(cols = -farm_size_cat,
names_to = "antibiotic",
values_to = "percentage") %>%
mutate(
antibiotic = case_when(
antibiotic == "chlo_pct" ~ "Chloramphenicol",
antibiotic == "nitr_pct" ~ "Nitrofurans",
antibiotic == "dime_pct" ~ "Dimetridazole",
antibiotic == "metr_pct" ~ "Metronidazole",
antibiotic == "cipr_pct" ~ "Ciprofloxacin",
antibiotic == "oflo_pct" ~ "Ofloxacin",
antibiotic == "carb_pct" ~ "Carbadox",
antibiotic == "olaq_pct" ~ "Olaquindox",
antibiotic == "baci_pct" ~ "Bacitracin",
TRUE ~ antibiotic
)
)
# Create a heatmap of specific antibiotic use by farm size
size_heatmap <- ggplot(specific_ab_size_long,
aes(x = farm_size_cat, y = antibiotic, fill = percentage)) +
geom_tile(color = "white") +
geom_text(aes(label = round(percentage, 1)), size = 3) +
scale_fill_gradient(low = "white", high = "steelblue") +
labs(title = "Heatmap of Specific Antibiotic Use by Farm Size Category",
x = "Farm Size",
y = "Antibiotic",
fill = "Prevalence (%)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 0, hjust = 0.5),
axis.text.y = element_text(size = 8))
# Print the heatmap
print(size_heatmap)

### Farm Size and Inappropriate Antibiotic Use
# Create an indicator for potentially inappropriate use
# Define inappropriate use as preventive use without disease or growth promotion
farm_data <- farm_data %>%
mutate(
inappropriate_use = case_when(
ab_prevention == 1 | ab_growth == 1 ~ 1,
TRUE ~ 0
)
)
# Analyze inappropriate use by farm size
inappropriate_by_size <- farm_data %>%
group_by(farm_size_cat) %>%
summarise(
n = n(),
inappropriate_n = sum(inappropriate_use, na.rm = TRUE),
inappropriate_pct = mean(inappropriate_use, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(!is.na(farm_size_cat))
# Display results
inappropriate_by_size %>%
kable(caption = "Inappropriate Antibiotic Use by Farm Size Category",
col.names = c("Farm Size", "Number of Farms", "Farms with Inappropriate Use", "% Inappropriate Use")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Inappropriate Antibiotic Use by Farm Size Category
Farm Size
|
Number of Farms
|
Farms with Inappropriate Use
|
% Inappropriate Use
|
Large
|
485
|
243
|
50
|
Medium
|
659
|
312
|
47
|
Small
|
12
|
4
|
33
|
# Create a bar chart of inappropriate use by farm size
inappropriate_plot <- ggplot(inappropriate_by_size,
aes(x = farm_size_cat, y = inappropriate_pct, fill = farm_size_cat)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(inappropriate_pct, 1), "%")),
position = position_stack(vjust = 0.5)) +
labs(title = "Inappropriate Antibiotic Use by Farm Size",
subtitle = "Defined as preventive use or growth promotion",
x = "Farm Size Category",
y = "Percentage of Farms (%)") +
theme_minimal() +
theme(legend.position = "none") +
scale_fill_brewer(palette = "Blues")
# Print the plot
print(inappropriate_plot)

# LOESS smoothing for inappropriate use by farm size
inappropriate_plot_loess <- ggplot(farm_data %>% filter(!is.na(area_farm)),
aes(x = area_farm, y = inappropriate_use)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "loess", se = TRUE) +
labs(title = "Non-linear Relationship Between Farm Size and Inappropriate Antibiotic Use",
subtitle = "LOESS Smoothing Curve with 95% Confidence Interval",
x = "Farm Area (square meters)",
y = "Probability of Inappropriate Use") +
theme_minimal()
# Print the LOESS plot
print(inappropriate_plot_loess)

Results for
Hypothesis 1.4: Isomorphism in Antibiotic Use Patterns within
Communities
if(!exists("farm_data")) {
farm_data <- readRDS(here("data", "processed", "farm_data_processed.rds"))
}
if(!exists("household_data")) {
household_data <- readRDS(here("data", "processed", "household_data_processed.rds"))
}
# Check if we have geographic identifiers for community-level analysis
if(!"geo_id" %in% names(farm_data) || !"geo_id" %in% names(household_data)) {
cat("Geographic identifiers not found in one or both datasets.\n")
cat("Creating proxy geographic identifiers based on available information.\n")
# Check for district and commune variables
if(all(c("district", "commune") %in% names(farm_data)) &&
all(c("district", "commune") %in% names(household_data))) {
# Create geographic ID for community-level analysis
farm_data <- farm_data %>%
mutate(geo_id = paste(district, commune, sep = "_"))
household_data <- household_data %>%
mutate(geo_id = paste(district, commune, sep = "_"))
cat("Created geo_id using district and commune variables.\n")
} else {
# If standard geographic variables don't exist, use farm_id pattern if available
if("farm_id" %in% names(farm_data)) {
farm_data <- farm_data %>%
mutate(
# Extract first part of farm_id as geographic identifier
# This assumes farm_id has a structure where the first digits represent location
geo_id = substr(as.character(farm_id), 1, 5)
)
# For household data, create a mock geo_id for demonstration
# In a real analysis, we would need a proper way to link households to communities
household_data <- household_data %>%
mutate(
geo_id = sample(unique(farm_data$geo_id), size = n(), replace = TRUE)
)
cat("Created proxy geo_id using farm_id pattern. Note: This is for demonstration only.\n")
cat("In a real analysis, proper geographic linkage is required.\n")
} else {
cat("No suitable geographic identifiers found. Community-level analysis will be skipped.\n")
# Exit the analysis if no geographic identifiers are available
return(NULL)
}
}
}
# Aggregate farm antibiotic use by community
farm_community <- farm_data %>%
group_by(geo_id) %>%
summarise(
farm_count = n(),
farm_ab_use_pct = mean(any_ab_use, na.rm = TRUE) * 100,
farm_ab_treatment_pct = mean(ab_treatment, na.rm = TRUE) * 100,
farm_ab_prevention_pct = mean(ab_prevention, na.rm = TRUE) * 100,
farm_ab_growth_pct = mean(ab_growth, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(farm_count >= 3) # Only include communities with at least 3 farms
# Aggregate household antibiotic use by community
household_community <- household_data %>%
group_by(geo_id) %>%
summarise(
household_count = n(),
household_ab_use_pct = mean(any_ab_last_illness, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(household_count >= 3) # Only include communities with at least 3 households
# Join farm and household community data
community_comparison <- inner_join(farm_community, household_community, by = "geo_id")
# Display the number of communities with sufficient data
cat("Number of communities with sufficient data for comparison:", nrow(community_comparison), "\n")
## Number of communities with sufficient data for comparison: 64
# Display community-level comparison data
community_comparison %>%
dplyr::select(geo_id, farm_count, household_count, farm_ab_use_pct, household_ab_use_pct) %>%
kable(caption = "Community-level Antibiotic Use in Farms and Households",
col.names = c("Community ID", "Number of Farms", "Number of Households",
"Farm Antibiotic Use (%)", "Household Antibiotic Use (%)")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Community-level Antibiotic Use in Farms and Households
Community ID
|
Number of Farms
|
Number of Households
|
Farm Antibiotic Use (%)
|
Household Antibiotic Use (%)
|
4_1
|
20
|
219
|
100
|
0.46
|
4_10
|
20
|
161
|
100
|
8.07
|
4_11
|
18
|
151
|
94
|
2.65
|
4_12
|
20
|
147
|
100
|
3.40
|
4_13
|
20
|
135
|
85
|
11.11
|
4_14
|
16
|
156
|
100
|
3.85
|
4_15
|
20
|
153
|
100
|
3.92
|
4_16
|
20
|
166
|
100
|
6.02
|
4_17
|
18
|
182
|
100
|
9.34
|
4_18
|
20
|
175
|
100
|
5.71
|
4_19
|
20
|
167
|
95
|
8.98
|
4_2
|
16
|
154
|
94
|
5.84
|
4_20
|
18
|
186
|
100
|
5.38
|
4_21
|
20
|
160
|
95
|
1.87
|
4_22
|
20
|
139
|
75
|
5.04
|
4_23
|
20
|
156
|
100
|
4.49
|
4_24
|
19
|
131
|
100
|
17.56
|
4_25
|
20
|
187
|
100
|
1.60
|
4_26
|
19
|
184
|
89
|
2.72
|
4_27
|
20
|
176
|
85
|
5.11
|
4_28
|
18
|
228
|
78
|
13.16
|
4_3
|
19
|
154
|
95
|
3.25
|
4_4
|
20
|
162
|
100
|
8.02
|
4_5
|
20
|
181
|
95
|
6.08
|
4_6
|
20
|
195
|
75
|
8.72
|
4_7
|
20
|
136
|
100
|
1.47
|
4_8
|
20
|
152
|
100
|
3.95
|
4_9
|
19
|
194
|
95
|
1.03
|
5_1
|
16
|
152
|
75
|
1.97
|
5_10
|
11
|
120
|
100
|
12.50
|
5_11
|
16
|
129
|
100
|
0.00
|
5_12
|
16
|
127
|
100
|
1.57
|
5_13
|
16
|
100
|
94
|
0.00
|
5_14
|
16
|
167
|
88
|
12.57
|
5_15
|
16
|
103
|
100
|
0.97
|
5_16
|
16
|
106
|
100
|
9.43
|
5_17
|
16
|
159
|
100
|
3.14
|
5_18
|
17
|
134
|
100
|
0.00
|
5_19
|
16
|
154
|
69
|
4.55
|
5_2
|
16
|
99
|
94
|
25.25
|
5_20
|
16
|
57
|
100
|
8.77
|
5_3
|
16
|
135
|
100
|
24.44
|
5_4
|
13
|
104
|
100
|
23.08
|
5_5
|
16
|
137
|
100
|
3.65
|
5_6
|
16
|
186
|
94
|
3.76
|
5_7
|
16
|
135
|
94
|
0.74
|
5_8
|
16
|
120
|
100
|
6.67
|
5_9
|
16
|
119
|
69
|
2.52
|
6_1
|
18
|
113
|
94
|
4.42
|
6_10
|
20
|
178
|
85
|
0.56
|
6_11
|
20
|
123
|
100
|
0.00
|
6_12
|
19
|
156
|
95
|
0.00
|
6_13
|
15
|
147
|
80
|
4.76
|
6_14
|
19
|
169
|
89
|
1.78
|
6_15
|
17
|
149
|
100
|
0.00
|
6_16
|
20
|
131
|
95
|
5.34
|
6_2
|
19
|
103
|
100
|
2.91
|
6_3
|
18
|
119
|
100
|
6.72
|
6_4
|
19
|
97
|
79
|
5.15
|
6_5
|
20
|
137
|
95
|
0.00
|
6_6
|
19
|
120
|
95
|
7.50
|
6_7
|
20
|
150
|
100
|
6.67
|
6_8
|
20
|
148
|
95
|
16.22
|
6_9
|
20
|
141
|
100
|
0.00
|
### Correlation Analysis
# Calculate Pearson correlation between farm and household antibiotic use
correlation_result <- cor.test(
community_comparison$farm_ab_use_pct,
community_comparison$household_ab_use_pct,
method = "pearson"
)
# Display correlation results
cat("Pearson correlation between farm and household antibiotic use at community level:\n")
## Pearson correlation between farm and household antibiotic use at community level:
cat("Correlation coefficient (r):", correlation_result$estimate, "\n")
## Correlation coefficient (r): 0.017
cat("95% confidence interval: [", correlation_result$conf.int[1], ",", correlation_result$conf.int[2], "]\n")
## 95% confidence interval: [ -0.23 , 0.26 ]
cat("t-statistic:", correlation_result$statistic, "\n")
## t-statistic: 0.13
cat("Degrees of freedom:", correlation_result$parameter, "\n")
## Degrees of freedom: 62
cat("P-value:", correlation_result$p.value, "\n")
## P-value: 0.9
cat("Interpretation:", ifelse(correlation_result$p.value < 0.05,
"Significant correlation",
"No significant correlation"), "\n")
## Interpretation: No significant correlation
# Calculate Spearman correlation (less sensitive to outliers)
spearman_result <- cor.test(
community_comparison$farm_ab_use_pct,
community_comparison$household_ab_use_pct,
method = "spearman"
)
cat("\nSpearman rank correlation (robust to outliers):\n")
##
## Spearman rank correlation (robust to outliers):
cat("Correlation coefficient (rho):", spearman_result$estimate, "\n")
## Correlation coefficient (rho): -0.014
cat("P-value:", spearman_result$p.value, "\n")
## P-value: 0.91
### Visualization of Community-level Patterns
# Scatter plot of farm vs. household antibiotic use at community level
community_plot <- ggplot(community_comparison,
aes(x = farm_ab_use_pct, y = household_ab_use_pct)) +
geom_point(aes(size = farm_count + household_count), alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Community-level Correlation Between Farm and Household Antibiotic Use",
subtitle = paste("Pearson correlation =", round(correlation_result$estimate, 2),
", p-value =", round(correlation_result$p.value, 3)),
x = "Farm Antibiotic Use (%)",
y = "Household Antibiotic Use (%)",
size = "Number of Respondents") +
theme_minimal()
# Print the community-level correlation plot
print(community_plot)

### Multiple Regression Analysis
# Prepare data for regression analysis - add more community characteristics if available
# For example, average educational level, socioeconomic indicators, etc.
if(all(c("edu", "area_farm") %in% names(farm_data))) {
# Aggregate additional farm characteristics
farm_community_extended <- farm_data %>%
group_by(geo_id) %>%
summarise(
farm_count = n(),
farm_ab_use_pct = mean(any_ab_use, na.rm = TRUE) * 100,
farm_edu_mean = mean(as.numeric(edu), na.rm = TRUE), # Assuming edu is a numeric or ordinal variable
farm_size_mean = mean(area_farm, na.rm = TRUE),
.groups = "drop"
) %>%
filter(farm_count >= 3)
# Join with household data
if("edu" %in% names(household_data)) {
household_community_extended <- household_data %>%
group_by(geo_id) %>%
summarise(
household_count = n(),
household_ab_use_pct = mean(any_ab_last_illness, na.rm = TRUE) * 100,
household_edu_mean = mean(as.numeric(edu), na.rm = TRUE),
.groups = "drop"
) %>%
filter(household_count >= 3)
# Join data
community_regression <- inner_join(farm_community_extended, household_community_extended, by = "geo_id")
# Regression model: Household antibiotic use as a function of farm antibiotic use and covariates
model <- lm(household_ab_use_pct ~ farm_ab_use_pct + farm_edu_mean + household_edu_mean + farm_size_mean,
data = community_regression)
# Display regression results
cat("\nMultiple regression analysis:\n")
cat("Dependent variable: Household antibiotic use percentage\n\n")
print(summary(model))
# Calculate VIF to check for multicollinearity
if(requireNamespace("car", quietly = TRUE)) {
cat("\nVariance Inflation Factors (checking for multicollinearity):\n")
print(car::vif(model))
}
}
}
### Analysis of Purpose-Specific Correlations
# Examine correlations between household antibiotic use and specific farm antibiotic purposes
purpose_correlations <- data.frame(
Purpose = c("Treatment", "Prevention", "Growth Promotion"),
Correlation = c(
cor(community_comparison$household_ab_use_pct, community_comparison$farm_ab_treatment_pct,
use = "pairwise.complete.obs"),
cor(community_comparison$household_ab_use_pct, community_comparison$farm_ab_prevention_pct,
use = "pairwise.complete.obs"),
cor(community_comparison$household_ab_use_pct, community_comparison$farm_ab_growth_pct,
use = "pairwise.complete.obs")
)
)
# Calculate p-values for these correlations
purpose_correlations$PValue <- c(
cor.test(community_comparison$household_ab_use_pct, community_comparison$farm_ab_treatment_pct)$p.value,
cor.test(community_comparison$household_ab_use_pct, community_comparison$farm_ab_prevention_pct)$p.value,
cor.test(community_comparison$household_ab_use_pct, community_comparison$farm_ab_growth_pct)$p.value
)
purpose_correlations$Significant <- purpose_correlations$PValue < 0.05
# Display purpose-specific correlations
purpose_correlations %>%
mutate(
Correlation = round(Correlation, 3),
PValue = round(PValue, 3),
Significant = ifelse(Significant, "Yes", "No")
) %>%
kable(caption = "Correlations Between Household Antibiotic Use and Farm Antibiotic Use by Purpose",
col.names = c("Farm Antibiotic Purpose", "Correlation with Household Use",
"P-Value", "Significant")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Correlations Between Household Antibiotic Use and Farm Antibiotic Use by
Purpose
Farm Antibiotic Purpose
|
Correlation with Household Use
|
P-Value
|
Significant
|
Treatment
|
0.01
|
0.95
|
No
|
Prevention
|
0.00
|
0.99
|
No
|
Growth Promotion
|
-0.09
|
0.49
|
No
|
# If information source data is available, analyze relationship with community patterns
if("q7_19" %in% names(household_data) & "advice1" %in% names(farm_data)) {
# Categorize information sources and aggregate at community level
household_info_sources <- household_data %>%
group_by(geo_id) %>%
summarise(
# Count frequency of different information sources
# This would need to be adapted based on actual coding of information sources
household_info_healthcare = mean(q7_19 == 1 | q7_19 == 2 | q7_19 == 3, na.rm = TRUE) * 100,
household_info_pharmacy = mean(q7_19 == 5, na.rm = TRUE) * 100,
household_info_social = mean(q7_19 == 8, na.rm = TRUE) * 100,
.groups = "drop"
)
farm_info_sources <- farm_data %>%
group_by(geo_id) %>%
summarise(
# Count frequency of different information sources
# This would need to be adapted based on actual coding of information sources
farm_info_veterinary = mean(grepl("vet", advice1, ignore.case = TRUE), na.rm = TRUE) * 100,
farm_info_pharmacy = mean(grepl("pharm", advice1, ignore.case = TRUE), na.rm = TRUE) * 100,
farm_info_social = mean(grepl("friend|neighbor", advice1, ignore.case = TRUE), na.rm = TRUE) * 100,
.groups = "drop"
)
# Join information source data with community comparison data
community_info <- community_comparison %>%
left_join(household_info_sources, by = "geo_id") %>%
left_join(farm_info_sources, by = "geo_id")
# Analyze correlation between similar information sources across sectors
cat("\nCorrelations between information sources across sectors:\n")
if(ncol(community_info) > 8) { # Check if information source variables were successfully joined
info_correlations <- data.frame(
Sources = c("Healthcare/Veterinary", "Pharmacy", "Social Networks"),
Correlation = c(
cor(community_info$household_info_healthcare, community_info$farm_info_veterinary,
use = "pairwise.complete.obs"),
cor(community_info$household_info_pharmacy, community_info$farm_info_pharmacy,
use = "pairwise.complete.obs"),
cor(community_info$household_info_social, community_info$farm_info_social,
use = "pairwise.complete.obs")
)
)
# Calculate p-values
info_correlations$PValue <- c(
cor.test(community_info$household_info_healthcare, community_info$farm_info_veterinary)$p.value,
cor.test(community_info$household_info_pharmacy, community_info$farm_info_pharmacy)$p.value,
cor.test(community_info$household_info_social, community_info$farm_info_social)$p.value
)
info_correlations$Significant <- info_correlations$PValue < 0.05
# Display information source correlations
info_correlations %>%
mutate(
Correlation = round(Correlation, 3),
PValue = round(PValue, 3),
Significant = ifelse(Significant, "Yes", "No")
) %>%
kable(caption = "Correlations Between Information Sources in Households and Farms",
col.names = c("Information Source", "Correlation", "P-Value", "Significant")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
} else {
cat("Information source variables not available or not successfully joined.\n")
}
}
### Multilevel Model for Community Effects
# For farm data
if(length(unique(farm_data$geo_id)) >= 10) { # Need sufficient number of communities
# Filter data for multilevel modeling
farm_mlm <- farm_data %>%
filter(!is.na(geo_id) & !is.na(any_ab_use))
# Fit random intercept model
farm_model <- tryCatch({
library(lme4)
glmer(any_ab_use ~ (1 | geo_id), data = farm_mlm, family = binomial)
}, error = function(e) {
message("Error fitting multilevel model for farm data: ", e$message)
return(NULL)
})
# Display model summary if successful
if(!is.null(farm_model)) {
cat("\nMultilevel model for farm antibiotic use:\n")
print(summary(farm_model))
# Calculate intraclass correlation coefficient (ICC)
if(requireNamespace("performance", quietly = TRUE)) {
cat("\nIntraclass correlation coefficient (ICC) for farms:\n")
farm_icc <- performance::icc(farm_model)
print(farm_icc)
cat("Interpretation: ",
ifelse(farm_icc$ICC > 0.05,
"Community membership explains a meaningful proportion of variance in farm antibiotic use.",
"Community membership explains little variance in farm antibiotic use."),
"\n")
}
}
# For household data
household_mlm <- household_data %>%
filter(!is.na(geo_id) & !is.na(any_ab_last_illness))
# Fit random intercept model
household_model <- tryCatch({
glmer(any_ab_last_illness ~ (1 | geo_id), data = household_mlm, family = binomial)
}, error = function(e) {
message("Error fitting multilevel model for household data: ", e$message)
return(NULL)
})
# Display model summary if successful
if(!is.null(household_model)) {
cat("\nMultilevel model for household antibiotic use:\n")
print(summary(household_model))
# Calculate intraclass correlation coefficient (ICC)
if(requireNamespace("performance", quietly = TRUE)) {
cat("\nIntraclass correlation coefficient (ICC) for households:\n")
household_icc <- performance::icc(household_model)
print(household_icc)
cat("Interpretation: ",
ifelse(household_icc$ICC > 0.05,
"Community membership explains a meaningful proportion of variance in household antibiotic use.",
"Community membership explains little variance in household antibiotic use."),
"\n")
}
}
} else {
cat("\nInsufficient number of communities for multilevel modeling.\n")
}
##
## Multilevel model for farm antibiotic use:
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: any_ab_use ~ (1 | geo_id)
## Data: farm_mlm
##
## AIC BIC logLik deviance df.resid
## 488 498 -242 484 1154
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.811 0.138 0.144 0.214 0.535
##
## Random effects:
## Groups Name Variance Std.Dev.
## geo_id (Intercept) 1.5 1.22
## Number of obs: 1156, groups: geo_id, 64
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.399 0.279 12.2 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Intraclass correlation coefficient (ICC) for farms:
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.313
## Unadjusted ICC: 0.313
## Interpretation:
##
## Multilevel model for household antibiotic use:
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: any_ab_last_illness ~ (1 | geo_id)
## Data: household_mlm
##
## AIC BIC logLik deviance df.resid
## 3757 3771 -1876 3753 9409
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.552 -0.259 -0.200 -0.131 9.866
##
## Random effects:
## Groups Name Variance Std.Dev.
## geo_id (Intercept) 1.09 1.05
## Number of obs: 9411, groups: geo_id, 64
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.237 0.147 -22 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Intraclass correlation coefficient (ICC) for households:
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.249
## Unadjusted ICC: 0.249
## Interpretation:
### Sensitivity Analysis
# Perform sensitivity analysis with different community-level aggregation thresholds
thresholds <- c(2, 3, 5)
sensitivity_results <- data.frame(
Threshold = integer(),
NCommunities = integer(),
Correlation = numeric(),
PValue = numeric(),
stringsAsFactors = FALSE
)
for(threshold in thresholds) {
# Aggregate with current threshold
farm_comm_sens <- farm_data %>%
group_by(geo_id) %>%
summarise(
farm_count = n(),
farm_ab_use_pct = mean(any_ab_use, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(farm_count >= threshold)
household_comm_sens <- household_data %>%
group_by(geo_id) %>%
summarise(
household_count = n(),
household_ab_use_pct = mean(any_ab_last_illness, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(household_count >= threshold)
# Join data
comm_comp_sens <- inner_join(farm_comm_sens, household_comm_sens, by = "geo_id")
# Calculate correlation
if(nrow(comm_comp_sens) > 5) { # Need at least a few communities for meaningful correlation
corr_result <- cor.test(comm_comp_sens$farm_ab_use_pct, comm_comp_sens$household_ab_use_pct)
# Store results
sensitivity_results <- rbind(sensitivity_results, data.frame(
Threshold = threshold,
NCommunities = nrow(comm_comp_sens),
Correlation = corr_result$estimate,
PValue = corr_result$p.value,
stringsAsFactors = FALSE
))
}
}
# Display sensitivity analysis results
if(nrow(sensitivity_results) > 0) {
sensitivity_results %>%
mutate(
Correlation = round(Correlation, 3),
PValue = round(PValue, 3),
Significant = ifelse(PValue < 0.05, "Yes", "No")
) %>%
kable(caption = "Sensitivity Analysis: Community-Level Correlations with Different Thresholds",
col.names = c("Minimum Respondents per Community", "Number of Communities",
"Correlation", "P-Value", "Significant")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
} else {
cat("\nInsufficient data for sensitivity analysis with different thresholds.\n")
}
Sensitivity Analysis: Community-Level Correlations with Different
Thresholds
|
Minimum Respondents per Community
|
Number of Communities
|
Correlation
|
P-Value
|
Significant
|
cor
|
2
|
64
|
0.02
|
0.9
|
No
|
cor1
|
3
|
64
|
0.02
|
0.9
|
No
|
cor2
|
5
|
64
|
0.02
|
0.9
|
No
|
#---------------------------------------------------------------------
# 1. Geographic Community Analysis with Enhanced Spatial Statistics
#---------------------------------------------------------------------
cat("\n## Geographic Community Analysis ##\n")
##
## ## Geographic Community Analysis ##
# Check for geographical coordinates
has_geo_coords <- all(c("latitude", "longitude") %in% names(farm_data))
if(has_geo_coords) {
# Filter farm data to records with valid coordinates
geo_farms <- farm_data %>%
filter(!is.na(latitude) & !is.na(longitude)) %>%
dplyr::select(farm_id, latitude, longitude, any_ab_use, ab_treatment, ab_prevention, ab_growth)
# Calculate distance matrix between farms (in meters)
if(nrow(geo_farms) > 1000) {
cat("Note: Farm size is large, use spatial index to optimise distance calculation.")
geo_farms_sample <- geo_farms %>% sample_n(min(1000, nrow(geo_farms)))
coords_matrix <- geo_farms_sample %>%
dplyr::select(longitude, latitude) %>%
as.matrix()
} else {
coords_matrix <- geo_farms %>%
dplyr::select(longitude, latitude) %>%
as.matrix()
}
dist_matrix <- distm(coords_matrix, fun = distHaversine)
rownames(dist_matrix) <- geo_farms$farm_id
colnames(dist_matrix) <- geo_farms$farm_id
# Define distance thresholds to test (in kilometers)
thresholds_km <- c(2, 5, 10)
# Initialize results dataframe
geo_community_results <- data.frame(
threshold_km = numeric(),
n_communities = numeric(),
mean_community_size = numeric(),
within_community_ab_correlation = numeric(),
p_value = numeric(),
stringsAsFactors = FALSE
)
for(threshold_km in thresholds_km) {
threshold_m <- threshold_km * 1000
# Create adjacency matrix based on threshold
adj_matrix <- dist_matrix < threshold_m & dist_matrix > 0 # Exclude self-connections
# Convert to igraph object
g <- graph_from_adjacency_matrix(adj_matrix, mode = "undirected")
# Identify communities (connected components)
communities <- components(g)
# Label each farm with its community ID
geo_farms$geo_community_id <- communities$membership
# Filter to communities with at least 3 farms
valid_communities <- geo_farms %>%
count(geo_community_id) %>%
filter(n >= 3) %>%
pull(geo_community_id)
if(length(valid_communities) > 0) {
# Filter farms to those in valid communities
community_farms <- geo_farms %>%
filter(geo_community_id %in% valid_communities)
# Compute within-community similarity in antibiotic use
if(nrow(community_farms) >= 10) { # Need sufficient data for meaningful test
# Calculate average within-community standard deviation of antibiotic use
community_stats <- community_farms %>%
group_by(geo_community_id) %>%
summarize(
ab_use_sd = sd(any_ab_use, na.rm = TRUE),
ab_use_mean = mean(any_ab_use, na.rm = TRUE),
n_farms = n(),
.groups = "drop"
)
# Calculate overall standard deviation for comparison
overall_sd <- sd(community_farms$any_ab_use, na.rm = TRUE)
# Calculate weighted average of within-community SD
weighted_avg_sd <- sum(community_stats$ab_use_sd * community_stats$n_farms) / sum(community_stats$n_farms)
# Store results
geo_community_results <- rbind(geo_community_results, data.frame(
threshold_km = threshold_km,
n_communities = length(valid_communities),
mean_community_size = mean(community_stats$n_farms),
within_community_ab_correlation = 1 - (weighted_avg_sd / overall_sd), # Higher value = more similarity
p_value = NA, # Will fill with appropriate test if available
stringsAsFactors = FALSE
))
# Visualization for 5km threshold
if(threshold_km == 5) {
# Convert to sf objects for mapping
farms_sf <- st_as_sf(community_farms, coords = c("longitude", "latitude"), crs = 4326)
# Create plot
geo_community_plot <- ggplot() +
geom_sf(data = farms_sf, aes(color = factor(geo_community_id), size = any_ab_use)) +
scale_color_viridis_d(name = "Geographic\nCommunity") +
scale_size_continuous(name = "Antibiotic Use", range = c(1, 5)) +
labs(title = paste0("Geographic Communities of Farms (", threshold_km, "km threshold)"),
subtitle = "Point size indicates antibiotic use prevalence") +
theme_minimal()
print(geo_community_plot)
}
}
}
}
# Display geographic community results table
if(nrow(geo_community_results) > 0) {
geo_community_results %>%
mutate(
within_community_ab_correlation = round(within_community_ab_correlation, 3),
p_value = ifelse(is.na(p_value), NA, round(p_value, 3)),
significant = ifelse(p_value < 0.05, "Yes", ifelse(is.na(p_value), "NA", "No"))
) %>%
kable(caption = "Geographic Community Analysis Results for Different Distance Thresholds",
col.names = c("Distance Threshold (km)", "Number of Communities",
"Mean Community Size", "Within-Community Similarity",
"P-Value", "Significant")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
}
# Enhanced Spatial Analysis: Moran's I and LISA Statistics
if(nrow(geo_farms) >= 10) {
cat("\n## Spatial Autocorrelation Analysis ##\n")
# Create spatial weights matrix for farms
coordinates <- coords_matrix
k <- min(15, nrow(geo_farms) - 1) # Choose appropriate k value
knn <- knearneigh(coordinates, k = k)
neighbors <- knn2nb(knn)
weights <- nb2listw(neighbors, style = "W")
# Calculate Global Moran's I for antibiotic use
moran_result <- moran.test(geo_farms$any_ab_use, weights)
# Report Moran's I results
cat("Global Moran's I for antibiotic use:", round(moran_result$estimate[1], 3),
", p-value:", round(moran_result$p.value, 3), "\n")
trend_model <- lm(any_ab_use ~ longitude + latitude, data = geo_farms)
trend_summary <- summary(trend_model)
cat("Examine spatial trends in antibiotic use\n")
cat("Spatial trend model R²:", round(trend_summary$r.squared, 3),
", p_value:", round(trend_summary$coefficients[2,4], 3), "/",
round(trend_summary$coefficients[3,4], 3), "\n")
# If there is a spatial trend, spatial autocorrelation is analysed after considering de-trending
if(trend_summary$coefficients[2,4] < 0.05 || trend_summary$coefficients[3,4] < 0.05) {
cat("There is a significant spatial trend. Consider detrending before spatial autocorrelation analysis\n")
geo_farms$ab_use_detrended <- residuals(trend_model)
# Recalculating spatial autocorrelation using detrended data
moran_detrended <- moran.test(geo_farms$ab_use_detrended, weights)
cat("Moran's I after trend removal:", round(moran_detrended$estimate[1], 3),
", p_value:", round(moran_detrended$p.value, 3), "\n")
}
# Calculate LISA
lisa_results <- localmoran(geo_farms$any_ab_use, weights)
# Add LISA results to farm data
geo_farms$lisa_i <- lisa_results[, 1]
geo_farms$lisa_p <- lisa_results[, 5]
# Identify hotspots and coldspots
mean_ab_use <- mean(geo_farms$any_ab_use, na.rm = TRUE)
geo_farms <- geo_farms %>%
mutate(
cluster_type = case_when(
lisa_p <= 0.05 & lisa_i > 0 & any_ab_use > mean_ab_use ~ "Hotspot",
lisa_p <= 0.05 & lisa_i > 0 & any_ab_use <= mean_ab_use ~ "Coldspot",
lisa_p <= 0.05 & lisa_i < 0 ~ "Spatial Outlier",
TRUE ~ "Not Significant"
)
)
# Visualize LISA clusters
lisa_plot <- ggplot(geo_farms, aes(x = longitude, y = latitude, color = cluster_type)) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("Hotspot" = "red", "Coldspot" = "blue",
"Spatial Outlier" = "purple", "Not Significant" = "grey")) +
labs(title = "Spatial Clustering of Antibiotic Use",
subtitle = "Based on Local Indicators of Spatial Association (LISA)",
x = "Longitude",
y = "Latitude",
color = "Cluster Type") +
theme_minimal()
print(lisa_plot)
# Show cluster summary statistics
geo_farms %>%
count(cluster_type) %>%
mutate(percentage = n / sum(n) * 100) %>%
arrange(desc(n)) %>%
kable(caption = "Summary of Spatial Clustering",
col.names = c("Cluster Type", "Number of Farms", "Percentage")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
}
# Analyze distance decay effect
cat("\n## Distance Decay Analysis ##\n")
# Create distance bins (in kilometers)
distance_bins <- c(0, 1, 2, 5, 10, 20, 50)
distance_bins_m <- distance_bins * 1000 # Convert to meters
# Initialize results dataframe
distance_decay <- data.frame(
distance_min_km = distance_bins[-length(distance_bins)],
distance_max_km = distance_bins[-1],
farm_pairs = numeric(length(distance_bins) - 1),
correlation = numeric(length(distance_bins) - 1),
stringsAsFactors = FALSE
)
# Calculate correlation at different distance bands
for(i in 1:(length(distance_bins_m) - 1)) {
# Filter farm pairs within this distance band
pairs_mask <- dist_matrix > distance_bins_m[i] & dist_matrix <= distance_bins_m[i+1]
# Need sufficient pairs for meaningful correlation
if(sum(pairs_mask) >= 10) {
# Get coordinates of pairs in this distance band
farm_pairs <- which(pairs_mask, arr.ind = TRUE)
# Extract antibiotic use values
ab_use_1 <- geo_farms$any_ab_use[farm_pairs[, 1]]
ab_use_2 <- geo_farms$any_ab_use[farm_pairs[, 2]]
# Calculate correlation
distance_decay$farm_pairs[i] <- length(ab_use_1)
distance_decay$correlation[i] <- cor(ab_use_1, ab_use_2, use = "pairwise.complete.obs")
}
}
# Display distance decay results
distance_decay %>%
filter(farm_pairs > 0) %>%
mutate(correlation = round(correlation, 3)) %>%
kable(caption = "Distance Decay Effect in Antibiotic Use Similarity",
col.names = c("Min Distance (km)", "Max Distance (km)",
"Number of Farm Pairs", "Correlation"),
digits = c(1, 1, 0, 3)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Visualize distance decay effect
decay_plot <- ggplot(distance_decay %>% filter(farm_pairs > 0),
aes(x = (distance_min_km + distance_max_km)/2, y = correlation)) +
geom_point(aes(size = farm_pairs), alpha = 0.7) +
geom_line() +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
labs(title = "Distance Decay Effect in Antibiotic Use Similarity",
x = "Distance (km)",
y = "Correlation Coefficient",
size = "Number of Farm Pairs") +
theme_minimal()
print(decay_plot)
} else {
cat("Geographic coordinates not available for spatial community analysis.\n")
}
## Geographic coordinates not available for spatial community analysis.
#---------------------------------------------------------------------
# 2. Information Transmission Network Analysis with Enhanced Source Analysis
#---------------------------------------------------------------------
cat("\n## Information Transmission Network Analysis ##\n")
##
## ## Information Transmission Network Analysis ##
# Check for information source variables
if(all(c("advice1", "advice2", "advice3") %in% names(farm_data))) {
# Standardize and clean information source variables
farm_info <- farm_data %>%
dplyr::select(farm_id, advice1, advice2, advice3, any_ab_use, ab_treatment, ab_prevention, ab_growth) %>%
filter(!is.na(advice1)) # At least primary advice source must be available
cat("Analyzing", nrow(farm_info), "farms with information source data...\n")
# Convert to lowercase and standardize source names if text
if(is.character(farm_info$advice1)) {
# Create a function to categorize advice sources
categorize_advice <- function(advice) {
case_when(
grepl("vet|doctor|health|medic", advice, ignore.case = TRUE) ~ "Professional healthcare",
grepl("pharm|drug", advice, ignore.case = TRUE) ~ "Pharmacy/drug store",
grepl("friend|neighbor|relative|family", advice, ignore.case = TRUE) ~ "Social network",
grepl("tv|radio|internet|media|news", advice, ignore.case = TRUE) ~ "Media",
grepl("company|supplier|sale", advice, ignore.case = TRUE) ~ "Supplier/company",
!is.na(advice) ~ "Other",
TRUE ~ NA_character_
)
}
# Apply standardization to all advice sources
unique_sources <- unique(na.omit(c(farm_info$advice1, farm_info$advice2, farm_info$advice3)))
if(length(unique_sources) > 20) {
cat("Many different sources of information exist (", length(unique_sources), "),Consider using text analysis methods\n")
if(requireNamespace("tm", quietly = TRUE) && requireNamespace("proxy", quietly = TRUE)) {
source_corpus <- tm::Corpus(tm::VectorSource(unique_sources))
source_dtm <- tm::DocumentTermMatrix(source_corpus)
source_dist <- proxy::dist(as.matrix(source_dtm))
source_clusters <- hclust(source_dist)
num_clusters <- min(5, length(unique_sources) %/% 4)
source_categories <- cutree(source_clusters, k = num_clusters)
}
}
farm_info <- farm_info %>%
mutate(
advice1_category = categorize_advice(advice1),
advice2_category = categorize_advice(advice2),
advice3_category = categorize_advice(advice3),
primary_source_std = advice1_category # Use first advice source as primary
)
# Show distribution of primary information sources
farm_info %>%
count(primary_source_std) %>%
mutate(percentage = n / sum(n) * 100) %>%
arrange(desc(n)) %>%
kable(caption = "Distribution of Primary Information Sources for Antibiotic Use",
col.names = c("Information Source", "Number of Farms", "Percentage (%)"),
digits = c(0, 0, 1)) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Analyze antibiotic use by information source
source_summary <- farm_info %>%
group_by(primary_source_std) %>%
summarise(
n_farms = n(),
ab_use_pct = mean(any_ab_use, na.rm = TRUE) * 100,
ab_treatment_pct = mean(ab_treatment, na.rm = TRUE) * 100,
ab_prevention_pct = mean(ab_prevention, na.rm = TRUE) * 100,
ab_growth_pct = mean(ab_growth, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(n_farms >= 5) # Only include sources with sufficient data
# Display results
source_summary %>%
mutate(across(ends_with("_pct"), ~ round(., 1))) %>%
kable(caption = "Antibiotic Use Patterns by Primary Information Source",
col.names = c("Information Source", "Number of Farms", "Any Antibiotic Use (%)",
"Treatment Use (%)", "Prevention Use (%)", "Growth Promotion Use (%)")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Visualize antibiotic use by information source
plot_data <- source_summary %>%
filter(n_farms >= 5) %>%
gather(key = "purpose", value = "percentage",
ab_use_pct, ab_treatment_pct, ab_prevention_pct, ab_growth_pct) %>%
mutate(purpose = factor(purpose,
levels = c("ab_use_pct", "ab_treatment_pct",
"ab_prevention_pct", "ab_growth_pct"),
labels = c("Any Use", "Treatment", "Prevention", "Growth")))
# Create and display the bar chart
source_purpose_plot <- ggplot(plot_data, aes(x = primary_source_std, y = percentage, fill = purpose)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = sprintf("%.1f%%", percentage)),
position = position_dodge(width = 0.9),
vjust = -0.5, size = 3) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Antibiotic Use Patterns by Primary Information Source",
subtitle = "Comparing usage for different purposes",
x = "Information Source",
y = "Percentage of Farms",
fill = "Usage Purpose") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(source_purpose_plot)
# Statistical comparison of information sources
if(nrow(source_summary) >= 2) {
cat("\nStatistical comparison of antibiotic use across information sources:\n")
# One-way ANOVA for any antibiotic use
ab_use_aov <- aov(any_ab_use ~ primary_source_std, data = farm_info)
ab_use_aov_summary <- summary(ab_use_aov)
cat("ANOVA for any antibiotic use by information source:\n")
print(ab_use_aov_summary)
# Statistical significance
if(ab_use_aov_summary[[1]][["Pr(>F)"]][1] < 0.05) {
cat("There are statistically significant differences in antibiotic use across information sources (p =",
round(ab_use_aov_summary[[1]][["Pr(>F)"]][1], 3), ")\n")
# Post-hoc test if ANOVA is significant
if(requireNamespace("emmeans", quietly = TRUE)) {
posthoc <- emmeans::emmeans(ab_use_aov, pairwise ~ primary_source_std)
cat("Post-hoc pairwise comparisons:\n")
print(posthoc$contrasts)
}
} else {
cat("No statistically significant differences in antibiotic use across information sources (p =",
round(ab_use_aov_summary[[1]][["Pr(>F)"]][1], 3), ")\n")
}
}
# Identify farms sharing common information sources
cat("\nConstructing information source networks...\n")
# Create information source network
farm_pairs <- expand.grid(farm1 = farm_info$farm_id,
farm2 = farm_info$farm_id,
stringsAsFactors = FALSE) %>%
filter(farm1 < farm2) # Only include each pair once
# Calculate information source overlap
farm_pairs$source_overlap <- apply(farm_pairs, 1, function(pair) {
farm1_idx <- which(farm_info$farm_id == pair["farm1"])
farm2_idx <- which(farm_info$farm_id == pair["farm2"])
# Get sources for each farm
sources1 <- na.omit(c(farm_info$advice1_category[farm1_idx],
farm_info$advice2_category[farm1_idx],
farm_info$advice3_category[farm1_idx]))
sources2 <- na.omit(c(farm_info$advice1_category[farm2_idx],
farm_info$advice2_category[farm2_idx],
farm_info$advice3_category[farm2_idx]))
# Calculate Jaccard similarity (intersection over union)
length(intersect(sources1, sources2)) / length(union(sources1, sources2))
})
# Summarize overlap distribution
cat("Distribution of information source overlap between farm pairs:\n")
overlap_summary <- farm_pairs %>%
mutate(overlap_category = cut(source_overlap,
breaks = c(0, 0.25, 0.5, 0.75, 1.0),
labels = c("0-25%", "26-50%", "51-75%", "76-100%"),
include.lowest = TRUE)) %>%
count(overlap_category) %>%
mutate(percentage = n / sum(n) * 100)
print(overlap_summary)
# Define network ties based on minimum overlap
min_overlap <- 0.5 # At least 50% source overlap
farm_network_edges <- farm_pairs %>%
filter(source_overlap >= min_overlap) %>%
dplyr::select(farm1, farm2)
if(nrow(farm_network_edges) > 0) {
cat("Found", nrow(farm_network_edges), "farm pairs with significant information source overlap.\n")
# Create network graph
info_g <- graph_from_data_frame(farm_network_edges,
vertices = farm_info %>% dplyr::select(farm_id, any_ab_use),
directed = FALSE)
# Network statistics
cat("\nInformation source network statistics:\n")
cat("Number of nodes (farms):", vcount(info_g), "\n")
cat("Number of edges (connections):", ecount(info_g), "\n")
cat("Network density:", edge_density(info_g), "\n")
cat("Average degree (connections per farm):", mean(degree(info_g)), "\n")
# Community detection in information network
info_communities <- cluster_louvain(info_g)
cat("Number of detected information communities:", length(unique(membership(info_communities))), "\n")
# Assign community IDs
farm_info$info_community_id <- membership(info_communities)[match(farm_info$farm_id, names(V(info_g)))]
# Visualize network with communities
# Network statistics
cat("\nInformation source network statistics:\n")
cat("Number of nodes (farms):", vcount(info_g), "\n")
cat("Number of edges (connections):", ecount(info_g), "\n")
cat("Network density:", edge_density(info_g), "\n")
cat("Average degree (connections per farm):", mean(degree(info_g)), "\n")
# Create a simplified network by limiting edges per node
if(requireNamespace("tidygraph", quietly = TRUE)) {
tg_simplified <- tidygraph::as_tbl_graph(info_g) %>%
tidygraph::activate(nodes) %>%
tidygraph::mutate(community = membership(info_communities),
antibiotic_use = any_ab_use) %>%
tidygraph::activate(edges) %>%
# Add edge weight based on overlap
tidygraph::mutate(weight = farm_pairs$source_overlap[match(paste0(from, "-", to),
paste0(farm_pairs$farm1, "-", farm_pairs$farm2))]) %>%
# Keep only top connections per node - fixed approach without edge_rank()
tidygraph::mutate(rank_in_from = rank(-weight, ties.method = "first"),
rank_in_to = rank(-weight, ties.method = "first")) %>%
tidygraph::filter(rank_in_from <= 3 | rank_in_to <= 3) # Keep top 3 connections per node
# Count nodes in each community for sizing plots
community_sizes <- table(membership(info_communities))
largest_communities <- names(sort(community_sizes, decreasing = TRUE))[1:min(4, length(community_sizes))]
}
} else {
# If tidygraph is not available, use the original graph
tg_simplified <- info_g
V(tg_simplified)$community <- membership(info_communities)
V(tg_simplified)$antibiotic_use <- V(info_g)$any_ab_use
# Identify largest communities
community_sizes <- table(membership(info_communities))
largest_communities <- names(sort(community_sizes, decreasing = TRUE))[1:min(4, length(community_sizes))]
}
# Network statistics
cat("\nInformation source network statistics:\n")
cat("Number of nodes (farms):", vcount(info_g), "\n")
cat("Number of edges (connections):", ecount(info_g), "\n")
cat("Network density:", edge_density(info_g), "\n")
cat("Average degree (connections per farm):", mean(degree(info_g)), "\n")
# Visualize network with multiple approaches
if(requireNamespace("ggraph", quietly = TRUE) && requireNamespace("tidygraph", quietly = TRUE)) {
# Create tidygraph object
tg <- tidygraph::as_tbl_graph(info_g) %>%
tidygraph::activate(nodes) %>%
tidygraph::mutate(community = membership(info_communities),
antibiotic_use = any_ab_use)
# 1. Main network visualization with reduced clutter
main_network_plot <- ggraph::ggraph(tg, layout = "fr") +
ggraph::geom_edge_link(alpha = 0.2, width = 0.3) +
ggraph::geom_node_point(aes(color = factor(community),
size = antibiotic_use * 2 + 0.5),
alpha = 0.7) +
scale_size_continuous(range = c(0.5, 3)) + # Reduce the range of node sizes
scale_color_brewer(palette = "Set1") +
labs(title = "Information Source Communities in Farms",
subtitle = "Farms connected by shared information sources (simplified view)",
color = "Community",
size = "Antibiotic Use") +
theme_void() +
theme(legend.position = "right")
print(main_network_plot)
# 2. Alternative layout visualization
alt_network_plot <- ggraph::ggraph(tg, layout = "kk") +
ggraph::geom_edge_link(alpha = 0.2, width = 0.3) +
ggraph::geom_node_point(aes(color = factor(community),
size = antibiotic_use * 2 + 0.5),
alpha = 0.7) +
scale_size_continuous(range = c(0.5, 3)) +
scale_color_brewer(palette = "Set1") +
labs(title = "Alternative View of Information Source Communities",
subtitle = "Using Kamada-Kawai layout algorithm",
color = "Community",
size = "Antibiotic Use") +
theme_void() +
theme(legend.position = "right")
print(alt_network_plot)
# 3. Create a visualization with communities with different point shapes
shape_plot <- ggraph::ggraph(tg, layout = "nicely") +
ggraph::geom_edge_link(alpha = 0.1, width = 0.2) +
ggraph::geom_node_point(aes(color = factor(community),
size = antibiotic_use * 2 + 0.8,
shape = factor(community %% 5)), # Use modulo to limit to available shapes
alpha = 0.8) +
scale_size_continuous(range = c(1, 4)) +
scale_color_brewer(palette = "Set1") +
scale_shape_manual(values = c(16, 17, 15, 18, 19), guide = "none") +
labs(title = "Communities of Farms with Similar Information Sources",
subtitle = "Different layouts highlight different aspects of community structure",
color = "Community",
size = "Antibiotic Use") +
theme_void() +
theme(legend.position = "right")
print(shape_plot)
} else {
# Basic plot if ggraph is not available
# Create a more visually appealing base plot
op <- par(mar = c(1,1,3,1)) # Adjust margins
set.seed(42) # For reproducibility
# Use a better layout
layout_coords <- layout_with_fr(info_g)
# Improve node appearance
V(info_g)$color <- rainbow(length(unique(membership(info_communities))))[membership(info_communities)]
V(info_g)$size <- 2 + 8 * V(info_g)$any_ab_use # Scale node sizes
E(info_g)$width <- 0.5 # Thinner edges
E(info_g)$color <- rgb(0.5, 0.5, 0.5, 0.2) # Transparent gray edges
# Plot with improved aesthetics
plot(info_g,
vertex.label = NA,
vertex.size = V(info_g)$size,
vertex.color = adjustcolor(V(info_g)$color, alpha.f = 0.7),
edge.width = E(info_g)$width,
edge.color = E(info_g)$color,
layout = layout_coords,
main = "Information Source Communities",
sub = "Farms connected by shared information sources")
legend("topright",
legend = paste("Community", sort(unique(membership(info_communities)))),
fill = rainbow(length(unique(membership(info_communities)))),
bty = "n",
cex = 0.8)
par(op) # Reset graphics parameters
}
# Analyze antibiotic use patterns by information community
info_community_stats <- farm_info %>%
filter(!is.na(info_community_id)) %>%
group_by(info_community_id) %>%
summarise(
n_farms = n(),
ab_use_pct = mean(any_ab_use, na.rm = TRUE) * 100,
ab_prevention_pct = mean(ab_prevention, na.rm = TRUE) * 100,
ab_growth_pct = mean(ab_growth, na.rm = TRUE) * 100,
ab_use_sd = sd(any_ab_use, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(n_farms >= 3) # Only include communities with sufficient data
# Display community statistics
info_community_stats %>%
mutate(across(ends_with("_pct"), ~ round(., 1)),
ab_use_sd = round(ab_use_sd, 1)) %>%
kable(caption = "Antibiotic Use Statistics by Information Community",
col.names = c("Community ID", "Number of Farms", "Any Antibiotic Use (%)",
"Prevention Use (%)", "Growth Promotion Use (%)",
"Std Dev of Antibiotic Use (%)")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Compare within-community vs. between-community variance
if(nrow(info_community_stats) > 1) {
# Calculate overall variance
overall_variance <- var(farm_info$any_ab_use, na.rm = TRUE) * 10000 # Convert to percentage scale
# Calculate weighted average of within-community variance
within_variance <- sum(info_community_stats$ab_use_sd^2 * info_community_stats$n_farms) /
sum(info_community_stats$n_farms)
# Calculate variance explained by community membership (like R-squared)
variance_explained <- 1 - (within_variance / overall_variance)
cat("\nInformation community analysis:\n")
cat("Number of information communities:", nrow(info_community_stats), "\n")
cat("Variance in antibiotic use explained by information community membership:",
round(variance_explained * 100, 1), "%\n")
# Visualize community differences
if(nrow(info_community_stats) >= 3) {
community_plot <- ggplot(info_community_stats,
aes(x = reorder(factor(info_community_id), ab_use_pct),
y = ab_use_pct)) +
geom_bar(stat = "identity", fill = "steelblue") +
geom_errorbar(aes(ymin = ab_use_pct - ab_use_sd,
ymax = ab_use_pct + ab_use_sd),
width = 0.2) +
labs(title = "Antibiotic Use by Information Community",
subtitle = "Error bars represent standard deviation",
x = "Information Community ID",
y = "Antibiotic Use (%)") +
theme_minimal()
print(community_plot)
}
}
} else {
cat("Insufficient information source overlap to construct meaningful network.\n")
}
} else {
# For numerical coding of advice sources
source_summary <- farm_data %>%
filter(!is.na(advice1)) %>%
group_by(advice1) %>%
summarise(
n = n(),
ab_use_pct = mean(any_ab_use, na.rm = TRUE) * 100,
ab_prevention_pct = mean(ab_prevention, na.rm = TRUE) * 100,
ab_growth_pct = mean(ab_growth, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(n >= 5) # Only include sources with sufficient data
# Display results
source_summary %>%
mutate(across(ends_with("_pct"), ~ round(., 1))) %>%
kable(caption = "Antibiotic Use Patterns by Primary Information Source Code",
col.names = c("Information Source Code", "Number of Farms",
"Any Antibiotic Use (%)", "Prevention Use (%)", "Growth Promotion Use (%)")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
}
## Analyzing 1156 farms with information source data...
## Many different sources of information exist ( 182 ),Consider using text analysis methods

##
## Constructing information source networks...
## Distribution of information source overlap between farm pairs:
## overlap_category n percentage
## 1 26-50% 1155 0.17
## 2 76-100% 666435 99.83
## Found 667590 farm pairs with significant information source overlap.
##
## Information source network statistics:
## Number of nodes (farms): 1156
## Number of edges (connections): 667590
## Network density: 1
## Average degree (connections per farm): 1155
## Number of detected information communities: 1
##
## Information source network statistics:
## Number of nodes (farms): 1156
## Number of edges (connections): 667590
## Network density: 1
## Average degree (connections per farm): 1155
##
## Information source network statistics:
## Number of nodes (farms): 1156
## Number of edges (connections): 667590
## Network density: 1
## Average degree (connections per farm): 1155



#---------------------------------------------------------------------
# 3. Human-Animal Contact Pattern Analysis (continued)
#---------------------------------------------------------------------
# Check if we can identify farm-household links
# Method 1: Direct ID linking
if(any(grepl("farm_id", names(household_data)))) {
cat("\n### Direct Farm-Household Link Analysis ###\n")
household_farm_link_var <- names(household_data)[grepl("farm_id", names(household_data))][1]
has_household_farm_link <- TRUE
# Extract linked households-farms
linked_households <- household_data %>%
filter(!is.na(!!sym(household_farm_link_var))) %>%
dplyr::select(household_id, !!household_farm_link_var, any_ab_last_illness)
linked_farms <- farm_data %>%
dplyr::select(farm_id, any_ab_use, ab_treatment, ab_prevention, ab_growth) %>%
filter(farm_id %in% linked_households[[household_farm_link_var]])
# Merge household and farm data
if(nrow(linked_households) > 0 && nrow(linked_farms) > 0) {
farm_household_pairs <- linked_households %>%
rename(farm_id = !!household_farm_link_var) %>%
inner_join(linked_farms, by = "farm_id")
cat("Found", nrow(farm_household_pairs), "direct farm-household links.\n")
# Display summary statistics of linked pairs
farm_household_summary <- farm_household_pairs %>%
summarise(
n_pairs = n(),
farm_ab_use_pct = mean(any_ab_use, na.rm = TRUE) * 100,
household_ab_use_pct = mean(any_ab_last_illness, na.rm = TRUE) * 100,
concordant_pairs = sum(any_ab_use == any_ab_last_illness, na.rm = TRUE),
concordant_pct = concordant_pairs / n() * 100
) %>%
mutate(across(ends_with("_pct"), ~ round(., 1)))
# Display summary
farm_household_summary %>%
gather(key = "statistic", value = "value") %>%
kable(caption = "Summary of Directly Linked Farm-Household Pairs",
col.names = c("Statistic", "Value")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Create a cross-tabulation of farm and household antibiotic use
cross_tab <- table(
Farm = factor(farm_household_pairs$any_ab_use, levels = c(0, 1), labels = c("No", "Yes")),
Household = factor(farm_household_pairs$any_ab_last_illness, levels = c(0, 1), labels = c("No", "Yes"))
)
# Display cross-tabulation
cross_tab %>%
kable(caption = "Cross-tabulation of Antibiotic Use in Linked Farm-Household Pairs") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Calculate Cohen's Kappa for agreement
if(requireNamespace("psych", quietly = TRUE)) {
kappa <- psych::cohen.kappa(cross_tab)
cat("\nCohen's Kappa for agreement in antibiotic use: ", round(kappa$kappa, 3),
" (", kappa$agree*100, "% agreement)\n", sep = "")
}
}
}
# Method 2: Role-based identification (if method 1 fails)
has_household_farm_link <- FALSE
if(!has_household_farm_link && "role" %in% names(farm_data)) {
cat("\n### Farm Owner-Household Matching Analysis ###\n")
# Identify farm owners/workers
farm_owners <- farm_data %>%
filter(role == 1) %>% # Assuming 1 = farm owner
dplyr::select(farm_id, role, any_ab_use, ab_treatment, ab_prevention, ab_growth)
# Try to match with households using common identifiers
if("farm_district" %in% names(farm_data) && "district" %in% names(household_data) &&
"farm_commune" %in% names(farm_data) && "commune" %in% names(household_data)) {
# Match based on location and additional criteria if available
potential_matches <- farm_owners %>%
inner_join(household_data,
by = c("farm_district" = "district", "farm_commune" = "commune"))
if(nrow(potential_matches) > 0) {
has_household_farm_link <- TRUE
farm_household_pairs <- potential_matches %>%
dplyr::select(farm_id, household_id, any_ab_use, ab_treatment, ab_prevention, ab_growth, any_ab_last_illness)
cat("Found", nrow(farm_household_pairs), "potential farm-household links based on location matching.\n")
# Display summary of potential matches
location_match_summary <- farm_household_pairs %>%
summarise(
n_pairs = n(),
farm_ab_use_pct = mean(any_ab_use, na.rm = TRUE) * 100,
household_ab_use_pct = mean(any_ab_last_illness, na.rm = TRUE) * 100
) %>%
mutate(across(ends_with("_pct"), ~ round(., 1)))
location_match_summary %>%
gather(key = "statistic", value = "value") %>%
kable(caption = "Summary of Location-Based Farm-Household Matches",
col.names = c("Statistic", "Value")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
}
}
}
##
## ### Farm Owner-Household Matching Analysis ###
# Method 3: If we have a dataset with both farm and household info for same respondents
if(!has_household_farm_link && any(grepl("consumption", names(farm_data)))) {
cat("\n### Household Consumption Pattern Analysis ###\n")
# Assuming consumption variables indicate household members consuming farm products
consumption_vars <- names(farm_data)[grepl("consumption", names(farm_data))]
if(length(consumption_vars) > 0) {
# Display distribution of consumption patterns
consumption_distribution <- farm_data %>%
count(!!sym(consumption_vars[1])) %>%
mutate(percentage = n / sum(n) * 100,
description = case_when(
!!sym(consumption_vars[1]) == 1 ~ "Never from own farm",
!!sym(consumption_vars[1]) == 2 ~ "More from other farms",
!!sym(consumption_vars[1]) == 3 ~ "More from own farm",
!!sym(consumption_vars[1]) == 4 ~ "Only from own farm",
TRUE ~ "Unknown"
))
consumption_distribution %>%
dplyr::select(-!!sym(consumption_vars[1])) %>%
mutate(percentage = round(percentage, 1)) %>%
kable(caption = "Distribution of Household Consumption Patterns",
col.names = c("Number of Farms", "Percentage (%)", "Consumption Pattern")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Create proxy links based on consumption patterns
farm_household_proxy <- farm_data %>%
filter(!!sym(consumption_vars[1]) %in% c(3, 4)) %>% # 3-4 = consuming own farm products
dplyr::select(farm_id, any_ab_use, ab_treatment, ab_prevention, ab_growth)
if(nrow(farm_household_proxy) > 0) {
has_household_farm_link <- TRUE
cat("Found", nrow(farm_household_proxy), "farms with household consumption of farm products.\n")
cat("Using these as proxy for farm-household overlap.\n")
# Compare with farms without household consumption
farm_data$household_consumption <- farm_data[[consumption_vars[1]]] %in% c(3, 4)
consumption_comparison <- farm_data %>%
group_by(household_consumption) %>%
summarise(
n = n(),
ab_use_pct = mean(any_ab_use, na.rm = TRUE) * 100,
ab_treatment_pct = mean(ab_treatment, na.rm = TRUE) * 100,
ab_prevention_pct = mean(ab_prevention, na.rm = TRUE) * 100,
ab_growth_pct = mean(ab_growth, na.rm = TRUE) * 100,
.groups = "drop"
)
# Display results
consumption_comparison %>%
mutate(household_consumption = ifelse(household_consumption, "Yes", "No"),
across(ends_with("_pct"), ~ round(., 1))) %>%
kable(caption = "Antibiotic Use by Household Consumption of Farm Products",
col.names = c("Household Consumes Farm Products", "Number of Farms",
"Any Antibiotic Use (%)", "Treatment Use (%)",
"Prevention Use (%)", "Growth Promotion Use (%)")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Statistical test for difference
ab_use_test <- t.test(any_ab_use ~ household_consumption, data = farm_data)
ab_prevention_test <- t.test(ab_prevention ~ household_consumption, data = farm_data)
ab_growth_test <- t.test(ab_growth ~ household_consumption, data = farm_data)
# Report test results
cat("\nStatistical comparison of farms by household consumption:\n")
cat("Any antibiotic use - difference:",
round(ab_use_test$estimate[2] - ab_use_test$estimate[1], 3) * 100, "%,",
"p-value:", round(ab_use_test$p.value, 3),
ifelse(ab_use_test$p.value < 0.05, " (significant)", " (not significant)"), "\n")
cat("Preventive use - difference:",
round(ab_prevention_test$estimate[2] - ab_prevention_test$estimate[1], 3) * 100, "%,",
"p-value:", round(ab_prevention_test$p.value, 3),
ifelse(ab_prevention_test$p.value < 0.05, " (significant)", " (not significant)"), "\n")
cat("Growth promotion use - difference:",
round(ab_growth_test$estimate[2] - ab_growth_test$estimate[1], 3) * 100, "%,",
"p-value:", round(ab_growth_test$p.value, 3),
ifelse(ab_growth_test$p.value < 0.05, " (significant)", " (not significant)"), "\n")
# Visualization
farm_data_plot <- farm_data %>%
mutate(household_consumption = ifelse(household_consumption, "Yes", "No")) %>%
dplyr::select(household_consumption, any_ab_use, ab_treatment, ab_prevention, ab_growth) %>%
pivot_longer(cols = c(any_ab_use, ab_treatment, ab_prevention, ab_growth),
names_to = "usage_type", values_to = "usage") %>%
mutate(usage_type = factor(usage_type,
levels = c("any_ab_use", "ab_treatment", "ab_prevention", "ab_growth"),
labels = c("Any Use", "Treatment", "Prevention", "Growth Promotion")))
# Create and display bar chart
consumption_plot <- ggplot(farm_data_plot, aes(x = usage_type, y = usage * 100, fill = household_consumption)) +
stat_summary(fun = mean, geom = "bar", position = "dodge") +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(0.9), width = 0.2) +
labs(title = "Antibiotic Use by Household Consumption Pattern",
subtitle = "Comparison of farms where household members consume farm products",
x = "Type of Antibiotic Use",
y = "Percentage of Farms",
fill = "Household Consumes\nFarm Products") +
scale_fill_brewer(palette = "Set1") +
theme_minimal()
print(consumption_plot)
}
}
}
##
## ### Household Consumption Pattern Analysis ###
## Found 400 farms with household consumption of farm products.
## Using these as proxy for farm-household overlap.
##
## Statistical comparison of farms by household consumption:
## Any antibiotic use - difference: 2.4 %, p-value: 0.082 (not significant)
## Preventive use - difference: 16 %, p-value: 0 (significant)
## Growth promotion use - difference: 1.4 %, p-value: 0.052 (not significant)

# If we have direct farm-household pairs, analyze the correlation in antibiotic use
if(has_household_farm_link && exists("farm_household_pairs") && nrow(farm_household_pairs) >= 10) {
cat("\n### Directly Linked Farm-Household Pair Analysis ###\n")
# Calculate correlation between farm and household antibiotic use
pair_correlation <- cor.test(
farm_household_pairs$any_ab_use,
farm_household_pairs$any_ab_last_illness
)
# Display correlation results
cat("Number of farm-household pairs:", nrow(farm_household_pairs), "\n")
cat("Correlation between farm and household antibiotic use:\n")
cat("Correlation coefficient (r):", round(pair_correlation$estimate, 3), "\n")
cat("95% confidence interval: [",
round(pair_correlation$conf.int[1], 3), ",",
round(pair_correlation$conf.int[2], 3), "]\n")
cat("P-value:", round(pair_correlation$p.value, 3),
ifelse(pair_correlation$p.value < 0.05, " (significant)", " (not significant)"), "\n")
# Visualize the relationship
pair_plot <- ggplot(farm_household_pairs, aes(x = any_ab_use, y = any_ab_last_illness)) +
geom_jitter(width = 0.1, height = 0.1, alpha = 0.7) +
geom_smooth(method = "glm", method.args = list(family = "binomial"), se = TRUE, color = "blue") +
labs(title = "Relationship Between Farm and Household Antibiotic Use",
subtitle = paste("Correlation coefficient =", round(pair_correlation$estimate, 2)),
x = "Farm Antibiotic Use (0=No, 1=Yes)",
y = "Household Antibiotic Use (0=No, 1=Yes)") +
theme_minimal()
print(pair_plot)
# Check for purpose-specific correlations
purpose_cors <- data.frame(
Purpose = c("Treatment", "Prevention", "Growth Promotion"),
Correlation = c(
cor(farm_household_pairs$ab_treatment, farm_household_pairs$any_ab_last_illness, use = "complete.obs"),
cor(farm_household_pairs$ab_prevention, farm_household_pairs$any_ab_last_illness, use = "complete.obs"),
cor(farm_household_pairs$ab_growth, farm_household_pairs$any_ab_last_illness, use = "complete.obs")
)
)
# Calculate p-values
purpose_cors$P_Value <- c(
cor.test(farm_household_pairs$ab_treatment, farm_household_pairs$any_ab_last_illness)$p.value,
cor.test(farm_household_pairs$ab_prevention, farm_household_pairs$any_ab_last_illness)$p.value,
cor.test(farm_household_pairs$ab_growth, farm_household_pairs$any_ab_last_illness)$p.value
)
purpose_cors$Significant <- ifelse(purpose_cors$P_Value < 0.05, "Yes", "No")
# Display purpose-specific correlations
purpose_cors %>%
mutate(
Correlation = round(Correlation, 3),
P_Value = round(P_Value, 3)
) %>%
kable(caption = "Correlations Between Farm Purpose-Specific and Household Antibiotic Use",
col.names = c("Farm Antibiotic Purpose", "Correlation with Household Use",
"P-Value", "Significant")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Create bar chart of purpose-specific correlations
purpose_plot <- ggplot(purpose_cors, aes(x = Purpose, y = Correlation, fill = Significant)) +
geom_bar(stat = "identity") +
geom_text(aes(label = sprintf("%.3f", Correlation)), vjust = -0.5) +
scale_fill_manual(values = c("No" = "gray70", "Yes" = "steelblue")) +
labs(title = "Purpose-Specific Correlations with Household Antibiotic Use",
x = "Farm Antibiotic Purpose",
y = "Correlation Coefficient") +
theme_minimal()
print(purpose_plot)
}
#---------------------------------------------------------------------
# 4. Integrated Analysis Across Multiple Community Definitions
#---------------------------------------------------------------------
cat("\n## Integrated Analysis Across Community Definitions ##\n")
##
## ## Integrated Analysis Across Community Definitions ##
# Create a list to store results from different community definitions
community_results <- list()
if(all(c("edu", "income") %in% names(farm_data))) {
cat("\n### Socioeconomic Community Analysis ###\n")
socioeco_clusters <- farm_data %>%
dplyr::select(farm_id, edu, income) %>%
filter(!is.na(edu) & !is.na(income)) %>%
mutate(
edu_z = scale(as.numeric(edu)),
income_z = scale(as.numeric(income))
)
if(nrow(socioeco_clusters) >= 20) {
cluster_result <- kmeans(socioeco_clusters %>% dplyr::select(edu_z, income_z),
centers = min(5, nrow(socioeco_clusters)/4))
socioeco_clusters$se_community_id <- cluster_result$cluster
farm_data <- farm_data %>%
left_join(socioeco_clusters %>% dplyr::select(farm_id, se_community_id), by = "farm_id")
cat("Defined based on socio-economic characteristics", length(unique(socioeco_clusters$se_community_id)), "communities\n")
}
}
# 1. Administrative community (from original code)
if("geo_id" %in% names(farm_data) && "geo_id" %in% names(household_data)) {
cat("\n### Administrative Community Analysis ###\n")
admin_farm_community <- farm_data %>%
group_by(geo_id) %>%
summarise(
farm_count = n(),
farm_ab_use_pct = mean(any_ab_use, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(farm_count >= 3)
admin_household_community <- household_data %>%
group_by(geo_id) %>%
summarise(
household_count = n(),
household_ab_use_pct = mean(any_ab_last_illness, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(household_count >= 3)
admin_comparison <- inner_join(admin_farm_community, admin_household_community, by = "geo_id") %>%
filter(farm_count >= 3 & household_count >= 3)
if(nrow(admin_comparison) > 0) {
cat("Found", nrow(admin_comparison), "administrative communities with sufficient data.\n")
# Display summary statistics of administrative communities
admin_comparison %>%
summarise(
n_communities = n(),
avg_farm_count = mean(farm_count),
avg_household_count = mean(household_count),
avg_farm_ab_use = mean(farm_ab_use_pct),
avg_household_ab_use = mean(household_ab_use_pct)
) %>%
mutate(across(starts_with("avg_"), ~ round(., 1))) %>%
gather(key = "statistic", value = "value") %>%
kable(caption = "Summary Statistics of Administrative Communities",
col.names = c("Statistic", "Value")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Calculate correlation
admin_corr <- cor.test(admin_comparison$farm_ab_use_pct, admin_comparison$household_ab_use_pct)
cat("Correlation between farm and household antibiotic use in administrative communities:\n")
cat("Correlation coefficient (r):", round(admin_corr$estimate, 3), "\n")
cat("95% confidence interval: [",
round(admin_corr$conf.int[1], 3), ",",
round(admin_corr$conf.int[2], 3), "]\n")
cat("P-value:", round(admin_corr$p.value, 3),
ifelse(admin_corr$p.value < 0.05, " (significant)", " (not significant)"), "\n")
# Visualize relationship
admin_plot <- ggplot(admin_comparison, aes(x = farm_ab_use_pct, y = household_ab_use_pct)) +
geom_point(aes(size = farm_count + household_count), alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Correlation Between Farm and Household Antibiotic Use in Administrative Communities",
subtitle = paste("Pearson correlation =", round(admin_corr$estimate, 2),
", p-value =", round(admin_corr$p.value, 3)),
x = "Farm Antibiotic Use (%)",
y = "Household Antibiotic Use (%)",
size = "Number of Respondents") +
theme_minimal()
print(admin_plot)
# Store results for comparison
community_results[["Administrative"]] <- list(
n_communities = nrow(admin_comparison),
correlation = admin_corr$estimate,
p_value = admin_corr$p.value
)
} else {
cat("Insufficient data for administrative community analysis.\n")
}
}
##
## ### Administrative Community Analysis ###
## Found 64 administrative communities with sufficient data.
## Correlation between farm and household antibiotic use in administrative communities:
## Correlation coefficient (r): 0.017
## 95% confidence interval: [ -0.23 , 0.26 ]
## P-value: 0.9 (not significant)

# 2. Geographic proximity communities (if available)
if(exists("mixed_community_data") && nrow(mixed_community_data) > 0) {
cat("\n### Geographic Proximity Community Analysis ###\n")
cat("Found", nrow(mixed_community_data), "geographic proximity communities with sufficient data.\n")
# Calculate correlation
geo_corr <- cor.test(mixed_community_data$farm_ab_use_pct, mixed_community_data$household_ab_use_pct)
cat("Correlation between farm and household antibiotic use in geographic proximity communities:\n")
cat("Correlation coefficient (r):", round(geo_corr$estimate, 3), "\n")
cat("95% confidence interval: [",
round(geo_corr$conf.int[1], 3), ",",
round(geo_corr$conf.int[2], 3), "]\n")
cat("P-value:", round(geo_corr$p.value, 3),
ifelse(geo_corr$p.value < 0.05, " (significant)", " (not significant)"), "\n")
# Store results for comparison
community_results[["Geographic"]] <- list(
n_communities = nrow(mixed_community_data),
correlation = geo_corr$estimate,
p_value = geo_corr$p.value
)
}
# 3. Information network communities (if available)
if(exists("info_community_stats") && nrow(info_community_stats) > 0 &&
"q7_19" %in% names(household_data) && "info_community_id" %in% names(farm_info)) {
cat("\n### Information Network Community Analysis ###\n")
# Try to assign households to information communities based on shared sources
if(exists("household_info")) {
# Map each household to most similar information community
household_info$info_community_id <- NA
for(i in 1:nrow(household_info)) {
household_source <- household_info$primary_source_std[i]
# Find dominant source in each community
community_sources <- farm_info %>%
filter(!is.na(info_community_id)) %>%
group_by(info_community_id, primary_source_std) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(info_community_id) %>%
filter(count == max(count)) %>%
slice(1)
# Assign to most matching community
matching_community <- community_sources %>%
filter(primary_source_std == household_source)
if(nrow(matching_community) > 0) {
household_info$info_community_id[i] <- matching_community$info_community_id[1]
}
}
# Aggregate household data by information community
household_info_community <- household_info %>%
filter(!is.na(info_community_id)) %>%
group_by(info_community_id) %>%
summarise(
household_count = n(),
household_ab_use_pct = mean(any_ab_last_illness, na.rm = TRUE) * 100,
.groups = "drop"
) %>%
filter(household_count >= 3)
# Join with farm community data
info_comparison <- info_community_stats %>%
rename(farm_count = n_farms, farm_ab_use_pct = ab_use_pct) %>%
dplyr::select(info_community_id, farm_count, farm_ab_use_pct) %>%
inner_join(household_info_community, by = "info_community_id")
if(nrow(info_comparison) > 0) {
cat("Found", nrow(info_comparison), "information network communities with sufficient data.\n")
# Calculate correlation
info_corr <- cor.test(info_comparison$farm_ab_use_pct, info_comparison$household_ab_use_pct)
cat("Correlation between farm and household antibiotic use in information network communities:\n")
cat("Correlation coefficient (r):", round(info_corr$estimate, 3), "\n")
cat("95% confidence interval: [",
round(info_corr$conf.int[1], 3), ",",
round(info_corr$conf.int[2], 3), "]\n")
cat("P-value:", round(info_corr$p.value, 3),
ifelse(info_corr$p.value < 0.05, " (significant)", " (not significant)"), "\n")
# Store results for comparison
community_results[["Information"]] <- list(
n_communities = nrow(info_comparison),
correlation = info_corr$estimate,
p_value = info_corr$p.value
)
} else {
cat("Insufficient data for information network community analysis.\n")
}
}
}
# 4. Human-animal contact (if we have direct links)
if(exists("farm_household_pairs") && nrow(farm_household_pairs) >= 10) {
cat("\n### Direct Contact Analysis ###\n")
# Use the correlation calculated earlier
if(exists("pair_correlation")) {
cat("Using", nrow(farm_household_pairs), "directly linked farm-household pairs.\n")
# Store results for comparison
community_results[["Direct Contact"]] <- list(
n_pairs = nrow(farm_household_pairs),
correlation = pair_correlation$estimate,
p_value = pair_correlation$p.value
)
}
}
# Create a summary table of results across different definitions
if(length(community_results) > 0) {
cat("\n### Comparative Analysis Across Community Definitions ###\n")
results_df <- data.frame(
Community_Definition = names(community_results),
N_Units = sapply(community_results, function(x) ifelse(is.null(x$n_communities), x$n_pairs, x$n_communities)),
Correlation = sapply(community_results, function(x) x$correlation),
P_Value = sapply(community_results, function(x) x$p_value),
stringsAsFactors = FALSE
)
# Create the Significant column explicitly
results_df$Significant <- ifelse(results_df$P_Value < 0.05, "Yes", "No")
# Display table
results_df %>%
mutate(
Correlation = round(Correlation, 3),
P_Value = round(P_Value, 3)
) %>%
kable(caption = "Comparison of Farm-Household Antibiotic Use Correlation Across Different Community Definitions",
col.names = c("Community Definition", "Number of Units",
"Correlation", "P-Value", "Significant")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Visualize comparison
comparison_plot <- ggplot(results_df, aes(x = Community_Definition, y = Correlation, fill = Significant)) +
geom_bar(stat = "identity") +
geom_text(aes(label = sprintf("%.3f", Correlation)), vjust = -0.5, size = 3.5) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
coord_cartesian(ylim = c(min(min(results_df$Correlation) - 0.1, -0.1),
max(max(results_df$Correlation) + 0.1, 0.1))) +
labs(title = "Comparison of Correlation Coefficients Across Community Definitions",
subtitle = "Testing the robustness of community-level isomorphism",
x = "Community Definition",
y = "Correlation Coefficient",
fill = "Statistically\nSignificant") +
scale_fill_manual(values = c("No" = "gray70", "Yes" = "steelblue")) +
theme_minimal()
print(comparison_plot)
}
##
## ### Comparative Analysis Across Community Definitions ###
