Getting Started

Install packages

Import data

df_raw <- read_excel("FarmerSurvey_Master_260123.xlsx", 
    sheet = "Raw") %>% 
  clean_names() # Uses janitor packages to clean names of objects
view (df_raw)

Data Preparation

Renaming columns

Extract column names into a dataframe

var_names <- tibble(old_name = names(df_raw))
view(var_names)

Create a renaming table

# Naming conventions: 
# One Health theme: wb = wellbeing, aw = animal welfare, pl = planet 
# COM-B: cap = capability, mot = motivation, opp = opportunity

rename_key <- tribble(
  ~old_name, ~new_name, ~theme, ~com_b, ~reverse_scored, 
  "id", "id", NA, NA, NA,
  
  "start_time", "start_time", NA, NA, NA,
  
  "what_are_the_last_four_digits_of_your_phone_number_this_cannot_be_used_to_identify_you_we_ask_this_only_so_that_you_can_choose_to_withdraw_from_the_study_within_seven_7_days_of_submission_or", "phone", NA, NA, NA,

  "what_country_do_you_live_in", "country", NA, NA, NA,

  "what_is_your_age_in_years_this_question_is_optional", "age", NA, NA, NA,

  "what_is_your_gender_this_question_is_optional", "gender", NA, NA, NA,

  "how_long_have_you_been_farming_if_you_were_raised_in_farming_please_include_your_childhood_years_as_applicable", "farming_time", NA, NA, NA,

  "what_is_your_farming_situation", "farm_type", NA, NA, NA,

  "what_type_s_of_livestock_do_you_farm_please_select_all_that_apply", "livestock", NA, NA, NA,

  "do_you_also_have_arable_crops", "arable", NA, NA, NA,

  "is_your_farm_fully_organic", "organic", NA, NA, NA,

  "do_you_consider_your_farm_to_be_regenerative_agroecological_if_you_are_unsure_please_select_no", "regen", NA, NA, NA,

  "is_your_farm_a_member_of_any_assurance_schemes", "schemes", NA, NA, NA,

  "which_one_s", "schemes_which", NA, NA, NA,

  "is_there_anything_else_you_would_like_us_to_know_about_your_farm_farming_system_please_do_not_disclose_any_personal_information_like_farm_name_or_specific_location_this_question_is_optional", "farm_freetext", NA, NA, NA,

  "i_am_self_confident", "wb_mot_confident", "wellbeing", "motivation", FALSE,

  "other_farmers_come_to_me_for_advice", "wb_mot_advice", "wellbeing", "motivation", FALSE,
  
  "i_am_physically_healthy", "wb_cap_healthy", "wellbeing", "capability", FALSE,
  
  "overall_i_am_satisfied_with_myself", "wb_mot_satisfied", "wellbeing", "motivation", FALSE,
  
  "when_i_learn_new_information_i_put_it_into_action", "wb_mot_action", "wellbeing", "motivation", FALSE,
  
  "i_enjoy_interacting_with_other_people", "wb_mot_interact", "wellbeing", "motivation", FALSE,
  
  "i_have_broken_bad_habits_to_improve_my_wellbeing", "wb_cap_badhabit", "wellbeing", "capability", FALSE,
  
  "i_have_access_to_the_resources_i_need_such_as_time_and_money_to_live_a_good_life", "wb_opp_money", "wellbeing", "opportunity", FALSE,
  
  "it_is_important_to_me_to_have_the_approval_of_my_friends_and_family", "wb_opp_approval", "wellbeing", "opportunity", FALSE,
  
  "i_adapt_easily_to_change", "wb_mot_adapt", "wellbeing", "motivation", FALSE,
  
  "i_find_looking_after_my_own_well_being_rewarding", "wb_mot_reward", "wellbeing", "motivation", FALSE,
  
  "i_can_do_things_as_well_as_most_other_people", "wb_mot_ability", "wellbeing", "motivation", FALSE,
  
  "i_have_people_who_i_can_turn_to_for_help", "wb_opp_help", "wellbeing", "opportunity", FALSE,
  
  "i_take_a_positive_attitude_toward_myself", "wb_mot_posatt", "wellbeing", "motivation", FALSE,
  
  "i_know_ways_to_improve_my_own_mental_wellbeing", "wb_cap_improve", "wellbeing", "capability", FALSE,
  
  "i_have_people_around_me_who_support_me", "wb_opp_support", "wellbeing", "opportunity", FALSE,

  "is_there_anything_else_you_would_like_us_to_know_about_your_wellbeing_please_do_not_disclose_any_personal_information_that_could_identify_you_this_question_is_optional", "wellbeing_freetext", NA, NA, NA,
  
  "i_know_what_to_do_to_improve_the_wellbeing_of_my_animals", "aw_cap_improve", "animal", "capability", FALSE,
  
  "i_have_access_to_all_the_resources_e_g_time_money_i_need_to_take_good_care_of_my_animals", "aw_opp_resource", "animal", "opportunity", FALSE,
  
  "improving_the_wellbeing_of_my_animals_will_be_beneficial_for_my_farm_and_business", "aw_mot_benefit", "animal", "motivation", FALSE,
  
  "the_benefits_of_taking_good_care_of_my_animals_outweighs_the_costs", "aw_mot_outweigh", "animal", "motivation", FALSE,
  
  "i_feel_emotionally_connected_to_my_animals", "aw_mot_connect", "animal", "motivation", FALSE,
  
  "other_farmers_i_know_take_steps_to_improve_the_wellbeing_of_their_animals", "aw_opp_farmers", "animal", "opportunity", FALSE,
  
  "my_physical_health_makes_it_difficult_to_take_good_care_of_my_animals", "aw_cap_health", "animal", "capability", TRUE,
  
  "a_main_responsibility_of_a_farmer_is_to_uphold_the_wellbeing_of_his_her_animals", "aw_mot_resp", "animal", "motivation", FALSE,
  
  "the_wellbeing_of_my_animals_is_one_of_the_most_important_things_in_my_life", "aw_mot_priority", "animal", "motivation", FALSE,
  
  "taking_good_care_of_my_animals_is_within_my_control", "aw_mot_control", "animal", "motivation", FALSE,
  
  "others_i_work_with_e_g_purchasers_industry_associates_pressure_me_to_improve_the_wellbeing_of_my_animals", "aw_opp_pressure", "animal", "opportunity", FALSE,
  
  "i_know_my_animals_have_feelings", "aw_cap_feelings", "animal", "capability", FALSE,
  
  "i_have_other_people_who_help_me_improve_the_lives_of_my_animals", "aw_opp_others", "animal", "opportunity", FALSE,
  
  "i_care_about_how_my_animals_feel", "aw_mot_feel", "animal", "motivation", FALSE,
  
  "i_will_experience_benefits_rewards_if_i_take_steps_to_improve_the_wellbeing_of_my_animals", "aw_mot_worth", "animal", "motivation", FALSE,
  
  "if_animal_welfare_is_maximised_there_are_negative_impacts_on_farm_productivity_and_economics", "aw_mot_tradeoff", "animal", "motivation", TRUE,

    "is_there_anything_else_you_would_like_us_to_know_about_your_views_and_experiences_with_your_livestock_this_question_is_optional", "livestock_freetext", NA, NA, NA,

  "if_i_do_things_to_reduce_my_environmental_impact_the_health_and_wellbeing_of_my_animals_will_also_improve", "pl_mot_mutual", "planet", "motivation", FALSE,

  "i_am_worried_about_the_health_of_the_planet", "pl_mot_ecoanxiety", "planet", "motivation", FALSE,
  
  "i_feel_pressure_from_others_to_reduce_my_environmental_impact", "pl_opp_pressure", "planet", "opportunity", FALSE,
  
  "i_have_access_to_all_the_resources_i_need_e_g_time_money_to_take_actions_that_minimise_negative_environmental_impacts", "pl_opp_resource", "planet", "opportunity", FALSE,
  
  "the_environment_is_being_harmed_by_the_combined_actions_of_individual_people", "pl_cap_harm", "planet", "capability", FALSE,
  
  "my_poor_physical_health_makes_me_unable_to_do_things_that_would_benefit_the_environment_e_g_walking_places_instead_of_driving", "pl_cap_health", "planet", "capability", TRUE,
  
  "i_know_what_i_have_to_do_to_be_more_environmentally_friendly", "pl_cap_know", "planet", "capability", FALSE,
  
  "caring_for_the_environment_is_our_social_responsibility", "pl_opp_know", "planet", "opportunity", FALSE,
  
  "i_feel_a_sense_of_connection_to_the_planet_and_to_wildlife", "pl_mot_connect", "planet", "motivation", FALSE,
  
  "taking_care_of_the_environment_comes_at_the_expense_of_my_farm_efficiency_productivity", "pl_mot_productivity", "planet", "motivation", TRUE,
  
  "a_consideration_for_the_environment_is_an_important_part_of_being_a_farmer", "pl_mot_identity", "planet", "motivation", FALSE,
  
  "it_is_within_my_power_to_positively_impact_the_environment", "pl_mot_power", "planet", "motivation", FALSE,
  
  "the_benefits_of_preserving_the_environment_outweigh_the_downsides_e_g_costs", "pl_mot_outweigh", "planet", "motivation", FALSE,
  
  "other_farmers_i_know_are_taking_steps_to_minimise_negative_environmental_impacts", "pl_opp_peers", "planet", "opportunity", FALSE,
  
  "environmental_conservation_is_among_my_top_goals", "pl_mot_peers", "planet", "motivation", FALSE,
  
  "i_will_experience_benefits_rewards_if_i_take_action_to_preserve_the_environment", "pl_mot_benfit", "planet", "motivation", FALSE,
  
  "is_there_anything_else_you_would_like_us_to_know_about_your_views_on_the_environment_this_question_is_optional", "planet_freetext", NA, NA, NA,

  "is_there_anything_else_you_would_like_to_add_this_question_is_optional", "anything_else", NA, NA, NA,

)

Safety Check

# Check for unmatched column names with 
setdiff(rename_key$old_name, names(df_raw))
## character(0)
# Check for duplicated new names in the rename key
rename_key %>%
  count(new_name) %>%
  filter(n > 1)
## # A tibble: 0 × 2
## # ℹ 2 variables: new_name <chr>, n <int>

Apply the renaming table to the dataframe

# Create a new data frame (df_tidy) to rename columns in df_raw using a lookup table (rename_key)
df_tidy <- df_raw %>%
  rename_with( # For each selected column name (.x),
    # find its position in rename_key$old_name
    # and replace it with the corresponding new_name
    ~ rename_key$new_name[
        match(.x, rename_key$old_name)
      ],
    # Apply this renaming only to columns listed in rename_key$old_name
    .cols = rename_key$old_name
  )

# save the renaming key as a .csv file for reproducibilty
write_csv(rename_key, "variable_rename_key.csv")

# Now we have a new dataframe called 'df_tidy' which is the same as df_raw but with the renamed column headings to use in subsequent steps of the analyses. 

Code Likert Data

Define Likert mapping

# This identifies the five levels of the likert scale used in the original dataset
likert_levels_5 <- c(
  "Strongly disagree",
  "Disagree",
  "Neutral",
  "Agree",
  "Strongly agree"
)

Identify likert items

This is possible because in an earlier chunk, metadata were assigned to the statement columns with info regarding their Theme, COM-B component, and if they were reverse coded

likert_items <- rename_key %>%
  filter(!is.na(theme), !is.na(com_b)) %>%   # keeps only scored items
  pull(new_name) %>%
  unique()

view(likert_items) # This gives us 48 items in the likert_items string which is correct as there are 16*3 likert statements

Convert text to ordered factor –> numeric

# Convert the Likert-scale columns to ordered factors
df_tidy <- df_tidy %>%
  mutate(across( 
    all_of(likert_items), # selects all of (and only) the likert columns
    ~ factor(., levels = likert_levels_5, ordered = TRUE)
  )) # using the specified 5-point likert levels, turn likert columns into ordered factors (changes the class from character to ordinal)

# Define a numeric mapping for 5-point Likert responses
map_5 <- c(
  "Strongly disagree" = 1,
  "Disagree"          = 2,
  "Neutral"           = 3,
  "Agree"             = 4,
  "Strongly agree"    = 5
)

# Recode likert items from ordered factors to numeric values
df_tidy <- df_tidy %>%
  mutate(across(all_of(likert_items), #select all the likert items
                ~ unname(map_5[as.character(.)]) %>% as.numeric())) #convert factors to characters, map to numeric values using map_5, and coerce to numbers

# NB: we convert factor --> character because converting a factor directly to numeric would corrupt the scale by indexing the map_5 as factor integers rather than labels. The latter (labels) is correct for Likert scales. If we converted to numeric, we could get results that are incorrect and behave unpredictably depending on level ordering. In short, we need text labels to safely map them to predefined numeric scales. If we didn't, we would accidentally use R's internal integer codes instead of intended Likert scoring. 

# if labels don't match exactly we will get NAs so let's do a quick check:


na_introduced <- colSums(is.na(df_tidy[ , likert_items]))
na_introduced[na_introduced > 0]  # inspect if any appear. 
## pl_cap_harm 
##           1
# one does, as this somehow was missed by the respondent. This is fine as the missing value won't break the workflow since we are using aggregate scores not PCA. The Mann-Whitney tests will automatically drop NAs. 

Reverse code items

reverse_items <- rename_key %>%
  filter(reverse_scored == TRUE)%>%
  pull(new_name)
# reverse code the identified items in a new dataframe called 'df_reverse'
df_tidy <- df_tidy %>%
  mutate(across(all_of(reverse_items), ~ 6 - .))
# List of reversed items
identical(df_tidy$aw_cap_health, df_tidy$aw_cap_health)
## [1] TRUE
# Sanity checks
sapply(df_tidy[ , reverse_items], class)             # should be "numeric"
##       aw_cap_health     aw_mot_tradeoff       pl_cap_health pl_mot_productivity 
##           "numeric"           "numeric"           "numeric"           "numeric"
summary(df_tidy[ , reverse_items])                   # ranges should be 1..5
##  aw_cap_health   aw_mot_tradeoff pl_cap_health   pl_mot_productivity
##  Min.   :2.000   Min.   :1.000   Min.   :1.000   Min.   :1.000      
##  1st Qu.:4.000   1st Qu.:3.000   1st Qu.:4.000   1st Qu.:2.250      
##  Median :4.000   Median :4.000   Median :4.000   Median :3.000      
##  Mean   :3.909   Mean   :3.545   Mean   :3.682   Mean   :2.909      
##  3rd Qu.:4.000   3rd Qu.:4.750   3rd Qu.:4.000   3rd Qu.:3.750      
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000

Create Composite Scores

# Map of items → theme & COM-B (only Likert items)
item_map <- rename_key %>%
  filter(!is.na(theme), !is.na(com_b)) %>%
  select(item = new_name, theme, com_b) %>%
  distinct()

id_cols <- c("id", "regen")


# Long format with metadata
df_long <- df_tidy %>%
  pivot_longer(
    cols = all_of(item_map$item),
    names_to = "item",
    values_to = "score"
  ) %>%
  left_join(item_map, by = "item")


#ensure farmer type is a 2-level factor

df_long <- df_long %>% #df long creates a long-format dataset where each row represent one participant, one item, and one score
  mutate(regen = factor(regen, levels = c("Yes", "No")))
df_long <- df_long %>%
  # fix any inconsistent yes/no data
  mutate(
    regen = trimws(regen),
    regen = tools::toTitleCase(tolower(regen))
  ) %>%
  # assign clean labels
  mutate(
    regen = factor(
      regen,
      levels = c("Yes", "No"),
      labels = c("Regenerative", "Conventional")
    )
  )

Create COM-B x theme composites (both sum and mean)

Because one participant has a missing item, the mean may be preferable (not influenced by the non-missing value). By computing both, mean will be primary and sum as a sensitivity check

df_composites <- df_long %>% 
  group_by(across(all_of(id_cols)), theme, com_b) %>% #groups data by participant ID (id_cols), theme, and COM-B component. 
  summarise(
    composite_sum  = sum(score, na.rm = TRUE), #adds up all item scores contributing to that construct, producing a total scale score = overall magnitude
    composite_mean = mean(score, na.rm = TRUE), #average score across items, ignores missing values, standardises scale to 1-5 regardless of number of items. more interpretable for likert scales
    n_items        = sum(!is.na(score)), # counts how many non-missing items contribute to that composite
    .groups = "drop" # removes grouping structure after summarising which can otherwise influence results downstream
  ) %>%
  unite("theme_com_b", theme, com_b, remove = FALSE) # creates new composite variable for plotting, summarising, modelling

Sanity check

#How many items contributed to each composite (by design)?
df_composites %>%
  count(theme, com_b, n_items)
## # A tibble: 10 × 4
##    theme     com_b       n_items     n
##    <chr>     <chr>         <int> <int>
##  1 animal    capability        3    22
##  2 animal    motivation        9    22
##  3 animal    opportunity       4    22
##  4 planet    capability        2     1
##  5 planet    capability        3    21
##  6 planet    motivation        9    22
##  7 planet    opportunity       4    22
##  8 wellbeing capability        3    22
##  9 wellbeing motivation        9    22
## 10 wellbeing opportunity       4    22
# Quick distribution snapshot
df_composites %>%
  group_by(theme_com_b, regen) %>%
  summarise(
    n = dplyr::n(),
    median_sum  = median(composite_sum),
    IQR_sum     = IQR(composite_sum),
    median_mean = median(composite_mean),
    IQR_mean    = IQR(composite_mean),
    .groups = "drop"
  )
## # A tibble: 18 × 7
##    theme_com_b           regen         n median_sum IQR_sum median_mean IQR_mean
##    <chr>                 <fct>     <int>      <dbl>   <dbl>       <dbl>    <dbl>
##  1 animal_capability     Regenera…    11         13     1          4.33    0.333
##  2 animal_capability     Conventi…    11         12     1.5        4       0.5  
##  3 animal_motivation     Regenera…    11         39     2.5        4.33    0.278
##  4 animal_motivation     Conventi…    11         39     4          4.33    0.444
##  5 animal_opportunity    Regenera…    11         14     3          3.5     0.75 
##  6 animal_opportunity    Conventi…    11         15     3.5        3.75    0.875
##  7 planet_capability     Regenera…    11         13     2          4.33    0.667
##  8 planet_capability     Conventi…    11         11     2          3.67    0.667
##  9 planet_motivation     Regenera…    11         36     3.5        4       0.389
## 10 planet_motivation     Conventi…    11         37     9.5        4.11    1.06 
## 11 planet_opportunity    Regenera…    11         14     1.5        3.5     0.375
## 12 planet_opportunity    Conventi…    11         14     2.5        3.5     0.625
## 13 wellbeing_capability  Regenera…    11         10     1.5        3.33    0.500
## 14 wellbeing_capability  Conventi…    11         10     4          3.33    1.33 
## 15 wellbeing_motivation  Regenera…    11         33     6          3.67    0.667
## 16 wellbeing_motivation  Conventi…    11         32     7.5        3.56    0.833
## 17 wellbeing_opportunity Regenera…    11         15     2.5        3.75    0.625
## 18 wellbeing_opportunity Conventi…    11         15     4.5        3.75    1.12

Two rows for planet x capability as one respondent skipped one question they are parsed onto a separate row. Overall, the above code transforms item-level Likert data into participant-level construct scores aligned with COM-B x One Health themes, then validating structural integrity and inspecting group-level distributions.

Mann-Whitney U tests for each One Health theme x COM-B composite

library (rstatix)

mw_results <- df_composites %>%
  group_by(theme_com_b) %>%
  wilcox_test(composite_mean ~ regen, detailed = TRUE) %>%
  mutate(test = "Mann-Whitney U") %>%
  select(theme_com_b, test, statistic, p, method, alternative, n1, n2)

view(mw_results)
delta_results <- df_composites %>%
  group_by(theme_com_b) %>%
  summarise(
    # Using effsize:: explicitly prevents conflicts
    cliff     = effsize::cliff.delta(composite_mean ~ regen)$estimate,
    conf_low  = effsize::cliff.delta(composite_mean ~ regen)$conf.int[1],
    conf_high = effsize::cliff.delta(composite_mean ~ regen)$conf.int[2]
  )

view(delta_results)
test_table <- mw_results %>%
  left_join(delta_results, by = "theme_com_b") %>%
  mutate(
    p_holm = p.adjust(p, method = "holm")
  ) %>%
  arrange(theme_com_b)

test_table
## # A tibble: 9 × 12
##   theme_com_b      test  statistic     p method alternative    n1    n2    cliff
##   <chr>            <chr>     <dbl> <dbl> <chr>  <chr>       <int> <int>    <dbl>
## 1 animal_capabili… Mann…      82   0.157 Wilco… two.sided      11    11  0.355  
## 2 animal_motivati… Mann…      61.5 0.974 Wilco… two.sided      11    11  0.0165 
## 3 animal_opportun… Mann…      54   0.69  Wilco… two.sided      11    11 -0.107  
## 4 planet_capabili… Mann…      82.5 0.152 Wilco… two.sided      11    11  0.364  
## 5 planet_motivati… Mann…      61   1     Wilco… two.sided      11    11  0.00826
## 6 planet_opportun… Mann…      57.5 0.867 Wilco… two.sided      11    11 -0.0496 
## 7 wellbeing_capab… Mann…      61.5 0.973 Wilco… two.sided      11    11  0.0165 
## 8 wellbeing_motiv… Mann…      68.5 0.62  Wilco… two.sided      11    11  0.132  
## 9 wellbeing_oppor… Mann…      69   0.594 Wilco… two.sided      11    11  0.140  
## # ℹ 3 more variables: conf_low <dbl>, conf_high <dbl>, p_holm <dbl>
write_csv(test_table, "COMB_OHtheme_results.csv")

Tests just across COM-B components

First aggregate scores just according to COM-B, not One Health theme

df_composites_overall <- df_long %>%
  group_by(id, regen, com_b) %>%
  summarise(
    composite_mean = mean(score, na.rm = TRUE),
    composite_sum  = sum(score, na.rm = TRUE),
    n_items = sum(!is.na(score)),
    .groups = "drop"
  )

Now calculate medians and IQRs for both groups across COM-B composites

# ensure consistent COM-B order in outputs/plots
df_composites_overall <- df_composites_overall %>%
  mutate(
    com_b = factor(com_b, levels = c("capability", "opportunity", "motivation"),
                   labels = c("Capability", "Opportunity", "Motivation"))
  )


# Summary for composite_mean
comb_summary_mean <- df_composites_overall %>%
  group_by(com_b, regen) %>%
  summarise(
    n          = sum(!is.na(composite_mean)),
    median     = median(composite_mean, na.rm = TRUE),
    IQR        = IQR(composite_mean, na.rm = TRUE),
    min        = suppressWarnings(min(composite_mean, na.rm = TRUE)),
    max        = suppressWarnings(max(composite_mean, na.rm = TRUE)),
    n_missing  = sum(is.na(composite_mean)),
    .groups = "drop"
  ) %>%
  # round to two decimal places
  mutate(
    median = round(median, 2),
    IQR    = round(IQR, 2),
    min    = round(min, 2),
    max    = round(max, 2)
  ) %>%
  arrange(com_b, regen)

# Save as CSV
write_csv(comb_summary_mean, "COMB_overall_median_IQR_mean.csv")

# print a neat table 
comb_summary_mean
## # A tibble: 6 × 8
##   com_b       regen            n median   IQR   min   max n_missing
##   <fct>       <fct>        <int>  <dbl> <dbl> <dbl> <dbl>     <int>
## 1 Capability  Regenerative    11   4.11  0.44  3.22  4.67         0
## 2 Capability  Conventional    11   3.78  0.72  2.62  4.78         0
## 3 Opportunity Regenerative    11   3.5   0.67  3     4.25         0
## 4 Opportunity Conventional    11   3.67  0.79  2.58  4.58         0
## 5 Motivation  Regenerative    11   4.04  0.26  3.26  4.3          0
## 6 Motivation  Conventional    11   3.7   0.63  3.04  4.74         0
mw_results_COMB <- df_composites_overall %>%
  group_by(com_b) %>%
  wilcox_test(composite_mean ~ regen, detailed = TRUE) %>%
  mutate(test = "Mann-Whitney U") %>%
  select(com_b, test, statistic, p, method, alternative, n1, n2)

view(mw_results_COMB)
delta_results_COMB <- df_composites_overall %>%
  group_by(com_b) %>%
  summarise(
    # Using effsize:: explicitly prevents conflicts
    cliff     = effsize::cliff.delta(composite_mean ~ regen)$estimate,
    conf_low  = effsize::cliff.delta(composite_mean ~ regen)$conf.int[1],
    conf_high = effsize::cliff.delta(composite_mean ~ regen)$conf.int[2]
  )

view(delta_results_COMB)
test_table_COMB <- mw_results_COMB %>%
  left_join(delta_results_COMB, by = "com_b") %>%
  mutate(
    p_holm = p.adjust(p, method = "holm")
  ) %>%
  arrange(com_b)

test_table_COMB
## # A tibble: 3 × 12
##   com_b    test  statistic     p method alternative    n1    n2   cliff conf_low
##   <fct>    <chr>     <dbl> <dbl> <chr>  <chr>       <int> <int>   <dbl>    <dbl>
## 1 Capabil… Mann…      79.5 0.221 Wilco… two.sided      11    11 0.314     -0.130
## 2 Opportu… Mann…      61   1     Wilco… two.sided      11    11 0.00826   -0.448
## 3 Motivat… Mann…      77   0.292 Wilco… two.sided      11    11 0.273     -0.204
## # ℹ 2 more variables: conf_high <dbl>, p_holm <dbl>
write_csv(test_table_COMB, "COMB_only_results.csv")

Produce violin plots for high-level COM-B only

p_comb <- ggplot(df_composites_overall,
       aes(x = regen,
           y = composite_mean,
           fill = regen)) +
  
  # Violin
  geom_violin(trim = FALSE, alpha = 0.6, width = 0.9, colour = NA) +
  
  # Boxplot overlay
  geom_boxplot(width = 0.25, alpha = 0.7, outlier.shape = NA) +
  
  # Individual points
  geom_jitter(width = 0.05, size = 2, alpha = 0.9) +
  
  # Facet by COM‑B component
  facet_wrap(~ com_b, scales = "free_y") +
  
  # Colours
  scale_fill_brewer(palette = "Set2") +
  
  labs(
    x = "Farm type",
    y = "Mean score"
  ) +
  
  theme_minimal(base_size = 13) +
  theme(
    legend.position = "none",
    strip.text = element_text(size = 12, face = "bold")
  )

p_comb #prints plot

ggsave(
  filename = "COMB_overall_violin.png",
  plot = p_comb,
  width = 10,          # inches
  height = 6,          # inches
  dpi = 300            # publication quality
)
df_tidy$farming_time <- fct_relevel(
  df_tidy$farming_time,
  "3-5 years",
  "6-10 years",
  "11-15 years",
  "16-20 years",
  "21-30 years",
  "31-40 years",
  "41-50 years",
  "51-60 years"
)

#create plot


p_farming_time <- ggplot(
  df_tidy,
  aes(
    x = farming_time,
    y = after_stat(count / sum(count)),
    fill = farming_time
  )
) +
  
  # Bars
  geom_bar(
    stat = "count",
    alpha = 0.7,
    width = 0.9,
    colour = NA
  ) +
  
  # Percentage labels on bars
  geom_text(
    stat = "count",
    aes(
      label = scales::percent(
        after_stat(count / sum(count)),
        accuracy = 1
      )
    ),
    vjust = -0.4,
    size = 3.2
  ) +
  
  # Y-axis as percentages
  scale_y_continuous(
    labels = scales::percent_format(accuracy = 1),
    expand = expansion(mult = c(0, 0.08))  # room for labels
  ) +
  
  # Colours to match other figure
  scale_fill_brewer(palette = "Set2") +
  
  labs(
    x = "Years of farming experience",
    y = "Percentage of respondents"
  ) +
  
  # Theme to match existing plot
  theme_minimal(base_size = 13) +
  theme(
    legend.position = "none",
    axis.text.x = element_text(size = 9),  # smaller x-axis labels
    panel.grid.major = element_blank(),  # remove gridlines
    panel.grid.minor = element_blank()

  )

p_farming_time

ggsave(
  filename = "Farming_Experience.png",
  plot = p_farming_time,
  width = 10,          # inches
  height = 6,          # inches
  dpi = 300            # publication quality
)