Install and load packages

# Install pacman ("package manager") if needed
if (!require("pacman")) install.packages("pacman")

# Load contributed packages with pacman
pacman::p_load(pacman, ggplot2, rworldmap, tidyverse, psych, dplyr, tidyr, countrycode)

## Import data

df1 <- read_csv("CH1_Scoping_DataCharting_241226.csv")

Rename headings

colnames(df1)
##  [1] "Covidence #"                                
##  [2] "Study ID...2"                               
##  [3] "Title...3"                                  
##  [4] "Study ID...4"                               
##  [5] "Title...5"                                  
##  [6] "Lead author"                                
##  [7] "Lead author institution"                    
##  [8] "Journal"                                    
##  [9] "Publication year"                           
## [10] "Country of study"                           
## [11] "Aim of study"                               
## [12] "Theoretical Framework"                      
## [13] "Data Type"                                  
## [14] "Data Collection Methodology"                
## [15] "Study Design"                               
## [16] "Analytical Methodology"                     
## [17] "Target Behaviour"                           
## [18] "Intervention Designed or Delivered"         
## [19] "Intervention designed based on HBC science?"
## [20] "Intervention Function"                      
## [21] "Mechanism for Change Identified?"           
## [22] "Mechanism for Change"                       
## [23] "Intervention Details"                       
## [24] "Theme"                                      
## [25] "One Health/Welfare component"               
## [26] "Participant Description"                    
## [27] "Total Number of Participants"               
## [28] "Male Proportion"                            
## [29] "Farming Context & Species"                  
## [30] "Findings"                                   
## [31] "Overall Impact of Intervention"
colnames(df1)[9] <- "year"
colnames(df1)[10] <- "country"
colnames(df1)[11] <- "aim"
colnames(df1)[12] <- "framework"
colnames(df1)[13] <- "type"
colnames(df1)[14] <- "datacoll"
colnames(df1)[15] <- "design"
colnames(df1)[16] <- "analysis"
colnames(df1)[18] <- "approach"
colnames(df1)[19] <- "hbc"
colnames(df1)[20] <- "bcw"
colnames(df1)[25] <- "component"
colnames(df1)[27] <- "totpar"
colnames(df1)[28] <- "maleprop"
colnames(df1)[29] <- "species"
colnames(df1)[31] <- "impact"

Generate world map

#Split the 'country' column into multiple rows
dfcountry <- df1 %>%
  separate_rows(country, sep = ";")

#Calculate frequency of studies per country
country_counts <- dfcountry %>%
  count(country, name = "value")

#Add IS03 codes
country_counts$ISO3 <- countrycode(country_counts$country, "country.name", "iso3c")

#Join data to a map
map_data <- joinCountryData2Map(country_counts,
                                joinCode = "ISO3",
                                nameJoinColumn = "ISO3",
                                verbose = TRUE)
## 28 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
##      failedCodes failedCountries
## 217 codes from the map weren't represented in your data
# Colour palette
green_palette <- c("#7bccc4", "#43a2ca", "#0868ac")  # Colourblind-friendly shades from ColourBrewer

breaks <- c(0, 1, 2, 3)


# Adjust margins to reduce white space
par(mar = c(1, 0.5, 0.5, 0.5))  # Smaller margins around the map

# Plot the map
mapCountryData(map_data, nameColumnToPlot = "value",
               catMethod = breaks, 
               colourPalette = green_palette,       # sets the palette to the vector above
               mapTitle = "",                       # to keep the title blank
               addLegend = FALSE,                   # to set a custom legend
               missingCountryCol = "lightgrey")

# Define legend labels and colors
legend_labels <- c("1", "2", "3")  # Because three frequencies in the dataset
legend_colors <- green_palette    # So that the legend colours are consistent with the map

# Add the custom legend
legend("bottom",                       # Position: bottom 
       legend = legend_labels,         # Calls the legend labels defined above
       fill = legend_colors,           # Calls the legend colours defined above
       title = "Number of Studies",    # Legend title
       horiz = TRUE,                   # Legend position is horizontal
       cex = 0.8)                      # Text size

# Reset margins to default for subsequent plots 
par(mar = c(5, 4, 4, 2) + 0.1)

Generate frequency histogram of study year

# Histogram for the df1 dataset
ggplot(df1, aes(x = year, fill = type)) +         
    geom_histogram(binwidth = 1,          
                   position = "stack",         # Show proportions of each type within each bar
                   color = "lightgrey",        # Border color for better visibility
                   alpha = 1.0) +              # Transparency
    labs(title = "",  
         x = "Year of Publication",                         
         y = "Number of Studies") +                 # Change y-axis label to indicate proportion
    scale_x_continuous(breaks = seq(min(df1$year), max(df1$year), by = 1)) +  # Show every year
    theme_classic() +
    theme(panel.grid.major = element_blank(),       # Remove major gridlines
          panel.grid.minor = element_blank(),       # Remove minor gridlines
          legend.position = c(0.15, 0.8)) +
    scale_fill_manual(values = c("Quantitative" = "#8da0cb", 
                                  "Qualitative" = "#66c2a5", 
                                  "Mixed-Methods" = "#fc8d62"), 
                      name = "Data Type")           # Custom colors and legend title

Calculating quantitative descriptives

Methodology

#summarise overall methodology
table(df1$type)
## 
## Mixed-Methods   Qualitative  Quantitative 
##             2             4            15
# summarise theoretical frameworks
table(df1$framework)
## 
##                                             ADKAR 
##                                                 3 
##                                             COM-B 
##                                                 3 
##           Easy, Attractive, Social, Timely (EAST) 
##                                                 1 
##                   Extended Parallel Process Model 
##                                                 1 
## Self-Determination Theory; Transtheoretical Model 
##                                                 1 
##                       Theory of Planned Behaviour 
##                                                11 
##                  Transtheoretical Model of Change 
##                                                 1
# data collection methodology

#Split the 'country' column into multiple rows
dfdata <- df1 %>%
  separate_rows(datacoll, sep = "; ")

#Calculate frequency of each data collection type
datacoll_counts <- dfdata %>%
  count(datacoll, name = "value")

print(datacoll_counts)
## # A tibble: 3 × 2
##   datacoll    value
##   <chr>       <int>
## 1 Focus group     2
## 2 Interview       4
## 3 Survey         17
#Calculate frequency of each type of study design
design_counts <- df1 %>%
  count(design, name = "value")

print(design_counts)
## # A tibble: 5 × 2
##   design                      value
##   <chr>                       <int>
## 1 Control                         1
## 2 Cross-sectional                12
## 3 Longitudinal                    6
## 4 Longitudinal control            1
## 5 Randomised controlled trial     1
# analytical methodology 

#Split the 'analysis' column into multiple rows
dfanalysis <- df1 %>%
  separate_rows(analysis, sep = "; ")

#Calculate frequency of each data collection type
analysis_counts <- dfanalysis %>%
  count(analysis, name = "value") 

# Sort the data frame in descending order of frequency
sorted_analysis_counts <- analysis_counts %>%
  arrange(desc(value))

print(sorted_analysis_counts)
## # A tibble: 14 × 2
##    analysis                                 value
##    <chr>                                    <int>
##  1 Descriptive                                  8
##  2 Non-Parametric Tests                         7
##  3 Logistic Regression                          5
##  4 Thematic Analysis (Deductive)                5
##  5 Linear Regression                            4
##  6 Structural Equation Modelling                3
##  7 t-tests                                      2
##  8 Confirmatory Factor Analysis                 1
##  9 Contingent Valuation Method                  1
## 10 Interpretative Phenomenological Analysis     1
## 11 Linear mixed model                           1
## 12 Paired t-tests                               1
## 13 Pearson's correlation, ANOVA                 1
## 14 Thematic Analysis (Inductive)                1

Population

# Summarise sample sizes

df1$totpar <- as.numeric(df1$totpar) # Thought i needed this but I don't as already numeric
summary(df1$totpar)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    23.0    82.0   155.0   215.1   230.0   928.0
sd_totpar <- sd(df1$totpar) #standard deviation of sample sizes
print(sd_totpar)
## [1] 225.3558
# Convert 'maleprop' to numeric, replacing 'na' with NA
df1$maleprop <- as.numeric(ifelse(df1$maleprop == "na", NA, df1$maleprop))

summary(df1$maleprop) #create a summary of this variable
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.5700  0.7550  0.7975  0.7946  0.8575  1.0000       7
sd_maleprop <- sd(df1$maleprop, na.rm = TRUE)      # Standard deviation, excluding NAs
print(sd_maleprop)
## [1] 0.1197829
#calculate weighted average of male participants across all studies
weighted_avg <- sum(df1$totpar * df1$maleprop, na.rm = TRUE) / sum(df1$totpar, na.rm = TRUE)

# Print the result
cat("Weighted Average:", weighted_avg, "\n")
## Weighted Average: 0.5746802

farming species/contexts

# Replace all variations of dairy* with "dairy"
df1 <- df1 %>%
  mutate(species = str_replace(species, "Dairy.*", "Dairy"))

# Replace two variations of broilers with "Broilers"
df1 <- df1 %>%
  mutate(species = str_replace(species, "Broiler poultry|Conventional broiler farms", "Broilers"))

species_counts <- df1 %>%
  count(species, name = "value") 

print(species_counts)
## # A tibble: 10 × 2
##    species                                                                 value
##    <chr>                                                                   <int>
##  1 35 broiler producers, 22 layer producers, 24 breeders, 19 turkey produ…     1
##  2 Broilers                                                                    2
##  3 Cattle                                                                      5
##  4 Cattle, other livestock                                                     1
##  5 Cattle, sheep, goats, deer, bison, horses, buffalo                          1
##  6 Dairy                                                                       7
##  7 Pig                                                                         1
##  8 Sheep breeding + other livestock species.                                   1
##  9 Varied - mostly plant breeding in combination with pig or beef. Some i…     1
## 10 poultry                                                                     1

Intervention characteristics

Intervention design

approach_counts <- df1 %>%
  count(approach, name = "value") 

print(approach_counts)
## # A tibble: 2 × 2
##   approach  value
##   <chr>     <int>
## 1 Delivered    15
## 2 Designed      6
# Count the number of assessed (i.e. delivered) interventions that are based on HBC science
delivered_yes_count <- df1 %>%
  filter(approach == "Delivered" & hbc == "Yes") %>%
  summarise(count = n())

# Print the result
print(delivered_yes_count)
## # A tibble: 1 × 1
##   count
##   <int>
## 1     5
#Split the 'analysis' column into multiple rows
dfBCW <- df1 %>%
  separate_rows(bcw, sep = "; ") 

#Calculate frequency of each data collection type
BCW_counts <- dfBCW %>%
  count(bcw, name = "value") 

# Sort the data frame in descending order of frequency
sorted_BCW_counts <- BCW_counts %>%
  arrange(desc(value))

print(sorted_BCW_counts)
## # A tibble: 9 × 2
##   bcw                         value
##   <chr>                       <int>
## 1 Education                      11
## 2 Enablement                     11
## 3 Incentivisation                 5
## 4 Modelling                       5
## 5 Restrictions                    4
## 6 Training                        4
## 7 Persuasion                      3
## 8 Coercion                        2
## 9 Environmental restructuring     2

Intervention focus

component_counts <- df1 %>%
  count(component, name = "value") 

print(component_counts)
## # A tibble: 5 × 2
##   component          value
##   <chr>              <int>
## 1 Animal                 6
## 2 Animal; Human          4
## 3 Animal; One Health     1
## 4 Human                  4
## 5 Planet                 6

Intervention outcome

# Figure out the frequency of different outcomes for delivered/assessed interventions
impact_count <- df1 %>%
  filter(approach == "Delivered") %>%
  count(impact, name = "value") 

# Print the result
print(impact_count)
## # A tibble: 4 × 2
##   impact   value
##   <chr>    <int>
## 1 Negative     1
## 2 Null         3
## 3 Positive    10
## 4 n/a          1
# Figure out the frequency of different outcomes for delivered/assessed interventions
impactHBC_count <- df1 %>%
  filter(approach == "Delivered" & hbc == "Yes") %>%
  count(impact, name = "value") 

# Print the result
print(impactHBC_count)
## # A tibble: 2 × 2
##   impact   value
##   <chr>    <int>
## 1 Null         1
## 2 Positive     4