# Install pacman ("package manager") if needed
if (!require("pacman")) install.packages("pacman")
# Load contributed packages with pacman
pacman::p_load(pacman, ggplot2, rworldmap, tidyverse, psych, dplyr, tidyr, countrycode)
## Import data
df1 <- read_csv("CH1_Scoping_DataCharting_241226.csv")
colnames(df1)
## [1] "Covidence #"
## [2] "Study ID...2"
## [3] "Title...3"
## [4] "Study ID...4"
## [5] "Title...5"
## [6] "Lead author"
## [7] "Lead author institution"
## [8] "Journal"
## [9] "Publication year"
## [10] "Country of study"
## [11] "Aim of study"
## [12] "Theoretical Framework"
## [13] "Data Type"
## [14] "Data Collection Methodology"
## [15] "Study Design"
## [16] "Analytical Methodology"
## [17] "Target Behaviour"
## [18] "Intervention Designed or Delivered"
## [19] "Intervention designed based on HBC science?"
## [20] "Intervention Function"
## [21] "Mechanism for Change Identified?"
## [22] "Mechanism for Change"
## [23] "Intervention Details"
## [24] "Theme"
## [25] "One Health/Welfare component"
## [26] "Participant Description"
## [27] "Total Number of Participants"
## [28] "Male Proportion"
## [29] "Farming Context & Species"
## [30] "Findings"
## [31] "Overall Impact of Intervention"
colnames(df1)[9] <- "year"
colnames(df1)[10] <- "country"
colnames(df1)[11] <- "aim"
colnames(df1)[12] <- "framework"
colnames(df1)[13] <- "type"
colnames(df1)[14] <- "datacoll"
colnames(df1)[15] <- "design"
colnames(df1)[16] <- "analysis"
colnames(df1)[18] <- "approach"
colnames(df1)[19] <- "hbc"
colnames(df1)[20] <- "bcw"
colnames(df1)[25] <- "component"
colnames(df1)[27] <- "totpar"
colnames(df1)[28] <- "maleprop"
colnames(df1)[29] <- "species"
colnames(df1)[31] <- "impact"
#Split the 'country' column into multiple rows
dfcountry <- df1 %>%
separate_rows(country, sep = ";")
#Calculate frequency of studies per country
country_counts <- dfcountry %>%
count(country, name = "value")
#Add IS03 codes
country_counts$ISO3 <- countrycode(country_counts$country, "country.name", "iso3c")
#Join data to a map
map_data <- joinCountryData2Map(country_counts,
joinCode = "ISO3",
nameJoinColumn = "ISO3",
verbose = TRUE)
## 28 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
## failedCodes failedCountries
## 217 codes from the map weren't represented in your data
# Colour palette
green_palette <- c("#7bccc4", "#43a2ca", "#0868ac") # Colourblind-friendly shades from ColourBrewer
breaks <- c(0, 1, 2, 3)
# Adjust margins to reduce white space
par(mar = c(1, 0.5, 0.5, 0.5)) # Smaller margins around the map
# Plot the map
mapCountryData(map_data, nameColumnToPlot = "value",
catMethod = breaks,
colourPalette = green_palette, # sets the palette to the vector above
mapTitle = "", # to keep the title blank
addLegend = FALSE, # to set a custom legend
missingCountryCol = "lightgrey")
# Define legend labels and colors
legend_labels <- c("1", "2", "3") # Because three frequencies in the dataset
legend_colors <- green_palette # So that the legend colours are consistent with the map
# Add the custom legend
legend("bottom", # Position: bottom
legend = legend_labels, # Calls the legend labels defined above
fill = legend_colors, # Calls the legend colours defined above
title = "Number of Studies", # Legend title
horiz = TRUE, # Legend position is horizontal
cex = 0.8) # Text size
# Reset margins to default for subsequent plots
par(mar = c(5, 4, 4, 2) + 0.1)
# Histogram for the df1 dataset
ggplot(df1, aes(x = year, fill = type)) +
geom_histogram(binwidth = 1,
position = "stack", # Show proportions of each type within each bar
color = "lightgrey", # Border color for better visibility
alpha = 1.0) + # Transparency
labs(title = "",
x = "Year of Publication",
y = "Number of Studies") + # Change y-axis label to indicate proportion
scale_x_continuous(breaks = seq(min(df1$year), max(df1$year), by = 1)) + # Show every year
theme_classic() +
theme(panel.grid.major = element_blank(), # Remove major gridlines
panel.grid.minor = element_blank(), # Remove minor gridlines
legend.position = c(0.15, 0.8)) +
scale_fill_manual(values = c("Quantitative" = "#8da0cb",
"Qualitative" = "#66c2a5",
"Mixed-Methods" = "#fc8d62"),
name = "Data Type") # Custom colors and legend title
#summarise overall methodology
table(df1$type)
##
## Mixed-Methods Qualitative Quantitative
## 2 4 15
# summarise theoretical frameworks
table(df1$framework)
##
## ADKAR
## 3
## COM-B
## 3
## Easy, Attractive, Social, Timely (EAST)
## 1
## Extended Parallel Process Model
## 1
## Self-Determination Theory; Transtheoretical Model
## 1
## Theory of Planned Behaviour
## 11
## Transtheoretical Model of Change
## 1
# data collection methodology
#Split the 'country' column into multiple rows
dfdata <- df1 %>%
separate_rows(datacoll, sep = "; ")
#Calculate frequency of each data collection type
datacoll_counts <- dfdata %>%
count(datacoll, name = "value")
print(datacoll_counts)
## # A tibble: 3 × 2
## datacoll value
## <chr> <int>
## 1 Focus group 2
## 2 Interview 4
## 3 Survey 17
#Calculate frequency of each type of study design
design_counts <- df1 %>%
count(design, name = "value")
print(design_counts)
## # A tibble: 5 × 2
## design value
## <chr> <int>
## 1 Control 1
## 2 Cross-sectional 12
## 3 Longitudinal 6
## 4 Longitudinal control 1
## 5 Randomised controlled trial 1
# analytical methodology
#Split the 'analysis' column into multiple rows
dfanalysis <- df1 %>%
separate_rows(analysis, sep = "; ")
#Calculate frequency of each data collection type
analysis_counts <- dfanalysis %>%
count(analysis, name = "value")
# Sort the data frame in descending order of frequency
sorted_analysis_counts <- analysis_counts %>%
arrange(desc(value))
print(sorted_analysis_counts)
## # A tibble: 14 × 2
## analysis value
## <chr> <int>
## 1 Descriptive 8
## 2 Non-Parametric Tests 7
## 3 Logistic Regression 5
## 4 Thematic Analysis (Deductive) 5
## 5 Linear Regression 4
## 6 Structural Equation Modelling 3
## 7 t-tests 2
## 8 Confirmatory Factor Analysis 1
## 9 Contingent Valuation Method 1
## 10 Interpretative Phenomenological Analysis 1
## 11 Linear mixed model 1
## 12 Paired t-tests 1
## 13 Pearson's correlation, ANOVA 1
## 14 Thematic Analysis (Inductive) 1
# Summarise sample sizes
df1$totpar <- as.numeric(df1$totpar) # Thought i needed this but I don't as already numeric
summary(df1$totpar)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 23.0 82.0 155.0 215.1 230.0 928.0
sd_totpar <- sd(df1$totpar) #standard deviation of sample sizes
print(sd_totpar)
## [1] 225.3558
# Convert 'maleprop' to numeric, replacing 'na' with NA
df1$maleprop <- as.numeric(ifelse(df1$maleprop == "na", NA, df1$maleprop))
summary(df1$maleprop) #create a summary of this variable
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.5700 0.7550 0.7975 0.7946 0.8575 1.0000 7
sd_maleprop <- sd(df1$maleprop, na.rm = TRUE) # Standard deviation, excluding NAs
print(sd_maleprop)
## [1] 0.1197829
#calculate weighted average of male participants across all studies
weighted_avg <- sum(df1$totpar * df1$maleprop, na.rm = TRUE) / sum(df1$totpar, na.rm = TRUE)
# Print the result
cat("Weighted Average:", weighted_avg, "\n")
## Weighted Average: 0.5746802
# Replace all variations of dairy* with "dairy"
df1 <- df1 %>%
mutate(species = str_replace(species, "Dairy.*", "Dairy"))
# Replace two variations of broilers with "Broilers"
df1 <- df1 %>%
mutate(species = str_replace(species, "Broiler poultry|Conventional broiler farms", "Broilers"))
species_counts <- df1 %>%
count(species, name = "value")
print(species_counts)
## # A tibble: 10 × 2
## species value
## <chr> <int>
## 1 35 broiler producers, 22 layer producers, 24 breeders, 19 turkey produ… 1
## 2 Broilers 2
## 3 Cattle 5
## 4 Cattle, other livestock 1
## 5 Cattle, sheep, goats, deer, bison, horses, buffalo 1
## 6 Dairy 7
## 7 Pig 1
## 8 Sheep breeding + other livestock species. 1
## 9 Varied - mostly plant breeding in combination with pig or beef. Some i… 1
## 10 poultry 1
approach_counts <- df1 %>%
count(approach, name = "value")
print(approach_counts)
## # A tibble: 2 × 2
## approach value
## <chr> <int>
## 1 Delivered 15
## 2 Designed 6
# Count the number of assessed (i.e. delivered) interventions that are based on HBC science
delivered_yes_count <- df1 %>%
filter(approach == "Delivered" & hbc == "Yes") %>%
summarise(count = n())
# Print the result
print(delivered_yes_count)
## # A tibble: 1 × 1
## count
## <int>
## 1 5
#Split the 'analysis' column into multiple rows
dfBCW <- df1 %>%
separate_rows(bcw, sep = "; ")
#Calculate frequency of each data collection type
BCW_counts <- dfBCW %>%
count(bcw, name = "value")
# Sort the data frame in descending order of frequency
sorted_BCW_counts <- BCW_counts %>%
arrange(desc(value))
print(sorted_BCW_counts)
## # A tibble: 9 × 2
## bcw value
## <chr> <int>
## 1 Education 11
## 2 Enablement 11
## 3 Incentivisation 5
## 4 Modelling 5
## 5 Restrictions 4
## 6 Training 4
## 7 Persuasion 3
## 8 Coercion 2
## 9 Environmental restructuring 2
component_counts <- df1 %>%
count(component, name = "value")
print(component_counts)
## # A tibble: 5 × 2
## component value
## <chr> <int>
## 1 Animal 6
## 2 Animal; Human 4
## 3 Animal; One Health 1
## 4 Human 4
## 5 Planet 6
# Figure out the frequency of different outcomes for delivered/assessed interventions
impact_count <- df1 %>%
filter(approach == "Delivered") %>%
count(impact, name = "value")
# Print the result
print(impact_count)
## # A tibble: 4 × 2
## impact value
## <chr> <int>
## 1 Negative 1
## 2 Null 3
## 3 Positive 10
## 4 n/a 1
# Figure out the frequency of different outcomes for delivered/assessed interventions
impactHBC_count <- df1 %>%
filter(approach == "Delivered" & hbc == "Yes") %>%
count(impact, name = "value")
# Print the result
print(impactHBC_count)
## # A tibble: 2 × 2
## impact value
## <chr> <int>
## 1 Null 1
## 2 Positive 4