Loading the 'metafor' package (version 4.6-0). For an
introduction to the package please type: help(metafor)
library(ggplot2)# import data filedat <-read.csv("/Users/XingYu/Desktop/RStudio/Meta-analysis/data_demo.csv")dat$case <-1:13# add a new column named "case"# meta-analysis with two random effects - author.year and case, as some studies may have more than one caseses <-rma.mv(yi = lnR,V = var,random =~1| Author.year / case,data = dat,method ="REML")# Default forest plot to show all studiesmeta::forest(es)
### Create subgroups of data for each taxaes.in <-rma.mv(yi = lnR,V = var,random =~1| Author.year / case,data = dat[which(dat$organisms =="invertebrates"),],method ="REML")es.bi <-rma.mv(yi = lnR,V = var,random =~1| Author.year / case,data = dat[which(dat$organisms =="bird"),],method ="REML")es.pl <-rma.mv(yi = lnR,V = var,random =~1| Author.year / case,data = dat[which(dat$organisms =="plant"),],method ="REML")es.am <-rma.mv(yi = lnR,V = var,random =~1| Author.year / case,data = dat[which(dat$organisms =="amphibians"),],method ="REML")# Meta Regression, the impact of duration on effect sizemr <-rma.mv(yi = lnR,V = var,mods =~ duration,random =~1| Author.year / case,data = dat,method="REML")### forestplot to display the impact of categorical variable (taxa) on effect sizee.s <-data.frame(category=c("Amphibian","Bird","Invertebrate","Plant","Overall Mean"),mean.effect.size=c(es.am$b,es.bi$b,es.in$b,es.pl$b,es$b),ci.lb=c(es.am$ci.lb,es.bi$ci.lb,es.in$ci.lb,es.pl$ci.lb,es$ci.lb),ci.ub=c(es.am$ci.ub,es.bi$ci.ub,es.in$ci.ub,es.pl$ci.ub,es$ci.ub))e.s$category <-factor(e.s$category,levels=rev(e.s$category))ggplot(e.s,aes(x=category,y=mean.effect.size,label=mean.effect.size,fill=category))+scale_x_discrete()+geom_hline(yintercept=0,lty="solid")+#y=0 solid linescale_fill_manual(values=c("pink","black","black","black","black"))+geom_errorbar(aes(ymin=ci.lb, ymax=ci.ub),width =0.2,col="grey")+geom_point(size=3,shape=23,stroke =1.5)+#use alpha to control how transparent of the points,stroke control the with of the ring of the pointguides(fill=FALSE, colour=FALSE)+# do not show legendxlab("")+#x lab and y labylab("")+theme_bw()+theme(panel.grid.major =element_blank(), panel.grid.minor =element_blank(),panel.background=element_rect(fill ="white"))+coord_flip()
Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
of ggplot2 3.3.4.
### Bubble Plot to display the meta-regression resultsggplot(dat,aes(x=duration,y=lnR,size=log10(1/var),fill=organisms))+scale_size_continuous(range=c(0.5,20))+geom_hline(yintercept=0,lty="dashed")+#y=0 dashed linegeom_vline(xintercept=0, lty="dashed")+#x=0 dashed linegeom_point(alpha=I(0.7),shape=21,stroke =1.2)+#use alpha to control how transparent of the points,stroke control the with of the ring of the pointgeom_abline(slope=mr$b[2],intercept=mr$b[1],lty="dotdash",colour="purple")+# regreesion line of microbeguides(size =FALSE)+# do not show legend of "size"xlab("Duration")+#x lab and y labylab("Effect Size")+xlim(-0.5,NA)+# x limitation and y limitationylim(NA,1.5)+#scale_y_continuous(limits=c(-1.5,1.5), breaks=seq(-1.5,1.5,0.5))+theme_bw()+theme(panel.grid.major =element_blank(), panel.grid.minor =element_blank(),panel.background=element_rect(fill ="white"))
# https://bookdown.org/MathiasHarrer/Doing_Meta_Analysis_in_R/ for more advanced codes and analysis
Grazing in Forest Meta-analysis
Data Processing
Fill the Missing Data
Second, for studies that did not provide standard deviations or other variability measurements, we used predictive mean matching to impute missing values of standard deviations (Furukawa et al., 2006) with R package “mice.”
# install.packages("mice")library(mice)
Attaching package: 'mice'
The following object is masked from 'package:stats':
filter
The following objects are masked from 'package:base':
cbind, rbind
data_raw <-read.csv("/Users/XingYu/Desktop/RStudio/Meta-analysis/raw_data_grazing_forest.csv")### Specify the columns to use as predictors for imputing SD_high and SD_lowpredictor_columns <-c("Plots_ungrazed", "Plots_grazed", "Mean_high", "SD_high", "Mean_low", "SD_low")### Set the imputation method for all columns to "" initially, so no columns are imputed by defaultmethod <-make.method(data_raw)method[] <-""### Apply predictive mean matching ("pmm") only to SD_high and SD_lowmethod["SD_high"] <-"pmm"method["SD_low"] <-"pmm"### Create a predictor matrix to specify which columns should be used for imputing SD_high and SD_lowpredictorMatrix <-make.predictorMatrix(data_raw)predictorMatrix[,] <-0# Set all columns to 0 initially (meaning they’re not used as predictors)predictorMatrix["SD_high", predictor_columns] <-1# Use only specified columns as predictors for SD_highpredictorMatrix["SD_low", predictor_columns] <-1# Use only specified columns as predictors for SD_low### Apply mice with the specified methods and predictor matriximputed_data <-mice(data_raw, method = method, predictorMatrix = predictorMatrix, m =5, seed =500)
### Complete the data set by filling in imputed valuesdata_raw_imputed <-complete(imputed_data)# zero_sd_rows <- data_raw_imputed %>% filter(SD_high == 0 | SD_low == 0)# print(zero_sd_rows)
Hedges’ d (Effect Sizes)
For each pair of variables from grazed (i) and ungrazed (n) sites, mean values (x) of response variables, Hedges’ d was calculated as
where J is a weighting factor based on the number of replicates (N), calculated as:
and S is the pooled standard deviation based on the standard deviations (s), calculated as:
Ungrazed (n) is low, grazed (i) is high?
It is an issue that the authors didn’t provide the mean the sd of the ungrazed group (controlled). Therefore the following methods I use to calculate the hedges’ d is fault. I can only use the fake “d” to practice the meta-analysis procedure… but it is often that the low value is from the treatment group.
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
### Loading the datadata <- data_raw_imputed### Function to calculate Hedges' dcalculate_hedges_d <-function(mean_high, sd_high, mean_low, sd_low, n_high, n_low) {## Pooled standard deviation pooled_sd <-sqrt(((n_high -1) * sd_high^2+ (n_low -1) * sd_low^2) / (n_high + n_low -2))## Correction factor for small sample sizes J <-1- (3/ (4* (n_high + n_low -2) -1))## Hedges' d calculation hedges_d <- ((mean_high - mean_low) / pooled_sd) * Jreturn(hedges_d)}### Applying the function to calculate Hedges' ddata <- data %>%rowwise() %>%mutate(Hedges_d =calculate_hedges_d(Mean_high, SD_high, Mean_low, SD_low, Plots_grazed, Plots_ungrazed))### Check the result# data$Hedges_d
The variance of Hedges’ d
The variance of Hedges’ d was calculated as:
Ungrazed (n) is low, grazed (i) is high?
### Function to calculate the variance of Hedges' dcalculate_var_hedges_d <-function(d, n_high, n_low) { var_d <- (n_high + n_low) / (n_high * n_low) + (d^2) / (2* (n_high + n_low))return(var_d)}### Applying the function to calculate the variance of Hedges' ddata <- data %>%mutate(Var_Hedges_d =calculate_var_hedges_d(Hedges_d, Plots_grazed, Plots_ungrazed))### Check the result# data$Var_Hedges_d### Save the data# write.csv(data, '/Users/XingYu/Desktop/data_he.csv', row.names = FALSE)
A Test Code
library(metafor)library(ggplot2)# Step 1: Filter out rows with NA or infinite values in Hedges_d or Var_Hedges_ddata_filtered <- data %>%filter(!is.na(Hedges_d), !is.na(Var_Hedges_d), is.finite(Hedges_d), is.finite(Var_Hedges_d))# Confirm the number of rows after filteringcat("Number of rows after filtering:", nrow(data_filtered), "\n")
Number of rows after filtering: 1933
# Step 2: Randomly select 30 rows from the filtered datadata_me <- data_filtered[sample(1:nrow(data_filtered), 30), ]# Meta-analysis with two random effects - using study identifier (e.g., Citation) and NO as case identifieres <-rma.mv(yi = Hedges_d,V = Var_Hedges_d,random =~1| Citation / NO,data = data_me,method ="REML")# Default forest plot to show all studiesforest(es)
Change the Directions of Negative Impact
First, we coined all the variables, a larger value of which indicates more negative impacts, such as the percentage of trampled ground cover and abortive seed ratio (Eldridge et al., 2016). We multiplied the Hedges’ d values of these variables by −1 to place those values in the same direction as other variables.
Yes, some variables, although with a large value, have negative effect. We should change its directions.
data_he <- data### Define the list of variables where a larger value indicates a more negative impact### I don't have time to go through all the "Measured aspect" column and identify negative impacts, I can only give the example code herenegative_impact_aspects <-c("trampled cover", "Abortive seeds")### Multiply Hedges_d by -1 for rows where Measured.aspect is in negative_impact_aspectsdata_he <- data_he %>%mutate(Hedges_d =if_else(Measured.aspect %in% negative_impact_aspects, -Hedges_d, Hedges_d))### Display the transformed data_he to confirm# print(head(data_he[, c("Measured.aspect", "Hedges_d")]))### Save the data# write.csv(data_he, '/Users/XingYu/Desktop/data_full.csv', row.names = FALSE)
Plotting
We used the random-effects model to estimate the effect size and used bootstrap procedure to estimate confidence intervals, which is more appropriate for ecological meta-analysis (Nakagawa & Santos, 2012).
We first tested the overall effect, and then analyzed livestock effects on different taxon groups and ecosystem attributes.
We did further assessment of indicators related to reproduction and forest carbon stock (plant biomass, litter carbon, and soil carbon), as they are critical for forest regeneration and the carbon cycle.
We also classified soil function indicators into hydrologic functions, greenhouse gas emissions, and soil nutrients. Soil is generally a source of CO2 and N2O emission but a sink of CH4 (Tang et al., 2019). We coined the CH4 uptake data to unify the direction of soil impact on greenhouse gas emissions.
Categorical Moderators
meta-analysis on different ways of data grouping.
Impacts on Different Taxon
library(metafor)library(ggplot2)library(dplyr)### Remove rows with NA, -Inf, or Inf in the Hedges_d and Var_Hedges_d columnsdata_full <- data_he %>%filter(!is.na(Hedges_d) &!is.infinite(Hedges_d) &!is.na(Var_Hedges_d) &!is.infinite(Var_Hedges_d))### Create a list to store results for each Taxonresults <-list()### Loop over each Taxon to fit the random-effects model and extract relevant statisticsfor (taxon inunique(data_full$Taxon)) {## Subset the data for the current Taxon subset_data <-subset(data_full, Taxon == taxon)## Check if the subset has any rows left after filteringif (nrow(subset_data) >0) {## Fit the random-effects model using rma.uni model <-rma.uni(yi = Hedges_d, vi = Var_Hedges_d, data = subset_data, method ="REML")## Store the results in a data frame results[[taxon]] <-data.frame(Taxon = taxon,mean_effect = model$b[1],ci_lower = model$ci.lb[1],ci_upper = model$ci.ub[1],p_value =ifelse(model$pval[1] <0.05, "p < 0.05", "p > 0.05") ) } else {## If no valid data points, skip this taxon or handle accordinglycat("Skipping taxon:", taxon, "due to lack of valid data points.\n") }}
Skipping taxon: NA due to lack of valid data points.
### Combine the results for all Taxon groups into a single data framedata_taxon <-do.call(rbind, results)### Drop the Unclassified plantsdata_taxon <- data_taxon %>%filter(Taxon !="Unclassified plants")### Define the desired order and new names for Taxonnew_order <-c("Soil", "Invertebrates", "Amphibians & Reptiles", "Small Mammals", "Medium & Large Mammals", "Birds", "Ground Plants", "Understory Plants", "Trees")### Create a lookup table to map old names to new namesname_mapping <-c("Soil"="Soil", "Invertebrates"="Invertebrates", "Amphi_reptiles"="Amphibians & Reptiles", "Small mammals"="Small Mammals", "M_L mammals"="Medium & Large Mammals", "Birds"="Birds", "Ground plants"="Ground Plants", "Understory plants"="Understory Plants", "Trees"="Trees")### Apply the new names and orderdata_taxon$Taxon <-factor(data_taxon$Taxon, levels =names(name_mapping), labels = new_order)### Plot using ggplot2ggplot(data_taxon, aes(x = mean_effect, y = Taxon, fill = p_value)) +## Bar plotgeom_bar(stat ="identity", aes(fill = p_value), color ="black", width =0.6) +## Add error bargeom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), height =0.2, color ="black") +scale_fill_manual(values =c("p < 0.05"="gray", "p > 0.05"="white")) +labs(x ="Hedges' d", y =NULL) +theme_test() +geom_vline(xintercept =0, linetype ="dashed") +## Legend settingtheme(legend.position =c(0.9, 0.9), # Position the legend in the top-right corner within the plotlegend.title =element_blank(), # Remove legend titlelegend.key.size =unit(0.5, "cm"),legend.text =element_text(size =8),axis.text.y =element_text(color ="black") )
Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.
Impacts on Key Ecosystem Attributes
### Define the target attributestarget_attributes <-c("Richness", "Abundance", "Structure", "Function")### Filter data to include rows with the target attributes in either columndata_at <- data_full %>%filter((Key.ecosystem.attributes %in% target_attributes | Abundance.and.richness %in% target_attributes) & Plots_grazed >=3)### Create a list to store results for each attributeresults <-list()### Loop over each unique attribute to fit the random-effects model and extract relevant statisticsfor (attribute in target_attributes) {## Subset the data for the current attribute in either column subset_data <-subset(data_at, Key.ecosystem.attributes == attribute | Abundance.and.richness == attribute)## Check if the subset has any rows left after filteringif (nrow(subset_data) >0) {## Fit the random-effects model using rma.uni model <-rma.uni(yi = Hedges_d, vi = Var_Hedges_d, data = subset_data, method ="REML")## Store the results in a data frame results[[attribute]] <-data.frame(Attribute = attribute,mean_effect = model$b[1],ci_lower = model$ci.lb[1],ci_upper = model$ci.ub[1],p_value =ifelse(model$pval[1] <0.05, "p < 0.05", "p > 0.05") ) } else {cat("Skipping attribute:", attribute, "due to lack of valid data points.\n") }}### Combine the results for all attributes into a single data framedata_attr <-do.call(rbind, results)### Define the order for the attributesdata_attr$Attribute <-factor(data_attr$Attribute,levels =c("Richness", "Abundance", "Structure", "Function"))### Plot using ggplot2ggplot(data_attr, aes(x = Attribute, y = mean_effect, fill = p_value)) +## Plot the bargeom_bar(stat ="identity", color ="black", width =0.6) +## Plot the error bargeom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width =0.15, color ="black") +scale_fill_manual(values =c("p < 0.05"="gray", "p > 0.05"="white")) +labs(x =NULL, y ="Hedges' d") +geom_hline(yintercept =0, linetype ="dashed") +theme_test() +theme(legend.position =c(0.9, 0.9),legend.title =element_blank(),legend.key.size =unit(0.5, "cm"),legend.text =element_text(size =10),axis.text.x =element_text(color ="black", size =14),axis.text.y =element_text(color ="black", size =10),axis.title.y =element_text(size =14) )
Caterpillar Plots (Grazing impacts on Trees)
### Filter data for the specified Taxontaxon_of_interest <-"Trees"data_cat <- data_full %>%filter(Taxon == taxon_of_interest)### Fit a random-effects model to get summary statisticsmodel <-rma(yi = Hedges_d, vi = Var_Hedges_d, data = data_cat, method ="REML")### Add the predicted effect size and confidence intervals from the model to the data framedata_cat$pred <-predict(model)$preddata_cat$ci_lower <-predict(model)$ci.lbdata_cat$ci_upper <-predict(model)$ci.ub# write.csv(data_cat, '/Users/XingYu/Desktop/data_caterpillar.csv', row.names = FALSE)### Create the caterpillar plotggplot(data_cat, aes(x = Hedges_d, y =reorder(NO, Hedges_d))) +geom_point(size =1) +geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), height =0.2, color ="gray") +# Add missing "+"geom_vline(aes(xintercept = pred), linetype ="dashed", color ="black", size =0.8) +geom_vline(xintercept =0, linetype ="dashed", color ="#3C3D37", size =0.6) +xlim(-10, 10) +labs(x ="Hedges' d") +theme_test() +theme(axis.text.x =element_text(size =10),axis.text.y =element_blank(), # Hide y-axis textaxis.ticks.y =element_blank(), # Hide y-axis ticksaxis.title.y =element_blank() # Hide y-axis title )
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning: Removed 14 rows containing missing values or values outside the scale range
(`geom_point()`).
Spatial Autocorrelation (didn’t test)
As spatial autocorrelation could impact the results, we first examined whether this issue existed in our study using Moran’s I.
As the result did not showed a significant spatial autocorrelation (p = 0.26), we did not include the spatial correlation structure in our random-effects model.
Publication Bias
We examined the potential publication bias in our dataset in three steps.
We used the funnel plot to show the relationship between standardized mean differences and inverse standard errors (Eldridge et al., 2020). Significant values indicated potential publication bias.
We tested the funnel asymmetry with modified Egger regression (Egger et al., 1997).
The “trim and fill” test was applied to examine asymmetry in funnel plots and impute ‘missing’ effect sizes to make the funnel plot symmetrical (Jennions & Møller, 2002). The results are shown in Appendix S4.