Meta Assignment 401

The Sample Code

library(metafor)

Loading required package: Matrix

Loading required package: metadat

Loading required package: numDeriv


Loading the 'metafor' package (version 4.6-0). For an
introduction to the package please type: help(metafor)

library(ggplot2)

# import data file
dat <- read.csv("/Users/XingYu/Desktop/RStudio/Meta-analysis/data_demo.csv")
dat$case <- 1:13 # add a new column named "case"

# meta-analysis with two random effects - author.year and case, as some studies may have more than one cases
es <- rma.mv(
  yi = lnR,
  V = var,
  random = ~ 1 | Author.year / case,
  data = dat,
  method = "REML"
)

# Default forest plot to show all studies
meta::forest(es)

### Create subgroups of data for each taxa
es.in <- rma.mv(
  yi = lnR,
  V = var,
  random = ~ 1 | Author.year / case,
  data = dat[which(dat$organisms == "invertebrates"),],
  method = "REML"
)

es.bi <- rma.mv(
  yi = lnR,
  V = var,
  random = ~ 1 | Author.year / case,
  data = dat[which(dat$organisms == "bird"),],
  method = "REML"
)

es.pl <- rma.mv(
  yi = lnR,
  V = var,
  random = ~ 1 | Author.year / case,
  data = dat[which(dat$organisms == "plant"),],
  method = "REML"
)

es.am <- rma.mv(
  yi = lnR,
  V = var,
  random = ~ 1 | Author.year / case,
  data = dat[which(dat$organisms == "amphibians"),],
  method = "REML"
)

# Meta Regression, the impact of duration on effect size
mr <- rma.mv(
  yi = lnR,
  V = var,
  mods = ~ duration,
  random = ~ 1 | Author.year / case,
  data = dat,
  method="REML"
)

### forestplot to display the impact of categorical variable (taxa) on effect size
e.s <- data.frame(category=c("Amphibian","Bird","Invertebrate","Plant","Overall Mean"),
                  mean.effect.size=c(es.am$b,es.bi$b,es.in$b,es.pl$b,es$b),
                  ci.lb=c(es.am$ci.lb,es.bi$ci.lb,es.in$ci.lb,es.pl$ci.lb,es$ci.lb),
                  ci.ub=c(es.am$ci.ub,es.bi$ci.ub,es.in$ci.ub,es.pl$ci.ub,es$ci.ub))

e.s$category <- factor(e.s$category,levels=rev(e.s$category))

ggplot(e.s,aes(x=category,y=mean.effect.size,label=mean.effect.size,fill=category))+
  scale_x_discrete()+
  geom_hline(yintercept=0,lty="solid")+ #y=0 solid line
  scale_fill_manual(values=c("pink","black","black","black","black"))+
  geom_errorbar(aes(ymin=ci.lb, ymax=ci.ub),width = 0.2,col="grey")+
  geom_point(size=3,shape=23,stroke = 1.5)+ #use alpha to control how transparent of the points,stroke control the with of the ring of the point
  guides(fill=FALSE, colour=FALSE)+ # do not show legend
  xlab("")+ #x lab and y lab
  ylab("")+
  theme_bw()+
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background=element_rect(fill = "white"))+
  coord_flip()

Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
of ggplot2 3.3.4.

### Bubble Plot to display the meta-regression results
ggplot(dat,aes(x=duration,y=lnR,size=log10(1/var),fill=organisms))+
  scale_size_continuous(range=c(0.5,20))+
  geom_hline(yintercept=0,lty="dashed")+ #y=0 dashed line
  geom_vline(xintercept=0, lty="dashed")+ #x=0 dashed line
  geom_point(alpha=I(0.7),shape=21,stroke = 1.2)+#use alpha to control how transparent of the points,stroke control the with of the ring of the point
  geom_abline(slope=mr$b[2],intercept=mr$b[1],lty="dotdash",colour="purple")+ # regreesion line of microbe
  guides(size = FALSE)+ # do not show legend of "size"
  xlab("Duration")+ #x lab and y lab
  ylab("Effect Size")+
  xlim(-0.5,NA)+ # x limitation and y limitation
  ylim(NA,1.5)+#scale_y_continuous(limits=c(-1.5,1.5), breaks=seq(-1.5,1.5,0.5))+
  theme_bw()+
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background=element_rect(fill = "white"))

# https://bookdown.org/MathiasHarrer/Doing_Meta_Analysis_in_R/ for more advanced codes and analysis

Grazing in Forest Meta-analysis

Data Processing

Fill the Missing Data

Second, for studies that did not provide standard deviations or other variability measurements, we used predictive mean matching to impute missing values of standard deviations (Furukawa et al., 2006) with R package “mice.”

# install.packages("mice")
library(mice)


Attaching package: 'mice'

The following object is masked from 'package:stats':

    filter

The following objects are masked from 'package:base':

    cbind, rbind

data_raw <- read.csv("/Users/XingYu/Desktop/RStudio/Meta-analysis/raw_data_grazing_forest.csv")

### Specify the columns to use as predictors for imputing SD_high and SD_low
predictor_columns <- c("Plots_ungrazed", "Plots_grazed", "Mean_high", "SD_high", "Mean_low", "SD_low")

### Set the imputation method for all columns to "" initially, so no columns are imputed by default
method <- make.method(data_raw)
method[] <- ""

### Apply predictive mean matching ("pmm") only to SD_high and SD_low
method["SD_high"] <- "pmm"
method["SD_low"] <- "pmm"

### Create a predictor matrix to specify which columns should be used for imputing SD_high and SD_low
predictorMatrix <- make.predictorMatrix(data_raw)
predictorMatrix[,] <- 0  # Set all columns to 0 initially (meaning they’re not used as predictors)
predictorMatrix["SD_high", predictor_columns] <- 1   # Use only specified columns as predictors for SD_high
predictorMatrix["SD_low", predictor_columns] <- 1    # Use only specified columns as predictors for SD_low

### Apply mice with the specified methods and predictor matrix
imputed_data <- mice(data_raw, method = method, predictorMatrix = predictorMatrix, m = 5, seed = 500)


 iter imp variable
  1   1  SD_high  SD_low
  1   2  SD_high  SD_low
  1   3  SD_high  SD_low
  1   4  SD_high  SD_low
  1   5  SD_high  SD_low
  2   1  SD_high  SD_low
  2   2  SD_high  SD_low
  2   3  SD_high  SD_low
  2   4  SD_high  SD_low
  2   5  SD_high  SD_low
  3   1  SD_high  SD_low
  3   2  SD_high  SD_low
  3   3  SD_high  SD_low
  3   4  SD_high  SD_low
  3   5  SD_high  SD_low
  4   1  SD_high  SD_low
  4   2  SD_high  SD_low
  4   3  SD_high  SD_low
  4   4  SD_high  SD_low
  4   5  SD_high  SD_low
  5   1  SD_high  SD_low
  5   2  SD_high  SD_low
  5   3  SD_high  SD_low
  5   4  SD_high  SD_low
  5   5  SD_high  SD_low

### Complete the data set by filling in imputed values
data_raw_imputed <- complete(imputed_data)

# zero_sd_rows <- data_raw_imputed %>% filter(SD_high == 0 | SD_low == 0)
# print(zero_sd_rows)

Hedges’ d (Effect Sizes)

For each pair of variables from grazed (i) and ungrazed (n) sites, mean values (x) of response variables, Hedges’ d was calculated as

where J is a weighting factor based on the number of replicates (N), calculated as:

and S is the pooled standard deviation based on the standard deviations (s), calculated as:

Ungrazed (n) is low, grazed (i) is high?

It is an issue that the authors didn’t provide the mean the sd of the ungrazed group (controlled). Therefore the following methods I use to calculate the hedges’ d is fault. I can only use the fake “d” to practice the meta-analysis procedure… but it is often that the low value is from the treatment group.

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

### Loading the data
data <- data_raw_imputed

### Function to calculate Hedges' d
calculate_hedges_d <- function(mean_high, sd_high, mean_low, sd_low, n_high, n_low) {
  ## Pooled standard deviation
  pooled_sd <- sqrt(((n_high - 1) * sd_high^2 + (n_low - 1) * sd_low^2) / (n_high + n_low - 2))
  
  ## Correction factor for small sample sizes
  J <- 1 - (3 / (4 * (n_high + n_low - 2) - 1))
  
  ## Hedges' d calculation
  hedges_d <- ((mean_high - mean_low) / pooled_sd) * J
  return(hedges_d)
}

### Applying the function to calculate Hedges' d
data <- data %>%
  rowwise() %>%
  mutate(Hedges_d = calculate_hedges_d(Mean_high, SD_high, Mean_low, SD_low, Plots_grazed, Plots_ungrazed))

### Check the result
# data$Hedges_d

The variance of Hedges’ d

The variance of Hedges’ d was calculated as:

Ungrazed (n) is low, grazed (i) is high?

### Function to calculate the variance of Hedges' d
calculate_var_hedges_d <- function(d, n_high, n_low) {
  var_d <- (n_high + n_low) / (n_high * n_low) + (d^2) / (2 * (n_high + n_low))
  return(var_d)
}

### Applying the function to calculate the variance of Hedges' d
data <- data %>%
  mutate(Var_Hedges_d = calculate_var_hedges_d(Hedges_d, Plots_grazed, Plots_ungrazed))

### Check the result
# data$Var_Hedges_d

### Save the data
# write.csv(data, '/Users/XingYu/Desktop/data_he.csv', row.names = FALSE)

A Test Code

library(metafor)
library(ggplot2)

# Step 1: Filter out rows with NA or infinite values in Hedges_d or Var_Hedges_d
data_filtered <- data %>%
  filter(!is.na(Hedges_d), !is.na(Var_Hedges_d), is.finite(Hedges_d), is.finite(Var_Hedges_d))

# Confirm the number of rows after filtering
cat("Number of rows after filtering:", nrow(data_filtered), "\n")

Number of rows after filtering: 1933

# Step 2: Randomly select 30 rows from the filtered data
data_me <- data_filtered[sample(1:nrow(data_filtered), 30), ]

# Meta-analysis with two random effects - using study identifier (e.g., Citation) and NO as case identifier
es <- rma.mv(
  yi = Hedges_d,
  V = Var_Hedges_d,
  random = ~ 1 | Citation / NO,
  data = data_me,
  method = "REML"
)

# Default forest plot to show all studies
forest(es)

Change the Directions of Negative Impact

First, we coined all the variables, a larger value of which indicates more negative impacts, such as the percentage of trampled ground cover and abortive seed ratio (Eldridge et al., 2016). We multiplied the Hedges’ d values of these variables by −1 to place those values in the same direction as other variables.

Yes, some variables, although with a large value, have negative effect. We should change its directions.

data_he <- data

### Define the list of variables where a larger value indicates a more negative impact
### I don't have time to go through all the "Measured aspect" column and identify negative impacts, I can only give the example code here
negative_impact_aspects <- c("trampled cover", "Abortive seeds")

### Multiply Hedges_d by -1 for rows where Measured.aspect is in negative_impact_aspects
data_he <- data_he %>%
  mutate(Hedges_d = if_else(Measured.aspect %in% negative_impact_aspects, -Hedges_d, Hedges_d))

### Display the transformed data_he to confirm
# print(head(data_he[, c("Measured.aspect", "Hedges_d")]))

### Save the data
# write.csv(data_he, '/Users/XingYu/Desktop/data_full.csv', row.names = FALSE)

Plotting

We used the random-effects model to estimate the effect size and used bootstrap procedure to estimate confidence intervals, which is more appropriate for ecological meta-analysis (Nakagawa & Santos, 2012).

We first tested the overall effect, and then analyzed livestock effects on different taxon groups and ecosystem attributes.

We did further assessment of indicators related to reproduction and forest carbon stock (plant biomass, litter carbon, and soil carbon), as they are critical for forest regeneration and the carbon cycle.

We also classified soil function indicators into hydrologic functions, greenhouse gas emissions, and soil nutrients. Soil is generally a source of CO2 and N2O emission but a sink of CH4 (Tang et al., 2019). We coined the CH4 uptake data to unify the direction of soil impact on greenhouse gas emissions.

Categorical Moderators

meta-analysis on different ways of data grouping.

Impacts on Different Taxon

library(metafor)
library(ggplot2)
library(dplyr)

### Remove rows with NA, -Inf, or Inf in the Hedges_d and Var_Hedges_d columns
data_full <- data_he %>%
  filter(!is.na(Hedges_d) & !is.infinite(Hedges_d) & !is.na(Var_Hedges_d) & !is.infinite(Var_Hedges_d))

### Create a list to store results for each Taxon
results <- list()

### Loop over each Taxon to fit the random-effects model and extract relevant statistics
for (taxon in unique(data_full$Taxon)) {
  ## Subset the data for the current Taxon
  subset_data <- subset(data_full, Taxon == taxon)
  
  ## Check if the subset has any rows left after filtering
  if (nrow(subset_data) > 0) {
    ## Fit the random-effects model using rma.uni
    model <- rma.uni(yi = Hedges_d, vi = Var_Hedges_d, data = subset_data, method = "REML")
    
    ## Store the results in a data frame
    results[[taxon]] <- data.frame(
      Taxon = taxon,
      mean_effect = model$b[1],
      ci_lower = model$ci.lb[1],
      ci_upper = model$ci.ub[1],
      p_value = ifelse(model$pval[1] < 0.05, "p < 0.05", "p > 0.05")
    )
  } else {
    ## If no valid data points, skip this taxon or handle accordingly
    cat("Skipping taxon:", taxon, "due to lack of valid data points.\n")
  }
}

Skipping taxon: NA due to lack of valid data points.

### Combine the results for all Taxon groups into a single data frame
data_taxon <- do.call(rbind, results)

### Drop the Unclassified plants
data_taxon <- data_taxon %>% 
  filter(Taxon != "Unclassified plants")

### Define the desired order and new names for Taxon
new_order <- c("Soil", "Invertebrates", "Amphibians & Reptiles", 
               "Small Mammals", "Medium & Large Mammals", "Birds", "Ground Plants", 
               "Understory Plants", "Trees")

### Create a lookup table to map old names to new names
name_mapping <- c("Soil" = "Soil", 
                  "Invertebrates" = "Invertebrates", 
                  "Amphi_reptiles" = "Amphibians & Reptiles", 
                  "Small mammals" = "Small Mammals", 
                  "M_L mammals" = "Medium & Large Mammals", 
                  "Birds" = "Birds", 
                  "Ground plants" = "Ground Plants", 
                  "Understory plants" = "Understory Plants", 
                  "Trees" = "Trees")

### Apply the new names and order
data_taxon$Taxon <- factor(data_taxon$Taxon, 
                           levels = names(name_mapping), 
                           labels = new_order)

### Plot using ggplot2
ggplot(data_taxon, aes(x = mean_effect, y = Taxon, fill = p_value)) +
  ## Bar plot
  geom_bar(stat = "identity", aes(fill = p_value), color = "black", width = 0.6) +
  ## Add error bar
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), height = 0.2, color = "black") +
  scale_fill_manual(values = c("p < 0.05" = "gray", "p > 0.05" = "white")) +
  labs(x = "Hedges' d", y = NULL) +
  theme_test() +
  geom_vline(xintercept = 0, linetype = "dashed") +
  ## Legend setting
  theme(
    legend.position = c(0.9, 0.9), # Position the legend in the top-right corner within the plot
    legend.title = element_blank(), # Remove legend title
    legend.key.size = unit(0.5, "cm"),
    legend.text = element_text(size = 8),
    axis.text.y = element_text(color = "black")
  )

Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
3.5.0.
ℹ Please use the `legend.position.inside` argument of `theme()` instead.

Impacts on Key Ecosystem Attributes

### Define the target attributes
target_attributes <- c("Richness", "Abundance", "Structure", "Function")

### Filter data to include rows with the target attributes in either column
data_at <- data_full %>%
  filter((Key.ecosystem.attributes %in% target_attributes | Abundance.and.richness %in% target_attributes) & Plots_grazed >= 3)

### Create a list to store results for each attribute
results <- list()

### Loop over each unique attribute to fit the random-effects model and extract relevant statistics
for (attribute in target_attributes) {
  ## Subset the data for the current attribute in either column
  subset_data <- subset(data_at, Key.ecosystem.attributes == attribute | Abundance.and.richness == attribute)
  
  ## Check if the subset has any rows left after filtering
  if (nrow(subset_data) > 0) {
    ## Fit the random-effects model using rma.uni
    model <- rma.uni(yi = Hedges_d, vi = Var_Hedges_d, data = subset_data, method = "REML")
    
    ## Store the results in a data frame
    results[[attribute]] <- data.frame(
      Attribute = attribute,
      mean_effect = model$b[1],
      ci_lower = model$ci.lb[1],
      ci_upper = model$ci.ub[1],
      p_value = ifelse(model$pval[1] < 0.05, "p < 0.05", "p > 0.05")
    )
  } else {
    cat("Skipping attribute:", attribute, "due to lack of valid data points.\n")
  }
}

### Combine the results for all attributes into a single data frame
data_attr <- do.call(rbind, results)

### Define the order for the attributes
data_attr$Attribute <- factor(data_attr$Attribute,
                              levels = c("Richness", "Abundance", "Structure", "Function"))

### Plot using ggplot2
ggplot(data_attr, aes(x = Attribute, y = mean_effect, fill = p_value)) +
  ## Plot the bar
  geom_bar(stat = "identity", color = "black", width = 0.6) +
  ## Plot the error bar
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.15, color = "black") +
  scale_fill_manual(values = c("p < 0.05" = "gray", "p > 0.05" = "white")) +
  labs(x = NULL, y = "Hedges' d") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme_test() +
  theme(
    legend.position = c(0.9, 0.9),
    legend.title = element_blank(),
    legend.key.size = unit(0.5, "cm"),
    legend.text = element_text(size = 10),
    axis.text.x = element_text(color = "black", size = 14),
    axis.text.y = element_text(color = "black", size = 10),
    axis.title.y = element_text(size = 14)
  )

Caterpillar Plots (Grazing impacts on Trees)

### Filter data for the specified Taxon
taxon_of_interest <- "Trees"
data_cat <- data_full %>%
  filter(Taxon == taxon_of_interest)

### Fit a random-effects model to get summary statistics
model <- rma(yi = Hedges_d, vi = Var_Hedges_d, data = data_cat, method = "REML")

### Add the predicted effect size and confidence intervals from the model to the data frame
data_cat$pred <- predict(model)$pred
data_cat$ci_lower <- predict(model)$ci.lb
data_cat$ci_upper <- predict(model)$ci.ub

# write.csv(data_cat, '/Users/XingYu/Desktop/data_caterpillar.csv', row.names = FALSE)

### Create the caterpillar plot
ggplot(data_cat, aes(x = Hedges_d, y = reorder(NO, Hedges_d))) +
  geom_point(size = 1) +
  geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), height = 0.2, color = "gray") +  # Add missing "+"
  geom_vline(aes(xintercept = pred), linetype = "dashed", color = "black", size = 0.8) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "#3C3D37", size = 0.6) +
  xlim(-10, 10) +
  labs(x = "Hedges' d") +
  theme_test() +
  theme(
    axis.text.x = element_text(size = 10),
    axis.text.y = element_blank(), # Hide y-axis text
    axis.ticks.y = element_blank(), # Hide y-axis ticks
    axis.title.y = element_blank() # Hide y-axis title
  )

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Warning: Removed 14 rows containing missing values or values outside the scale range
(`geom_point()`).

Spatial Autocorrelation (didn’t test)

As spatial autocorrelation could impact the results, we first examined whether this issue existed in our study using Moran’s I.

As the result did not showed a significant spatial autocorrelation (p = 0.26), we did not include the spatial correlation structure in our random-effects model.

Publication Bias

We examined the potential publication bias in our dataset in three steps.

We used the funnel plot to show the relationship between standardized mean differences and inverse standard errors (Eldridge et al., 2020). Significant values indicated potential publication bias.
We tested the funnel asymmetry with modified Egger regression (Egger et al., 1997).
The “trim and fill” test was applied to examine asymmetry in funnel plots and impute ‘missing’ effect sizes to make the funnel plot symmetrical (Jennions & Møller, 2002). The results are shown in Appendix S4.