Pulmonary Function in Wind Instrumentalists vs. Controls

Author

Sarah Morris

Code
# Load necessary libraries
library(metafor)
library(dplyr)
library(ggplot2)

# Read the data
spiro_data <- read.csv("../data/spiro_data_01.04.25.csv")

# Clean the data: select required columns and remove rows with missing values
spiro_data_clean <- spiro_data %>% 
  select(studyID, ESID, author, year, PFT, n_exp, mn_exp, std_exp, n_ctl, mn_ctl, std_ctl) %>% 
  filter(!is.na(std_exp) & !is.na(std_ctl) &
         !is.na(n_exp) & !is.na(mn_exp) &
         !is.na(n_ctl) & !is.na(mn_ctl))

# Convert ESID to factor and sort by ESID
spiro_data_clean$ESID <- as.factor(spiro_data_clean$ESID)
spiro_data_clean <- spiro_data_clean %>% arrange(ESID)

# Calculate effect sizes (Hedges' g)
es <- escalc(measure = "SMD", 
             n1i = spiro_data_clean$n_exp, 
             m1i = spiro_data_clean$mn_exp, 
             sd1i = spiro_data_clean$std_exp,
             n2i = spiro_data_clean$n_ctl, 
             m2i = spiro_data_clean$mn_ctl, 
             sd2i = spiro_data_clean$std_ctl,
             data = spiro_data_clean)

# Create custom labels (adding outcome from PFT)
es$slab <- paste(spiro_data_clean$author, spiro_data_clean$year, "\
Outcome:", spiro_data_clean$PFT)

# Run the multi-level meta-analysis with ESID nested within studyID 
res_mv <- rma.mv(yi, vi, random = ~ 1 | studyID/ESID, data = es, method = "REML")

# Print summary of meta-analysis
summary_result <- summary(res_mv)
print(summary_result)

Multivariate Meta-Analysis Model (k = 111; method: REML)

   logLik   Deviance        AIC        BIC       AICc   
-389.3753   778.7505   784.7505   792.8520   784.9769   

Variance Components:

            estim    sqrt  nlvls  fixed        factor 
sigma^2.1  0.2744  0.5238     20     no       studyID 
sigma^2.2  0.4620  0.6797     35     no  studyID/ESID 

Test for Heterogeneity:
Q(df = 110) = 1916.9697, p-val < .0001

Model Results:

estimate      se    zval    pval    ci.lb   ci.ub    
  0.2116  0.1715  1.2340  0.2172  -0.1245  0.5477    

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Code
# Determine custom row positions with gaps between ESID groups
# and add space for subtitles and summary statistics
grp_counts <- table(spiro_data_clean$ESID)
grp_names <- names(grp_counts)
n_groups <- length(grp_counts)

# We need extra space:
# - 2 rows at the top for padding (extra space under column labels)
# - 2 rows for each group (title + summary)
# - 1 gap row between each group
extra_top_padding <- 2  # Increased from 1 to 2
extra_rows_per_group <- 2  # 1 for title + 1 for summary
total_extra_rows <- extra_top_padding + (extra_rows_per_group * n_groups) + (n_groups - 1)
total_rows <- sum(grp_counts) + total_extra_rows

# Define ESID group titles
esid_titles <- c(
  "1" = "Expiratory Flow Outcomes",
  "2" = "Lung Capacity Outcomes",
  "3" = "Cardiopulmonary Outcomes",
  "4" = "Ventilatory Outcomes"
)

# Calculate row positions for each study, title, and summary
row_positions <- numeric(nrow(es))
title_positions <- numeric(n_groups)
summary_positions <- numeric(n_groups)
gap_positions <- numeric(n_groups - 1)

current_row <- extra_top_padding + 1  # Start after top padding

for (i in 1:n_groups) {
  # Position for group title
  title_positions[i] <- current_row
  current_row <- current_row + 1
  
  # Position for group summary (right after the title)
  summary_positions[i] <- current_row
  current_row <- current_row + 1
  
  # Positions for studies in this group
  group_indices <- which(spiro_data_clean$ESID == grp_names[i])
  group_size <- length(group_indices)
  row_positions[group_indices] <- current_row:(current_row + group_size - 1)
  current_row <- current_row + group_size
  
  # Add gap after each group except the last
  if (i < n_groups) {
    gap_positions[i] <- current_row - 0.5
    current_row <- current_row + 1
  }
}

# Calculate dynamic height based on number of rows
row_height_inches <- 0.4
dynamic_height <- total_rows * row_height_inches + 3  # Add 3 inches for margins and title

# Calculate group-specific meta-analysis results
group_results <- list()
for (i in 1:n_groups) {
  group_indices <- which(spiro_data_clean$ESID == grp_names[i])
  if (length(group_indices) > 1) {
    # Only run meta-analysis if there's more than one study in the group
    group_data <- es[group_indices, ]
    group_res <- try(rma.mv(yi, vi, random = ~ 1 | studyID, data = group_data, method = "REML"), silent = TRUE)
    
    if (!inherits(group_res, "try-error")) {
      group_results[[i]] <- group_res
    } else {
      # Fallback to simpler model if the multilevel model fails
      group_res <- try(rma(yi, vi, data = group_data, method = "REML"), silent = TRUE)
      if (!inherits(group_res, "try-error")) {
        group_results[[i]] <- group_res
      } else {
        group_results[[i]] <- NULL
      }
    }
  } else if (length(group_indices) == 1) {
    # For single study, just store the effect size and variance
    group_results[[i]] <- list(
      b = es$yi[group_indices],
      se = sqrt(es$vi[group_indices]),
      ci.lb = es$yi[group_indices] - 1.96 * sqrt(es$vi[group_indices]),
      ci.ub = es$yi[group_indices] + 1.96 * sqrt(es$vi[group_indices])
    )
  } else {
    group_results[[i]] <- NULL
  }
}

# Print group-specific results
for (i in 1:n_groups) {
  if (!is.null(group_results[[i]])) {
    if (is.list(group_results[[i]]) && !is.null(group_results[[i]]$b)) {
      cat("ESID", grp_names[i], ":", esid_titles[grp_names[i]], "\
")
      est <- sprintf("%.3f", group_results[[i]]$b)
      ci_l <- sprintf("%.3f", group_results[[i]]$ci.lb)
      ci_u <- sprintf("%.3f", group_results[[i]]$ci.ub)
      cat("Group Summary: ", est, " [", ci_l, ", ", ci_u, "]\
\
")
    }
  }
}
ESID 1 : Expiratory Flow Outcomes 
Group Summary:  0.059  [ -0.198 ,  0.316 ]

ESID 2 : Lung Capacity Outcomes 
Group Summary:  0.017  [ -0.658 ,  0.691 ]

ESID 3 : Cardiopulmonary Outcomes 
Group Summary:  0.251  [ -0.370 ,  0.871 ]

ESID 4 : Ventilatory Outcomes 
Group Summary:  0.769  [ -0.289 ,  1.827 ]
Code
# Extract meta-analysis statistics for annotation
mv_sum <- summary(res_mv)
overall_est <- sprintf("%.3f", mv_sum$b)
overall_se <- sprintf("%.3f", mv_sum$se)
ci_lb <- sprintf("%.3f", mv_sum$ci.lb)
ci_ub <- sprintf("%.3f", mv_sum$ci.ub)
tau2 <- sprintf("%.3f", sum(mv_sum$sigma2))
# I² calculation - this is an approximation for multilevel models
I2 <- round(100 * sum(mv_sum$sigma2) / (sum(mv_sum$sigma2) + mean(mv_sum$vi)), 1)

# Compose the meta-analysis text annotation (centered)
meta_text <- paste0("Overall Estimate: ", overall_est, " (SE=", overall_se, 
                    ", 95% CI: [", ci_lb, ", ", ci_ub, "])     ",
                    "Tau² = ", tau2, ", I² = ", I2, "%")

# Print overall meta-analysis statistics
cat("Overall Meta-analysis Statistics:\
")
Overall Meta-analysis Statistics:
Code
cat(meta_text, "\
\
")
Overall Estimate: 0.212 (SE=0.171, 95% CI: [-0.124, 0.548])     Tau² = 0.736, I² = 91.2% 
Code
# Set up the plotting area with adjusted margins
oldpar <- par(mar = c(6, 4, 2, 1))  # Increase bottom margin for meta text

# Create the forest plot with dynamic sizing
# First save as PDF
pdf("forest_plot_final.pdf", width = 10, height = dynamic_height)

# Create the basic forest plot
forest(res_mv,
       slab = es$slab,
       xlab = "Effect Size (Hedges' g)",
       main = "Forest Plot for Multi-level Meta-analysis\
(with ESID grouping and PFT outcome)",
       cex = 0.8,           # Text size for study labels
       cex.lab = 1.0,       # Label size
       cex.axis = 0.9,      # Axis text size
       rows = row_positions,
       ylim = c(0, total_rows + 1)
)

# Add horizontal lines to separate ESID groups
if(length(gap_positions) > 0) {
  abline(h = gap_positions, lty = 2, col = "gray")
}

# Add ESID group titles and summaries
for (i in 1:n_groups) {
  # Add group title
  text(-6, title_positions[i], paste("ESID", grp_names[i], ":", esid_titles[grp_names[i]]), 
       cex = 1.1, font = 2, adj = 0)
  
  # Add group summary statistics in red under the title
  if (!is.null(group_results[[i]])) {
    if (is.list(group_results[[i]]) && !is.null(group_results[[i]]$b)) {
      est <- sprintf("%.3f", group_results[[i]]$b)
      ci_l <- sprintf("%.3f", group_results[[i]]$ci.lb)
      ci_u <- sprintf("%.3f", group_results[[i]]$ci.ub)
      summary_text <- paste0("Group Summary: ", est, " [", ci_l, ", ", ci_u, "]")
      text(-6, summary_positions[i], summary_text, col = "red", cex = 0.9, adj = 0)
    }
  }
}

# Add meta-analysis statistics text at the bottom (centered)
mtext(meta_text, side = 1, line = 4, cex = 0.8, adj = 0.5)

dev.off()
png 
  2 
Code
# Also save as PNG for easier viewing
png("forest_plot_final2.png", width = 10*150, height = dynamic_height*150, res = 150)

# Create the basic forest plot
forest(res_mv,
       slab = es$slab,
       xlab = "Effect Size (Hedges' g)",
       main = "Forest Plot for Multi-level Meta-analysis\
(with ESID grouping and PFT outcome)",
       cex = 0.8,           # Text size for study labels
       cex.lab = 1.0,       # Label size
       cex.axis = 0.9,      # Axis text size
       rows = row_positions,
       ylim = c(0, total_rows + 1)
)

# Add horizontal lines to separate ESID groups
if(length(gap_positions) > 0) {
  abline(h = gap_positions, lty = 2, col = "gray")
}

# Add ESID group titles and summaries
for (i in 1:n_groups) {
  # Add group title
  text(-6, title_positions[i], paste("ESID", grp_names[i], ":", esid_titles[grp_names[i]]), 
       cex = 1.1, font = 2, adj = 0)
  
  # Add group summary statistics in red under the title
  if (!is.null(group_results[[i]])) {
    if (is.list(group_results[[i]]) && !is.null(group_results[[i]]$b)) {
      est <- sprintf("%.3f", group_results[[i]]$b)
      ci_l <- sprintf("%.3f", group_results[[i]]$ci.lb)
      ci_u <- sprintf("%.3f", group_results[[i]]$ci.ub)
      summary_text <- paste0("Group Summary: ", est, " [", ci_l, ", ", ci_u, "]")
      text(-6, summary_positions[i], summary_text, col = "red", cex = 0.9, adj = 0)
    }
  }
}

# Add meta-analysis statistics text at the bottom (centered)
mtext(meta_text, side = 1, line = 4, cex = 0.8, adj = 0.5)

dev.off()
png 
  2 
Code
# Create the funnel plot
pdf("funnel_plot_final.pdf", width = 8, height = 8)
funnel(res_mv, 
       main = "Funnel Plot for Publication Bias Assessment", 
       cex = 1.2, 
       cex.axis = 1.2)
dev.off()
png 
  2 
Code
png("funnel_plot_final.png", width = 8*150, height = 8*150, res = 150)
funnel(res_mv, 
       main = "Funnel Plot for Publication Bias Assessment", 
       cex = 1.2, 
       cex.axis = 1.2)
dev.off()
png 
  2 
Code
# Display the forest plot in the R graphics device
# Create the basic forest plot
forest(res_mv,
       slab = es$slab,
       xlab = "Effect Size (Hedges' g)",
       main = "Forest Plot for Multi-level Meta-analysis\
(with ESID grouping and PFT outcome)",
       cex = 0.8,           # Text size for study labels
       cex.lab = 1.0,       # Label size
       cex.axis = 0.9,      # Axis text size
       rows = row_positions,
       ylim = c(0, total_rows + 1)
)

# Add horizontal lines to separate ESID groups
if(length(gap_positions) > 0) {
  abline(h = gap_positions, lty = 2, col = "gray")
}

# Add ESID group titles and summaries
for (i in 1:n_groups) {
  # Add group title
  text(-6, title_positions[i], paste("ESID", grp_names[i], ":", esid_titles[grp_names[i]]), 
       cex = 1.1, font = 2, adj = 0)
  
  # Add group summary statistics in red under the title
  if (!is.null(group_results[[i]])) {
    if (is.list(group_results[[i]]) && !is.null(group_results[[i]]$b)) {
      est <- sprintf("%.3f", group_results[[i]]$b)
      ci_l <- sprintf("%.3f", group_results[[i]]$ci.lb)
      ci_u <- sprintf("%.3f", group_results[[i]]$ci.ub)
      summary_text <- paste0("Group Summary: ", est, " [", ci_l, ", ", ci_u, "]")
      text(-6, summary_positions[i], summary_text, col = "red", cex = 0.9, adj = 0)
    }
  }
}

# Add meta-analysis statistics text at the bottom (centered)
mtext(meta_text, side = 1, line = 4, cex = 0.8, adj = 0.5)

Code
# Display the funnel plot in the R graphics device
funnel(res_mv, 
       main = "Funnel Plot for Publication Bias Assessment", 
       cex = 1.2, 
       cex.axis = 1.2)

Code
# Restore original graphical parameters
par(oldpar)

# Print confirmation of saved files
print("Final files saved:")
[1] "Final files saved:"
Code
print("1. forest_plot_final.pdf")
[1] "1. forest_plot_final.pdf"
Code
print("2. forest_plot_final.png")
[1] "2. forest_plot_final.png"
Code
print("3. funnel_plot_final.pdf")
[1] "3. funnel_plot_final.pdf"
Code
print("4. funnel_plot_final.png")
[1] "4. funnel_plot_final.png"

1 Overview of Analysis Structure

This output presents the results of a multilevel meta-analysis that included 111 effect sizes (k = 111) nested within a hierarchical structure. The analysis employed a Restricted Maximum Likelihood (REML) estimation method, which is appropriate for estimating variance components in multilevel models.

Assumptions

  • REML provides less biased estimates of variance components than ML

  • Underlying random effects approximately follow normal distributions

  • Sample sizes are sufficient for asymptotic properties of REML to hold

Significance

REML generally produces better estimates of variance components in multilevel models, especially with smaller numbers of higher-level units (in this case, 20 studies). This leads to more accurate standard errors and confidence intervals for the overall effect.

2 Model Fit Statistics

The output provides several model fit indices:

  • Log-likelihood: -389.3753

  • Deviance: 778.7505

  • AIC (Akaike Information Criterion): 784.7505

  • BIC (Bayesian Information Criterion): 792.8520

  • AICc (Corrected AIC): 784.9769

These statistics are useful for comparing this model with alternative specifications if available, but on their own, they primarily serve as baseline values for model fit.

3 Variance Components Analysis

The model identifies two levels of nested random effects:

  1. Level 1 (sigma^2.1): Study-level variance

    • Estimated variance: 0.2744

    • Standard deviation: 0.5238

    • Number of studies: 20

  2. Level 2 (sigma^2.2): Effect size-level variance within studies

    • Estimated variance: 0.4620

    • Standard deviation: 0.6797

    • Number of unique effect size IDs within studies: 35

This indicates substantial variability both between different studies (Level 1) and between different effect sizes within the same study (Level 2). The larger variance component at Level 2 (0.4620 > 0.2744) suggests that there is more heterogeneity between effect sizes within studies than between different studies.

Purpose

Two levels of variance were modeled:

  1. Study-level variance (σ²₁ = 0.2744/0.2831)

  2. Effect-size-level variance (σ²₂ = 0.4620/0.4360)

Assumptions

  • Variability exists both between studies and within studies

  • These sources of variability can be separated and quantified

  • Variances are constant within each level

Significance

The analysis reveals that “the larger variance component at Level 2 (0.4620 > 0.2744) suggests that there is more heterogeneity between effect sizes within studies than between different studies.” This is crucial information suggesting that differences in outcomes or methods within studies create more variability than differences between separate research teams or study designs.

4 Heterogeneity Assessment

Purpose

The Q-test evaluates whether the observed variability in effect sizes exceeds what would be expected from sampling error alone.

Assumptions

  • Under the null hypothesis of homogeneity, Q follows a chi-square distribution with k-1 degrees of freedom

  • Effect sizes are normally distributed around the true effect

  • Sampling variances are known (not estimated)

Result

The Q-test for heterogeneity is highly significant:

  • Q(df = 110) = 1916.97, p < 0.0001

This indicates substantial heterogeneity in effect sizes across the dataset. The very large Q statistic (1916.97) relative to its degrees of freedom (110) suggests that the variability among effect sizes is far greater than what would be expected by sampling error alone. This justifies the use of a multilevel model that can account for this heterogeneity.

5 Overall Effect Size

The overall pooled effect size estimate is:

  • Estimate: 0.2116

  • Standard Error: 0.1715

  • z-value: 1.2340

  • p-value: 0.2172

  • 95% Confidence Interval: [-0.1245, 0.5477]

This effect size is positive but not statistically significant (p = 0.2172), with a confidence interval that includes zero. The magnitude of the effect (0.2116) would typically be considered small to medium according to conventional effect size interpretations (assuming this is a standardized mean difference, correlation, or similar metric).

6 Interpretation and Implications

  1. Effect Size Magnitude and Significance The estimated overall effect of 0.2116 suggests a small to medium positive effect, but it is not statistically significant as the confidence interval includes zero. This indicates that, across all studies, we cannot confidently conclude that there is a non-zero effect.

  2. Heterogeneity The significant Q-test and substantial variance components indicate considerable heterogeneity both between and within studies. This suggests the presence of moderating variables that might explain this variability. In some contexts or under certain conditions, the effect might be larger and significant, while in others it might be negligible or even negative.

  3. Hierarchical Structure The multilevel structure reveals that the 111 effect sizes were derived from 20 different studies, with approximately 35 different effect size calculations or comparisons within these studies. This nesting structure is important to account for, as effect sizes from the same study are likely to be more similar to each other than to effect sizes from different studies.

  4. Research Implications The substantial heterogeneity suggests the need for moderator analyses to identify factors that influence the magnitude of the effect. These moderators could include:

    • Study characteristics (e.g., design, quality, sample size)

    • Participant characteristics (e.g., age, gender, clinical status)

    • Intervention or exposure characteristics

    • Outcome measurement methods

  5. Practical Implications The non-significant overall effect coupled with high heterogeneity suggests that the effectiveness of the intervention or the strength of the relationship being studied may be context-dependent. Practitioners should be cautious about generalizing these findings and should consider potential moderating factors relevant to their specific context.

  6. Methodological Considerations The multilevel approach appropriately accounts for the dependency structure in the data, providing a more accurate estimate of the overall effect and its uncertainty compared to traditional meta-analytic methods that assume independence of effect sizes.

7 Conclusion

This multilevel meta-analysis reveals a small to medium positive effect (0.2116) that is not statistically significant. The high heterogeneity between and within studies suggests that the effect varies considerably across different contexts and conditions. Further investigation into potential moderators would be valuable for understanding when and for whom the effect is most pronounced. The multilevel modeling approach appropriately accounts for the complex dependency structure in the data, enhancing the validity of the findings.