Description

This report presents a the meta-analysis of ideal cardiovascular health (CVH) metrics across African populations using real study data. The analysis synthesizes data from multiple studies to estimate pooled prevalence rates for each component of the American Heart Association’s Life’s Simple 7 (LS7) framework. The report includes data cleaning, descriptive statistics, meta-analytic models, and visualizations. ### Load Required Packages

# Clear environment
rm(list = ls())
# Load necessary packages
library(tidyverse)
library(meta)
library(metafor)
library(ggplot2)
library(janitor)
library(kableExtra)
library(forestplot)
library(patchwork)
library(DT)
library(readxl)
# Set seed for reproducibility
set.seed(123)

Load and Clean Data

# Load the dataset
cvh_data <- read_excel('/Users/jamesoguta/Documents/James Oguta/Manuscripts/Systematic Reviews/LS7 Systematic Review/Data Analysis/LS7 Review Data Analysis/Extracted Data.xlsx', sheet = "Sheet1") %>% 
  clean_names()

# Display basic information
cat("Dataset dimensions:", dim(cvh_data), "\n")
## Dataset dimensions: 19 54
cat("Number of studies:", nrow(cvh_data), "\n")
## Number of studies: 19
cat("Variables:", ncol(cvh_data), "\n")
## Variables: 54
# Display first few rows
head(cvh_data) %>% 
  select(1:10) %>%  # Show first 10 columns for readability
  datatable(options = list(scrollX = TRUE))

Data Cleaning and Processing

# Create clean dataset for analysis
clean_cvh <- cvh_data %>%
  # Select and rename key variables
  select(
    study_id = study_id,
    authors = author_s,
    study_title,
    year = year_of_publication,
    country,
    region = region_west_east_central_southern_northern,
    survey_year,
    study_design,
    sample_size = sample_size_n,
    sample_clean,
    sex_female = sex_distribution_percent_female,
    
    # Prevalence metrics
    diet_prev = prevalence_of_ideal_diet_percent,
    smoking_prev = prevalence_of_ideal_smoking_percent,
    pa_prev = prevalence_of_ideal_physical_activity_percent,
    bmi_prev = prevalence_of_ideal_bmi_percent,
    bp_prev = prevalence_of_ideal_blood_pressure_percent,
    lipids_prev = prevalence_of_ideal_blood_lipids_cholesterol_percent,
    glucose_prev = prevalence_of_ideal_blood_glucose_percent,
    
    # Overall CVH prevalence
    ideal_cvh_prev = prevalence_ideal_cvh_percent,
    intermediate_cvh_prev = prevalence_intermediate_cvh_percent,
    poor_cvh_prev = prevalence_poor_cvh_percent,
    
    # Additional study characteristics
    mean_median_cvh_score = mean_median_cvh_score_sd_iqr,
    risk_of_bias_judgment,
    fidelity_tag = fidelity_tag_original_ls7_adapted_ls7_partial_ls7_le8
  ) %>%
  # Clean and transform variables
  mutate(
    # Convert percentages to proportions and handle character values
    across(ends_with("_prev"), ~ {
      # Convert to character, remove % signs, then to numeric
      clean_val <- as.character(.) %>%
        str_replace_all("%", "") %>%
        str_replace_all("<", "") %>%
        str_replace_all(">", "") %>%
        str_trim()
      
      # Convert to numeric and handle ranges (take first value)
      clean_val <- str_split(clean_val, "-", simplify = TRUE)[,1]
      clean_val <- str_split(clean_val, "–", simplify = TRUE)[,1]
      
      num_val <- as.numeric(clean_val)
      ifelse(!is.na(num_val) & num_val > 1, num_val / 100, num_val)
    }),
    
    # Handle sample size
    sample_size = ifelse(is.na(sample_clean) | sample_clean == "" | sample_clean == "NA", 
                         as.numeric(sample_size), 
                         as.numeric(sample_clean)),
    
    # Clean year variables
    year = as.numeric(year),
    survey_year = as.numeric(survey_year),
    
    # Create study identifier
    study_label = paste0(str_extract(authors, "^[^,]+"), " (", year, ")"),
    
    # Simplify region classification
    region_simple = case_when(
      str_detect(region, "East") ~ "Eastern",
      str_detect(region, "West") ~ "Western", 
      str_detect(region, "South") ~ "Southern",
      str_detect(region, "North") ~ "Northern",
      str_detect(region, "Central") ~ "Central",
      TRUE ~ "Other"
    )
  ) %>%
  # Remove studies with no prevalence data
  filter(!(is.na(diet_prev) & is.na(smoking_prev) & is.na(pa_prev) & 
             is.na(bmi_prev) & is.na(bp_prev) & is.na(lipids_prev) & 
             is.na(glucose_prev) & is.na(ideal_cvh_prev)))

cat("Data cleaning complete. Final dataset:", nrow(clean_cvh), "studies\n")
## Data cleaning complete. Final dataset: 14 studies

Descriptive Analysis

Study Characteristics Summary

cat("\nPerforming descriptive analysis...\n")
## 
## Performing descriptive analysis...
# Summary of study characteristics
study_summary <- clean_cvh %>%
  summarise(
    n_studies = n(),
    total_participants = sum(sample_size, na.rm = TRUE),
    mean_sample_size = mean(sample_size, na.rm = TRUE),
    sd_sample_size = sd(sample_size, na.rm = TRUE),
    year_range = paste(min(year, na.rm = TRUE), max(year, na.rm = TRUE), sep = " - "),
    countries_studied = n_distinct(country),
    regions_represented = n_distinct(region_simple)
  )

kable(study_summary, caption = "Overall Study Characteristics") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Overall Study Characteristics
n_studies total_participants mean_sample_size sd_sample_size year_range countries_studied regions_represented
14 51190 3656.429 2230.284 2018 - 2025 9 3
# Studies by region
region_summary <- clean_cvh %>%
  count(region_simple) %>%
  arrange(desc(n))

cat("\nStudies by region:\n")
## 
## Studies by region:
kable(region_summary, caption = "Studies by Geographic Region") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Studies by Geographic Region
region_simple n
Eastern 5
Southern 5
Western 4
# Studies by study design
design_summary <- clean_cvh %>%
  count(study_design) %>%
  arrange(desc(n))
cat("\nStudies by design:\n")
## 
## Studies by design:
kable(design_summary, caption = "Studies by Design") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Studies by Design
study_design n
Cross-sectional 8
Secondary analysis of WHO STEPS (cross-sectional) 2
Cross-sectional analysis based on data from a prospective cohort study 1
Cross-sectional population surveys 1
Multi-centre cross-sectional (2012–2015) 1
Prospective cohort 1

Prevalence Summary

# Summary statistics for prevalence metrics
prevalence_stats <- clean_cvh %>%
  select(ends_with("_prev")) %>%
  pivot_longer(everything(), names_to = "metric", values_to = "prevalence") %>%
  mutate(metric = str_to_upper(str_remove(metric, "_prev"))) %>%
  group_by(metric) %>%
  summarise(
    n_studies = sum(!is.na(prevalence)),
    mean = mean(prevalence, na.rm = TRUE),
    sd = sd(prevalence, na.rm = TRUE),
    median = median(prevalence, na.rm = TRUE),
    q25 = quantile(prevalence, 0.25, na.rm = TRUE),
    q75 = quantile(prevalence, 0.75, na.rm = TRUE)
  ) %>%
  mutate(across(where(is.numeric), ~round(., 3)))

cat("\nPrevalence statistics:\n")
## 
## Prevalence statistics:
kable(prevalence_stats, caption = "Descriptive Statistics for CVH Metrics") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Descriptive Statistics for CVH Metrics
metric n_studies mean sd median q25 q75
BMI 10 0.740 0.121 0.746 0.691 0.809
BP 10 0.418 0.129 0.396 0.316 0.468
DIET 8 0.129 0.074 0.138 0.099 0.171
GLUCOSE 10 0.884 0.080 0.897 0.828 0.944
IDEAL_CVH 11 0.496 0.226 0.519 0.334 0.638
INTERMEDIATE_CVH 10 0.518 0.229 0.534 0.385 0.700
LIPIDS 10 0.872 0.096 0.889 0.840 0.943
PA 12 0.798 0.209 0.856 0.729 0.919
POOR_CVH 4 0.064 0.070 0.041 0.017 0.087
SMOKING 3 0.858 0.126 0.930 0.822 0.931

Data Visualization

Descriptive Plots

#| fig.width: 12
#| fig.height: 6

cat("\nCreating descriptive plots...\n")
## 
## Creating descriptive plots...
# Distribution of studies by year
year_summary <- clean_cvh %>%
  count(year) %>%
  arrange(year)

p1 <- ggplot(year_summary, aes(x = factor(year), y = n)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  labs(title = "Studies by Publication Year",
       x = "Publication Year", y = "Number of Studies") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Studies by region
p2 <- ggplot(region_summary, aes(x = reorder(region_simple, n), y = n)) +
  geom_col(fill = "darkorange", alpha = 0.8) +
  coord_flip() +
  labs(title = "Studies by Geographic Region", x = "", y = "Number of Studies") +
  theme_minimal()

# Combine plots
descriptive_plots <- p1 + p2
print(descriptive_plots)

Distribution of Prevalence Metrics

#| fig.width: 10
#| fig.height: 6

# Distribution of prevalence metrics
prevalence_long <- clean_cvh %>%
  select(study_label, ends_with("_prev")) %>%
  pivot_longer(cols = -study_label, names_to = "metric", values_to = "prevalence") %>%
  mutate(metric = str_to_upper(str_remove(metric, "_prev")))

p3 <- ggplot(prevalence_long, aes(x = reorder(metric, prevalence, na.rm = TRUE), y = prevalence)) +
  geom_boxplot(fill = "lightblue", alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.6, size = 1) +
  labs(title = "Distribution of Ideal CVH Metric Prevalence",
       x = "CVH Metric", y = "Prevalence") +
  scale_y_continuous(labels = scales::percent) +
  coord_flip() +
  theme_minimal()

print(p3)

### Meta-Analysis preparation

cat("\nPreparing data for meta-analysis...\n")
## 
## Preparing data for meta-analysis...
# Function to prepare meta-analysis data for each metric
prepare_meta_data <- function(metric) {
  clean_cvh %>%
    filter(!is.na(!!sym(metric)) & !is.na(sample_size)) %>%
    mutate(
      # Calculate number of events
      events = round(!!sym(metric) * sample_size),
      # Ensure events don't exceed sample size
      events = pmin(events, sample_size),
      # Handle zero events
      events = ifelse(events == 0, 0.5, events),
      # Calculate standard error for proportions
      se = sqrt((!!sym(metric) * (1 - !!sym(metric))) / sample_size),
      variance = se^2
    ) %>%
    select(study_label, country, region_simple, year, sample_size, 
           prevalence = !!sym(metric), events, se, variance)
}

# Prepare data for each metric
metrics <- c("diet_prev", "smoking_prev", "pa_prev", "bmi_prev", 
             "bp_prev", "lipids_prev", "glucose_prev", "ideal_cvh_prev")

meta_datasets <- map(metrics, prepare_meta_data)
names(meta_datasets) <- metrics

# Display number of studies available for each metric
studies_per_metric <- map_int(meta_datasets, nrow)
metric_summary <- data.frame(
  Metric = str_to_upper(str_remove(metrics, "_prev")),
  Studies = studies_per_metric,
  Total_Participants = map_dbl(meta_datasets, ~sum(.x$sample_size, na.rm = TRUE))
)

cat("\nStudies available for meta-analysis:\n")
## 
## Studies available for meta-analysis:
kable(metric_summary, caption = "Studies Available for Meta-Analysis by Metric") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Studies Available for Meta-Analysis by Metric
Metric Studies Total_Participants
diet_prev DIET 8 25855
smoking_prev SMOKING 3 19606
pa_prev PA 12 43862
bmi_prev BMI 10 40835
bp_prev BP 10 39853
lipids_prev LIPIDS 10 39853
glucose_prev GLUCOSE 10 39853
ideal_cvh_prev IDEAL_CVH 11 41433

Meta-Analysis Models

cat("\nPerforming meta-analysis...\n")
## 
## Performing meta-analysis...
# Function to perform meta-analysis with error handling
perform_meta_analysis <- function(metric_data, metric_name) {
  if (nrow(metric_data) < 2) {
    cat("Insufficient studies for", metric_name, "- only", nrow(metric_data), "study available\n")
    return(NULL)
  }
  
  tryCatch({
    meta_result <- metaprop(
      event = events,
      n = sample_size,
      studlab = study_label,
      data = metric_data,
      sm = "PLOGIT",
      method.tau = "ML",
      prediction = TRUE,
      title = paste("Prevalence of Ideal", metric_name)
    )
    return(meta_result)
  }, error = function(e) {
    cat("Error in meta-analysis for", metric_name, ":", e$message, "\n")
    return(NULL)
  })
}

# Perform meta-analysis for each metric
meta_results <- list()
for (i in seq_along(metrics)) {
  metric_name <- str_to_upper(str_remove(metrics[i], "_prev"))
  meta_results[[metrics[i]]] <- perform_meta_analysis(
    meta_datasets[[metrics[i]]], 
    metric_name
  )
}

Meta-Analysis Results

cat("\nCompiling meta-analysis results...\n")
## 
## Compiling meta-analysis results...
# Create summary table of meta-analysis results
summary_data <- map_dfr(metrics, function(metric) {
  result <- meta_results[[metric]]
  if (is.null(result)) {
    return(data.frame(
      Metric = str_to_upper(str_remove(metric, "_prev")),
      Studies = NA,
      Total_N = NA,
      Prevalence = NA,
      CI_lower = NA,
      CI_upper = NA,
      I2 = NA,
      Tau2 = NA
    ))
  }
  
  data.frame(
    Metric = str_to_upper(str_remove(metric, "_prev")),
    Studies = result$k,
    Total_N = sum(result$n),
    Prevalence = round(exp(result$TE.random) / (1 + exp(result$TE.random)), 3),
    CI_lower = round(exp(result$lower.random) / (1 + exp(result$lower.random)), 3),
    CI_upper = round(exp(result$upper.random) / (1 + exp(result$upper.random)), 3),
    I2 = round(result$I2, 1),
    Tau2 = round(result$tau2, 4)
  )
})

# Format for display
formatted_summary <- summary_data %>%
  mutate(
    Prevalence_CI = ifelse(is.na(Prevalence), "Insufficient data",
                           sprintf("%.1f%% (%.1f-%.1f)", 
                                   Prevalence * 100, CI_lower * 100, CI_upper * 100)),
    I2 = ifelse(is.na(I2), "", sprintf("%.1f%%", I2)),
    Studies_N = ifelse(is.na(Studies), "", 
                       paste0(Studies, " (", format(Total_N, big.mark = ","), ")"))
  ) %>%
  select(Metric, Studies_N, Prevalence_CI, I2, Tau2)

cat("\nMETA-ANALYSIS RESULTS SUMMARY:\n")
## 
## META-ANALYSIS RESULTS SUMMARY:
kable(formatted_summary, caption = "Pooled Prevalence Estimates from Random-Effects Meta-Analysis") %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  row_spec(which(!is.na(summary_data$Prevalence)), bold = FALSE)
Pooled Prevalence Estimates from Random-Effects Meta-Analysis
Metric Studies_N Prevalence_CI I2 Tau2
DIET 8 (25,855) 7.0% (1.7-24.2) 1.0% 4.2749
SMOKING 3 (19,606) 88.5% (75.6-95.0) 1.0% 0.6366
PA 12 (43,862) 85.4% (73.2-92.6) 1.0% 1.7937
BMI 10 (40,835) 75.9% (67.4-82.8) 1.0% 0.4626
BP 10 (39,853) 41.3% (33.8-49.3) 1.0% 0.2660
LIPIDS 10 (39,853) 89.4% (84.0-93.2) 1.0% 0.5901
GLUCOSE 10 (39,853) 91.6% (84.4-95.6) 1.0% 1.2659
IDEAL_CVH 11 (41,433) 50.2% (35.0-65.4) 1.0% 1.1311

Visualization of Meta-Analysis Results

Summary Forest Plot

cat("\nCreating meta-analysis visualizations...\n")
## 
## Creating meta-analysis visualizations...
# Summary forest plot
plot_data <- summary_data %>%
  filter(!is.na(Prevalence))

if (nrow(plot_data) > 0) {
  summary_forest <- ggplot(plot_data, aes(x = Prevalence, y = reorder(Metric, Prevalence))) +
    geom_point(aes(size = Studies), color = "blue", alpha = 0.8) +
    geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper), height = 0.2, 
                   color = "blue", alpha = 0.7, size = 1) +
    geom_vline(xintercept = 0.5, linetype = "dashed", color = "red", alpha = 0.5) +
    geom_vline(xintercept = mean(plot_data$Prevalence, na.rm = TRUE), 
               linetype = "dotted", color = "gray", size = 1) +
    labs(title = "Pooled Prevalence of Ideal CVH Metrics in African Populations",
         subtitle = "Random Effects Meta-Analysis with 95% Confidence Intervals",
         x = "Pooled Prevalence (95% CI)", 
         y = "CVH Metric",
         size = "Number of\nStudies") +
    scale_x_continuous(labels = scales::percent, limits = c(0, 1)) +
    theme_minimal() +
    theme(panel.grid.major = element_line(color = "grey90"),
          panel.grid.minor = element_blank(),
          plot.title = element_text(face = "bold", size = 14),
          plot.subtitle = element_text(size = 12),
          axis.text = element_text(size = 10))
  
  print(summary_forest)
}

### Individual Forest Plots for Each Metric

# Note: Individual forest plots are created but not displayed inline to save space
cat("\nCreating individual forest plots...\n")
## 
## Creating individual forest plots...
# Create individual forest plots for metrics with sufficient studies
for (metric in metrics) {
  if (!is.null(meta_results[[metric]]) && meta_results[[metric]]$k >= 3) {
    metric_name <- str_to_upper(str_remove(metric, "_prev"))
    
    png(paste0("forest_plot_", metric, ".png"), 
        width = 12, height = 8, units = "in", res = 300)
    
    forest(meta_results[[metric]],
           leftcols = c("studlab", "event", "n"),
           leftlabs = c("Study", "Events", "Total"),
           xlab = "Prevalence",
           smlab = "Prevalence",
           col.square = "blue",
           col.diamond = "red",
           col.diamond.lines = "darkred",
           print.tau2 = TRUE,
           print.I2 = TRUE,
           mlab = "RE Model")
    
    grid.text(paste("Forest Plot: Ideal", metric_name, "Prevalence"), 
              x = 0.5, y = 0.95, just = "center",
              gp = gpar(cex = 1.2, fontface = "bold"))
    
    dev.off()
    cat("Created: forest_plot_", metric, ".png\n", sep = "")
  }
}
## Created: forest_plot_diet_prev.png
## Created: forest_plot_smoking_prev.png
## Created: forest_plot_pa_prev.png
## Created: forest_plot_bmi_prev.png
## Created: forest_plot_bp_prev.png
## Created: forest_plot_lipids_prev.png
## Created: forest_plot_glucose_prev.png
## Created: forest_plot_ideal_cvh_prev.png

Subgroup Analyses

#| cat("\nPerforming subgroup analysis...\n")

# Subgroup analysis for ideal CVH by region
if (n_distinct(meta_datasets$ideal_cvh_prev$region_simple) > 1 && 
    nrow(meta_datasets$ideal_cvh_prev) >= 3) {
  
  subgroup_ideal <- metaprop(
    event = events,
    n = sample_size,
    studlab = study_label,
    data = meta_datasets$ideal_cvh_prev,
    subgroup = region_simple,
    sm = "PLOGIT",
    method.tau = "ML"
  )
  
  cat("Subgroup Analysis Results (Ideal CVH by Region):\n")
  print(subgroup_ideal)
  
  # Create subgroup forest plot
  png("subgroup_analysis_region.png", width = 12, height = 10, units = "in", res = 300)
  
  forest(subgroup_ideal,
         leftcols = c("studlab", "event", "n"),
         leftlabs = c("Study", "Events", "Total"),
         xlab = "Prevalence",
         smlab = "Prevalence")
  
  grid.text("Subgroup Analysis: Ideal CVH by Geographic Region", 
            x = 0.5, y = 0.97, just = "center",
            gp = gpar(cex = 1.2, fontface = "bold"))
  
  dev.off()
  cat("Created: subgroup_analysis_region.png\n")
} else {
  cat("Insufficient data for subgroup analysis by region\n")
}
## Subgroup Analysis Results (Ideal CVH by Region):
## Number of studies: k = 11
## Number of observations: o = 41433
## Number of events: e = 23748
## 
##                      proportion           95%-CI
## Common effect model      0.5732 [0.5684; 0.5779]
## Random effects model     0.5023 [0.3498; 0.6543]
## 
## Quantifying heterogeneity (with 95%-CIs):
##  tau^2 = 1.1311; tau = 1.0636; I^2 = 99.9% [99.8%; 99.9%]; H = 26.56 [25.11; 28.09]
## 
## Test of heterogeneity:
##             Q d.f. p-value
##  Wald 7052.86   10       0
##  LRT  9995.27   10       0
## 
## Results for subgroups (common effect model):
##                            k proportion           95%-CI       Q   I^2
## region_simple = Southern   4     0.5573 [0.5470; 0.5677]  800.67 99.6%
## region_simple = Eastern    5     0.6755 [0.6697; 0.6812] 3233.68 99.9%
## region_simple = Western    2     0.2432 [0.2335; 0.2531]  312.66 99.7%
## 
## Test for subgroup differences (common effect model):
##                      Q d.f. p-value
## Between groups 3836.89    2       0
## 
## Results for subgroups (random effects model):
##                            k proportion           95%-CI  tau^2    tau
## region_simple = Southern   4     0.5468 [0.4047; 0.6816] 0.3401 0.5832
## region_simple = Eastern    5     0.5856 [0.3336; 0.7996] 1.3997 1.1831
## region_simple = Western    2     0.2332 [0.1311; 0.3800] 0.2540 0.5040
## 
## Test for subgroup differences (random effects model):
##                    Q d.f. p-value
## Between groups 10.42    2  0.0055
## 
## Details of meta-analysis methods:
## - Random intercept logistic regression model
## - Maximum-likelihood estimator for tau^2
## - Calculation of I^2 based on Q
## - Logit transformation
## Created: subgroup_analysis_region.png

Publication Bias Assessment

#| eval: false
#| warning: false

cat("\nAssessing publication bias...\n")
## 
## Assessing publication bias...
# Function to assess publication bias
assess_publication_bias <- function(meta_result, metric_name) {
  if (is.null(meta_result) || meta_result$k < 3) {
    cat("Insufficient studies for publication bias assessment (", metric_name, ")\n")
    return(NULL)
  }
  
  # Funnel plot
  png(paste0("funnel_plot_", metric_name, ".png"), 
      width = 8, height = 6, units = "in", res = 300)
  
  funnel(meta_result)
  grid.text(paste("Funnel Plot:", metric_name), 
            x = 0.5, y = 0.95, just = "center",
            gp = gpar(cex = 1.2, fontface = "bold"))
  
  dev.off()
  
  # Egger's test
  egger_test <- metabias(meta_result, method.bias = "linreg", k.min = 3)
  
  cat(paste0("\nPublication Bias Assessment - ", metric_name, ":\n"))
  print(egger_test)
  
  return(egger_test)
}

# Assess publication bias for each metric
publication_bias_results <- list()
for (metric in metrics) {
  if (!is.null(meta_results[[metric]]) && meta_results[[metric]]$k >= 3) {
    metric_name <- str_to_upper(str_remove(metric, "_prev"))
    publication_bias_results[[metric]] <- assess_publication_bias(
      meta_results[[metric]], metric_name)
  }
}
## 
## Publication Bias Assessment - DIET:
## Review:     Prevalence of Ideal DIET
## 
## Linear regression test of funnel plot asymmetry
## 
## Test result: t = -1.94, df = 6, p-value = 0.1010
## Bias estimate: -8.4850 (SE = 4.3833)
## 
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 43.1118)
## - predictor: standard error
## - weight:    inverse variance
## - reference: Egger et al. (1997), BMJ
## 
## Publication Bias Assessment - SMOKING:
## Review:     Prevalence of Ideal SMOKING
## 
## Linear regression test of funnel plot asymmetry
## 
## Test result: t = 1.99, df = 1, p-value = 0.2965
## Bias estimate: 45.8350 (SE = 23.0372)
## 
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 285.8802)
## - predictor: standard error
## - weight:    inverse variance
## - reference: Egger et al. (1997), BMJ
## 
## Publication Bias Assessment - PA:
## Review:     Prevalence of Ideal PA
## 
## Linear regression test of funnel plot asymmetry
## 
## Test result: t = 0.83, df = 10, p-value = 0.4262
## Bias estimate: 13.0668 (SE = 15.7545)
## 
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 503.3980)
## - predictor: standard error
## - weight:    inverse variance
## - reference: Egger et al. (1997), BMJ
## 
## Publication Bias Assessment - BMI:
## Review:     Prevalence of Ideal BMI
## 
## Linear regression test of funnel plot asymmetry
## 
## Test result: t = 1.66, df = 8, p-value = 0.1348
## Bias estimate: 26.7982 (SE = 16.1121)
## 
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 241.3698)
## - predictor: standard error
## - weight:    inverse variance
## - reference: Egger et al. (1997), BMJ
## 
## Publication Bias Assessment - BP:
## Review:     Prevalence of Ideal BP
## 
## Linear regression test of funnel plot asymmetry
## 
## Test result: t = -1.28, df = 8, p-value = 0.2371
## Bias estimate: -22.1929 (SE = 17.3647)
## 
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 268.2065)
## - predictor: standard error
## - weight:    inverse variance
## - reference: Egger et al. (1997), BMJ
## 
## Publication Bias Assessment - LIPIDS:
## Review:     Prevalence of Ideal LIPIDS
## 
## Linear regression test of funnel plot asymmetry
## 
## Test result: t = 1.87, df = 8, p-value = 0.0978
## Bias estimate: 24.8998 (SE = 13.2881)
## 
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 206.7203)
## - predictor: standard error
## - weight:    inverse variance
## - reference: Egger et al. (1997), BMJ
## 
## Publication Bias Assessment - GLUCOSE:
## Review:     Prevalence of Ideal GLUCOSE
## 
## Linear regression test of funnel plot asymmetry
## 
## Test result: t = 2.90, df = 8, p-value = 0.0198
## Bias estimate: 17.5766 (SE = 6.0562)
## 
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 81.0211)
## - predictor: standard error
## - weight:    inverse variance
## - reference: Egger et al. (1997), BMJ
## 
## Publication Bias Assessment - IDEAL_CVH:
## Review:     Prevalence of Ideal IDEAL_CVH
## 
## Linear regression test of funnel plot asymmetry
## 
## Test result: t = -0.99, df = 9, p-value = 0.3475
## Bias estimate: -26.3915 (SE = 26.6244)
## 
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 706.5167)
## - predictor: standard error
## - weight:    inverse variance
## - reference: Egger et al. (1997), BMJ

Risk of Bias and Fidelity Analysis

# Risk of bias assessment
rob_summary <- clean_cvh %>%
  count(risk_of_bias_judgment) %>%
  mutate(percentage = n / sum(n) * 100)

if (nrow(rob_summary) > 0) {
  cat("\nRisk of Bias Summary:\n")
  kable(rob_summary, caption = "Risk of Bias Distribution") %>%
    kable_styling(bootstrap_options = c("striped", "hover"))
  
  rob_plot <- ggplot(rob_summary, aes(x = reorder(risk_of_bias_judgment, -n), y = n)) +
    geom_col(fill = "coral", alpha = 0.8) +
    labs(title = "Distribution of Risk of Bias Judgments",
         x = "Risk of Bias Judgment", 
         y = "Number of Studies") +
    theme_minimal()
  
  print(rob_plot)
}
## 
## Risk of Bias Summary:

# Fidelity to LS7 framework
fidelity_summary <- clean_cvh %>%
  count(fidelity_tag) %>%
  mutate(percentage = n / sum(n) * 100)

if (nrow(fidelity_summary) > 0) {
  cat("\nLS7 Fidelity Summary:\n")
  kable(fidelity_summary, caption = "LS7 Framework Fidelity") %>%
    kable_styling(bootstrap_options = c("striped", "hover"))
  
  fidelity_plot <- ggplot(fidelity_summary, aes(x = reorder(fidelity_tag, -n), y = n)) +
    geom_col(fill = "lightgreen", alpha = 0.8) +
    labs(title = "Fidelity to LS7 Framework Across Studies",
         x = "Fidelity Category", 
         y = "Number of Studies") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  print(fidelity_plot)
}
## 
## LS7 Fidelity Summary:

### Correlation Analysis

cat("\nPerforming correlation analysis...\n")
## 
## Performing correlation analysis...
# Correlation matrix between metrics
correlation_data <- clean_cvh %>%
  select(ends_with("_prev")) %>%
  cor(use = "pairwise.complete.obs")

correlation_long <- as.data.frame(correlation_data) %>%
  rownames_to_column("metric1") %>%
  pivot_longer(cols = -metric1, names_to = "metric2", values_to = "correlation") %>%
  mutate(
    metric1 = str_to_upper(str_remove(metric1, "_prev")),
    metric2 = str_to_upper(str_remove(metric2, "_prev"))
  )

correlation_plot <- ggplot(correlation_long, aes(x = metric1, y = metric2, fill = correlation)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(correlation, 2)), color = "black", size = 3.5, fontface = "bold") +
  scale_fill_gradient2(low = "#2166AC", high = "#B2182B", mid = "white", 
                       midpoint = 0, limits = c(-1, 1),
                       name = "Correlation") +
  labs(title = "Correlation Matrix of Ideal CVH Metrics",
       x = "", y = "") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
        axis.text.y = element_text(face = "bold"),
        plot.title = element_text(face = "bold", size = 14))

print(correlation_plot)