# Distribution of studies by year
year_summary <- clean_cvh %>%
count(year) %>%
arrange(year)
p1 <- ggplot(year_summary, aes(x = factor(year), y = n)) +
geom_col(fill = "steelblue", alpha = 0.8) +
labs(title = "Studies by Publication Year",
x = "Publication Year", y = "Number of Studies") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Studies by region
p2 <- ggplot(region_summary, aes(x = reorder(region_simple, n), y = n)) +
geom_col(fill = "darkorange", alpha = 0.8) +
coord_flip() +
labs(title = "Studies by Geographic Region", x = "", y = "Number of Studies") +
theme_minimal()
# Combine plots
descriptive_plots <- p1 + p2
print(descriptive_plots)
Distribution of Prevalence Metrics
#| fig.width: 10
#| fig.height: 6
# Distribution of prevalence metrics
prevalence_long <- clean_cvh %>%
select(study_label, ends_with("_prev")) %>%
pivot_longer(cols = -study_label, names_to = "metric", values_to = "prevalence") %>%
mutate(metric = str_to_upper(str_remove(metric, "_prev")))
p3 <- ggplot(prevalence_long, aes(x = reorder(metric, prevalence, na.rm = TRUE), y = prevalence)) +
geom_boxplot(fill = "lightblue", alpha = 0.7, outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.6, size = 1) +
labs(title = "Distribution of Ideal CVH Metric Prevalence",
x = "CVH Metric", y = "Prevalence") +
scale_y_continuous(labels = scales::percent) +
coord_flip() +
theme_minimal()
print(p3)
### Meta-Analysis preparation
cat("\nPreparing data for meta-analysis...\n")
##
## Preparing data for meta-analysis...
# Function to prepare meta-analysis data for each metric
prepare_meta_data <- function(metric) {
clean_cvh %>%
filter(!is.na(!!sym(metric)) & !is.na(sample_size)) %>%
mutate(
# Calculate number of events
events = round(!!sym(metric) * sample_size),
# Ensure events don't exceed sample size
events = pmin(events, sample_size),
# Handle zero events
events = ifelse(events == 0, 0.5, events),
# Calculate standard error for proportions
se = sqrt((!!sym(metric) * (1 - !!sym(metric))) / sample_size),
variance = se^2
) %>%
select(study_label, country, region_simple, year, sample_size,
prevalence = !!sym(metric), events, se, variance)
}
# Prepare data for each metric
metrics <- c("diet_prev", "smoking_prev", "pa_prev", "bmi_prev",
"bp_prev", "lipids_prev", "glucose_prev", "ideal_cvh_prev")
meta_datasets <- map(metrics, prepare_meta_data)
names(meta_datasets) <- metrics
# Display number of studies available for each metric
studies_per_metric <- map_int(meta_datasets, nrow)
metric_summary <- data.frame(
Metric = str_to_upper(str_remove(metrics, "_prev")),
Studies = studies_per_metric,
Total_Participants = map_dbl(meta_datasets, ~sum(.x$sample_size, na.rm = TRUE))
)
cat("\nStudies available for meta-analysis:\n")
##
## Studies available for meta-analysis:
kable(metric_summary, caption = "Studies Available for Meta-Analysis by Metric") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Studies Available for Meta-Analysis by Metric
|
|
Metric
|
Studies
|
Total_Participants
|
|
diet_prev
|
DIET
|
8
|
25855
|
|
smoking_prev
|
SMOKING
|
3
|
19606
|
|
pa_prev
|
PA
|
12
|
43862
|
|
bmi_prev
|
BMI
|
10
|
40835
|
|
bp_prev
|
BP
|
10
|
39853
|
|
lipids_prev
|
LIPIDS
|
10
|
39853
|
|
glucose_prev
|
GLUCOSE
|
10
|
39853
|
|
ideal_cvh_prev
|
IDEAL_CVH
|
11
|
41433
|
Summary Forest Plot
cat("\nCreating meta-analysis visualizations...\n")
##
## Creating meta-analysis visualizations...
# Summary forest plot
plot_data <- summary_data %>%
filter(!is.na(Prevalence))
if (nrow(plot_data) > 0) {
summary_forest <- ggplot(plot_data, aes(x = Prevalence, y = reorder(Metric, Prevalence))) +
geom_point(aes(size = Studies), color = "blue", alpha = 0.8) +
geom_errorbarh(aes(xmin = CI_lower, xmax = CI_upper), height = 0.2,
color = "blue", alpha = 0.7, size = 1) +
geom_vline(xintercept = 0.5, linetype = "dashed", color = "red", alpha = 0.5) +
geom_vline(xintercept = mean(plot_data$Prevalence, na.rm = TRUE),
linetype = "dotted", color = "gray", size = 1) +
labs(title = "Pooled Prevalence of Ideal CVH Metrics in African Populations",
subtitle = "Random Effects Meta-Analysis with 95% Confidence Intervals",
x = "Pooled Prevalence (95% CI)",
y = "CVH Metric",
size = "Number of\nStudies") +
scale_x_continuous(labels = scales::percent, limits = c(0, 1)) +
theme_minimal() +
theme(panel.grid.major = element_line(color = "grey90"),
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 12),
axis.text = element_text(size = 10))
print(summary_forest)
}
### Individual Forest Plots for Each Metric
# Note: Individual forest plots are created but not displayed inline to save space
cat("\nCreating individual forest plots...\n")
##
## Creating individual forest plots...
# Create individual forest plots for metrics with sufficient studies
for (metric in metrics) {
if (!is.null(meta_results[[metric]]) && meta_results[[metric]]$k >= 3) {
metric_name <- str_to_upper(str_remove(metric, "_prev"))
png(paste0("forest_plot_", metric, ".png"),
width = 12, height = 8, units = "in", res = 300)
forest(meta_results[[metric]],
leftcols = c("studlab", "event", "n"),
leftlabs = c("Study", "Events", "Total"),
xlab = "Prevalence",
smlab = "Prevalence",
col.square = "blue",
col.diamond = "red",
col.diamond.lines = "darkred",
print.tau2 = TRUE,
print.I2 = TRUE,
mlab = "RE Model")
grid.text(paste("Forest Plot: Ideal", metric_name, "Prevalence"),
x = 0.5, y = 0.95, just = "center",
gp = gpar(cex = 1.2, fontface = "bold"))
dev.off()
cat("Created: forest_plot_", metric, ".png\n", sep = "")
}
}
## Created: forest_plot_diet_prev.png
## Created: forest_plot_smoking_prev.png
## Created: forest_plot_pa_prev.png
## Created: forest_plot_bmi_prev.png
## Created: forest_plot_bp_prev.png
## Created: forest_plot_lipids_prev.png
## Created: forest_plot_glucose_prev.png
## Created: forest_plot_ideal_cvh_prev.png
Subgroup Analyses
#| cat("\nPerforming subgroup analysis...\n")
# Subgroup analysis for ideal CVH by region
if (n_distinct(meta_datasets$ideal_cvh_prev$region_simple) > 1 &&
nrow(meta_datasets$ideal_cvh_prev) >= 3) {
subgroup_ideal <- metaprop(
event = events,
n = sample_size,
studlab = study_label,
data = meta_datasets$ideal_cvh_prev,
subgroup = region_simple,
sm = "PLOGIT",
method.tau = "ML"
)
cat("Subgroup Analysis Results (Ideal CVH by Region):\n")
print(subgroup_ideal)
# Create subgroup forest plot
png("subgroup_analysis_region.png", width = 12, height = 10, units = "in", res = 300)
forest(subgroup_ideal,
leftcols = c("studlab", "event", "n"),
leftlabs = c("Study", "Events", "Total"),
xlab = "Prevalence",
smlab = "Prevalence")
grid.text("Subgroup Analysis: Ideal CVH by Geographic Region",
x = 0.5, y = 0.97, just = "center",
gp = gpar(cex = 1.2, fontface = "bold"))
dev.off()
cat("Created: subgroup_analysis_region.png\n")
} else {
cat("Insufficient data for subgroup analysis by region\n")
}
## Subgroup Analysis Results (Ideal CVH by Region):
## Number of studies: k = 11
## Number of observations: o = 41433
## Number of events: e = 23748
##
## proportion 95%-CI
## Common effect model 0.5732 [0.5684; 0.5779]
## Random effects model 0.5023 [0.3498; 0.6543]
##
## Quantifying heterogeneity (with 95%-CIs):
## tau^2 = 1.1311; tau = 1.0636; I^2 = 99.9% [99.8%; 99.9%]; H = 26.56 [25.11; 28.09]
##
## Test of heterogeneity:
## Q d.f. p-value
## Wald 7052.86 10 0
## LRT 9995.27 10 0
##
## Results for subgroups (common effect model):
## k proportion 95%-CI Q I^2
## region_simple = Southern 4 0.5573 [0.5470; 0.5677] 800.67 99.6%
## region_simple = Eastern 5 0.6755 [0.6697; 0.6812] 3233.68 99.9%
## region_simple = Western 2 0.2432 [0.2335; 0.2531] 312.66 99.7%
##
## Test for subgroup differences (common effect model):
## Q d.f. p-value
## Between groups 3836.89 2 0
##
## Results for subgroups (random effects model):
## k proportion 95%-CI tau^2 tau
## region_simple = Southern 4 0.5468 [0.4047; 0.6816] 0.3401 0.5832
## region_simple = Eastern 5 0.5856 [0.3336; 0.7996] 1.3997 1.1831
## region_simple = Western 2 0.2332 [0.1311; 0.3800] 0.2540 0.5040
##
## Test for subgroup differences (random effects model):
## Q d.f. p-value
## Between groups 10.42 2 0.0055
##
## Details of meta-analysis methods:
## - Random intercept logistic regression model
## - Maximum-likelihood estimator for tau^2
## - Calculation of I^2 based on Q
## - Logit transformation
## Created: subgroup_analysis_region.png
Publication Bias Assessment
#| eval: false
#| warning: false
cat("\nAssessing publication bias...\n")
##
## Assessing publication bias...
# Function to assess publication bias
assess_publication_bias <- function(meta_result, metric_name) {
if (is.null(meta_result) || meta_result$k < 3) {
cat("Insufficient studies for publication bias assessment (", metric_name, ")\n")
return(NULL)
}
# Funnel plot
png(paste0("funnel_plot_", metric_name, ".png"),
width = 8, height = 6, units = "in", res = 300)
funnel(meta_result)
grid.text(paste("Funnel Plot:", metric_name),
x = 0.5, y = 0.95, just = "center",
gp = gpar(cex = 1.2, fontface = "bold"))
dev.off()
# Egger's test
egger_test <- metabias(meta_result, method.bias = "linreg", k.min = 3)
cat(paste0("\nPublication Bias Assessment - ", metric_name, ":\n"))
print(egger_test)
return(egger_test)
}
# Assess publication bias for each metric
publication_bias_results <- list()
for (metric in metrics) {
if (!is.null(meta_results[[metric]]) && meta_results[[metric]]$k >= 3) {
metric_name <- str_to_upper(str_remove(metric, "_prev"))
publication_bias_results[[metric]] <- assess_publication_bias(
meta_results[[metric]], metric_name)
}
}
##
## Publication Bias Assessment - DIET:
## Review: Prevalence of Ideal DIET
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = -1.94, df = 6, p-value = 0.1010
## Bias estimate: -8.4850 (SE = 4.3833)
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 43.1118)
## - predictor: standard error
## - weight: inverse variance
## - reference: Egger et al. (1997), BMJ
##
## Publication Bias Assessment - SMOKING:
## Review: Prevalence of Ideal SMOKING
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = 1.99, df = 1, p-value = 0.2965
## Bias estimate: 45.8350 (SE = 23.0372)
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 285.8802)
## - predictor: standard error
## - weight: inverse variance
## - reference: Egger et al. (1997), BMJ
##
## Publication Bias Assessment - PA:
## Review: Prevalence of Ideal PA
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = 0.83, df = 10, p-value = 0.4262
## Bias estimate: 13.0668 (SE = 15.7545)
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 503.3980)
## - predictor: standard error
## - weight: inverse variance
## - reference: Egger et al. (1997), BMJ
##
## Publication Bias Assessment - BMI:
## Review: Prevalence of Ideal BMI
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = 1.66, df = 8, p-value = 0.1348
## Bias estimate: 26.7982 (SE = 16.1121)
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 241.3698)
## - predictor: standard error
## - weight: inverse variance
## - reference: Egger et al. (1997), BMJ
##
## Publication Bias Assessment - BP:
## Review: Prevalence of Ideal BP
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = -1.28, df = 8, p-value = 0.2371
## Bias estimate: -22.1929 (SE = 17.3647)
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 268.2065)
## - predictor: standard error
## - weight: inverse variance
## - reference: Egger et al. (1997), BMJ
##
## Publication Bias Assessment - LIPIDS:
## Review: Prevalence of Ideal LIPIDS
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = 1.87, df = 8, p-value = 0.0978
## Bias estimate: 24.8998 (SE = 13.2881)
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 206.7203)
## - predictor: standard error
## - weight: inverse variance
## - reference: Egger et al. (1997), BMJ
##
## Publication Bias Assessment - GLUCOSE:
## Review: Prevalence of Ideal GLUCOSE
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = 2.90, df = 8, p-value = 0.0198
## Bias estimate: 17.5766 (SE = 6.0562)
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 81.0211)
## - predictor: standard error
## - weight: inverse variance
## - reference: Egger et al. (1997), BMJ
##
## Publication Bias Assessment - IDEAL_CVH:
## Review: Prevalence of Ideal IDEAL_CVH
##
## Linear regression test of funnel plot asymmetry
##
## Test result: t = -0.99, df = 9, p-value = 0.3475
## Bias estimate: -26.3915 (SE = 26.6244)
##
## Details:
## - multiplicative residual heterogeneity variance (tau^2 = 706.5167)
## - predictor: standard error
## - weight: inverse variance
## - reference: Egger et al. (1997), BMJ
Temporal Trends
cat("\nPerforming additional analyses...\n")
##
## Performing additional analyses...
# Temporal trends analysis
temporal_data <- clean_cvh %>%
filter(!is.na(ideal_cvh_prev)) %>%
arrange(year)
if (nrow(temporal_data) > 1) {
trend_plot <- ggplot(temporal_data, aes(x = year, y = ideal_cvh_prev)) +
geom_point(aes(size = sample_size, color = region_simple), alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE, color = "darkred",
fill = "pink", alpha = 0.3) +
labs(title = "Temporal Trends in Ideal CVH Prevalence",
subtitle = "Individual Studies with Linear Trend Line",
x = "Publication Year",
y = "Ideal CVH Prevalence",
size = "Sample Size",
color = "Region") +
scale_y_continuous(labels = scales::percent) +
scale_color_brewer(palette = "Set1") +
theme_minimal() +
theme(plot.title = element_text(face = "bold", size = 14))
print(trend_plot)
# Statistical test for trend
trend_test <- cor.test(temporal_data$year, temporal_data$ideal_cvh_prev,
method = "spearman")
cat("Spearman correlation between year and ideal CVH prevalence:\n")
cat("rho =", round(trend_test$estimate, 3),
"p-value =", round(trend_test$p.value, 4), "\n")
}

## Spearman correlation between year and ideal CVH prevalence:
## rho = 0.19 p-value = 0.5752
Risk of Bias and Fidelity Analysis
# Risk of bias assessment
rob_summary <- clean_cvh %>%
count(risk_of_bias_judgment) %>%
mutate(percentage = n / sum(n) * 100)
if (nrow(rob_summary) > 0) {
cat("\nRisk of Bias Summary:\n")
kable(rob_summary, caption = "Risk of Bias Distribution") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
rob_plot <- ggplot(rob_summary, aes(x = reorder(risk_of_bias_judgment, -n), y = n)) +
geom_col(fill = "coral", alpha = 0.8) +
labs(title = "Distribution of Risk of Bias Judgments",
x = "Risk of Bias Judgment",
y = "Number of Studies") +
theme_minimal()
print(rob_plot)
}
##
## Risk of Bias Summary:

# Fidelity to LS7 framework
fidelity_summary <- clean_cvh %>%
count(fidelity_tag) %>%
mutate(percentage = n / sum(n) * 100)
if (nrow(fidelity_summary) > 0) {
cat("\nLS7 Fidelity Summary:\n")
kable(fidelity_summary, caption = "LS7 Framework Fidelity") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
fidelity_plot <- ggplot(fidelity_summary, aes(x = reorder(fidelity_tag, -n), y = n)) +
geom_col(fill = "lightgreen", alpha = 0.8) +
labs(title = "Fidelity to LS7 Framework Across Studies",
x = "Fidelity Category",
y = "Number of Studies") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(fidelity_plot)
}
##
## LS7 Fidelity Summary:
### Correlation Analysis
cat("\nPerforming correlation analysis...\n")
##
## Performing correlation analysis...
# Correlation matrix between metrics
correlation_data <- clean_cvh %>%
select(ends_with("_prev")) %>%
cor(use = "pairwise.complete.obs")
correlation_long <- as.data.frame(correlation_data) %>%
rownames_to_column("metric1") %>%
pivot_longer(cols = -metric1, names_to = "metric2", values_to = "correlation") %>%
mutate(
metric1 = str_to_upper(str_remove(metric1, "_prev")),
metric2 = str_to_upper(str_remove(metric2, "_prev"))
)
correlation_plot <- ggplot(correlation_long, aes(x = metric1, y = metric2, fill = correlation)) +
geom_tile(color = "white") +
geom_text(aes(label = round(correlation, 2)), color = "black", size = 3.5, fontface = "bold") +
scale_fill_gradient2(low = "#2166AC", high = "#B2182B", mid = "white",
midpoint = 0, limits = c(-1, 1),
name = "Correlation") +
labs(title = "Correlation Matrix of Ideal CVH Metrics",
x = "", y = "") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
axis.text.y = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 14))
print(correlation_plot)
