This tutorial demonstrates how to create custom forest plots in R using the forestploter and grid packages. It defines relative risks (RRs) and confidence intervals for three groups (Student, Professional, Freelance) across four gestational age categories. Data is formatted into structured data frames, and individual forest plots are created for each group, with customized aesthetics using forest_theme.

A combined data frame is then generated to display all groups in a unified forest plot, enabling direct comparisons. The x-axis represents RRs, with reference lines and arrows indicating “Lower risk” and “Higher risk.” This script provides a streamlined approach to visualizing effect sizes and confidence intervals across multiple categories in a clear and customizable format.

# Define the estimates, lower, and upper bounds for each category
estimates <- list(
  Student = c(0.97, 0.96, 0.95, 0.95),
  Professional = c(0.87, 0.85, 0.75, 0.56),
  Freelance = c(0.90, 0.88, 0.68, 0.47)
)
lower_bounds <- list(
  Student = c(0.95, 0.92, 0.90, 0.87),
  Professional = c(0.86, 0.81, 0.71, 0.50),
  Freelance = c(0.86, 0.78, 0.58, 0.34)
)
upper_bounds <- list(
  Student = c(0.98, 1.01, 1.01, 1.04),
  Professional = c(0.89, 0.89, 0.80, 0.62),
  Freelance = c(0.94, 0.99, 0.79, 0.65)
)

# Create the plot data frame
plot_data <- data.frame(
  Category = c(
    "Late preterm (34-36 w)",
    "Moderately preterm (32-33 w)",
    "Very preterm (28-31 w)",
    "Extremely preterm (24-27 w)"
  ),
  Student = estimates$Student,
  Professional = estimates$Professional,
  Freelance = estimates$Freelance,
  Student_Lower = lower_bounds$Student,
  Student_Upper = upper_bounds$Student,
  Professional_Lower = lower_bounds$Professional,
  Professional_Upper = upper_bounds$Professional,
  Freelance_Lower = lower_bounds$Freelance,
  Freelance_Upper = upper_bounds$Freelance
)

# Reorganize the dataframe to match the format of df
plot_data_transformed <- data.frame(
  Model = plot_data$Category,
  est = plot_data$Student,
  se = NA,  # If standard errors are not provided, leave it as NA
  low = plot_data$Student_Lower,
  hi = plot_data$Student_Upper
)
names(plot_data_transformed)[1] <- "Group: Non-Professional"

plot_data_transformed$` ` <- paste(rep(" ", 20), collapse = " ")

# Combine estimates and confidence intervals into a single column
plot_data_transformed$`RR (95% CI)` <- sprintf("%.3f (%.3f to %.3f)",
                                   plot_data$Student,
                                   plot_data$Student_Lower,
                                   plot_data$Student_Upper)

# Define theme
tm <- forest_theme(base_size = 10,
                   # Confidence interval point shape, line type/color/width
                   ci_pch = 15,
                   ci_col = "#762a83",
                   ci_fill = "black",
                   ci_alpha = 0.8,
                   ci_lty = 1,
                   ci_lwd = 1.5,
                   ci_Theight = 0.2, # Set a T end at the end of CI 
                   # Reference line width/type/color
                   refline_lwd = gpar(lwd = 1, lty = "dashed", col = "grey20"),
                   # Vertical line width/type/color
                   vertline_lwd = 1,
                   vertline_lty = "dashed",
                   vertline_col = "grey20",
                   # Change summary color for filling and borders
                   summary_fill = "#4575b4",
                   summary_col = "#4575b4",
                   # Footnote font size/face/color
                   footnote_gp = gpar(cex = 0.6, fontface = "italic", col = "blue"))

# Draw the forest plot
forest(plot_data_transformed[,c(1,6:7)],
       est = plot_data_transformed$est,
       lower = plot_data_transformed$low, 
       upper = plot_data_transformed$hi,
       #sizes = df$se,
       ci_column = 2,
       ref_line = 1,
       arrow_lab = c("Lower risk", "Higher risk"),
       xlim = c(0.83, 1.05),
       ticks_at = c(0.9, 1),
       theme=tm)

# Reorganize the dataframe to match the format of df
plot_data_transformed <- data.frame(
  Model = plot_data$Category,
  est = plot_data$Professional,
  se = NA,  # If standard errors are not provided, leave it as NA
  low = plot_data$Professional_Lower,
  hi = plot_data$Professional_Upper
)
names(plot_data_transformed)[1] <- "Group: Professional"

plot_data_transformed$` ` <- paste(rep(" ", 20), collapse = " ")

# Combine estimates and confidence intervals into a single column
plot_data_transformed$`RR (95% CI)` <- sprintf("%.3f (%.3f to %.3f)",
                                   plot_data$Professional,
                                   plot_data$Professional_Lower,
                                   plot_data$Professional_Upper)

# Define theme
tm <- forest_theme(base_size = 10,
                   # Confidence interval point shape, line type/color/width
                   ci_pch = 15,
                   ci_col = "#762a83",
                   ci_fill = "black",
                   ci_alpha = 0.8,
                   ci_lty = 1,
                   ci_lwd = 1.5,
                   ci_Theight = 0.2, # Set a T end at the end of CI 
                   # Reference line width/type/color
                   refline_lwd = gpar(lwd = 1, lty = "dashed", col = "grey20"),
                   # Vertical line width/type/color
                   vertline_lwd = 1,
                   vertline_lty = "dashed",
                   vertline_col = "grey20",
                   # Change summary color for filling and borders
                   summary_fill = "#4575b4",
                   summary_col = "#4575b4",
                   # Footnote font size/face/color
                   footnote_gp = gpar(cex = 0.6, fontface = "italic", col = "blue"))

# Draw the forest plot
forest(plot_data_transformed[,c(1,6:7)],
       est = plot_data_transformed$est,
       lower = plot_data_transformed$low, 
       upper = plot_data_transformed$hi,
       #sizes = df$se,
       ci_column = 2,
       ref_line = 1,
       arrow_lab = c("Lower risk", "Higher risk"),
       xlim = c(0.45, 1),
       ticks_at = c(0.6, 0.8, 1),
       theme=tm)

# Reorganize the dataframe to match the format of df
plot_data_transformed <- data.frame(
  Model = plot_data$Category,
  est = plot_data$Freelance,
  se = NA,  # If standard errors are not provided, leave it as NA
  low = plot_data$Freelance_Lower,
  hi = plot_data$Freelance_Upper
)
names(plot_data_transformed)[1] <- "Group: Freelance"

plot_data_transformed$` ` <- paste(rep(" ", 20), collapse = " ")

# Combine estimates and confidence intervals into a single column
plot_data_transformed$`RR (95% CI)` <- sprintf("%.3f (%.3f to %.3f)",
                                   plot_data$Freelance,
                                   plot_data$Freelance_Lower,
                                   plot_data$Freelance_Upper)

# Define theme
tm <- forest_theme(base_size = 10,
                   # Confidence interval point shape, line type/color/width
                   ci_pch = 15,
                   ci_col = "#762a83",
                   ci_fill = "black",
                   ci_alpha = 0.8,
                   ci_lty = 1,
                   ci_lwd = 1.5,
                   ci_Theight = 0.2, # Set a T end at the end of CI 
                   # Reference line width/type/color
                   refline_lwd = gpar(lwd = 1, lty = "dashed", col = "grey20"),
                   # Vertical line width/type/color
                   vertline_lwd = 1,
                   vertline_lty = "dashed",
                   vertline_col = "grey20",
                   # Change summary color for filling and borders
                   summary_fill = "#4575b4",
                   summary_col = "#4575b4",
                   # Footnote font size/face/color
                   footnote_gp = gpar(cex = 0.6, fontface = "italic", col = "blue"))

# Draw the forest plot
forest(plot_data_transformed[,c(1,6:7)],
       est = plot_data_transformed$est,
       lower = plot_data_transformed$low, 
       upper = plot_data_transformed$hi,
       #sizes = df$se,
       ci_column = 2,
       ref_line = 1,
       arrow_lab = c("Lower risk", "Higher risk"),
       xlim = c(0.3, 1),
       ticks_at = c(0.5, 0.75, 1),
       theme=tm)

# Combine all data into a single dataframe
combined_data <- data.frame(
  "Group" = rep(c("Non-Professional", "Professional", "Freelance"), each = 4),
  "Gestational Age Category" = rep(
    c("Late preterm (34-36 w)",
      "Moderately preterm (32-33 w)",
      "Very preterm (28-31 w)",
      "Extremely preterm (24-27 w)"), 
    3
  ),
  "Estimate" = c(
  # Non-Professional
  0.97, 0.96, 0.95, 0.95,
  # Professional
  0.87, 0.85, 0.75, 0.56,
  # Freelance
  0.90, 0.88, 0.68, 0.47
),
  "SE" = NA,
  "Lower" = c(
  # Non-Professional
  0.95, 0.92, 0.90, 0.87,
  # Professional
  0.86, 0.81, 0.71, 0.50,
  # Freelance
  0.86, 0.78, 0.58, 0.34
),
  "Upper" = c(
  # Non-Professional
  0.98, 1.01, 1.01, 1.04,
  # Professional
  0.89, 0.89, 0.80, 0.62,
  # Freelance
  0.94, 0.99, 0.79, 0.65
)
)
combined_data$` ` <- paste(rep(" ", 20), collapse = " ")
# Create a column for RR (95% CI)
combined_data$`RR (95% CI)` <- sprintf("%.3f (%.3f to %.3f)",
                                       combined_data$Estimate,
                                       combined_data$Lower,
                                       combined_data$Upper)

names(combined_data)[2] <- "Gestational Age Category"

# Define theme
tm <- forest_theme(base_size = 10,
                   # Confidence interval point shape, line type/color/width
                   ci_pch = 15,
                   ci_col = "#762a83",
                   ci_fill = "black",
                   ci_alpha = 0.8,
                   ci_lty = 1,
                   ci_lwd = 1.5,
                   ci_Theight = 0.2, # Set a T end at the end of CI 
                   # Reference line width/type/color
                   refline_lwd = gpar(lwd = 1, lty = "dashed", col = "grey20"),
                   # Vertical line width/type/color
                   vertline_lwd = 1,
                   vertline_lty = "dashed",
                   vertline_col = "grey20",
                   # Change summary color for filling and borders
                   summary_fill = "#4575b4",
                   summary_col = "#4575b4",
                   # Footnote font size/face/color
                   footnote_gp = gpar(cex = 0.6, fontface = "italic", col = "blue"))


# Draw the forest plot with rearranged columns
forest(combined_data[, c(1:2,7:8)],
       est = combined_data$Estimate,
       lower = combined_data$Lower, 
       upper = combined_data$Upper,
       ci_column = 3,
       ref_line = 1,
       arrow_lab = c("Lower risk", "Higher risk"),
       xlim = c(0.3, 1.1),
       ticks_at = c(0.5, 0.75, 1, 1.1),
       theme = tm,
       legend_title = "Group")