Data Preparation Function
#' Prepare Data for Manhattan Plot
#'
#' Extracts and formats the required columns for fastman_gg plotting
#'
#' @param stats Data frame containing gene-level statistics
#' @param point_label Character, name of column containing locus labels
#' @param uninfomative_labels Character vector, labels to exclude from annotation
#'
#' @return Data frame with columns: gene, SNP, CHR, BP, P, and point_label
get_plot_data <- function(stats,
point_label = "locus_symbol",
uninfomative_labels = NA) {
stats <- stats %>%
mutate(SNP = paste0("S", CHR, "_", BP)) %>%
dplyr::select(gene, SNP, CHR, BP, P, all_of(point_label)) %>%
distinct() %>%
arrange(CHR, BP)
# Check if uninfomative_labels has any non-NA values
if (length(uninfomative_labels) > 0 && any(!is.na(uninfomative_labels))) {
# Set to empty string so points are plotted but not labeled
stats[[point_label]][stats[[point_label]] %in% uninfomative_labels] <- ""
}
stats
}
Locus Label Annotation Function
#' Add Locus Labels to Manhattan Plot
#'
#' Adds ggrepel text annotations for top SNPs to an existing Manhattan plot
#'
#' @param p ggplot object (Manhattan plot)
#' @param m Data frame with CHR, BP, P, and label column
#' @param transform_coordinates Function that transforms CHR, BP to BPn
#' @param top_n Integer, number of top loci to annotate
#' @param maxnegLogP Maximum -log10(P) value for y-axis positioning
#' @param label_column Character, name of column to use for labels
#' @param text_size Numeric, size of annotation text
#' @param segment_color Character, color of connecting segments
#'
#' @return ggplot object with added annotations
add_locus_labels <- function(p,
m,
transform_coordinates,
top_n,
maxnegLogP,
label_column = "locus_symbol",
text_size = 5,
segment_color = "black") {
if (is.null(top_n) || top_n <= 0) {
return(p)
}
# Calculate BPn for all points using transformer
m_with_coords <- m %>%
mutate(
BPn = transform_coordinates(CHR, BP),
logP = -log10(P)
)
# Get top N SNPs with non-empty labels
top_snps <- m_with_coords %>%
filter(!is.na(.data[[label_column]]) & trimws(.data[[label_column]]) != "") %>%
arrange(P) %>%
head(top_n)
if (nrow(top_snps) > 0) {
p <- p +
ggrepel::geom_text_repel(
data = top_snps,
aes(x = BPn, y = logP, label = .data[[label_column]]),
inherit.aes = FALSE,
ylim = c(maxnegLogP, NA),
segment.color = segment_color,
segment.size = 0.3,
min.segment.length = 0,
size = text_size,
box.padding = 0.5,
point.padding = 0.3,
force = 1.5,
max.overlaps = Inf,
fontface = "italic"
)
}
p
}
Main Manhattan Plot Function
#' Create Manhattan Plot using fastman_gg
#'
#' Creates a Manhattan plot with optional ggrepel annotations for top loci
#'
#' @param m Data frame formatted by get_plot_data(), must contain:
#' - CHR: chromosome number
#' - BP: base pair position
#' - P: p-value
#' - SNP: SNP identifier
#' - locus_symbol: gene/locus labels for annotation (optional)
#' @param title Character string for plot title (supports markdown)
#' @param top_n Integer, number of top SNPs to annotate with ggrepel.
#' Only SNPs with non-empty label values are considered.
#' If NULL or 0, no annotation is added
#' @param label_column Character, name of column to use for labels
#' @param ... Additional arguments passed to fastman_gg()
#'
#' @return ggplot object with optional annotations
#'
#' @examples
#' # Basic plot
#' plot_manhattan_gg(m = to_plot, title = "My Analysis")
#'
#' # With annotations
#' plot_manhattan_gg(m = to_plot, title = "My Analysis", top_n = 5)
#'
#' # Add vertical lines
#' p <- plot_manhattan_gg(m = to_plot, title = "My Analysis", top_n = 3)
#' transform <- get_transformer(to_plot)
#' p + geom_vline(xintercept = transform(4, 172e6), linetype = "dashed")
plot_manhattan_gg <- function(m,
title,
top_n = NULL,
label_column = "locus_symbol",
...) {
neglogFDR <- expression(-log[10](italic(FDR)))
FDR_thresh <- -log10(0.05)
maxnegLogP <- -log10(min(m$P[m$P > 0]))
# Get coordinate transformer
transform_coordinates <- get_transformer(m)
# Create base Manhattan plot
p <- fastman_gg(
m = m,
snp = "SNP",
maxP = maxnegLogP,
genomewideline = FDR_thresh,
suggestiveline = NULL,
ylab = neglogFDR,
xlab = "",
...
) +
ggtitle(title) +
theme_classic(base_size = 20) +
scale_y_continuous(expand = expansion(mult = c(0.01, 0.25))) +
theme(
plot.title = element_markdown(hjust = 1, size = 25, face = "bold"),
axis.text.x = element_text(angle = 0, hjust = 0.5),
panel.grid.major.y = element_line(color = "grey90", linewidth = 0.2),
legend.position = "none"
)
# Add locus labels if requested
p <- add_locus_labels(
p = p,
m = m,
transform_coordinates = transform_coordinates,
top_n = top_n,
maxnegLogP = maxnegLogP,
label_column = label_column
)
p
}