本文档演示如何用 ggplot2 + gghalves 复现 Nature Communications 图中常见的“云雨图(raincloud plot)”效果:半小提琴(云)+ 箱线(体)+ 抖点(雨)。
关键词:半小提琴geom_half_violin()、半箱线geom_half_boxplot()、抖点geom_jitter()、自定义顺序与配色、白填充箱线。
# 建议的基础包
if (!requireNamespace("tidyverse", quietly = TRUE)) install.packages("tidyverse")
if (!requireNamespace("gghalves",  quietly = TRUE)) install.packages("gghalves")
library(tidyverse)
library(gghalves)
set.seed(20250923)  # 保证可复现
df <- data.frame(
  subtills   = c(rnorm(100, 14, 1.3), rnorm(20, 18, 3.2), rnorm(30, 11, 2.5)),
  cereus     = c(rnorm(60,  11, 1.6), rnorm(40,  8.5, 1.2), rnorm(50,  5.5, 1.2)),
  megaterium = c(rnorm(130, 6.5, 1.1), rnorm(10,  9.0, 2.2), rnorm(10,  4.5, 0.8)),
  circulans  = c(rnorm(100, 4.2, 0.9), rnorm(40,  2.6, 0.6), rnorm(10,  1.3, 0.4))
) %>%
  tidyr::gather() %>%
  dplyr::rename(Species = key, value = value)
head(df)
默认顺序可能与论文图不一致。我们可自定义排序:
variable <- c("subtills", "cereus", "megaterium", "circulans")
df <- df %>% mutate(Species = factor(Species, levels = variable))
自定义调色板(与示例中的色调保持一致):
pal <- c(
  subtills   = "#5E81AC",  # desaturated blue
  cereus     = "#D08770",  # salmon
  megaterium = "#A3BE8C",  # green
  circulans  = "#EBCB8B"   # sand
)
ggplot(df, aes(x = Species, y = value, fill = Species)) +
  geom_violin(alpha = 0.9, linewidth = 0.3, trim = FALSE) +
  geom_boxplot(width = 0.18, linewidth = 0.4, outlier.shape = 21, outlier.size = 0.8, fill = "white") +
  scale_fill_manual(values = pal) +
  coord_cartesian(ylim = c(0, 25)) +
  labs(x = NULL, y = "NO. of BGCs/genome") +
  theme_test(base_size = 15) +
  theme(
    legend.position = "none",
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(face = "italic")
  )
思路: - 半小提琴和半箱线放在 右侧(side = "r"); -
抖点轻微左移(position_nudge(x = -0.2)),形成“云雨”分列;
- 箱线白填充、轮廓着色以匹配小提琴颜色; - 适度加
alpha,避免遮挡。
ggplot(df, aes(x = Species, y = value)) +
  # 云:半小提琴(右侧)
  gghalves::geom_half_violin(
    aes(fill = Species, color = Species),
    side = "r", alpha = 0.9, linewidth = 0.3, trim = FALSE
  ) +
  # 体:半箱线(右侧,白填充+彩色边)
  gghalves::geom_half_boxplot(
    aes(color = Species),
    side = "r", width = 0.2, outlier.shape = NA,
    position = position_nudge(x = 0.15),  # 轻微右移,与小提琴并排但不完全重合
    fill = "white", linewidth = 0.5
  ) +
  # 雨:抖点(左侧)
  geom_jitter(
  aes(color = Species),
  width = 0.12, height = 0, size = 1.4, alpha = 0.75
) +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  coord_cartesian(ylim = c(0, 25)) +
  labs(x = NULL, y = "NO. of BGCs/genome") +
  theme_test(base_size = 15) +
  theme(
    legend.position = "none",
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(face = "italic")
  )
“把因子转成数值 + 手动偏移”的做法来微调,下面给出对应实现:
x_num <- as.numeric(df$Species)
ggplot(df) +
  # 半小提琴(右)
  gghalves::geom_half_violin(
    aes(x = x_num + 0.10, y = value, fill = Species, color = Species),
    side = "r", alpha = 0.9, linewidth = 0.3, trim = FALSE
  ) +
  # 半箱线(右、白填充)
  gghalves::geom_half_boxplot(
    aes(x = x_num + 0.10, y = value, color = Species),
    side = "r", width = 0.18, outlier.shape = NA, fill = "white", linewidth = 0.5
  ) +
  # 抖点(左)
  geom_jitter(
    aes(x = x_num - 0.18, y = value, color = Species),
    width = 0.08, size = 1.4, alpha = 0.75
  ) +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal, guide = "none") +
  # 恢复离散刻度标签
  scale_x_continuous(
    breaks = sort(unique(x_num)),
    labels = levels(df$Species)
  ) +
  coord_cartesian(ylim = c(0, 25)) +
  labs(x = NULL, y = "NO. of BGCs/genome") +
  theme_test(base_size = 15) +
  theme(
    legend.position = "none",
    axis.text = element_text(color = "black"),
    axis.text.x = element_text(face = "italic")
  )
fill = "white" 放在
geom_half_boxplot()(或
geom_boxplot())里,同时使用
aes(color = Species) 让边框着色,达到“彩色边 +
白色内部”的审稿友好风格。cex:在 ggplot2 里,请用
size(点/文本大小)、linewidth(线宽)。coord_cartesian(ylim = ...),而非
scale_y_continuous(limits = ...),后者可能会裁剪统计计算。scale_*_manual()
里加 guide = "none"。p <- ggplot(df, aes(x = Species, y = value)) +
  gghalves::geom_half_violin(aes(fill = Species, color = Species), side = "r", alpha = 0.9, linewidth = 0.3) +
  gghalves::geom_half_boxplot(aes(color = Species), side = "r", width = 0.2, outlier.shape = NA, position = position_nudge(x = 0.08), fill = "white", linewidth = 0.5) +
  geom_jitter(aes(color = Species), position = position_nudge(x = -0.18), width = 0.08, size = 1.4, alpha = 0.75) +
  scale_fill_manual(values = pal) + scale_color_manual(values = pal) +
  coord_cartesian(ylim = c(0, 25)) +
  labs(x = NULL, y = "NO. of BGCs/genome") +
  theme_test(base_size = 15) +
  theme(legend.position = "none", axis.text = element_text(color = "black"), axis.text.x = element_text(face = "italic"))
ggsave("raincloud_demo.png", p, width = 8, height = 5, dpi = 300)
ggsave("raincloud_demo.pdf", p, width = 8, height = 5, device = cairo_pdf)
补充:
- 这种组合(半小提琴 + 箱线 + 抖点)就是常说的 雨云图 /
云雨图(raincloud plot)。
- 也可考虑
ggdist::stat_halfeye()、ggbeeswarm::geom_quasirandom()
等扩展实现,视美观与性能取舍。更多请关注xhs:飞高高啦