本文档把 Data2.xlsx放在你代码里写的路径即可:~/Documents/小红书/蜜蜂图/Data2.xlsx
如果运行时出现 “Removed xxx rows …” 或 ggplot2 的 size 提示,属于预期(数据有 NA、以及新版 ggplot2 的提醒),这里保持你的原样。


1) 环境与主题(原样)

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)

theme_set(theme_classic(base_size = 16))
pkgs <- c("readxl","reshape2","ggplot2","ggsignif","rstatix","tidyverse","ggpubr")
need <- setdiff(pkgs, rownames(installed.packages()))
if (length(need)) install.packages(need, repos = "https://cloud.r-project.org")

library(readxl)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggplot2)
library(ggsignif)
library(rstatix)
## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(tidyverse)
library(ggpubr)

theme_set(theme_classic(base_size = 16))

笔记: - 这里加载了绘图与统计相关包;theme_set() 统一设定了经典主题。


2) 读取数据 + 宽转长 + 因子与主题(原样)

data_path <- "~/Documents/小红书/蜜蜂图/Data2.xlsx"
exprSet <- as.data.frame(read_excel(data_path, sheet = 1))

# 宽转长
exprSet2 <- reshape2::melt(exprSet)  # -> Group / variable / value
## Using Group as id variables
head(exprSet2)
##   Group variable     value
## 1    NS  Smoking 1.1438213
## 2    NS  Smoking 0.5760341
## 3    NS  Smoking 0.8769649
## 4    NS  Smoking 0.5391636
## 5    NS  Smoking 0.4813568
## 6    NS  Smoking 0.6038886
# Group variable     value
# 1    NS  Smoking 0.4837348
# 2    NS  Smoking 0.2165572
# 3    NS  Smoking 0.5785105
# 4    NS  Smoking 0.7888895
# 5    NS  Smoking 0.5367586
# 6    NS  Smoking 0.4482413
exprSet2$Group <- factor(exprSet2$Group,
                         levels = c("NS","SMK","NS+abx","SMK+abx"),
                         labels = c("Non-\\nSMK","SMK","Non-\\nSMK+\\nabx","SMK+\\nabx"))       
colnames(exprSet2) <- c("Group2","Group","Value")
library(ggplot2)
mytheme <- theme_classic() +
  theme(legend.title = element_blank(),
        legend.text = element_text(size = 20),
        legend.position = "top",
        plot.margin = margin(t = 5,  # 顶部边缘距离
                             r = 10,  # 右边边缘距离
                             b = 5,  # 底部边缘距离
                             l = 5), # 左边边缘距离
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 20))

笔记: - 此处保持你的因子设置顺序(先对 exprSet2$Group 设定标签,再统一改列名)。
- 若后面想避免 NA 提示,可在这里手动 exprSet2 <- na.omit(exprSet2);本 Rmd 不做改动。


3) 先画背景,再画主体(原样)

library(ggplot2)
p1 <- ggplot(exprSet2,aes(x = Group,y = Value)) +
  scale_y_continuous(expand = c(0,0),limits = c(-1,4),breaks = seq(-1,4,1)) + 
  labs(x = NULL,y = "Weight Change(%)",title = NULL) + 
  mytheme 
p2 <- p1 + 
  annotate("rect", xmin = 1.5, xmax = Inf, fill = "grey",
           ymin = -1, ymax = Inf, alpha = .9) 

p3 <- p2 + 
  # 添加散点
  geom_point(aes(fill = Group2),
             show.legend = F,color = "black",
             position = position_jitterdodge(seed = 12345,
                                             dodge.width = 0.8,
                                             jitter.width = 0.15),
             shape = 21,size = 4)

p4 <- p3 + 
  # 均值\\中位数横线
  geom_crossbar(aes(fill = Group2),
                stat = "summary",
                fun = median,
                position = position_dodge(width = 0.8),
                size = 0.5,width = 0.5,
                color = 'black')
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# stat_summary(fun = median,
# position = position_dodge(width = 0.8),
# geom = "crossbar",
# size = 0.5,width = 0.5,
# color = 'black')

p5 <- p4 + 
  # 修改颜色
  scale_color_manual(values = c("#4d97cd","#db6968","#008343","#e8c559")) +
  scale_fill_manual(values = c("#4d97cd","#db6968","#008343","#e8c559")) + 
  # 添加额外水平线
  geom_hline(yintercept = 0,
             size = 1,color = "black",lty = "dashed")

p5
## Warning: Removed 291 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 291 rows containing missing values or values outside the scale range
## (`geom_point()`).

笔记: - 背景矩形放在最前,避免遮挡点与横线。
- position_jitterdodge() 同时实现分组错位 + 组内抖动。
- 中位数横线用 geom_crossbar(stat="summary", fun=median)


4) 显著性:批量+手工(原样)

library(rstatix)
library(tidyverse)
stat.test <- exprSet2 %>% 
  group_by(Group) %>% 
  t_test(Value ~ Group2) %>%
  adjust_pvalue() %>% 
  add_significance("p.adj") %>% 
  add_xy_position(x = "Group")
data.frame(stat.test)
##        Group   .y.            group1            group2 n1 n2   statistic
## 1    Smoking Value        Non-\\nSMK               SMK 37 38  10.4405208
## 2    Smoking Value        Non-\\nSMK Non-\\nSMK+\\nabx 37 40  -2.3915685
## 3    Smoking Value        Non-\\nSMK        SMK+\\nabx 37 39  13.5021618
## 4    Smoking Value               SMK Non-\\nSMK+\\nabx 38 40 -12.3976418
## 5    Smoking Value               SMK        SMK+\\nabx 38 39   2.4697722
## 6    Smoking Value Non-\\nSMK+\\nabx        SMK+\\nabx 40 39  15.4467640
## 7  Cessation Value        Non-\\nSMK               SMK 33 34  -9.7945223
## 8  Cessation Value        Non-\\nSMK Non-\\nSMK+\\nabx 33 36  -6.4484627
## 9  Cessation Value        Non-\\nSMK        SMK+\\nabx 33 34  -5.6989436
## 10 Cessation Value               SMK Non-\\nSMK+\\nabx 34 36   4.9474092
## 11 Cessation Value               SMK        SMK+\\nabx 34 34   4.1635492
## 12 Cessation Value Non-\\nSMK+\\nabx        SMK+\\nabx 36 34  -0.3225305
##          df        p     p.adj p.adj.signif y.position
## 1  71.16157 5.31e-16 4.779e-15         ****   1.708640
## 2  74.99034 1.90e-02 4.800e-02            *   1.906208
## 3  73.43474 1.39e-21 1.529e-20         ****   2.103776
## 4  74.31546 9.04e-20 9.040e-19         ****   2.301344
## 5  74.58886 1.60e-02 4.800e-02            *   2.498912
## 6  76.54895 3.12e-25 3.744e-24         ****   2.696480
## 7  55.64978 1.03e-13 8.240e-13         ****   2.441640
## 8  66.79276 1.47e-08 1.029e-07         ****   2.639208
## 9  60.22328 3.85e-07 2.310e-06         ****   2.836776
## 10 55.96572 7.24e-06 3.620e-05         ****   3.034344
## 11 64.60785 9.47e-05 3.788e-04          ***   3.231912
## 12 61.08634 7.48e-01 7.480e-01           ns   3.429480
##                           groups x xmin xmax
## 1                Non-\\nSMK, SMK 1  0.7  0.9
## 2  Non-\\nSMK, Non-\\nSMK+\\nabx 1  0.7  1.1
## 3         Non-\\nSMK, SMK+\\nabx 1  0.7  1.3
## 4         SMK, Non-\\nSMK+\\nabx 1  0.9  1.1
## 5                SMK, SMK+\\nabx 1  0.9  1.3
## 6  Non-\\nSMK+\\nabx, SMK+\\nabx 1  1.1  1.3
## 7                Non-\\nSMK, SMK 2  1.7  1.9
## 8  Non-\\nSMK, Non-\\nSMK+\\nabx 2  1.7  2.1
## 9         Non-\\nSMK, SMK+\\nabx 2  1.7  2.3
## 10        SMK, Non-\\nSMK+\\nabx 2  1.9  2.1
## 11               SMK, SMK+\\nabx 2  1.9  2.3
## 12 Non-\\nSMK+\\nabx, SMK+\\nabx 2  2.1  2.3
stat.test$y.position <- stat.test$y.position - 0.5
p5 + stat_pvalue_manual(stat.test,label = "p.adj.signif",hide.ns = T,#label = "p" or label = "p.adj"
                        tip.length = 0.01,label.size = 8)
## Warning: Removed 291 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 291 rows containing missing values or values outside the scale range
## (`geom_point()`).

tmp <- stat.test[c(1,6,7,11),]
data.frame(tmp)
##       Group   .y.            group1     group2 n1 n2 statistic       df
## 1   Smoking Value        Non-\\nSMK        SMK 37 38 10.440521 71.16157
## 2   Smoking Value Non-\\nSMK+\\nabx SMK+\\nabx 40 39 15.446764 76.54895
## 3 Cessation Value        Non-\\nSMK        SMK 33 34 -9.794522 55.64978
## 4 Cessation Value               SMK SMK+\\nabx 34 34  4.163549 64.60785
##          p     p.adj p.adj.signif y.position                        groups x
## 1 5.31e-16 4.779e-15         ****   1.208640               Non-\\nSMK, SMK 1
## 2 3.12e-25 3.744e-24         ****   2.196480 Non-\\nSMK+\\nabx, SMK+\\nabx 1
## 3 1.03e-13 8.240e-13         ****   1.941640               Non-\\nSMK, SMK 2
## 4 9.47e-05 3.788e-04          ***   2.731912               SMK, SMK+\\nabx 2
##   xmin xmax
## 1  0.7  0.9
## 2  1.1  1.3
## 3  1.7  1.9
## 4  1.9  2.3
#      Group   .y.          group1    group2 n1 n2 statistic       df        p
#1   Smoking Value       Non-\\nSMK       SMK 37 38  8.657194 52.58912 1.08e-11
#2   Smoking Value Non-\\nSMK+\\nabx SMK+\\nabx 40 39  9.813350 55.89030 9.20e-14
#3 Cessation Value       Non-\\nSMK       SMK 33 34 -9.476267 62.06300 1.13e-13
#4 Cessation Value             SMK SMK+\\nabx 34 34  5.275416 59.57884 1.94e-06
#      p.adj p.adj.signif y.position                     groups x xmin xmax
#1 8.640e-11            2.062760             Non-\\nSMK, SMK 1  0.7  0.9
#2 1.104e-12            3.627320 Non-\\nSMK+\\nabx, SMK+\\nabx 1  1.1  1.3
#3 1.130e-12            2.567760             Non-\\nSMK, SMK 2  1.7  1.9
#4 1.164e-05            3.819408             SMK, SMK+\\nabx 2  1.9  2.3
library(ggsignif)
p5 + geom_signif(annotations = tmp$p.adj.signif,
                 y_position = c(2.5,2.8,3.1,3.6),
                 xmin = tmp$xmin,
                 xmax = tmp$xmax,
                 size = 1.2,textsize = 8,
                 tip_length = 0) 
## Warning: Removed 291 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 291 rows containing non-finite outside the scale range
## (`stat_signif()`).
## Warning: Removed 291 rows containing missing values or values outside the scale range
## (`geom_point()`).

笔记: - 第一段用 rstatix + ggpubr 自动标注所有比较;第二段从结果中挑选 4 个比较,用 ggsignif 手工放置。
- 若想避免点位和星标距离太近,可把 stat.test$y.position 的偏移量再调大一点,或把 y_position 数组改高。


5) 输出(可选,自行添加)

若需要导出 PNG/PDF,可在末尾自行追加:

ggsave("figure_manual.png", width = 8, height = 5.6, dpi = 300)