本文档把
Data2.xlsx
放在你代码里写的路径即可:~/Documents/小红书/蜜蜂图/Data2.xlsx
。
如果运行时出现 “Removed xxx rows …” 或 ggplot2 的size
提示,属于预期(数据有 NA、以及新版 ggplot2 的提醒),这里保持你的原样。
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
theme_set(theme_classic(base_size = 16))
pkgs <- c("readxl","reshape2","ggplot2","ggsignif","rstatix","tidyverse","ggpubr")
need <- setdiff(pkgs, rownames(installed.packages()))
if (length(need)) install.packages(need, repos = "https://cloud.r-project.org")
library(readxl)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggplot2)
library(ggsignif)
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(tidyverse)
library(ggpubr)
theme_set(theme_classic(base_size = 16))
笔记: - 这里加载了绘图与统计相关包;theme_set()
统一设定了经典主题。
data_path <- "~/Documents/小红书/蜜蜂图/Data2.xlsx"
exprSet <- as.data.frame(read_excel(data_path, sheet = 1))
# 宽转长
exprSet2 <- reshape2::melt(exprSet) # -> Group / variable / value
## Using Group as id variables
head(exprSet2)
## Group variable value
## 1 NS Smoking 1.1438213
## 2 NS Smoking 0.5760341
## 3 NS Smoking 0.8769649
## 4 NS Smoking 0.5391636
## 5 NS Smoking 0.4813568
## 6 NS Smoking 0.6038886
# Group variable value
# 1 NS Smoking 0.4837348
# 2 NS Smoking 0.2165572
# 3 NS Smoking 0.5785105
# 4 NS Smoking 0.7888895
# 5 NS Smoking 0.5367586
# 6 NS Smoking 0.4482413
exprSet2$Group <- factor(exprSet2$Group,
levels = c("NS","SMK","NS+abx","SMK+abx"),
labels = c("Non-\\nSMK","SMK","Non-\\nSMK+\\nabx","SMK+\\nabx"))
colnames(exprSet2) <- c("Group2","Group","Value")
library(ggplot2)
mytheme <- theme_classic() +
theme(legend.title = element_blank(),
legend.text = element_text(size = 20),
legend.position = "top",
plot.margin = margin(t = 5, # 顶部边缘距离
r = 10, # 右边边缘距离
b = 5, # 底部边缘距离
l = 5), # 左边边缘距离
axis.text = element_text(size = 20),
axis.title = element_text(size = 20))
笔记: - 此处保持你的因子设置顺序(先对 exprSet2$Group
设定标签,再统一改列名)。
- 若后面想避免 NA 提示,可在这里手动
exprSet2 <- na.omit(exprSet2)
;本 Rmd 不做改动。
library(ggplot2)
p1 <- ggplot(exprSet2,aes(x = Group,y = Value)) +
scale_y_continuous(expand = c(0,0),limits = c(-1,4),breaks = seq(-1,4,1)) +
labs(x = NULL,y = "Weight Change(%)",title = NULL) +
mytheme
p2 <- p1 +
annotate("rect", xmin = 1.5, xmax = Inf, fill = "grey",
ymin = -1, ymax = Inf, alpha = .9)
p3 <- p2 +
# 添加散点
geom_point(aes(fill = Group2),
show.legend = F,color = "black",
position = position_jitterdodge(seed = 12345,
dodge.width = 0.8,
jitter.width = 0.15),
shape = 21,size = 4)
p4 <- p3 +
# 均值\\中位数横线
geom_crossbar(aes(fill = Group2),
stat = "summary",
fun = median,
position = position_dodge(width = 0.8),
size = 0.5,width = 0.5,
color = 'black')
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# stat_summary(fun = median,
# position = position_dodge(width = 0.8),
# geom = "crossbar",
# size = 0.5,width = 0.5,
# color = 'black')
p5 <- p4 +
# 修改颜色
scale_color_manual(values = c("#4d97cd","#db6968","#008343","#e8c559")) +
scale_fill_manual(values = c("#4d97cd","#db6968","#008343","#e8c559")) +
# 添加额外水平线
geom_hline(yintercept = 0,
size = 1,color = "black",lty = "dashed")
p5
## Warning: Removed 291 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 291 rows containing missing values or values outside the scale range
## (`geom_point()`).
笔记: - 背景矩形放在最前,避免遮挡点与横线。
- position_jitterdodge()
同时实现分组错位 +
组内抖动。
- 中位数横线用
geom_crossbar(stat="summary", fun=median)
。
library(rstatix)
library(tidyverse)
stat.test <- exprSet2 %>%
group_by(Group) %>%
t_test(Value ~ Group2) %>%
adjust_pvalue() %>%
add_significance("p.adj") %>%
add_xy_position(x = "Group")
data.frame(stat.test)
## Group .y. group1 group2 n1 n2 statistic
## 1 Smoking Value Non-\\nSMK SMK 37 38 10.4405208
## 2 Smoking Value Non-\\nSMK Non-\\nSMK+\\nabx 37 40 -2.3915685
## 3 Smoking Value Non-\\nSMK SMK+\\nabx 37 39 13.5021618
## 4 Smoking Value SMK Non-\\nSMK+\\nabx 38 40 -12.3976418
## 5 Smoking Value SMK SMK+\\nabx 38 39 2.4697722
## 6 Smoking Value Non-\\nSMK+\\nabx SMK+\\nabx 40 39 15.4467640
## 7 Cessation Value Non-\\nSMK SMK 33 34 -9.7945223
## 8 Cessation Value Non-\\nSMK Non-\\nSMK+\\nabx 33 36 -6.4484627
## 9 Cessation Value Non-\\nSMK SMK+\\nabx 33 34 -5.6989436
## 10 Cessation Value SMK Non-\\nSMK+\\nabx 34 36 4.9474092
## 11 Cessation Value SMK SMK+\\nabx 34 34 4.1635492
## 12 Cessation Value Non-\\nSMK+\\nabx SMK+\\nabx 36 34 -0.3225305
## df p p.adj p.adj.signif y.position
## 1 71.16157 5.31e-16 4.779e-15 **** 1.708640
## 2 74.99034 1.90e-02 4.800e-02 * 1.906208
## 3 73.43474 1.39e-21 1.529e-20 **** 2.103776
## 4 74.31546 9.04e-20 9.040e-19 **** 2.301344
## 5 74.58886 1.60e-02 4.800e-02 * 2.498912
## 6 76.54895 3.12e-25 3.744e-24 **** 2.696480
## 7 55.64978 1.03e-13 8.240e-13 **** 2.441640
## 8 66.79276 1.47e-08 1.029e-07 **** 2.639208
## 9 60.22328 3.85e-07 2.310e-06 **** 2.836776
## 10 55.96572 7.24e-06 3.620e-05 **** 3.034344
## 11 64.60785 9.47e-05 3.788e-04 *** 3.231912
## 12 61.08634 7.48e-01 7.480e-01 ns 3.429480
## groups x xmin xmax
## 1 Non-\\nSMK, SMK 1 0.7 0.9
## 2 Non-\\nSMK, Non-\\nSMK+\\nabx 1 0.7 1.1
## 3 Non-\\nSMK, SMK+\\nabx 1 0.7 1.3
## 4 SMK, Non-\\nSMK+\\nabx 1 0.9 1.1
## 5 SMK, SMK+\\nabx 1 0.9 1.3
## 6 Non-\\nSMK+\\nabx, SMK+\\nabx 1 1.1 1.3
## 7 Non-\\nSMK, SMK 2 1.7 1.9
## 8 Non-\\nSMK, Non-\\nSMK+\\nabx 2 1.7 2.1
## 9 Non-\\nSMK, SMK+\\nabx 2 1.7 2.3
## 10 SMK, Non-\\nSMK+\\nabx 2 1.9 2.1
## 11 SMK, SMK+\\nabx 2 1.9 2.3
## 12 Non-\\nSMK+\\nabx, SMK+\\nabx 2 2.1 2.3
stat.test$y.position <- stat.test$y.position - 0.5
p5 + stat_pvalue_manual(stat.test,label = "p.adj.signif",hide.ns = T,#label = "p" or label = "p.adj"
tip.length = 0.01,label.size = 8)
## Warning: Removed 291 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 291 rows containing missing values or values outside the scale range
## (`geom_point()`).
tmp <- stat.test[c(1,6,7,11),]
data.frame(tmp)
## Group .y. group1 group2 n1 n2 statistic df
## 1 Smoking Value Non-\\nSMK SMK 37 38 10.440521 71.16157
## 2 Smoking Value Non-\\nSMK+\\nabx SMK+\\nabx 40 39 15.446764 76.54895
## 3 Cessation Value Non-\\nSMK SMK 33 34 -9.794522 55.64978
## 4 Cessation Value SMK SMK+\\nabx 34 34 4.163549 64.60785
## p p.adj p.adj.signif y.position groups x
## 1 5.31e-16 4.779e-15 **** 1.208640 Non-\\nSMK, SMK 1
## 2 3.12e-25 3.744e-24 **** 2.196480 Non-\\nSMK+\\nabx, SMK+\\nabx 1
## 3 1.03e-13 8.240e-13 **** 1.941640 Non-\\nSMK, SMK 2
## 4 9.47e-05 3.788e-04 *** 2.731912 SMK, SMK+\\nabx 2
## xmin xmax
## 1 0.7 0.9
## 2 1.1 1.3
## 3 1.7 1.9
## 4 1.9 2.3
# Group .y. group1 group2 n1 n2 statistic df p
#1 Smoking Value Non-\\nSMK SMK 37 38 8.657194 52.58912 1.08e-11
#2 Smoking Value Non-\\nSMK+\\nabx SMK+\\nabx 40 39 9.813350 55.89030 9.20e-14
#3 Cessation Value Non-\\nSMK SMK 33 34 -9.476267 62.06300 1.13e-13
#4 Cessation Value SMK SMK+\\nabx 34 34 5.275416 59.57884 1.94e-06
# p.adj p.adj.signif y.position groups x xmin xmax
#1 8.640e-11 2.062760 Non-\\nSMK, SMK 1 0.7 0.9
#2 1.104e-12 3.627320 Non-\\nSMK+\\nabx, SMK+\\nabx 1 1.1 1.3
#3 1.130e-12 2.567760 Non-\\nSMK, SMK 2 1.7 1.9
#4 1.164e-05 3.819408 SMK, SMK+\\nabx 2 1.9 2.3
library(ggsignif)
p5 + geom_signif(annotations = tmp$p.adj.signif,
y_position = c(2.5,2.8,3.1,3.6),
xmin = tmp$xmin,
xmax = tmp$xmax,
size = 1.2,textsize = 8,
tip_length = 0)
## Warning: Removed 291 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 291 rows containing non-finite outside the scale range
## (`stat_signif()`).
## Warning: Removed 291 rows containing missing values or values outside the scale range
## (`geom_point()`).
笔记: - 第一段用 rstatix + ggpubr
自动标注所有比较;第二段从结果中挑选 4 个比较,用 ggsignif
手工放置。
- 若想避免点位和星标距离太近,可把 stat.test$y.position
的偏移量再调大一点,或把 y_position
数组改高。
若需要导出 PNG/PDF,可在末尾自行追加:
ggsave("figure_manual.png", width = 8, height = 5.6, dpi = 300)