一般基础R语言开展的统计分析结果不是整洁的表现形式。Kassambara,也就是ggpubr的作者,创造了rstatix。该包基于tidy架构和理念,可以方便的计算并呈现规整的统计结果,配合ggpubr包可以华丽地制作publication ready plot。
可以用以下形式安装
if(!require(devtools)) install.packages("devtools")
devtools::install_github("kassambara/rstatix") #或者
install.packages("rstatix")
library(rstatix)
library(ggpubr)
type参数选择统计量的类型。
ToothGrowth %>%
get_summary_stats(len, type = "common")
## # A tibble: 1 x 10
## variable n min max median iqr mean sd se ci
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 len 60 4.2 33.9 19.2 12.2 18.8 7.65 0.988 1.98
ToothGrowth %>%
group_by(dose) %>%
get_summary_stats(type = "full")
## # A tibble: 3 x 14
## dose variable n min max median q1 q3 iqr mad mean sd
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.5 len 20 4.2 21.5 9.85 7.22 12.2 5.03 4.00 10.6 4.5
## 2 1 len 20 13.6 27.3 19.2 16.2 23.4 7.12 5.78 19.7 4.42
## 3 2 len 20 18.5 33.9 26.0 23.5 27.8 4.3 3.71 26.1 3.77
## # ... with 2 more variables: se <dbl>, ci <dbl>
df <- ToothGrowth %>%
mutate(dose = as.factor(dose))
hist(df$len)
df %>% ggplot(aes(len)) +
geom_histogram(boundary = 0, binwidth = 5) +
scale_x_continuous(breaks = seq(0, 35, 5)) +
scale_y_continuous(breaks = seq(0, 15, 1)) +
theme_classic2()
# {ggpubr}Density plot
ggdensity(ToothGrowth$len, fill = "lightgray")
# {ggpubr}QQ plot
ggqqplot(ToothGrowth$len)
# {rstatix}正态性检验
df %>% shapiro_test(len)
## # A tibble: 1 x 3
## variable statistic p
## <chr> <dbl> <dbl>
## 1 len 0.967 0.109
## {stats}正态性检验,结果一致
df %$% shapiro.test(len)
##
## Shapiro-Wilk normality test
##
## data: len
## W = 0.96743, p-value = 0.1091
# Shapiro Wilk normality test for one variable
df %>% group_by(supp) %>% shapiro_test(len)
## # A tibble: 2 x 4
## supp variable statistic p
## <fct> <chr> <dbl> <dbl>
## 1 OJ len 0.918 0.0236
## 2 VC len 0.966 0.428
df %>% ggplot(aes(x = 1L, y = len))+ geom_violin() + geom_jitter()
df %>% rstatix::t_test(len ~ 1, mu = 10)
## # A tibble: 1 x 7
## .y. group1 group2 n statistic df p
## * <chr> <chr> <chr> <int> <dbl> <dbl> <dbl>
## 1 len 1 null model 60 8.92 59 1.53e-12
df %>% rstatix::t_test(len ~ supp)
## # A tibble: 1 x 8
## .y. group1 group2 n1 n2 statistic df p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 len OJ VC 30 30 1.92 55.3 0.0606
## BaseR
library(magrittr)
ToothGrowth %>%
as_tibble() %$%
t.test(len ~ supp) %>%
broom::tidy()
## # A tibble: 1 x 10
## estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3.70 20.7 17.0 1.92 0.0606 55.3 -0.171 7.57
## # ... with 2 more variables: method <chr>, alternative <chr>
df %>% rstatix::t_test(len ~ supp, paired = TRUE)
## # A tibble: 1 x 8
## .y. group1 group2 n1 n2 statistic df p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 len OJ VC 30 30 3.30 29 0.00255
df %>%
group_by(dose) %>%
t_test(data =., len ~ supp) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance("p.adj")
## # A tibble: 3 x 11
## dose .y. group1 group2 n1 n2 statistic df p p.adj
## <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0.5 len OJ VC 10 10 3.17 15.0 0.00636 0.0191
## 2 1 len OJ VC 10 10 4.03 15.4 0.00104 0.00312
## 3 2 len OJ VC 10 10 -0.0461 14.0 0.964 1
## # ... with 1 more variable: p.adj.signif <chr>
# 交互比较
df %>% t_test(len ~ dose)
## # A tibble: 3 x 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 len 0.5 1 20 20 -6.48 38.0 1.27e- 7 2.54e- 7 ****
## 2 len 0.5 2 20 20 -11.8 36.9 4.40e-14 1.32e-13 ****
## 3 len 1 2 20 20 -4.90 37.1 1.91e- 5 1.91e- 5 ****
# 参照组比较Comparison against reference group
df %>% t_test(len ~ dose, ref.group = "0.5")
## # A tibble: 2 x 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 len 0.5 1 20 20 -6.48 38.0 1.27e- 7 1.27e- 7 ****
## 2 len 0.5 2 20 20 -11.8 36.9 4.40e-14 8.80e-14 ****
# 分别与整体比较 Comparison against all
df %>% t_test(len ~ dose, ref.group = "all")
## # A tibble: 3 x 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 len all 0.5 60 20 5.82 56.4 2.90e-7 8.70e-7 ****
## 2 len all 1 60 20 -0.660 57.5 5.12e-1 5.12e-1 ns
## 3 len all 2 60 20 -5.61 66.5 4.25e-7 8.70e-7 ****
# 作图
p <- df %>% ggboxplot(x = "supp", y = "len", fill = "supp", width = 0.5)
p
# 统计分析
stat.ttest <- df %>%
rstatix::t_test(len ~ supp)
stat.ttest
## # A tibble: 1 x 8
## .y. group1 group2 n1 n2 statistic df p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 len OJ VC 30 30 1.92 55.3 0.0606
# 添加统计结果
p + ggpubr::stat_pvalue_manual(stat.ttest,
# label = "p",
label = "T-test, p = {p}",
y.position = 35,
tip.length = 0)
# 作图
p <- df %>% ggboxplot(x = "supp", y = "len", width = 0.2,
ylim = c(0,40),
fill = "supp", facet.by = "dose")
# 统计分析
stat.ttest <- df %>% group_by(dose) %>%
rstatix::t_test(len ~ supp)
stat.ttest
## # A tibble: 3 x 9
## dose .y. group1 group2 n1 n2 statistic df p
## * <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 0.5 len OJ VC 10 10 3.17 15.0 0.00636
## 2 1 len OJ VC 10 10 4.03 15.4 0.00104
## 3 2 len OJ VC 10 10 -0.0461 14.0 0.964
# 添加统计结果
p + ggpubr::stat_pvalue_manual(stat.ttest,
# label = "p",
label = "T-test, p = {p}",
y.position = 35,
tip.length = 0)
## 还可根据各分面设置LABEL位置
stat.ttest1 <- df %>% group_by(dose) %>%
rstatix::t_test(len ~ supp) %>%
add_xy_position()
stat.ttest1
## # A tibble: 3 x 13
## dose .y. group1 group2 n1 n2 statistic df p y.position
## <fct> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 0.5 len OJ VC 10 10 3.17 15.0 0.00636 24.2
## 2 1 len OJ VC 10 10 4.03 15.4 0.00104 30.0
## 3 2 len OJ VC 10 10 -0.0461 14.0 0.964 36.6
## # ... with 3 more variables: groups <named list>, xmin <int>, xmax <int>
p + stat_pvalue_manual(stat.ttest1, tip.length = 0)
# 作图
p <- df %>%
ggboxplot(x = "dose", y = "len", fill = "dose",
width = .4, ylim = c(0,45))
p
# 统计分析
stat.ttest <- df %>%
t_test(len ~ dose)
stat.ttest
## # A tibble: 3 x 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 len 0.5 1 20 20 -6.48 38.0 1.27e- 7 2.54e- 7 ****
## 2 len 0.5 2 20 20 -11.8 36.9 4.40e-14 1.32e-13 ****
## 3 len 1 2 20 20 -4.90 37.1 1.91e- 5 1.91e- 5 ****
# 添加统计结果
p + stat_pvalue_manual(stat.ttest,
label = "p = {p}",
# label = "{p.adj.signif}",
y.position = c(35, 39, 43),
tip.length = 0)
还可以
com <- list(c("0.5", "1"), c("0.5", "2"), c("1", "2"))
df %>% ggboxplot(x = "dose", y = "len", fill = "dose",
width = .4, ylim = c(0,45))+
stat_compare_means(label.y = 43, method = "anova")+
stat_compare_means(comparisons = com, method = "t.test")
# T-test
stat.test <- df %>% t_test(len ~ dose, ref.group = "all")
stat.test
## # A tibble: 3 x 10
## .y. group1 group2 n1 n2 statistic df p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 len all 0.5 60 20 5.82 56.4 2.90e-7 8.70e-7 ****
## 2 len all 1 60 20 -0.660 57.5 5.12e-1 5.12e-1 ns
## 3 len all 2 60 20 -5.61 66.5 4.25e-7 8.70e-7 ****
# Box plot with horizontal mean line
ggboxplot(df, x = "dose", y = "len", fill = 'dose', width = .3) +
stat_pvalue_manual(
stat.test, label = "p.adj.signif",
y.position = 35,
remove.bracket = TRUE
) +
geom_hline(yintercept = mean(df$len), linetype = 2)
Reference:Alboukadel Kassambara (2020). rstatix: Pipe-Friendly Framework for Basic Statistical Tests. R package version 0.6.0.999. https://rpkgs.datanovia.com/rstatix/