t检验在健康数据科学中是很常见的操作,如何对多组样本一次批量开展t检验呢? 本文介绍如何用R语言快速批量开展多组两独立样本t检验。
library(tidyverse)
library(rstatix)
以经典数据集iris为例。
iris
任务01:对iris数据集setosa和virginica物种间的Sepal.Length开展检验分析。
shapiro.test(iris$Sepal.Length[iris$Species == "setosa"])
##
## Shapiro-Wilk normality test
##
## data: iris$Sepal.Length[iris$Species == "setosa"]
## W = 0.9777, p-value = 0.4595
shapiro.test(iris$Sepal.Length[iris$Species == "virginica"])
##
## Shapiro-Wilk normality test
##
## data: iris$Sepal.Length[iris$Species == "virginica"]
## W = 0.97118, p-value = 0.2583
var.test(iris$Sepal.Length[iris$Species == "setosa"],
iris$Sepal.Length[iris$Species == "virginica"])
##
## F test to compare two variances
##
## data: iris$Sepal.Length[iris$Species == "setosa"] and iris$Sepal.Length[iris$Species == "virginica"]
## F = 0.30729, num df = 49, denom df = 49, p-value = 6.366e-05
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1743776 0.5414962
## sample estimates:
## ratio of variances
## 0.3072862
# 正态分布但方差不齐,采用Welch t-test
ttest0 <- t.test(iris$Sepal.Length[iris$Species == "setosa"],
iris$Sepal.Length[iris$Species == "virginica"])
ttest0
##
## Welch Two Sample t-test
##
## data: iris$Sepal.Length[iris$Species == "setosa"] and iris$Sepal.Length[iris$Species == "virginica"]
## t = -15.386, df = 76.516, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.78676 -1.37724
## sample estimates:
## mean of x mean of y
## 5.006 6.588
ttest0 %>%
broom::tidy()
iris %>%
filter(Species == "setosa") %>%
shapiro_test(Sepal.Length)
iris %>%
filter(Species %in% c("setosa", "virginica")) %>%
var.test(Sepal.Length ~ Species, data = .)
##
## F test to compare two variances
##
## data: Sepal.Length by Species
## F = 0.30729, num df = 49, denom df = 49, p-value = 6.366e-05
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1743776 0.5414962
## sample estimates:
## ratio of variances
## 0.3072862
iris %>%
filter(Species %in% c("setosa", "virginica")) %>%
mutate(Species = as.character(Species)) %>% #注意因子型分组变量无法运行t_test
rstatix::t_test(Sepal.Length ~ Species,
# detailed = T
)
任务02:对iris数据集各两两物种间的Sepal.Length开展检验分析。
iris %>%
group_by(Species) %>%
summarise(
shapiro.test(Sepal.Length) %>% broom::tidy()
)
# 生成两两比对组
two_groups <- iris %>%
distinct(Species) %>%
pull(Species) %>%
as.character() %>%
combn(2) %>%
as.data.frame() %>%
select(setosa_versicolor = V1, setosa_virginica = V2, versicolor_virginica = V3)
# 两两方差齐性检验
two_groups %>%
map_dfr(
~ iris %>%
filter(Species %in% .x) %>%
summarise(
var.test(Sepal.Length ~ Species) %>%
broom::tidy()),
.id = "group")
# 两两t检验
two_groups %>%
map_dfr(
~iris %>%
filter(Species %in% .x) %>%
summarise(
t.test(Sepal.Length ~ Species, var.equal = FALSE) %>%
broom::tidy()),
.id = "group")
iris %>%
group_by(Species) %>%
shapiro_test(Sepal.Length)
# 生成比对组
two_groups <- iris %>%
distinct(Species) %>%
pull(Species) %>%
as.character() %>%
combn(2) %>%
as.data.frame() %>%
select(setosa_versicolor = V1, setosa_virginica = V2, versicolor_virginica = V3)
# 方差齐性检验
two_groups %>%
map_dfr(
~ iris %>%
filter(Species %in% .x) %>%
summarise(
var.test(Sepal.Length ~ Species) %>%
# bartlett.test(Sepal.Length ~ Species) %>%
broom::tidy()),
.id = "group")
iris %>%
t_test(Sepal.Length ~ Species)
任务03:对iris数据集各两两物种间的Sepal.Length、Sepal.Width、Petal.Length及Petal.Width开展检验分析。
iris_long <- iris %>%
pivot_longer(1:4,
names_to = "items",
values_to = "cm")
iris_long %>%
group_by(items, Species) %>%
shapiro_test(cm)
# 生成比对组
two_groups <- iris %>%
distinct(Species) %>%
pull(Species) %>%
as.character() %>%
combn(2) %>%
as.data.frame() %>%
select(setosa_versicolor = V1, setosa_virginica = V2, versicolor_virginica = V3)
two_groups %>%
map_dfr(
~ iris_long %>%
filter(Species %in% .x) %>%
group_by(items) %>%
summarise(
var.test(cm ~ Species) %>%
broom::tidy()),
.id = "group")
iris_long %>%
group_by(items) %>%
t_test(cm ~ Species)
1 Kassambara A. Rstatix: Pipe-friendly framework for basic statistical tests. 2021 https://CRAN.R-project.org/package=rstatix.
2 可我的家里有cy. 用Rmarkdown 写论文——解决参考文献与交叉引用. https://sspai.com/post/53998.