rm(list=ls())
# 前四个与方差分析最紧密相关
library(rstatix) # identify_outliers, and include good stats test functions
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
##
## filter
library(dplyr) # %>%
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(bruceR) # a good ANOVA related wrapper
##
## bruceR (version 0.8.9)
## BRoadly Useful Convenient and Efficient R functions
##
## Packages also loaded:
## √ dplyr √ emmeans √ ggplot2
## √ tidyr √ effectsize √ ggtext
## √ stringr √ performance √ cowplot
## √ forcats √ lmerTest √ see
## √ data.table
##
## Main functions of `bruceR`:
## cc() Describe() TTEST()
## add() Freq() MANOVA()
## .mean() Corr() EMMEANS()
## set.wd() Alpha() PROCESS()
## import() EFA() model_summary()
## print_table() CFA() lavaan_summary()
##
## https://psychbruce.github.io/bruceR/
##
## These R packages are dependencies of `bruceR` but not installed:
## pacman, lmtest, vars, phia, BayesFactor, GGally, GPArotation
## ***** Please Install All Dependencies *****
## install.packages("bruceR", dep=TRUE)
library(car) # leveneTest
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(afex) # for factorial design anova
## ************
## Welcome to afex. For support visit: http://afex.singmann.science/
## - Functions for ANOVAs: aov_car(), aov_ez(), and aov_4()
## - Methods for calculating p-values with mixed(): 'S', 'KR', 'LRT', and 'PB'
## - 'afex_aov' and 'mixed' objects can be passed to emmeans() for follow-up tests
## - NEWS: emmeans() for ANOVA models now uses model = 'multivariate' as default.
## - Get and set global package options with: afex_options()
## - Set orthogonal sum-to-zero contrasts globally: set_sum_contrasts()
## - For example analyses see: browseVignettes("afex")
## ************
##
## Attaching package: 'afex'
## The following object is masked from 'package:lme4':
##
## lmer
library(ggpubr) # plot in publication-ready formats
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
library(purrr) # for functions dealing with functions
##
## Attaching package: 'purrr'
## The following object is masked from 'package:car':
##
## some
## The following object is masked from 'package:data.table':
##
## transpose
数据通常有两种存储的形式,一种是长数据,一种是短数据,长数据比较普遍。长数据和短数据的对比就是,长数据还需要根据分组变量将所有的数据分组。
anova
处理的对象是DataFrame,因此数据都要使用data.frame函数转化为数据框。
score = c(3,3,4,6,6,8,5,3,5,7,8,8,8,5,8,9,8,10,8,9,7,10,10,10)
subj = rep(1:6,times=4) # 善于使用rep函数,创建标签
strategy=c(rep(1,6),rep(2,6),rep(3,6),rep(4,6))
long_data = data.frame(score,subj,strategy) # 构建dataframe
long_data
## score subj strategy
## 1 3 1 1
## 2 3 2 1
## 3 4 3 1
## 4 6 4 1
## 5 6 5 1
## 6 8 6 1
## 7 5 1 2
## 8 3 2 2
## 9 5 3 2
## 10 7 4 2
## 11 8 5 2
## 12 8 6 2
## 13 8 1 3
## 14 5 2 3
## 15 8 3 3
## 16 9 4 3
## 17 8 5 3
## 18 10 6 3
## 19 8 1 4
## 20 9 2 4
## 21 7 3 4
## 22 10 4 4
## 23 10 5 4
## 24 10 6 4
拿到数据以后,应该首先检查是否具有缺失值,是否有极端值(outlier)。
使用is.na函数,可以配合sum看全局。
sum(is.na(long_data)) # 可以直接对所有数据检验是否有缺失值,如果没有直接跳过后续步骤
## [1] 0
# 具体对每一个列检验缺失值
sum(is.na(long_data$score))
## [1] 0
sum(is.na(long_data$subj))
## [1] 0
sum(is.na(long_data$strategy))
## [1] 0
num_na=long_data %>% map(is.na) # 也可以使用 map 函数直接对三个列进行检验,但是结果会比较不直观
summary(num_na)
## Length Class Mode
## score 24 -none- logical
## subj 24 -none- logical
## strategy 24 -none- logical
outlier:值在\(Q_3+1.5IQR\)和\(Q_1-1.5IQR\)区间之外
extreme:值在\(Q_3+3IQR\)和\(Q_1-3IQR\)区间之外
#检查极端值
long_data %>% # 管道相当于把管道之前的结果传入之后函数的第一个参数
group_by(strategy) %>% # 注意group_by的列并不需要实现转化为factor类型
identify_outliers(score) # 极端值必定是在每组之内讨论的,因此必须先进行分组
## # A tibble: 2 × 5
## strategy score subj is.outlier is.extreme
## <dbl> <dbl> <int> <lgl> <lgl>
## 1 3 5 2 TRUE TRUE
## 2 3 10 6 TRUE FALSE
数据框中包含的分组变量应该转化为factor类型,并且让数据依据其分组。
虽然不是一定要用到,但会比较保险,比如leveneTest就需要依照分组变量计算,绘制盒须图等也需要合适的分组变量,更何况,分组变量本身的性质就应该是factor。
分组变量转为因子类型的方法有: *
convert_as_factor:管道友好,可以传入多个参数 *
as.factor:简单直白,注意必须是对数据框中的列操作,而不是对原始列的变量操作
# 将分组变量转化为因子
# 本例中,subj和strategy都应该转化为因子
# 方法一:使用convert_as_factor
long_data=data.frame(long_data) %>%
convert_as_factor(strategy) # 注意,convert_as_factor函数是需要接收返回值的,它并不是一种方法,否则数据没有被处理
long_data # 发现strategy转变为了因子
## score subj strategy
## 1 3 1 1
## 2 3 2 1
## 3 4 3 1
## 4 6 4 1
## 5 6 5 1
## 6 8 6 1
## 7 5 1 2
## 8 3 2 2
## 9 5 3 2
## 10 7 4 2
## 11 8 5 2
## 12 8 6 2
## 13 8 1 3
## 14 5 2 3
## 15 8 3 3
## 16 9 4 3
## 17 8 5 3
## 18 10 6 3
## 19 8 1 4
## 20 9 2 4
## 21 7 3 4
## 22 10 4 4
## 23 10 5 4
## 24 10 6 4
# convert_as_factor可以传入多个参数,可以让所有的要因子化的列都转变为因子类型!
# 方法二:as.factor
# 错误示范,对原始列的变量直接操作
subj=as.factor(subj) # 改变的只是当时构成long_data中的那一列变量,但是long_data中的subj实际上并没有更新!
long_data # 发现并没有任何改变发生
## score subj strategy
## 1 3 1 1
## 2 3 2 1
## 3 4 3 1
## 4 6 4 1
## 5 6 5 1
## 6 8 6 1
## 7 5 1 2
## 8 3 2 2
## 9 5 3 2
## 10 7 4 2
## 11 8 5 2
## 12 8 6 2
## 13 8 1 3
## 14 5 2 3
## 15 8 3 3
## 16 9 4 3
## 17 8 5 3
## 18 10 6 3
## 19 8 1 4
## 20 9 2 4
## 21 7 3 4
## 22 10 4 4
## 23 10 5 4
## 24 10 6 4
# 正确示范:修改数据框的列
long_data$subj=as.factor(long_data$subj) # 注意必须是对数据框中的列操作
long_data # 发现subj转变为了因子
## score subj strategy
## 1 3 1 1
## 2 3 2 1
## 3 4 3 1
## 4 6 4 1
## 5 6 5 1
## 6 8 6 1
## 7 5 1 2
## 8 3 2 2
## 9 5 3 2
## 10 7 4 2
## 11 8 5 2
## 12 8 6 2
## 13 8 1 3
## 14 5 2 3
## 15 8 3 3
## 16 9 4 3
## 17 8 5 3
## 18 10 6 3
## 19 8 1 4
## 20 9 2 4
## 21 7 3 4
## 22 10 4 4
## 23 10 5 4
## 24 10 6 4
综合以上介绍,convert_as_factor是最通用、最便捷的!
long_data=data.frame(long_data) %>%
convert_as_factor(strategy,subj) # 注意,convert_as_factor函数是需要接收返回值的,它并不是一种方法,否则数据没有被处理。它被接收以后,下方不会有结果展示;但如果没有变量去接收,下方会直接展示结果,但这也失去了意义(除非这之后紧跟着另一个管道函数)
long_data # 发现strategy和subj都转变为了因子
## score subj strategy
## 1 3 1 1
## 2 3 2 1
## 3 4 3 1
## 4 6 4 1
## 5 6 5 1
## 6 8 6 1
## 7 5 1 2
## 8 3 2 2
## 9 5 3 2
## 10 7 4 2
## 11 8 5 2
## 12 8 6 2
## 13 8 1 3
## 14 5 2 3
## 15 8 3 3
## 16 9 4 3
## 17 8 5 3
## 18 10 6 3
## 19 8 1 4
## 20 9 2 4
## 21 7 3 4
## 22 10 4 4
## 23 10 5 4
## 24 10 6 4
数据基本无误或排除特殊情况后,根据anova的类型,检验需要的前提。比如正态性假设,方差同质假设、球形假设等。
long_data %>%
group_by(strategy) %>%
shapiro_test(score) # 注意这里的正态性检验的函数是shapiro_test,而不是shapiro.test
## # A tibble: 4 × 4
## strategy variable statistic p
## <fct> <chr> <dbl> <dbl>
## 1 1 score 0.896 0.352
## 2 2 score 0.896 0.352
## 3 3 score 0.873 0.238
## 4 4 score 0.831 0.110
leveneTest: 输入公式、输入数据levene_test:管道友好,输入公式注意,分组变量需要调整为因子类型,这一点建议在数据的基本处理阶段就要完成!
# leveneTest
leveneTest(score~strategy,data=long_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.9786 0.4226
## 20
# levene_test (%>%)
long_data %>%
convert_as_factor(strategy) %>% # 如果前期没有将strategy转化为因子类型,此处需要加上这条语句
levene_test(score~strategy)
## # A tibble: 1 × 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 3 20 0.979 0.423
# 也可以对比发现两种方法最终得到了一样的结果
mauchly.test:第一个参数传入线性回归模型,第二个参数传入公式
matrix_data = matrix(long_data$score,nrow=6,ncol=4) # 首先需要将数据转化为矩阵,其中行表示每个被试(subject),列表示不同的条件(treatment)
mlmfit = lm(matrix_data ~ 1) # 变量1~变量2,代表的是变量1根据变量2分组,如变量2是常数1,那么会返回一个拟合的线性模型的输出
mauchly.test(mlmfit,X=~1) # 第二个参数指定为X=~1,代表进行协方差同质性检验(testing for compound symmetry in the untransformed covariance matrix)
##
## Mauchly's test of sphericity
## Contrasts orthogonal to
## ~1
##
##
## data: SSD matrix from lm(formula = matrix_data ~ 1)
## W = 0.55102, p-value = 0.8239
# 输出的p-value越大越好,认为球形假设成立
aov,anova等 R 语言原生函数MANOVA:bruceR包中的强大函数aov(formula, data = NULL)
aov函数可以直接对两列数据进行方差分析,且注意第二列数据应该为因子类型(分组变量)。如果使用数据框,那么formula中的变量即为数据框的列名称。
aov函数返回的是组内和组间的和方、自由度,也即最基本的方差分析的结果。这一结果通常用变量承接,可以认为是一种模型,可以用于后续的分析,比如传入summary,anova函数中去。
Residuals指的是组内。
fit_model = aov(score ~ strategy,long_data) # 前一个为因变量,后一个为分组变量
fit_model
## Call:
## aov(formula = score ~ strategy, data = long_data)
##
## Terms:
## strategy Residuals
## Sum of Squares 60 62
## Deg. of Freedom 3 20
##
## Residual standard error: 1.760682
## Estimated effects may be unbalanced
anova(object, ...)
object:an object containing the results returned by a
model fitting function (e.g., lm or
glm).anova(object, ...)中可以包含一个或者多个这样的对象。“When
given a single argument it produces a table which tests whether the
model terms are significant. When given a sequence of objects, anova
tests the models against one another in the order specified.”
anova(object, ...)计算的是方差分析表,在原先aov得到的模型的基础上计算均方、F值、p值。
anova(fit_model)
## Analysis of Variance Table
##
## Response: score
## Df Sum Sq Mean Sq F value Pr(>F)
## strategy 3 60 20.0 6.4516 0.003116 **
## Residuals 20 62 3.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary也能返回一样的方差分析表。
“The function summary.lm computes and returns a list of summary statistics of the fitted linear model given in object”
summary(fit_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## strategy 3 60 20.0 6.452 0.00312 **
## Residuals 20 62 3.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MANOVA只需要提供数据,确定自变量,申明组内和组间的关系三步即可。返回的结果包括方差分析,效应量(gerenral
& partial, 90%CI)。
MANOVA(data,subID = NULL,dv = NULL,dvs = NULL,dvs.pattern = NULL,between = NULL,within = NULL,covariate = NULL,ss.type = "III",sph.correction = "none",aov.include = FALSE,digits = 3,nsmall = digits,file = NULL)
data:可以传入宽数据或长数据subID:被试的
ID,长数据需要传入,这在长数据中也即列的名称;对于宽数据不需要传入这一参数dv:自变量
dv即为因变量dv只有在被试间设计可用,对于被试内或者混合设计,要用dvs或dvs.patterndvs:重复测量方差分析。只适用于宽数据(被试内或混合设计)。传入方式有两种
start:stop:声明变量的范围dvs.pattern:如果使用了dvs.,那么要传入正则表达式明确变量名的样式between:between-subjects
factor(s),如有多个分组变量,将它们包含在向量中within:within-subjects
factor(s),如有多个分组变量,将它们包含在向量中sph.correction:违背球形假设的纠正。只适用于多余三个水平的重复测量方差分析。通常设置为GG其中,between和within需要至少明确一个量!如果需要进行重复测量(消除被试个体差异),就用within指定;否则用between指定。
长数据和宽数据的比较:可以认为长数据包含了组间的信息(自变量),也包含了组内的信息(被试个体)。长数据的优点是简单,可以承载任意维度的信息,但宽数据承载的信息有限,可以把向MANOVA传参的过程理解为将长数据转变为宽数据的过程。长数据需要明确subID,相当于依据其将数据重新整合,把被试
ID
作为首列;长数据还要明确within变量,程序再根据within变量将数据拆分成多列。这样之后长数据变得和宽数据“等价”了。此外,系统需要明白每一行或每一列数据的意义并转化,因此每一列都必须被指明含义。
长数据
repeat_fit <- MANOVA(long_data,
subID = "subj", # 明确这一列指代的是被试编号
dv = "score", # 因变量
within = "strategy", # strategy作为within-subject的变量,说明进行重复测量方差检验
sph.correction = "GG") # greenhouse-geisser correction
##
## * Data are aggregated to mean (across items/trials)
## if there are >=2 observations per subject and cell.
## You may use Linear Mixed Model to analyze the data,
## e.g., with subjects and items as level-2 clusters.
##
## ====== ANOVA (Within-Subjects Design) ======
##
## Descriptives:
## ───────────────────────────
## "strategy" Mean S.D. n
## ───────────────────────────
## strategy1 5.000 (2.000) 6
## strategy2 6.000 (2.000) 6
## strategy3 8.000 (1.673) 6
## strategy4 9.000 (1.265) 6
## ───────────────────────────
## Total sample size: N = 6
##
## ANOVA Table:
## Dependent variable(s): score
## Between-subjects factor(s): –
## Within-subjects factor(s): strategy
## Covariate(s): –
## ─────────────────────────────────────────────────────────────────────────────
## MS MSE df1 df2 F p η²p [90% CI of η²p] η²G
## ─────────────────────────────────────────────────────────────────────────────
## strategy 27.551 1.286 2.178 10.889 21.429 <.001 *** .811 [.558, .894] .492
## ─────────────────────────────────────────────────────────────────────────────
## Sphericity correction method: GG (Greenhouse-Geisser)
## MSE = mean square error (the residual variance of the linear model)
## η²p = partial eta-squared = SS / (SS + SSE) = F * df1 / (F * df1 + df2)
## ω²p = partial omega-squared = (F - 1) * df1 / (F * df1 + df2 + 1)
## η²G = generalized eta-squared (see Olejnik & Algina, 2003)
## Cohen’s f² = η²p / (1 - η²p)
##
## Levene’s Test for Homogeneity of Variance:
## No between-subjects factors. No need to do the Levene’s test.
##
## Mauchly’s Test of Sphericity:
## ───────────────────────────────
## Mauchly's W p
## ───────────────────────────────
## strategy 0.5510 .824
## ───────────────────────────────
宽数据
wide_data = data.frame(s1 = c(3,3,4,6,6,8), s2 = c(5,3,5,7,8,8), s3=c(8,5,8,9,8,10), s4 =c(8,9,7,10,10,10))
repeated_fit=MANOVA(wide_data,
dvs = 's1:s4',
dvs.pattern = 's(.)',
within='s',
sph.correction = 'GG')
##
## Note:
## dvs="s1:s4" is matched to variables:
## s1, s2, s3, s4
##
## ====== ANOVA (Within-Subjects Design) ======
##
## Descriptives:
## ────────────────────
## "s" Mean S.D. n
## ────────────────────
## s1 5.000 (2.000) 6
## s2 6.000 (2.000) 6
## s3 8.000 (1.673) 6
## s4 9.000 (1.265) 6
## ────────────────────
## Total sample size: N = 6
##
## ANOVA Table:
## Dependent variable(s): s1, s2, s3, s4
## Between-subjects factor(s): –
## Within-subjects factor(s): s
## Covariate(s): –
## ──────────────────────────────────────────────────────────────────────
## MS MSE df1 df2 F p η²p [90% CI of η²p] η²G
## ──────────────────────────────────────────────────────────────────────
## s 27.551 1.286 2.178 10.889 21.429 <.001 *** .811 [.558, .894] .492
## ──────────────────────────────────────────────────────────────────────
## Sphericity correction method: GG (Greenhouse-Geisser)
## MSE = mean square error (the residual variance of the linear model)
## η²p = partial eta-squared = SS / (SS + SSE) = F * df1 / (F * df1 + df2)
## ω²p = partial omega-squared = (F - 1) * df1 / (F * df1 + df2 + 1)
## η²G = generalized eta-squared (see Olejnik & Algina, 2003)
## Cohen’s f² = η²p / (1 - η²p)
##
## Levene’s Test for Homogeneity of Variance:
## No between-subjects factors. No need to do the Levene’s test.
##
## Mauchly’s Test of Sphericity:
## ────────────────────────
## Mauchly's W p
## ────────────────────────
## s 0.5510 .824
## ────────────────────────
between-subject design
尝试将上述的long_data中的subj一列去除,这相当于不知道我们三种strategy下用的是同样的一批被试,因此我们只能进行被试间检验(one-way
ANOVA)
between_data=data.frame(score,strategy) %>% convert_as_factor(strategy) # 注:MANOVA 传参时,between 变量可以不是因子类型,照样可以得到一样的结果
between_fit=MANOVA(between_data,
dv='score',
between='strategy')
##
## ====== ANOVA (Between-Subjects Design) ======
##
## Descriptives:
## ───────────────────────────
## "strategy" Mean S.D. n
## ───────────────────────────
## strategy1 5.000 (2.000) 6
## strategy2 6.000 (2.000) 6
## strategy3 8.000 (1.673) 6
## strategy4 9.000 (1.265) 6
## ───────────────────────────
## Total sample size: N = 24
##
## ANOVA Table:
## Dependent variable(s): score
## Between-subjects factor(s): strategy
## Within-subjects factor(s): –
## Covariate(s): –
## ───────────────────────────────────────────────────────────────────────
## MS MSE df1 df2 F p η²p [90% CI of η²p] η²G
## ───────────────────────────────────────────────────────────────────────
## strategy 20.000 3.100 3 20 6.452 .003 ** .492 [.164, .656] .492
## ───────────────────────────────────────────────────────────────────────
## MSE = mean square error (the residual variance of the linear model)
## η²p = partial eta-squared = SS / (SS + SSE) = F * df1 / (F * df1 + df2)
## ω²p = partial omega-squared = (F - 1) * df1 / (F * df1 + df2 + 1)
## η²G = generalized eta-squared (see Olejnik & Algina, 2003)
## Cohen’s f² = η²p / (1 - η²p)
##
## Levene’s Test for Homogeneity of Variance:
## ───────────────────────────────────────
## Levene’s F df1 df2 p
## ───────────────────────────────────────
## DV: score 1.067 3 20 .385
## ───────────────────────────────────────
将被试间方差分析(one-way ANOVA)和被试内方差分析(repeated-measured ANOVA)的结果相互比较,发现后者更显著。有意思的是,当用单因素方差分析时,得到的效应量是\(\eta^2\),而使用重复测量方差分析时,得到的效应量应为\(partial\) \(\eta^2\),这在两种检验的结果都得到了验证。
如果 ANOVA 的结果显著,那么需要紧接着进行事后检验,判定是哪两组的差异显著。事后检验较为保守,即使 ANOVA 显著,事后检验也未必得到显著的结果。
EMMEANS(model,effect=NULL,p.adjust='bonferroni')
向其中传入由MANOVA得到的方差分析模型,设置待检验的效应(通常为分组变量),以及调整方式即可。
EMMEANS能够返回效应量(partial \(\eta^2\),Cohen’s
d,及其置信区间),也能返回事后检验的结果。
调整方式默认为p.adjust='bonferroni',也可以设置为’tukey’,‘scheffe’。
# p.adjust='tukey'
EMMEANS(between_fit,effect = 'strategy',p.adjust = 'tukey')
## ------ EMMEANS (effect = "strategy") ------
##
## Joint Tests of "strategy":
## ─────────────────────────────────────────────────────
## Effect df1 df2 F p η²p [90% CI of η²p]
## ─────────────────────────────────────────────────────
## strategy 3 20 6.452 .003 ** .492 [.164, .656]
## ─────────────────────────────────────────────────────
## Note. Simple effects of repeated measures with 3 or more levels
## are different from the results obtained with SPSS MANOVA syntax.
##
## Estimated Marginal Means of "strategy":
## ─────────────────────────────────────────
## "strategy" Mean [95% CI of Mean] S.E.
## ─────────────────────────────────────────
## strategy1 5.000 [3.501, 6.499] (0.719)
## strategy2 6.000 [4.501, 7.499] (0.719)
## strategy3 8.000 [6.501, 9.499] (0.719)
## strategy4 9.000 [7.501, 10.499] (0.719)
## ─────────────────────────────────────────
##
## Pairwise Comparisons of "strategy":
## ──────────────────────────────────────────────────────────────────────────────────
## Contrast Estimate S.E. df t p Cohen’s d [95% CI of d]
## ──────────────────────────────────────────────────────────────────────────────────
## strategy2 - strategy1 1.000 (1.017) 20 0.984 .760 0.568 [-1.048, 2.184]
## strategy3 - strategy1 3.000 (1.017) 20 2.951 .036 * 1.704 [ 0.088, 3.320]
## strategy3 - strategy2 2.000 (1.017) 20 1.967 .233 1.136 [-0.480, 2.752]
## strategy4 - strategy1 4.000 (1.017) 20 3.935 .004 ** 2.272 [ 0.656, 3.888]
## strategy4 - strategy2 3.000 (1.017) 20 2.951 .036 * 1.704 [ 0.088, 3.320]
## strategy4 - strategy3 1.000 (1.017) 20 0.984 .760 0.568 [-1.048, 2.184]
## ──────────────────────────────────────────────────────────────────────────────────
## Pooled SD for computing Cohen’s d: 1.761
## P-value adjustment: Tukey method for comparing a family of 4 estimates.
##
## Disclaimer:
## By default, pooled SD is Root Mean Square Error (RMSE).
## There is much disagreement on how to compute Cohen’s d.
## You are completely responsible for setting `sd.pooled`.
## You might also use `effectsize::t_to_d()` to compute d.
也可以调整p.adjust='scheffe',结果会更难显著。
EMMEANS(between_fit,effect = 'strategy',p.adjust = 'scheffe')
## ------ EMMEANS (effect = "strategy") ------
##
## Joint Tests of "strategy":
## ─────────────────────────────────────────────────────
## Effect df1 df2 F p η²p [90% CI of η²p]
## ─────────────────────────────────────────────────────
## strategy 3 20 6.452 .003 ** .492 [.164, .656]
## ─────────────────────────────────────────────────────
## Note. Simple effects of repeated measures with 3 or more levels
## are different from the results obtained with SPSS MANOVA syntax.
##
## Estimated Marginal Means of "strategy":
## ─────────────────────────────────────────
## "strategy" Mean [95% CI of Mean] S.E.
## ─────────────────────────────────────────
## strategy1 5.000 [3.501, 6.499] (0.719)
## strategy2 6.000 [4.501, 7.499] (0.719)
## strategy3 8.000 [6.501, 9.499] (0.719)
## strategy4 9.000 [7.501, 10.499] (0.719)
## ─────────────────────────────────────────
##
## Pairwise Comparisons of "strategy":
## ──────────────────────────────────────────────────────────────────────────────────
## Contrast Estimate S.E. df t p Cohen’s d [95% CI of d]
## ──────────────────────────────────────────────────────────────────────────────────
## strategy2 - strategy1 1.000 (1.017) 20 0.984 .809 0.568 [-1.192, 2.328]
## strategy3 - strategy1 3.000 (1.017) 20 2.951 .060 . 1.704 [-0.056, 3.464]
## strategy3 - strategy2 2.000 (1.017) 20 1.967 .305 1.136 [-0.624, 2.896]
## strategy4 - strategy1 4.000 (1.017) 20 3.935 .008 ** 2.272 [ 0.512, 4.032]
## strategy4 - strategy2 3.000 (1.017) 20 2.951 .060 . 1.704 [-0.056, 3.464]
## strategy4 - strategy3 1.000 (1.017) 20 0.984 .809 0.568 [-1.192, 2.328]
## ──────────────────────────────────────────────────────────────────────────────────
## Pooled SD for computing Cohen’s d: 1.761
## P-value adjustment: Scheffe method with rank 3.
##
## Disclaimer:
## By default, pooled SD is Root Mean Square Error (RMSE).
## There is much disagreement on how to compute Cohen’s d.
## You are completely responsible for setting `sd.pooled`.
## You might also use `effectsize::t_to_d()` to compute d.
TukeyHSD(x, conf.level = 0.95, ...),来自原生stats包。
其中x是拟合模型,这里通常适用aov拟合的模型。conf.level可以设定置信区间的大小,通常和显著性水平相对应。
diff是两组的均值差异,p adj是调整后的p-value。
TukeyHSD类可以用print显示结果,也可以用plot展现事后两两比较的图,图上呈现了均值差异和置信区间,非常直观。
tfm=TukeyHSD(fit_model)
print(tfm)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = score ~ strategy, data = long_data)
##
## $strategy
## diff lwr upr p adj
## 2-1 1 -1.8452027 3.845203 0.7601658
## 3-1 3 0.1547973 5.845203 0.0364863
## 4-1 4 1.1547973 6.845203 0.0041906
## 3-2 2 -0.8452027 4.845203 0.2331646
## 4-2 3 0.1547973 5.845203 0.0364863
## 4-3 1 -1.8452027 3.845203 0.7601658
plot(tfm)
tukey_hsd(x, formula, ...),来自rstatix包。
tukey_hsd(x, formula, ...)是管道友好的。
其中x为aov,lm,data.frame等包含着formula中的变量的数据类型,formula为num~group,其中num为数值变量,group为分类变量。如果传入了方差分析模型aov,那么就不用再输入formula,因为formula在aov中就已经体现。
tukey_hsd(x, formula, ...)能返回一个tibble data
frame,反馈所有的结果(呈现效果更美观)。
# 直接传入 aov 模型
tukey_hsd(fit_model)
## # A tibble: 6 × 9
## term group1 group2 null.value estimate conf.low conf.high p.adj p.adj.…¹
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 strategy 1 2 0 1.00 -1.85 3.85 0.76 ns
## 2 strategy 1 3 0 3 0.155 5.85 0.0365 *
## 3 strategy 1 4 0 4 1.15 6.85 0.00419 **
## 4 strategy 2 3 0 2 -0.845 4.85 0.233 ns
## 5 strategy 2 4 0 3 0.155 5.85 0.0365 *
## 6 strategy 3 4 0 1 -1.85 3.85 0.76 ns
## # … with abbreviated variable name ¹p.adj.signif
# 使用管道
long_data %>% tukey_hsd(score~strategy)
## # A tibble: 6 × 9
## term group1 group2 null.value estimate conf.low conf.high p.adj p.adj.…¹
## * <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 strategy 1 2 0 1.00 -1.85 3.85 0.76 ns
## 2 strategy 1 3 0 3 0.155 5.85 0.0365 *
## 3 strategy 1 4 0 4 1.15 6.85 0.00419 **
## 4 strategy 2 3 0 2 -0.845 4.85 0.233 ns
## 5 strategy 2 4 0 3 0.155 5.85 0.0365 *
## 6 strategy 3 4 0 1 -1.85 3.85 0.76 ns
## # … with abbreviated variable name ¹p.adj.signif
在进行方差分析之前,可以先
绘制箱型图 大概比较一下各组数据。纵轴上可以比较均值,宽度可以大致看出方差是否同质。
绘制QQ图 大致查看正态性假设是否成立
boxplotggboxplot# 传统方法
boxplot(score~strategy,long_data,varwidth=T) # 设置varwidth=T,可以由箱型图的宽度看出各组数据的方差大致情况,如果宽度相近,也反映了方差可能同质。
# ggboxplot,画出来的效果可能更美观
ggboxplot(long_data, x = "strategy", y = "score", add = "point")
要根据不同的组别画出QQ plot,因此使用ggqqplot函数
ggqqplot函数:第一个参数传入数据框,第二个参数传入要绘制的变量,facet.by传入的是分组变量
ggqqplot(long_data, "score", facet.by = "strategy")