Designing Experiments and Analyzing Data: A Model Comparison Perspective (3rd edition) by Maxwell, Delaney, & Kelley

Information about the book is available at https://designingexperiments.com

R Code and Instructions to Accompany Chapter 5

These notes build on Chapter 1, 3, and 4, where we discuss some operational aspects of R and detail the use of many functions and how to install packages. Here, we skip details that have already been discussed in the prior R Code and Instructions.

Bonferroni critical value (for t and F)

C <- 3
df1 <- 1
df2 <- 40
alpha_FW <- .05
alpha_PC <- alpha_FW/C

# Critical value for t.
qt(1-alpha_PC/2, df=df2)
## [1] 2.498856
# Critical value for F
qf(1-alpha_PC, df1=df1, df2=df2)
## [1] 6.24428

Tukey critical value. Notice, here, that the family-wise Type I error rate is not divided for each tail, as it is inherently one-sided when examining mean differences. Further, notice that we have to convert the critical q value into a critical t or F value.

alpha_FW <- .05
a <- 4
df <- 40

# Critical q value:
qtukey(p=1-alpha_FW, df=df, nmeans = a)
## [1] 3.790685
# Tukey corrected critical t value (divide by the square root of 2) (Games-Howell procedure)
qtukey(p=1-alpha_FW, df=df, nmeans = a)/sqrt(2)
## [1] 2.680419
# Tukey corrected critical F value:
(qtukey(p=1-alpha_FW, df=df, nmeans = a)^2)/2
## [1] 7.184648
# Table 5.6 (Critical F values with Tukey correction)
qtukey(p=1-.05, df=12, nmeans = 2)^2/2
## [1] 4.747225
qtukey(p=1-.05, df=12, nmeans = 3)^2/2
## [1] 7.117496
qtukey(p=1-.05, df=12, nmeans = 4)^2/2
## [1] 8.814374
qtukey(p=1-.05, df=12, nmeans = 5)^2/2
## [1] 10.15972
qtukey(p=1-.05, df=12, nmeans = 6)^2/2
## [1] 11.28235

Load the AMCP package to use the data from Table 4, Chapter 5.

library(AMCP)
data(chapter_5_table_4)
chapter_5_table_4
##    group sbp
## 1      1  84
## 2      1  95
## 3      1  93
## 4      1 104
## 5      1  99
## 6      1 106
## 7      2  81
## 8      2  84
## 9      2  92
## 10     2 101
## 11     2  80
## 12     2 108
## 13     3  98
## 14     3  95
## 15     3  86
## 16     3  87
## 17     3  94
## 18     3 101
## 19     4  91
## 20     4  78
## 21     4  85
## 22     4  80
## 23     4  81
## 24     4  76
ANOVA.Object <- aov(sbp~as.factor(group), data=chapter_5_table_4)
ANOVA_Result <- anova(ANOVA.Object)
ANOVA_Result
## Analysis of Variance Table
## 
## Response: sbp
##                  Df  Sum Sq Mean Sq F value  Pr(>F)  
## as.factor(group)  3  744.79 248.264  3.7985 0.02637 *
## Residuals        20 1307.17  65.358                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MS.W <- ANOVA_Result$"Mean Sq"[2]
MS.W
## [1] 65.35833
df1 <- ANOVA_Result$"Df"[1]
df2 <- ANOVA_Result$"Df"[2]

Get the group descriptive statistics (this is one appraoch; see the Chapter 4 R Accompaniment for other approaches).

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Summary.5.4 <- chapter_5_table_4 %>% 
group_by(group) %>%
summarize(
mean=mean(sbp),
sd=sd(sbp),
s2=var(sbp),
n=n()
)  
Summary.5.4
## # A tibble: 4 × 5
##   group     mean        sd        s2     n
##   <int>    <dbl>     <dbl>     <dbl> <int>
## 1     1 96.83333  8.035339  64.56667     6
## 2     2 91.00000 11.489125 132.00000     6
## 3     3 93.50000  5.958188  35.50000     6
## 4     4 81.83333  5.419102  29.36667     6

Questions of interest mapped onto the contrast coefficients

C1 <- c(1, -1, 0, 0)
C2 <- c(1, 0, -1, 0)
C3 <- c(0, 1, -1, 0)
C4 <- c(1/3, 1/3, 1/3, -1)

Uncorrected confidence intervals using MBESS package.

library(MBESS)

ci.c(means=Summary.5.4$mean, c.weights=C1, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast
## [1] -3.903025
## 
## $Contrast
## [1] 5.833333
## 
## $Upper.Conf.Limit.Contrast
## [1] 15.56969
ci.c(means=Summary.5.4$mean, c.weights=C2, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast
## [1] -6.403025
## 
## $Contrast
## [1] 3.333333
## 
## $Upper.Conf.Limit.Contrast
## [1] 13.06969
ci.c(means=Summary.5.4$mean, c.weights=C3, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast
## [1] -12.23636
## 
## $Contrast
## [1] -2.5
## 
## $Upper.Conf.Limit.Contrast
## [1] 7.236358
ci.c(means=Summary.5.4$mean, c.weights=C4, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast
## [1] 3.994741
## 
## $Contrast
## [1] 11.94444
## 
## $Upper.Conf.Limit.Contrast
## [1] 19.89415

Another approch for forming contrasts for the specified \(c\)-weights is given as follows.

# Effect size
Psi.1 <- sum(Summary.5.4$mean*C1)
Psi.2 <- sum(Summary.5.4$mean*C2)
Psi.3 <- sum(Summary.5.4$mean*C3)
Psi.4 <- sum(Summary.5.4$mean*C4)

# Standard error (assuming homogeneity of variance)
SE.1 <- sqrt(MS.W*sum(C1^2/Summary.5.4$n))
SE.2 <- sqrt(MS.W*sum(C2^2/Summary.5.4$n))
SE.3 <- sqrt(MS.W*sum(C3^2/Summary.5.4$n))
SE.4 <- sqrt(MS.W*sum(C4^2/Summary.5.4$n))

# Multiplier (which changes based on the appraoch taken); here, 
# w is used without correction for multiplicity.
w <- sqrt(qf(1-.05, df1=df1, df2=df2))

# The uncorrected margins of error are given as
MOE1 <- SE.1*w
MOE2 <- SE.2*w
MOE3 <- SE.3*w
MOE4 <- SE.4*w

# The uncorrected confidence intervals are given as
c(Psi.1 - MOE1, Psi.1 + MOE1)
## [1] -2.38262 14.04929
c(Psi.2 - MOE2, Psi.2 + MOE2)
## [1] -4.88262 11.54929
c(Psi.3 - MOE3, Psi.3 + MOE3)
## [1] -10.715954   5.715954
c(Psi.4 - MOE4, Psi.4 + MOE4)
## [1]  5.236146 18.652743

Now, using the above approach, we correct for multiplicity by using the Bonferroni correction, which modified \(w\).

# Multiplier (which changes based on the appraoch taken); here, 
# w is corrected by diving the desired family-wise Type I error rate by 4.
w_B <- sqrt(qf(1-.05/4, df1=1, df2=df2))

# The uncorrected margins of error are given as
MOE1_B <- SE.1*w_B
MOE2_B <- SE.2*w_B
MOE3_B <- SE.3*w_B
MOE4_B <- SE.4*w_B

# The uncorrected confidence intervals are given as
c(Psi.1 - MOE1_B, Psi.1 + MOE1_B)
## [1] -6.976206 18.642873
c(Psi.2 - MOE2_B, Psi.2 + MOE2_B)
## [1] -9.476206 16.142873
c(Psi.3 - MOE3_B, Psi.3 + MOE3_B)
## [1] -15.30954  10.30954
c(Psi.4 - MOE4_B, Psi.4 + MOE4_B)
## [1]  1.485499 22.403390

Now, we correct for multiplicity with Bonferroni and do not assume homogeneity of variance.

# When assumming heterogenious variances (e.g., see Chapter 4, Equation 34)
df.C1 <- (sum(C1^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C1^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C2 <- (sum(C2^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C2^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C3 <- (sum(C3^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C3^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C4 <- (sum(C4^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C4^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))

# Multiplier (which changes based on the appraoch taken); here, 
#w is corrected by diving the desired family-wise Type I error rate by 4.
w_B1 <- sqrt(qf(1-.05/4, df1=1, df2=df.C1))
w_B2 <- sqrt(qf(1-.05/4, df1=1, df2=df.C2))
w_B3 <- sqrt(qf(1-.05/4, df1=1, df2=df.C3))
w_B4 <- sqrt(qf(1-.05/4, df1=1, df2=df.C4))

# Standard error (not assuming homogeneity of variance; here we add "h" for heterogenious). 
SE.1h <- sqrt(sum(C1^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.2h <- sqrt(sum(C2^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.3h <- sqrt(sum(C3^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.4h <- sqrt(sum(C4^2*(Summary.5.4$s2/Summary.5.4$n)))


# The uncorrected margins of error are given as
MOE1_Bh <- SE.1h*w_B
MOE2_Bh <- SE.2h*w_B
MOE3_Bh <- SE.3h*w_B
MOE4_Bh <- SE.4h*w_B

# The uncorrected confidence intervals are given as
c(Psi.1 - MOE1_Bh, Psi.1 + MOE1_Bh)
## [1] -9.874742 21.541409
c(Psi.2 - MOE2_Bh, Psi.2 + MOE2_Bh)
## [1] -7.87427 14.54094
c(Psi.3 - MOE3_Bh, Psi.3 + MOE3_Bh)
## [1] -17.00025  12.00025
c(Psi.4 - MOE4_Bh, Psi.4 + MOE4_Bh)
## [1]  3.62397 20.26492

The Tukey correction, for all pairwise comparisons, is simple to immplement in R with the TukeyHSD() function. The function works be providing a fitted aov() object. This replicates Table 5.7.

TukeyHSD(ANOVA.Object)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sbp ~ as.factor(group), data = chapter_5_table_4)
## 
## $`as.factor(group)`
##           diff       lwr       upr     p adj
## 2-1  -5.833333 -18.89753  7.230868 0.6038383
## 3-1  -3.333333 -16.39753  9.730868 0.8903064
## 4-1 -15.000000 -28.06420 -1.935799 0.0208593
## 3-2   2.500000 -10.56420 15.564201 0.9493084
## 4-2  -9.166667 -22.23087  3.897534 0.2345096
## 4-3 -11.666667 -24.73087  1.397534 0.0905811

Addiontally, a plot for the Tukey confidence intervals can be obtained using the plot() function.

plot(TukeyHSD(ANOVA.Object))

Critical value from the Scheffe approach with a=4 (df numerator = 3) and df denominator = 30.

3*qf(.95, 3, 30)
## [1] 8.766832
# Or, more generally (not run)
# (a-1)*qf(1-alpha, (a-1), (N-a-1))

Consider again the ANOVA previously performed (above) on the data from chapter_5_table_4. We repeat the ANOVA here for completeness.

ANOVA.Object <- aov(sbp~as.factor(group), data=chapter_5_table_4)
ANOVA_Result <- anova(ANOVA.Object)
MS.W <- ANOVA_Result$"Mean Sq"[2]



df1 <- ANOVA_Result$"Df"[1]
df2 <- ANOVA_Result$"Df"[2]

df1
## [1] 3
df2
## [1] 20
ANOVA_Result
## Analysis of Variance Table
## 
## Response: sbp
##                  Df  Sum Sq Mean Sq F value  Pr(>F)  
## as.factor(group)  3  744.79 248.264  3.7985 0.02637 *
## Residuals        20 1307.17  65.358                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MS.W
## [1] 65.35833

Now, we take the objects defined above and perform the contrasts.

# c-weights of interest that map the questions onto the statistical framework
C1 <- c(1, -1, 0, 0)
C2 <- c(1, 0, -1, 0)
C3 <- c(0, 1, -1, 0)
C4 <- c(1/3, 1/3, 1/3, -1)

# Here we use a vector of ordered (Groups 1, 2, 3, and 4) means. 
# Note, however, that earlier we used we used "Summary.5.4$mean" to extract the means. 
Means <- c(96.83333, 91.00000, 93.50000, 81.83333)

Psi.1 <- sum(Means*C1)
Psi.2 <- sum(Means*C2)
Psi.3 <- sum(Means*C3)
Psi.4 <- sum(Means*C4)

# Multiplier under the assumption of homogeneous various. 
w_S <- sqrt(df1*qf(1-.05, df1=df1, df2=df2))

# Here we use a vector of ordered (Groups 1, 2, 3, and 4) sample sizes.
# Note, however, that earlier we used we used "Summary.5.4$n" 
# to extract the sample sizes

n.values <- c(6, 6, 6, 6)

# Standard error (assuming homogeneity of variance)
SE.1 <- sqrt(MS.W*sum(C1^2/n.values))
SE.2 <- sqrt(MS.W*sum(C2^2/n.values))
SE.3 <- sqrt(MS.W*sum(C3^2/n.values))
SE.4 <- sqrt(MS.W*sum(C4^2/n.values))

MOE1_S <- w_S*SE.1
MOE2_S <- w_S*SE.2
MOE3_S <- w_S*SE.3
MOE4_S <- w_S*SE.4

# Results for contrast 1-4
c(Psi.1 - MOE1_S, Psi.1 + MOE1_S)
## [1] -8.397119 20.063779
c(Psi.2 - MOE2_S, Psi.2 + MOE2_S)
## [1] -10.89712  17.56378
c(Psi.3 - MOE3_S, Psi.3 + MOE3_S)
## [1] -16.73045  11.73045
c(Psi.4 - MOE4_S, Psi.4 + MOE4_S)
## [1]  0.3253335 23.5635598

Now, not assumming homogeneous variances, we have the following.

# When assumming heterogenious variances (e.g., see Chapter 4, Equation 34); From above, repeated.
df.C1 <- (sum(C1^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C1^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C2 <- (sum(C2^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C2^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C3 <- (sum(C3^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C3^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C4 <- (sum(C4^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C4^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))


# We now have a new multiplier for use with confidence intervals from the Scheffe perspective
w_S1 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C1))
w_S2 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C2))
w_S3 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C3))
w_S4 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C4))

# Standard error (not assuming homogeneity of variance; here we add "h" for heterogenious); repeated
SE.1h <- sqrt(sum(C1^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.2h <- sqrt(sum(C2^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.3h <- sqrt(sum(C3^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.4h <- sqrt(sum(C4^2*(Summary.5.4$s2/Summary.5.4$n)))

MOE1_Sh <- w_S1*SE.1h
MOE2_Sh <- w_S2*SE.2h
MOE3_Sh <- w_S3*SE.3h
MOE4_Sh <- w_S4*SE.4h

# Results for contrast 1-4 assuming heterogenious variances
c(Psi.1 - MOE1_Sh, Psi.1 + MOE1_Sh)
## [1] -13.67423  25.34089
c(Psi.2 - MOE2_Sh, Psi.2 + MOE2_Sh)
## [1] -10.50013  17.16679
c(Psi.3 - MOE3_Sh, Psi.3 + MOE3_Sh)
## [1] -21.23757  16.23757
c(Psi.4 - MOE4_Sh, Psi.4 + MOE4_Sh)
## [1]  2.275611 21.613283

The multcomp package can be used for Dunnett’s appraoch.

library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
data(chapter_5_table_4)

#Here we redef that if the groups were defined -1, -2, -3, and -4 
# (so that "group 4"" is now "group = -4" and thus the smallest), we obtain the results we want. 
chapter_5_table_4$Group.Switched.Order <- as.factor(chapter_5_table_4$group*-1) 

ANOVA.Object <- aov(sbp~Group.Switched.Order, data=chapter_5_table_4) # Repeated
Results.Dunnet <- glht(ANOVA.Object, linfct=mcp(Group.Switched.Order="Dunnett"))
summary(Results.Dunnet)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Dunnett Contrasts
## 
## 
## Fit: aov(formula = sbp ~ Group.Switched.Order, data = chapter_5_table_4)
## 
## Linear Hypotheses:
##              Estimate Std. Error t value Pr(>|t|)  
## -3 - -4 == 0   11.667      4.668   2.500   0.0546 .
## -2 - -4 == 0    9.167      4.668   1.964   0.1528  
## -1 - -4 == 0   15.000      4.668   3.214   0.0117 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)