# Designing Experiments and Analyzing Data: A Model Comparison Perspective (3rd edition) by Maxwell, Delaney, & Kelley

## Information about the book is available at https://designingexperiments.com

### R Code and Instructions to Accompany Chapter 5

These notes build on Chapter 1, 3, and 4, where we discuss some operational aspects of R and detail the use of many functions and how to install packages. Here, we skip details that have already been discussed in the prior R Code and Instructions.

Bonferroni critical value (for t and F)

C <- 3
df1 <- 1
df2 <- 40
alpha_FW <- .05
alpha_PC <- alpha_FW/C

# Critical value for t.
qt(1-alpha_PC/2, df=df2)
##  2.498856
# Critical value for F
qf(1-alpha_PC, df1=df1, df2=df2)
##  6.24428

Tukey critical value. Notice, here, that the family-wise Type I error rate is not divided for each tail, as it is inherently one-sided when examining mean differences. Further, notice that we have to convert the critical q value into a critical t or F value.

alpha_FW <- .05
a <- 4
df <- 40

# Critical q value:
qtukey(p=1-alpha_FW, df=df, nmeans = a)
##  3.790685
# Tukey corrected critical t value (divide by the square root of 2) (Games-Howell procedure)
qtukey(p=1-alpha_FW, df=df, nmeans = a)/sqrt(2)
##  2.680419
# Tukey corrected critical F value:
(qtukey(p=1-alpha_FW, df=df, nmeans = a)^2)/2
##  7.184648
# Table 5.6 (Critical F values with Tukey correction)
qtukey(p=1-.05, df=12, nmeans = 2)^2/2
##  4.747225
qtukey(p=1-.05, df=12, nmeans = 3)^2/2
##  7.117496
qtukey(p=1-.05, df=12, nmeans = 4)^2/2
##  8.814374
qtukey(p=1-.05, df=12, nmeans = 5)^2/2
##  10.15972
qtukey(p=1-.05, df=12, nmeans = 6)^2/2
##  11.28235

Load the AMCP package to use the data from Table 4, Chapter 5.

library(AMCP)
data(chapter_5_table_4)
chapter_5_table_4
##    group sbp
## 1      1  84
## 2      1  95
## 3      1  93
## 4      1 104
## 5      1  99
## 6      1 106
## 7      2  81
## 8      2  84
## 9      2  92
## 10     2 101
## 11     2  80
## 12     2 108
## 13     3  98
## 14     3  95
## 15     3  86
## 16     3  87
## 17     3  94
## 18     3 101
## 19     4  91
## 20     4  78
## 21     4  85
## 22     4  80
## 23     4  81
## 24     4  76
ANOVA.Object <- aov(sbp~as.factor(group), data=chapter_5_table_4)
ANOVA_Result <- anova(ANOVA.Object)
ANOVA_Result
## Analysis of Variance Table
##
## Response: sbp
##                  Df  Sum Sq Mean Sq F value  Pr(>F)
## as.factor(group)  3  744.79 248.264  3.7985 0.02637 *
## Residuals        20 1307.17  65.358
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MS.W <- ANOVA_Result$"Mean Sq" MS.W ##  65.35833 df1 <- ANOVA_Result$"Df"
df2 <- ANOVA_Result$"Df" Get the group descriptive statistics (this is one appraoch; see the Chapter 4 R Accompaniment for other approaches). library(dplyr) ## ## Attaching package: 'dplyr' ## The following objects are masked from 'package:stats': ## ## filter, lag ## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union Summary.5.4 <- chapter_5_table_4 %>% group_by(group) %>% summarize( mean=mean(sbp), sd=sd(sbp), s2=var(sbp), n=n() ) Summary.5.4 ## # A tibble: 4 × 5 ## group mean sd s2 n ## <int> <dbl> <dbl> <dbl> <int> ## 1 1 96.83333 8.035339 64.56667 6 ## 2 2 91.00000 11.489125 132.00000 6 ## 3 3 93.50000 5.958188 35.50000 6 ## 4 4 81.83333 5.419102 29.36667 6 Questions of interest mapped onto the contrast coefficients C1 <- c(1, -1, 0, 0) C2 <- c(1, 0, -1, 0) C3 <- c(0, 1, -1, 0) C4 <- c(1/3, 1/3, 1/3, -1) Uncorrected confidence intervals using MBESS package. library(MBESS) ci.c(means=Summary.5.4$mean, c.weights=C1, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast ##  -3.903025 ## ##$Contrast
##  5.833333
##
## $Upper.Conf.Limit.Contrast ##  15.56969 ci.c(means=Summary.5.4$mean, c.weights=C2, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast ##  -6.403025 ## ##$Contrast
##  3.333333
##
## $Upper.Conf.Limit.Contrast ##  13.06969 ci.c(means=Summary.5.4$mean, c.weights=C3, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast ##  -12.23636 ## ##$Contrast
##  -2.5
##
## $Upper.Conf.Limit.Contrast ##  7.236358 ci.c(means=Summary.5.4$mean, c.weights=C4, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast ##  3.994741 ## ##$Contrast
##  11.94444
##
## $Upper.Conf.Limit.Contrast ##  19.89415 Another approch for forming contrasts for the specified $$c$$-weights is given as follows. # Effect size Psi.1 <- sum(Summary.5.4$mean*C1)
Psi.2 <- sum(Summary.5.4$mean*C2) Psi.3 <- sum(Summary.5.4$mean*C3)
Psi.4 <- sum(Summary.5.4$mean*C4) # Standard error (assuming homogeneity of variance) SE.1 <- sqrt(MS.W*sum(C1^2/Summary.5.4$n))
SE.2 <- sqrt(MS.W*sum(C2^2/Summary.5.4$n)) SE.3 <- sqrt(MS.W*sum(C3^2/Summary.5.4$n))
SE.4 <- sqrt(MS.W*sum(C4^2/Summary.5.4$n)) # Multiplier (which changes based on the appraoch taken); here, # w is used without correction for multiplicity. w <- sqrt(qf(1-.05, df1=df1, df2=df2)) # The uncorrected margins of error are given as MOE1 <- SE.1*w MOE2 <- SE.2*w MOE3 <- SE.3*w MOE4 <- SE.4*w # The uncorrected confidence intervals are given as c(Psi.1 - MOE1, Psi.1 + MOE1) ##  -2.38262 14.04929 c(Psi.2 - MOE2, Psi.2 + MOE2) ##  -4.88262 11.54929 c(Psi.3 - MOE3, Psi.3 + MOE3) ##  -10.715954 5.715954 c(Psi.4 - MOE4, Psi.4 + MOE4) ##  5.236146 18.652743 Now, using the above approach, we correct for multiplicity by using the Bonferroni correction, which modified $$w$$. # Multiplier (which changes based on the appraoch taken); here, # w is corrected by diving the desired family-wise Type I error rate by 4. w_B <- sqrt(qf(1-.05/4, df1=1, df2=df2)) # The uncorrected margins of error are given as MOE1_B <- SE.1*w_B MOE2_B <- SE.2*w_B MOE3_B <- SE.3*w_B MOE4_B <- SE.4*w_B # The uncorrected confidence intervals are given as c(Psi.1 - MOE1_B, Psi.1 + MOE1_B) ##  -6.976206 18.642873 c(Psi.2 - MOE2_B, Psi.2 + MOE2_B) ##  -9.476206 16.142873 c(Psi.3 - MOE3_B, Psi.3 + MOE3_B) ##  -15.30954 10.30954 c(Psi.4 - MOE4_B, Psi.4 + MOE4_B) ##  1.485499 22.403390 Now, we correct for multiplicity with Bonferroni and do not assume homogeneity of variance. # When assumming heterogenious variances (e.g., see Chapter 4, Equation 34) df.C1 <- (sum(C1^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C1^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C2 <- (sum(C2^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C2^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1)) df.C3 <- (sum(C3^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C3^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C4 <- (sum(C4^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C4^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1)) # Multiplier (which changes based on the appraoch taken); here, #w is corrected by diving the desired family-wise Type I error rate by 4. w_B1 <- sqrt(qf(1-.05/4, df1=1, df2=df.C1)) w_B2 <- sqrt(qf(1-.05/4, df1=1, df2=df.C2)) w_B3 <- sqrt(qf(1-.05/4, df1=1, df2=df.C3)) w_B4 <- sqrt(qf(1-.05/4, df1=1, df2=df.C4)) # Standard error (not assuming homogeneity of variance; here we add "h" for heterogenious). SE.1h <- sqrt(sum(C1^2*(Summary.5.4$s2/Summary.5.4$n))) SE.2h <- sqrt(sum(C2^2*(Summary.5.4$s2/Summary.5.4$n))) SE.3h <- sqrt(sum(C3^2*(Summary.5.4$s2/Summary.5.4$n))) SE.4h <- sqrt(sum(C4^2*(Summary.5.4$s2/Summary.5.4$n))) # The uncorrected margins of error are given as MOE1_Bh <- SE.1h*w_B MOE2_Bh <- SE.2h*w_B MOE3_Bh <- SE.3h*w_B MOE4_Bh <- SE.4h*w_B # The uncorrected confidence intervals are given as c(Psi.1 - MOE1_Bh, Psi.1 + MOE1_Bh) ##  -9.874742 21.541409 c(Psi.2 - MOE2_Bh, Psi.2 + MOE2_Bh) ##  -7.87427 14.54094 c(Psi.3 - MOE3_Bh, Psi.3 + MOE3_Bh) ##  -17.00025 12.00025 c(Psi.4 - MOE4_Bh, Psi.4 + MOE4_Bh) ##  3.62397 20.26492 The Tukey correction, for all pairwise comparisons, is simple to immplement in R with the TukeyHSD() function. The function works be providing a fitted aov() object. This replicates Table 5.7. TukeyHSD(ANOVA.Object) ## Tukey multiple comparisons of means ## 95% family-wise confidence level ## ## Fit: aov(formula = sbp ~ as.factor(group), data = chapter_5_table_4) ## ##$as.factor(group)
##           diff       lwr       upr     p adj
## 2-1  -5.833333 -18.89753  7.230868 0.6038383
## 3-1  -3.333333 -16.39753  9.730868 0.8903064
## 4-1 -15.000000 -28.06420 -1.935799 0.0208593
## 3-2   2.500000 -10.56420 15.564201 0.9493084
## 4-2  -9.166667 -22.23087  3.897534 0.2345096
## 4-3 -11.666667 -24.73087  1.397534 0.0905811

Addiontally, a plot for the Tukey confidence intervals can be obtained using the plot() function.

plot(TukeyHSD(ANOVA.Object)) Critical value from the Scheffe approach with a=4 (df numerator = 3) and df denominator = 30.

3*qf(.95, 3, 30)
##  8.766832
# Or, more generally (not run)
# (a-1)*qf(1-alpha, (a-1), (N-a-1))

Consider again the ANOVA previously performed (above) on the data from chapter_5_table_4. We repeat the ANOVA here for completeness.

ANOVA.Object <- aov(sbp~as.factor(group), data=chapter_5_table_4)
ANOVA_Result <- anova(ANOVA.Object)
MS.W <- ANOVA_Result$"Mean Sq" df1 <- ANOVA_Result$"Df"
df2 <- ANOVA_Result$"Df" df1 ##  3 df2 ##  20 ANOVA_Result ## Analysis of Variance Table ## ## Response: sbp ## Df Sum Sq Mean Sq F value Pr(>F) ## as.factor(group) 3 744.79 248.264 3.7985 0.02637 * ## Residuals 20 1307.17 65.358 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 MS.W ##  65.35833 Now, we take the objects defined above and perform the contrasts. # c-weights of interest that map the questions onto the statistical framework C1 <- c(1, -1, 0, 0) C2 <- c(1, 0, -1, 0) C3 <- c(0, 1, -1, 0) C4 <- c(1/3, 1/3, 1/3, -1) # Here we use a vector of ordered (Groups 1, 2, 3, and 4) means. # Note, however, that earlier we used we used "Summary.5.4$mean" to extract the means.
Means <- c(96.83333, 91.00000, 93.50000, 81.83333)

Psi.1 <- sum(Means*C1)
Psi.2 <- sum(Means*C2)
Psi.3 <- sum(Means*C3)
Psi.4 <- sum(Means*C4)

# Multiplier under the assumption of homogeneous various.
w_S <- sqrt(df1*qf(1-.05, df1=df1, df2=df2))

# Here we use a vector of ordered (Groups 1, 2, 3, and 4) sample sizes.
# Note, however, that earlier we used we used "Summary.5.4$n" # to extract the sample sizes n.values <- c(6, 6, 6, 6) # Standard error (assuming homogeneity of variance) SE.1 <- sqrt(MS.W*sum(C1^2/n.values)) SE.2 <- sqrt(MS.W*sum(C2^2/n.values)) SE.3 <- sqrt(MS.W*sum(C3^2/n.values)) SE.4 <- sqrt(MS.W*sum(C4^2/n.values)) MOE1_S <- w_S*SE.1 MOE2_S <- w_S*SE.2 MOE3_S <- w_S*SE.3 MOE4_S <- w_S*SE.4 # Results for contrast 1-4 c(Psi.1 - MOE1_S, Psi.1 + MOE1_S) ##  -8.397119 20.063779 c(Psi.2 - MOE2_S, Psi.2 + MOE2_S) ##  -10.89712 17.56378 c(Psi.3 - MOE3_S, Psi.3 + MOE3_S) ##  -16.73045 11.73045 c(Psi.4 - MOE4_S, Psi.4 + MOE4_S) ##  0.3253335 23.5635598 Now, not assumming homogeneous variances, we have the following. # When assumming heterogenious variances (e.g., see Chapter 4, Equation 34); From above, repeated. df.C1 <- (sum(C1^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C1^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C2 <- (sum(C2^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C2^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1)) df.C3 <- (sum(C3^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C3^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C4 <- (sum(C4^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C4^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1)) # We now have a new multiplier for use with confidence intervals from the Scheffe perspective w_S1 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C1)) w_S2 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C2)) w_S3 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C3)) w_S4 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C4)) # Standard error (not assuming homogeneity of variance; here we add "h" for heterogenious); repeated SE.1h <- sqrt(sum(C1^2*(Summary.5.4$s2/Summary.5.4$n))) SE.2h <- sqrt(sum(C2^2*(Summary.5.4$s2/Summary.5.4$n))) SE.3h <- sqrt(sum(C3^2*(Summary.5.4$s2/Summary.5.4$n))) SE.4h <- sqrt(sum(C4^2*(Summary.5.4$s2/Summary.5.4$n))) MOE1_Sh <- w_S1*SE.1h MOE2_Sh <- w_S2*SE.2h MOE3_Sh <- w_S3*SE.3h MOE4_Sh <- w_S4*SE.4h # Results for contrast 1-4 assuming heterogenious variances c(Psi.1 - MOE1_Sh, Psi.1 + MOE1_Sh) ##  -13.67423 25.34089 c(Psi.2 - MOE2_Sh, Psi.2 + MOE2_Sh) ##  -10.50013 17.16679 c(Psi.3 - MOE3_Sh, Psi.3 + MOE3_Sh) ##  -21.23757 16.23757 c(Psi.4 - MOE4_Sh, Psi.4 + MOE4_Sh) ##  2.275611 21.613283 The multcomp package can be used for Dunnett’s appraoch. library(multcomp) ## Loading required package: mvtnorm ## Loading required package: survival ## Loading required package: TH.data ## Loading required package: MASS ## ## Attaching package: 'MASS' ## The following object is masked from 'package:dplyr': ## ## select ## ## Attaching package: 'TH.data' ## The following object is masked from 'package:MASS': ## ## geyser data(chapter_5_table_4) #Here we redef that if the groups were defined -1, -2, -3, and -4 # (so that "group 4"" is now "group = -4" and thus the smallest), we obtain the results we want. chapter_5_table_4$Group.Switched.Order <- as.factor(chapter_5_table_4\$group*-1)

ANOVA.Object <- aov(sbp~Group.Switched.Order, data=chapter_5_table_4) # Repeated
Results.Dunnet <- glht(ANOVA.Object, linfct=mcp(Group.Switched.Order="Dunnett"))
summary(Results.Dunnet)
##
##   Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Dunnett Contrasts
##
##
## Fit: aov(formula = sbp ~ Group.Switched.Order, data = chapter_5_table_4)
##
## Linear Hypotheses:
##              Estimate Std. Error t value Pr(>|t|)
## -3 - -4 == 0   11.667      4.668   2.500   0.0546 .
## -2 - -4 == 0    9.167      4.668   1.964   0.1528
## -1 - -4 == 0   15.000      4.668   3.214   0.0117 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)