These notes build on Chapter 1, 3, and 4, where we discuss some operational aspects of R and detail the use of many functions and how to install packages. Here, we skip details that have already been discussed in the prior R Code and Instructions.
Bonferroni critical value (for t and F)
C <- 3
df1 <- 1
df2 <- 40
alpha_FW <- .05
alpha_PC <- alpha_FW/C
# Critical value for t.
qt(1-alpha_PC/2, df=df2)
## [1] 2.498856
# Critical value for F
qf(1-alpha_PC, df1=df1, df2=df2)
## [1] 6.24428
Tukey critical value. Notice, here, that the family-wise Type I error rate is not divided for each tail, as it is inherently one-sided when examining mean differences. Further, notice that we have to convert the critical q value into a critical t or F value.
alpha_FW <- .05
a <- 4
df <- 40
# Critical q value:
qtukey(p=1-alpha_FW, df=df, nmeans = a)
## [1] 3.790685
# Tukey corrected critical t value (divide by the square root of 2) (Games-Howell procedure)
qtukey(p=1-alpha_FW, df=df, nmeans = a)/sqrt(2)
## [1] 2.680419
# Tukey corrected critical F value:
(qtukey(p=1-alpha_FW, df=df, nmeans = a)^2)/2
## [1] 7.184648
# Table 5.6 (Critical F values with Tukey correction)
qtukey(p=1-.05, df=12, nmeans = 2)^2/2
## [1] 4.747225
qtukey(p=1-.05, df=12, nmeans = 3)^2/2
## [1] 7.117496
qtukey(p=1-.05, df=12, nmeans = 4)^2/2
## [1] 8.814374
qtukey(p=1-.05, df=12, nmeans = 5)^2/2
## [1] 10.15972
qtukey(p=1-.05, df=12, nmeans = 6)^2/2
## [1] 11.28235
Load the AMCP package to use the data from Table 4, Chapter 5.
library(AMCP)
data(chapter_5_table_4)
chapter_5_table_4
## group sbp
## 1 1 84
## 2 1 95
## 3 1 93
## 4 1 104
## 5 1 99
## 6 1 106
## 7 2 81
## 8 2 84
## 9 2 92
## 10 2 101
## 11 2 80
## 12 2 108
## 13 3 98
## 14 3 95
## 15 3 86
## 16 3 87
## 17 3 94
## 18 3 101
## 19 4 91
## 20 4 78
## 21 4 85
## 22 4 80
## 23 4 81
## 24 4 76
ANOVA.Object <- aov(sbp~as.factor(group), data=chapter_5_table_4)
ANOVA_Result <- anova(ANOVA.Object)
ANOVA_Result
## Analysis of Variance Table
##
## Response: sbp
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(group) 3 744.79 248.264 3.7985 0.02637 *
## Residuals 20 1307.17 65.358
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MS.W <- ANOVA_Result$"Mean Sq"[2]
MS.W
## [1] 65.35833
df1 <- ANOVA_Result$"Df"[1]
df2 <- ANOVA_Result$"Df"[2]
Get the group descriptive statistics (this is one appraoch; see the Chapter 4 R Accompaniment for other approaches).
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Summary.5.4 <- chapter_5_table_4 %>%
group_by(group) %>%
summarize(
mean=mean(sbp),
sd=sd(sbp),
s2=var(sbp),
n=n()
)
Summary.5.4
## # A tibble: 4 × 5
## group mean sd s2 n
## <int> <dbl> <dbl> <dbl> <int>
## 1 1 96.83333 8.035339 64.56667 6
## 2 2 91.00000 11.489125 132.00000 6
## 3 3 93.50000 5.958188 35.50000 6
## 4 4 81.83333 5.419102 29.36667 6
Questions of interest mapped onto the contrast coefficients
C1 <- c(1, -1, 0, 0)
C2 <- c(1, 0, -1, 0)
C3 <- c(0, 1, -1, 0)
C4 <- c(1/3, 1/3, 1/3, -1)
Uncorrected confidence intervals using MBESS package.
library(MBESS)
ci.c(means=Summary.5.4$mean, c.weights=C1, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast
## [1] -3.903025
##
## $Contrast
## [1] 5.833333
##
## $Upper.Conf.Limit.Contrast
## [1] 15.56969
ci.c(means=Summary.5.4$mean, c.weights=C2, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast
## [1] -6.403025
##
## $Contrast
## [1] 3.333333
##
## $Upper.Conf.Limit.Contrast
## [1] 13.06969
ci.c(means=Summary.5.4$mean, c.weights=C3, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast
## [1] -12.23636
##
## $Contrast
## [1] -2.5
##
## $Upper.Conf.Limit.Contrast
## [1] 7.236358
ci.c(means=Summary.5.4$mean, c.weights=C4, n=Summary.5.4$n, N=sum(Summary.5.4$n), s.anova=sqrt(MS.W))
## $Lower.Conf.Limit.Contrast
## [1] 3.994741
##
## $Contrast
## [1] 11.94444
##
## $Upper.Conf.Limit.Contrast
## [1] 19.89415
Another approch for forming contrasts for the specified \(c\)-weights is given as follows.
# Effect size
Psi.1 <- sum(Summary.5.4$mean*C1)
Psi.2 <- sum(Summary.5.4$mean*C2)
Psi.3 <- sum(Summary.5.4$mean*C3)
Psi.4 <- sum(Summary.5.4$mean*C4)
# Standard error (assuming homogeneity of variance)
SE.1 <- sqrt(MS.W*sum(C1^2/Summary.5.4$n))
SE.2 <- sqrt(MS.W*sum(C2^2/Summary.5.4$n))
SE.3 <- sqrt(MS.W*sum(C3^2/Summary.5.4$n))
SE.4 <- sqrt(MS.W*sum(C4^2/Summary.5.4$n))
# Multiplier (which changes based on the appraoch taken); here,
# w is used without correction for multiplicity.
w <- sqrt(qf(1-.05, df1=df1, df2=df2))
# The uncorrected margins of error are given as
MOE1 <- SE.1*w
MOE2 <- SE.2*w
MOE3 <- SE.3*w
MOE4 <- SE.4*w
# The uncorrected confidence intervals are given as
c(Psi.1 - MOE1, Psi.1 + MOE1)
## [1] -2.38262 14.04929
c(Psi.2 - MOE2, Psi.2 + MOE2)
## [1] -4.88262 11.54929
c(Psi.3 - MOE3, Psi.3 + MOE3)
## [1] -10.715954 5.715954
c(Psi.4 - MOE4, Psi.4 + MOE4)
## [1] 5.236146 18.652743
Now, using the above approach, we correct for multiplicity by using the Bonferroni correction, which modified \(w\).
# Multiplier (which changes based on the appraoch taken); here,
# w is corrected by diving the desired family-wise Type I error rate by 4.
w_B <- sqrt(qf(1-.05/4, df1=1, df2=df2))
# The uncorrected margins of error are given as
MOE1_B <- SE.1*w_B
MOE2_B <- SE.2*w_B
MOE3_B <- SE.3*w_B
MOE4_B <- SE.4*w_B
# The uncorrected confidence intervals are given as
c(Psi.1 - MOE1_B, Psi.1 + MOE1_B)
## [1] -6.976206 18.642873
c(Psi.2 - MOE2_B, Psi.2 + MOE2_B)
## [1] -9.476206 16.142873
c(Psi.3 - MOE3_B, Psi.3 + MOE3_B)
## [1] -15.30954 10.30954
c(Psi.4 - MOE4_B, Psi.4 + MOE4_B)
## [1] 1.485499 22.403390
Now, we correct for multiplicity with Bonferroni and do not assume homogeneity of variance.
# When assumming heterogenious variances (e.g., see Chapter 4, Equation 34)
df.C1 <- (sum(C1^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C1^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C2 <- (sum(C2^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C2^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C3 <- (sum(C3^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C3^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C4 <- (sum(C4^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C4^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
# Multiplier (which changes based on the appraoch taken); here,
#w is corrected by diving the desired family-wise Type I error rate by 4.
w_B1 <- sqrt(qf(1-.05/4, df1=1, df2=df.C1))
w_B2 <- sqrt(qf(1-.05/4, df1=1, df2=df.C2))
w_B3 <- sqrt(qf(1-.05/4, df1=1, df2=df.C3))
w_B4 <- sqrt(qf(1-.05/4, df1=1, df2=df.C4))
# Standard error (not assuming homogeneity of variance; here we add "h" for heterogenious).
SE.1h <- sqrt(sum(C1^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.2h <- sqrt(sum(C2^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.3h <- sqrt(sum(C3^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.4h <- sqrt(sum(C4^2*(Summary.5.4$s2/Summary.5.4$n)))
# The uncorrected margins of error are given as
MOE1_Bh <- SE.1h*w_B
MOE2_Bh <- SE.2h*w_B
MOE3_Bh <- SE.3h*w_B
MOE4_Bh <- SE.4h*w_B
# The uncorrected confidence intervals are given as
c(Psi.1 - MOE1_Bh, Psi.1 + MOE1_Bh)
## [1] -9.874742 21.541409
c(Psi.2 - MOE2_Bh, Psi.2 + MOE2_Bh)
## [1] -7.87427 14.54094
c(Psi.3 - MOE3_Bh, Psi.3 + MOE3_Bh)
## [1] -17.00025 12.00025
c(Psi.4 - MOE4_Bh, Psi.4 + MOE4_Bh)
## [1] 3.62397 20.26492
The Tukey correction, for all pairwise comparisons, is simple to immplement in R with the TukeyHSD() function. The function works be providing a fitted aov() object. This replicates Table 5.7.
TukeyHSD(ANOVA.Object)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = sbp ~ as.factor(group), data = chapter_5_table_4)
##
## $`as.factor(group)`
## diff lwr upr p adj
## 2-1 -5.833333 -18.89753 7.230868 0.6038383
## 3-1 -3.333333 -16.39753 9.730868 0.8903064
## 4-1 -15.000000 -28.06420 -1.935799 0.0208593
## 3-2 2.500000 -10.56420 15.564201 0.9493084
## 4-2 -9.166667 -22.23087 3.897534 0.2345096
## 4-3 -11.666667 -24.73087 1.397534 0.0905811
Addiontally, a plot for the Tukey confidence intervals can be obtained using the plot() function.
plot(TukeyHSD(ANOVA.Object))
Critical value from the Scheffe approach with a=4 (df numerator = 3) and df denominator = 30.
3*qf(.95, 3, 30)
## [1] 8.766832
# Or, more generally (not run)
# (a-1)*qf(1-alpha, (a-1), (N-a-1))
Consider again the ANOVA previously performed (above) on the data from chapter_5_table_4. We repeat the ANOVA here for completeness.
ANOVA.Object <- aov(sbp~as.factor(group), data=chapter_5_table_4)
ANOVA_Result <- anova(ANOVA.Object)
MS.W <- ANOVA_Result$"Mean Sq"[2]
df1 <- ANOVA_Result$"Df"[1]
df2 <- ANOVA_Result$"Df"[2]
df1
## [1] 3
df2
## [1] 20
ANOVA_Result
## Analysis of Variance Table
##
## Response: sbp
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(group) 3 744.79 248.264 3.7985 0.02637 *
## Residuals 20 1307.17 65.358
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
MS.W
## [1] 65.35833
Now, we take the objects defined above and perform the contrasts.
# c-weights of interest that map the questions onto the statistical framework
C1 <- c(1, -1, 0, 0)
C2 <- c(1, 0, -1, 0)
C3 <- c(0, 1, -1, 0)
C4 <- c(1/3, 1/3, 1/3, -1)
# Here we use a vector of ordered (Groups 1, 2, 3, and 4) means.
# Note, however, that earlier we used we used "Summary.5.4$mean" to extract the means.
Means <- c(96.83333, 91.00000, 93.50000, 81.83333)
Psi.1 <- sum(Means*C1)
Psi.2 <- sum(Means*C2)
Psi.3 <- sum(Means*C3)
Psi.4 <- sum(Means*C4)
# Multiplier under the assumption of homogeneous various.
w_S <- sqrt(df1*qf(1-.05, df1=df1, df2=df2))
# Here we use a vector of ordered (Groups 1, 2, 3, and 4) sample sizes.
# Note, however, that earlier we used we used "Summary.5.4$n"
# to extract the sample sizes
n.values <- c(6, 6, 6, 6)
# Standard error (assuming homogeneity of variance)
SE.1 <- sqrt(MS.W*sum(C1^2/n.values))
SE.2 <- sqrt(MS.W*sum(C2^2/n.values))
SE.3 <- sqrt(MS.W*sum(C3^2/n.values))
SE.4 <- sqrt(MS.W*sum(C4^2/n.values))
MOE1_S <- w_S*SE.1
MOE2_S <- w_S*SE.2
MOE3_S <- w_S*SE.3
MOE4_S <- w_S*SE.4
# Results for contrast 1-4
c(Psi.1 - MOE1_S, Psi.1 + MOE1_S)
## [1] -8.397119 20.063779
c(Psi.2 - MOE2_S, Psi.2 + MOE2_S)
## [1] -10.89712 17.56378
c(Psi.3 - MOE3_S, Psi.3 + MOE3_S)
## [1] -16.73045 11.73045
c(Psi.4 - MOE4_S, Psi.4 + MOE4_S)
## [1] 0.3253335 23.5635598
Now, not assumming homogeneous variances, we have the following.
# When assumming heterogenious variances (e.g., see Chapter 4, Equation 34); From above, repeated.
df.C1 <- (sum(C1^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C1^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C2 <- (sum(C2^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C2^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C3 <- (sum(C3^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C3^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
df.C4 <- (sum(C4^2*(Summary.5.4$s2/Summary.5.4$n))^2) / sum(((C4^2*(Summary.5.4$s2/Summary.5.4$n))^2)/(Summary.5.4$n-1))
# We now have a new multiplier for use with confidence intervals from the Scheffe perspective
w_S1 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C1))
w_S2 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C2))
w_S3 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C3))
w_S4 <- sqrt(df1*qf(1-.05, df1=df1, df2=df.C4))
# Standard error (not assuming homogeneity of variance; here we add "h" for heterogenious); repeated
SE.1h <- sqrt(sum(C1^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.2h <- sqrt(sum(C2^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.3h <- sqrt(sum(C3^2*(Summary.5.4$s2/Summary.5.4$n)))
SE.4h <- sqrt(sum(C4^2*(Summary.5.4$s2/Summary.5.4$n)))
MOE1_Sh <- w_S1*SE.1h
MOE2_Sh <- w_S2*SE.2h
MOE3_Sh <- w_S3*SE.3h
MOE4_Sh <- w_S4*SE.4h
# Results for contrast 1-4 assuming heterogenious variances
c(Psi.1 - MOE1_Sh, Psi.1 + MOE1_Sh)
## [1] -13.67423 25.34089
c(Psi.2 - MOE2_Sh, Psi.2 + MOE2_Sh)
## [1] -10.50013 17.16679
c(Psi.3 - MOE3_Sh, Psi.3 + MOE3_Sh)
## [1] -21.23757 16.23757
c(Psi.4 - MOE4_Sh, Psi.4 + MOE4_Sh)
## [1] 2.275611 21.613283
The multcomp package can be used for Dunnett’s appraoch.
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
data(chapter_5_table_4)
#Here we redef that if the groups were defined -1, -2, -3, and -4
# (so that "group 4"" is now "group = -4" and thus the smallest), we obtain the results we want.
chapter_5_table_4$Group.Switched.Order <- as.factor(chapter_5_table_4$group*-1)
ANOVA.Object <- aov(sbp~Group.Switched.Order, data=chapter_5_table_4) # Repeated
Results.Dunnet <- glht(ANOVA.Object, linfct=mcp(Group.Switched.Order="Dunnett"))
summary(Results.Dunnet)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Dunnett Contrasts
##
##
## Fit: aov(formula = sbp ~ Group.Switched.Order, data = chapter_5_table_4)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## -3 - -4 == 0 11.667 4.668 2.500 0.0546 .
## -2 - -4 == 0 9.167 4.668 1.964 0.1528
## -1 - -4 == 0 15.000 4.668 3.214 0.0117 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)