Nested Designs in R

Example 1

days <- c(240, 206, 217, 225, 110, 118, 103, 95, 56, 60, 68, 58, 71, 53, 68, 
    57, 47, 52, 31, 49, 37, 33, 40, 45)
type <- factor(c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
    "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B"))
years <- factor(c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 
    6, 6, 6, 6))
years2 <- factor(c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 
    2, 3, 3, 3, 3))  # This is the way the data is coded for problems in the book

Box plots for each teacher can be appropiate, as they are the experimental unit.

boxplot(days ~ type)

plot of chunk tplots

boxplot(days ~ years)

plot of chunk tplots

boxplot(days ~ type:years2)

plot of chunk tplots

library(lattice)
dotplot(days ~ years2 | type)

plot of chunk tplots

What if we do it correctly?

res1 <- lm(days ~ type + type/years)
anova(res1)
## Analysis of Variance Table
## 
## Response: days
##            Df Sum Sq Mean Sq F value  Pr(>F)    
## type        1  39447   39447     458 3.0e-14 ***
## type:years  4  56577   14144     164 6.7e-14 ***
## Residuals  18   1550      86                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res2 <- lm(days ~ type + type/years2)
anova(res2)
## Analysis of Variance Table
## 
## Response: days
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## type         1  39447   39447     458 3.0e-14 ***
## type:years2  4  56577   14144     164 6.7e-14 ***
## Residuals   18   1550      86                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Which schools are different?

TukeyHSD(aov(res1), "type")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = res1)
## 
## $type
##       diff    lwr    upr p adj
## B-A -81.08 -89.04 -73.12     0
TukeyHSD(aov(res2), "type")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = res2)
## 
## $type
##       diff    lwr    upr p adj
## B-A -81.08 -89.04 -73.12     0

Do we want all the teacher comparisons? Probably not.

TukeyHSD(aov(res1), "type:years")  #Yikes
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = res1)
## 
## $`type:years`
##            diff     lwr       upr  p adj
## B:1-A:1      NA      NA        NA     NA
## A:2-A:1 -115.50 -139.94  -91.0629 0.0000
## B:2-A:1      NA      NA        NA     NA
## A:3-A:1 -161.50 -185.94 -137.0629 0.0000
## B:3-A:1      NA      NA        NA     NA
## A:4-A:1      NA      NA        NA     NA
## B:4-A:1 -159.75 -184.19 -135.3129 0.0000
## A:5-A:1      NA      NA        NA     NA
## B:5-A:1 -177.25 -201.69 -152.8129 0.0000
## A:6-A:1      NA      NA        NA     NA
## B:6-A:1 -183.25 -207.69 -158.8129 0.0000
## A:2-B:1      NA      NA        NA     NA
## B:2-B:1      NA      NA        NA     NA
## A:3-B:1      NA      NA        NA     NA
## B:3-B:1      NA      NA        NA     NA
## A:4-B:1      NA      NA        NA     NA
## B:4-B:1      NA      NA        NA     NA
## A:5-B:1      NA      NA        NA     NA
## B:5-B:1      NA      NA        NA     NA
## A:6-B:1      NA      NA        NA     NA
## B:6-B:1      NA      NA        NA     NA
## B:2-A:2      NA      NA        NA     NA
## A:3-A:2  -46.00  -70.44  -21.5629 0.0001
## B:3-A:2      NA      NA        NA     NA
## A:4-A:2      NA      NA        NA     NA
## B:4-A:2  -44.25  -68.69  -19.8129 0.0001
## A:5-A:2      NA      NA        NA     NA
## B:5-A:2  -61.75  -86.19  -37.3129 0.0000
## A:6-A:2      NA      NA        NA     NA
## B:6-A:2  -67.75  -92.19  -43.3129 0.0000
## A:3-B:2      NA      NA        NA     NA
## B:3-B:2      NA      NA        NA     NA
## A:4-B:2      NA      NA        NA     NA
## B:4-B:2      NA      NA        NA     NA
## A:5-B:2      NA      NA        NA     NA
## B:5-B:2      NA      NA        NA     NA
## A:6-B:2      NA      NA        NA     NA
## B:6-B:2      NA      NA        NA     NA
## B:3-A:3      NA      NA        NA     NA
## A:4-A:3      NA      NA        NA     NA
## B:4-A:3    1.75  -22.69   26.1871 1.0000
## A:5-A:3      NA      NA        NA     NA
## B:5-A:3  -15.75  -40.19    8.6871 0.4513
## A:6-A:3      NA      NA        NA     NA
## B:6-A:3  -21.75  -46.19    2.6871 0.1078
## A:4-B:3      NA      NA        NA     NA
## B:4-B:3      NA      NA        NA     NA
## A:5-B:3      NA      NA        NA     NA
## B:5-B:3      NA      NA        NA     NA
## A:6-B:3      NA      NA        NA     NA
## B:6-B:3      NA      NA        NA     NA
## B:4-A:4      NA      NA        NA     NA
## A:5-A:4      NA      NA        NA     NA
## B:5-A:4      NA      NA        NA     NA
## A:6-A:4      NA      NA        NA     NA
## B:6-A:4      NA      NA        NA     NA
## A:5-B:4      NA      NA        NA     NA
## B:5-B:4  -17.50  -41.94    6.9371 0.3143
## A:6-B:4      NA      NA        NA     NA
## B:6-B:4  -23.50  -47.94    0.9371 0.0657
## B:5-A:5      NA      NA        NA     NA
## A:6-A:5      NA      NA        NA     NA
## B:6-A:5      NA      NA        NA     NA
## A:6-B:5      NA      NA        NA     NA
## B:6-B:5   -6.00  -30.44   18.4371 0.9979
## B:6-A:6      NA      NA        NA     NA
TukeyHSD(aov(res2), "school:years2")
## Error: 'which' specified no factors
contrast(res2, list(school = "A", teacher = "1"), list(school = "A", teacher = "2"))  #Bummer
## Error: could not find function "contrast"
library(lsmeans)
## Warning: package 'lsmeans' was built under R version 2.15.3
lsmeans(res2, pairwise ~ type:years2)
## $`type:years2 lsmeans`
##  type years2 lsmean   SE df lower.CL upper.CL
##     A      1 222.00 4.64 18   212.25   231.75
##     B      1  62.25 4.64 18    52.50    72.00
##     A      2 106.50 4.64 18    96.75   116.25
##     B      2  44.75 4.64 18    35.00    54.50
##     A      3  60.50 4.64 18    50.75    70.25
##     B      3  38.75 4.64 18    29.00    48.50
## 
## $`type:years2 pairwise differences`
##             estimate    SE df t.ratio p.value
## A, 1 - B, 1   159.75 6.562 18 24.3440 0.00000
## A, 1 - A, 2   115.50 6.562 18 17.6008 0.00000
## A, 1 - B, 2   177.25 6.562 18 27.0108 0.00000
## A, 1 - A, 3   161.50 6.562 18 24.6106 0.00000
## A, 1 - B, 3   183.25 6.562 18 27.9251 0.00000
## B, 1 - A, 2   -44.25 6.562 18 -6.7432 0.00003
## B, 1 - B, 2    17.50 6.562 18  2.6668 0.13141
## B, 1 - A, 3     1.75 6.562 18  0.2667 0.99978
## B, 1 - B, 3    23.50 6.562 18  3.5811 0.02212
## A, 2 - B, 2    61.75 6.562 18  9.4100 0.00000
## A, 2 - A, 3    46.00 6.562 18  7.0098 0.00002
## A, 2 - B, 3    67.75 6.562 18 10.3243 0.00000
## B, 2 - A, 3   -15.75 6.562 18 -2.4001 0.20777
## B, 2 - B, 3     6.00 6.562 18  0.9143 0.93774
## A, 3 - B, 3    21.75 6.562 18  3.3144 0.03809
##     p values are adjusted using the tukey method for 6 means

Using lsmeans gives the same results as TukeyHSD. However, you can get p-values that are not adjusted.

lsmeans(res2, pairwise ~ type:years2, adjust = "none")
## $`type:years2 lsmeans`
##  type years2 lsmean   SE df lower.CL upper.CL
##     A      1 222.00 4.64 18   212.25   231.75
##     B      1  62.25 4.64 18    52.50    72.00
##     A      2 106.50 4.64 18    96.75   116.25
##     B      2  44.75 4.64 18    35.00    54.50
##     A      3  60.50 4.64 18    50.75    70.25
##     B      3  38.75 4.64 18    29.00    48.50
## 
## $`type:years2 pairwise differences`
##             estimate    SE df t.ratio p.value
## A, 1 - B, 1   159.75 6.562 18 24.3440 0.00000
## A, 1 - A, 2   115.50 6.562 18 17.6008 0.00000
## A, 1 - B, 2   177.25 6.562 18 27.0108 0.00000
## A, 1 - A, 3   161.50 6.562 18 24.6106 0.00000
## A, 1 - B, 3   183.25 6.562 18 27.9251 0.00000
## B, 1 - A, 2   -44.25 6.562 18 -6.7432 0.00000
## B, 1 - B, 2    17.50 6.562 18  2.6668 0.01572
## B, 1 - A, 3     1.75 6.562 18  0.2667 0.79275
## B, 1 - B, 3    23.50 6.562 18  3.5811 0.00214
## A, 2 - B, 2    61.75 6.562 18  9.4100 0.00000
## A, 2 - A, 3    46.00 6.562 18  7.0098 0.00000
## A, 2 - B, 3    67.75 6.562 18 10.3243 0.00000
## B, 2 - A, 3   -15.75 6.562 18 -2.4001 0.02742
## B, 2 - B, 3     6.00 6.562 18  0.9143 0.37263
## A, 3 - B, 3    21.75 6.562 18  3.3144 0.00386
##     p values are not adjusted

Since we are only intrested in the three of the pairwise comparisons, the Bonferonni adjusted p-value is \( \frac{\alpha}{g} \), where g is the number of comparisons. Since g=3, any adjusted p-value less than .017 is significant. Therefore they are all significant. In the end, you can use the TukeyHSD and just look at the interesting contrasts.