Recipe 7: Resampling

Recipes for the Design of Experiments

Zoe Konrad

Rensselaer Polytechnic Institute

Fall 2014 v1

1. Setting

System under test

Heights of singers in the NY Choral Society in 1979. Self-report, to the nearest inch.

x <- read.csv("~/Desktop/Zoe/Recipe 7.csv")
attach(x)
str(x)
## 'data.frame':    130 obs. of  2 variables:
##  $ Height: int  64 62 66 65 60 61 65 66 65 63 ...
##  $ Sing  : Factor w/ 4 levels "Alto","Bass",..: 3 3 3 3 3 3 3 3 3 3 ...

Factors and Levels

The factor is voice parts with four levels: alto, bass, soprano, and tenor.

plot(Sing)

plot of chunk unnamed-chunk-2

Response variables

The response variable is height is inches.

summary(Height)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    60.0    65.0    66.0    67.1    70.0    76.0

2. (Experimental) Design

3. (Statistical) Analysis

Exploratory Data Analysis Graphics

plot(Height~Sing)

plot of chunk unnamed-chunk-4

Testing

Describe

model <- aov(Height~Sing)
anova(model)
## Analysis of Variance Table
## 
## Response: Height
##            Df Sum Sq Mean Sq F value Pr(>F)    
## Sing        3    957     319    44.7 <2e-16 ***
## Residuals 126    899       7                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Estimation (of Parameters)

Describe

tukey <- TukeyHSD(aov(Height~Sing))
tukey
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Height ~ Sing)
## 
## $Sing
##                  diff    lwr     upr  p adj
## Bass-Alto      5.8643  4.214  7.5149 0.0000
## Soprano-Alto  -0.1678 -1.787  1.4513 0.9931
## Tenor-Alto     4.2643  2.315  6.2134 0.0000
## Soprano-Bass  -6.0321 -7.639 -4.4249 0.0000
## Tenor-Bass    -1.6000 -3.539  0.3393 0.1438
## Tenor-Soprano  4.4321  2.520  6.3445 0.0000
plot(tukey)

plot of chunk unnamed-chunk-6

aggregate(Height, by=list(Sing), FUN=mean)
##   Group.1     x
## 1    Alto 64.89
## 2    Bass 70.75
## 3 Soprano 64.72
## 4   Tenor 69.15

Diagnostics/Model Adequacy Checking

Describe

qqnorm(residuals(model)) ; qqline(residuals(model))

plot of chunk unnamed-chunk-7

Resampling

with(x,tapply(Height,Sing,mean))
##    Alto    Bass Soprano   Tenor 
##   64.89   70.75   64.72   69.15
with(x,tapply(Height,Sing,var))
##    Alto    Bass Soprano   Tenor 
##   7.810   5.907   6.050  10.345
with(x,tapply(Height,Sing,length))
##    Alto    Bass Soprano   Tenor 
##      35      36      39      20
summary(aov(Height~Sing))
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Sing          3    957     319    44.7 <2e-16 ***
## Residuals   126    899       7                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
meanstar = mean(Height)
sdstar = sqrt(7.1)
simsing = Sing
R = 10000  # 10,000 iterations
Fstar1 = numeric(R)
for (i in 1:R) {
  groupA = rnorm(35, mean=meanstar, sd=sdstar)
  groupB = rnorm(36, mean=meanstar, sd=sdstar)
  groupC = rnorm(39, mean=meanstar, sd=sdstar)
  groupD = rnorm(20, mean=meanstar, sd=sdstar)
  simheight = c(groupA,groupB,groupC,groupD)
  simdata = data.frame(simheight,simsing)
  Fstar1[i] = oneway.test(simheight~simsing, var.equal=T, data=simdata)$statistic
}
hist(Fstar1, ylim=c(0,1), xlim=c(0, 8), prob=T)
z=seq(.25,6,.25)
points(z,y=df(z,3,127),type="b",col="red")

plot of chunk unnamed-chunk-8

meanstar = with(x, tapply(Height,Sing,mean))
grpA = Height[Sing=="Alto"] - meanstar[1]
grpB = Height[Sing=="Bass"] - meanstar[2]
grpC = Height[Sing=="Soprano"] - meanstar[3]
grpD = Height[Sing=="Tenor"] - meanstar[4]
simsing2 = Sing
R = 10000
Fstar2 = numeric(R)
for (i in 1:R) {
  group1 = sample(grpA, size=35, replace=T)
  group2 = sample(grpB, size=36, replace=T)
  group3 = sample(grpC, size=39, replace=T)
  group4 = sample(grpD, size=20, replace=T)
  simheight2 = c(group1,group2,group3,group4)
  simdata2 = data.frame(simheight2,simsing2)
  Fstar2[i] = oneway.test(simheight2~simsing2, var.equal=T, data=simdata2)$statistic
}

hist(Fstar2, ylim=c(0,1), xlim=c(0, 8), prob=T)
z=seq(.25,6,.25)
points(z,y=df(z,3,127),type="b",col="red")

plot of chunk unnamed-chunk-9

print(realFstar<-oneway.test(Height~Sing, var.equal=T, data=x)$statistic)
##    F 
## 44.7
qf(.95,3,127)
## [1] 2.676
quantile(Fstar2,.95)
##   95% 
## 2.587
mean(Fstar2>=realFstar)
## [1] 0
anova(aov(simheight2~simsing2, data=simdata2))
## Analysis of Variance Table
## 
## Response: simheight2
##            Df Sum Sq Mean Sq F value Pr(>F)
## simsing2    3      3    1.01    0.15   0.93
## Residuals 126    870    6.90

4. References to the literature