Heights of singers in the NY Choral Society in 1979. Self-report, to the nearest inch.
x <- read.csv("~/Desktop/Zoe/Recipe 7.csv")
attach(x)
str(x)
## 'data.frame': 130 obs. of 2 variables:
## $ Height: int 64 62 66 65 60 61 65 66 65 63 ...
## $ Sing : Factor w/ 4 levels "Alto","Bass",..: 3 3 3 3 3 3 3 3 3 3 ...
The factor is voice parts with four levels: alto, bass, soprano, and tenor.
plot(Sing)
The response variable is height is inches.
summary(Height)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 60.0 65.0 66.0 67.1 70.0 76.0
plot(Height~Sing)
Describe
model <- aov(Height~Sing)
anova(model)
## Analysis of Variance Table
##
## Response: Height
## Df Sum Sq Mean Sq F value Pr(>F)
## Sing 3 957 319 44.7 <2e-16 ***
## Residuals 126 899 7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Describe
tukey <- TukeyHSD(aov(Height~Sing))
tukey
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Height ~ Sing)
##
## $Sing
## diff lwr upr p adj
## Bass-Alto 5.8643 4.214 7.5149 0.0000
## Soprano-Alto -0.1678 -1.787 1.4513 0.9931
## Tenor-Alto 4.2643 2.315 6.2134 0.0000
## Soprano-Bass -6.0321 -7.639 -4.4249 0.0000
## Tenor-Bass -1.6000 -3.539 0.3393 0.1438
## Tenor-Soprano 4.4321 2.520 6.3445 0.0000
plot(tukey)
aggregate(Height, by=list(Sing), FUN=mean)
## Group.1 x
## 1 Alto 64.89
## 2 Bass 70.75
## 3 Soprano 64.72
## 4 Tenor 69.15
Describe
qqnorm(residuals(model)) ; qqline(residuals(model))
with(x,tapply(Height,Sing,mean))
## Alto Bass Soprano Tenor
## 64.89 70.75 64.72 69.15
with(x,tapply(Height,Sing,var))
## Alto Bass Soprano Tenor
## 7.810 5.907 6.050 10.345
with(x,tapply(Height,Sing,length))
## Alto Bass Soprano Tenor
## 35 36 39 20
summary(aov(Height~Sing))
## Df Sum Sq Mean Sq F value Pr(>F)
## Sing 3 957 319 44.7 <2e-16 ***
## Residuals 126 899 7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
meanstar = mean(Height)
sdstar = sqrt(7.1)
simsing = Sing
R = 10000 # 10,000 iterations
Fstar1 = numeric(R)
for (i in 1:R) {
groupA = rnorm(35, mean=meanstar, sd=sdstar)
groupB = rnorm(36, mean=meanstar, sd=sdstar)
groupC = rnorm(39, mean=meanstar, sd=sdstar)
groupD = rnorm(20, mean=meanstar, sd=sdstar)
simheight = c(groupA,groupB,groupC,groupD)
simdata = data.frame(simheight,simsing)
Fstar1[i] = oneway.test(simheight~simsing, var.equal=T, data=simdata)$statistic
}
hist(Fstar1, ylim=c(0,1), xlim=c(0, 8), prob=T)
z=seq(.25,6,.25)
points(z,y=df(z,3,127),type="b",col="red")
meanstar = with(x, tapply(Height,Sing,mean))
grpA = Height[Sing=="Alto"] - meanstar[1]
grpB = Height[Sing=="Bass"] - meanstar[2]
grpC = Height[Sing=="Soprano"] - meanstar[3]
grpD = Height[Sing=="Tenor"] - meanstar[4]
simsing2 = Sing
R = 10000
Fstar2 = numeric(R)
for (i in 1:R) {
group1 = sample(grpA, size=35, replace=T)
group2 = sample(grpB, size=36, replace=T)
group3 = sample(grpC, size=39, replace=T)
group4 = sample(grpD, size=20, replace=T)
simheight2 = c(group1,group2,group3,group4)
simdata2 = data.frame(simheight2,simsing2)
Fstar2[i] = oneway.test(simheight2~simsing2, var.equal=T, data=simdata2)$statistic
}
hist(Fstar2, ylim=c(0,1), xlim=c(0, 8), prob=T)
z=seq(.25,6,.25)
points(z,y=df(z,3,127),type="b",col="red")
print(realFstar<-oneway.test(Height~Sing, var.equal=T, data=x)$statistic)
## F
## 44.7
qf(.95,3,127)
## [1] 2.676
quantile(Fstar2,.95)
## 95%
## 2.587
mean(Fstar2>=realFstar)
## [1] 0
anova(aov(simheight2~simsing2, data=simdata2))
## Analysis of Variance Table
##
## Response: simheight2
## Df Sum Sq Mean Sq F value Pr(>F)
## simsing2 3 3 1.01 0.15 0.93
## Residuals 126 870 6.90