######################################################
# Stat 430 - Assignment 3
#
# Sarah Rathwell - 301084687
#
# Objective - Investigate blocked designs.
######################################################
rm(list=ls())
library(car)
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: splines
## Loading required package: TH.data
######################################################
cat(" Investigate blocked designs.\n\n",
"Last modification:",date(),'\n')
## Investigate blocked designs.
##
## Last modification: Fri Sep 9 11:59:50 2016
######################################################
# 1
# Investigate brick strength by cooling level, blocked by kiln
# Create data table for cooling effects, brick strength, and kiln
brick.mat <- matrix(c(1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,
3,10,5,4,8,4,12,10,6,9,10,15,13,9,15,8,
14,7,9,13,5,5,5,5,5,10,10,10,10,10,15,15,
15,15,15,20,20,20,20,20),ncol=3,byrow=F)
colnames(brick.mat) <- c("Kiln","Strength","Cool.Temp")
brick <- as.data.frame(brick.mat,row.names=NULL)
brick
## Kiln Strength Cool.Temp
## 1 1 3 5
## 2 2 10 5
## 3 3 5 5
## 4 4 4 5
## 5 5 8 5
## 6 1 4 10
## 7 2 12 10
## 8 3 10 10
## 9 4 6 10
## 10 5 9 10
## 11 1 10 15
## 12 2 15 15
## 13 3 13 15
## 14 4 9 15
## 15 5 15 15
## 16 1 8 20
## 17 2 14 20
## 18 3 7 20
## 19 4 9 20
## 20 5 13 20
# Check if design is balanced and input matches given table
xtabs( ~ Cool.Temp+Kiln, data=brick)
## Kiln
## Cool.Temp 1 2 3 4 5
## 5 1 1 1 1 1
## 10 1 1 1 1 1
## 15 1 1 1 1 1
## 20 1 1 1 1 1
xtabs(Strength ~ Cool.Temp+Kiln, data=brick)
## Kiln
## Cool.Temp 1 2 3 4 5
## 5 3 10 5 4 8
## 10 4 12 10 6 9
## 15 10 15 13 9 15
## 20 8 14 7 9 13
# Ensure temp and kiln is read as factor
brick$Cool.Temp <- factor(brick$Cool.Temp)
brick$Kiln <- factor(brick$Kiln)
str(brick)
## 'data.frame': 20 obs. of 3 variables:
## $ Kiln : Factor w/ 5 levels "1","2","3","4",..: 1 2 3 4 5 1 2 3 4 5 ...
## $ Strength : num 3 10 5 4 8 4 12 10 6 9 ...
## $ Cool.Temp: Factor w/ 4 levels "5","10","15",..: 1 1 1 1 1 2 2 2 2 2 ...
# 1a
# Visual analysis of mean strength over temp
plot(Strength ~ Cool.Temp, data=brick, main="Brick Strength by Temp",
range=0, notch=T)
## Warning in bxp(structure(list(stats = structure(c(3, 4, 5, 8, 10, 4, 6, :
## some notches went outside hinges ('box'): maybe set notch=FALSE
stripchart(Strength ~ Cool.Temp, data=brick, add=T, vertical=T, method='jitter',
jitter=.1)

cat(c("There is some visual evidence for a difference of means, meritting further analysis"))
## There is some visual evidence for a difference of means, meritting further analysis
# 1b
# Investigate difference of means over cooling temps
cat(c("H0: Brick strength means equal among cooling tems, HA: At least one mean different"))
## H0: Brick strength means equal among cooling tems, HA: At least one mean different
# Check blocks for interactions for ANOVA assumptions
interaction.plot(brick$Cool.Temp, brick$Kiln, brick$Strength, ylab="Strength", xlab="Temp",
main="Brick strength over cooling temp", trace.label="Kiln")

# Note assumption of block additivity does not appear violated; there are no clear
# block interactions
# Fit data for one way ANOVA
brick.fit <- aov(Strength ~ Kiln + Cool.Temp, data=brick)
Anova(brick.fit, type=3)
## Anova Table (Type III tests)
##
## Response: Strength
## Sum Sq Df F value Pr(>F)
## (Intercept) 23.256 1 12.348 0.00427 **
## Kiln 122.200 4 16.221 8.765e-05 ***
## Cool.Temp 112.400 3 19.894 5.974e-05 ***
## Residuals 22.600 12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Note that because design is balanced, order of blocks/treatment in model
# does not matter {block+trt and trt+block will yield same results}
cat(c(" An ANOVA for the fitted model shows strong evidence for a difference in\n",
"mean strength over various cooling temps (F=19.9, p>>0.001); we reject H0"))
## An ANOVA for the fitted model shows strong evidence for a difference in
## mean strength over various cooling temps (F=19.9, p>>0.001); we reject H0
# 1c
# Check assumptions of normality for Anova
par(mfrow=c(2,2))
plot(brick.fit)

layout(1)
cat(c(" Our residuals appear to be randomply scattered and approximately\n",
"normally distributed. There is no evidence that assumptions are violated."))
## Our residuals appear to be randomply scattered and approximately
## normally distributed. There is no evidence that assumptions are violated.
# 1d
# Found evidence for difference of means; continue with multcomp analysis
fit.tuk <- glht(brick.fit, linfct=mcp(Cool.Temp='Tukey'))
fit.tuk.cld <- cld(fit.tuk)
summary(fit.tuk)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = Strength ~ Kiln + Cool.Temp, data = brick)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 10 - 5 == 0 2.2000 0.8679 2.535 0.10429
## 15 - 5 == 0 6.4000 0.8679 7.374 < 0.001 ***
## 20 - 5 == 0 4.2000 0.8679 4.839 0.00202 **
## 15 - 10 == 0 4.2000 0.8679 4.839 0.00179 **
## 20 - 10 == 0 2.0000 0.8679 2.304 0.15144
## 20 - 15 == 0 -2.2000 0.8679 -2.535 0.10431
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
fit.tuk.cld
## 5 10 15 20
## "a" "ab" "c" "bc"
plot(TukeyHSD(brick.fit, which='Cool.Temp', ordered=T))

cat(c(" We can see that there is evidence for differences in mean brick strength\n",
"between cooling temps 5-20, 5-15, and 10-15."))
## We can see that there is evidence for differences in mean brick strength
## between cooling temps 5-20, 5-15, and 10-15.
######################################################
# 2
# Investigate the coffee yields by planting density, blocked by location
# Create data table for yield, density, and location
coffee.mat <- matrix(c(1,1,1,1,2,2,2,2,3,3,3,3,185,90,70,0,185,
90,70,0,185,90,70,0,3107,2092,2329,2017,
1531,2101,1519,1766,2167,2428,2160,1967),
ncol=3, byrow=F)
colnames(coffee.mat) <- c("Location","Density","Yield")
coffee <- as.data.frame(coffee.mat,row.names=NULL)
coffee
## Location Density Yield
## 1 1 185 3107
## 2 1 90 2092
## 3 1 70 2329
## 4 1 0 2017
## 5 2 185 1531
## 6 2 90 2101
## 7 2 70 1519
## 8 2 0 1766
## 9 3 185 2167
## 10 3 90 2428
## 11 3 70 2160
## 12 3 0 1967
# Check if design is balanced and input matches given table
xtabs( ~ Location+Density, data=coffee)
## Density
## Location 0 70 90 185
## 1 1 1 1 1
## 2 1 1 1 1
## 3 1 1 1 1
xtabs(Yield ~ Location+Density, data=coffee)
## Density
## Location 0 70 90 185
## 1 2017 2329 2092 3107
## 2 1766 1519 2101 1531
## 3 1967 2160 2428 2167
# Set treatment/block to factor
coffee$Location <- factor(coffee$Location)
coffee$Density <- factor(coffee$Density)
str(coffee)
## 'data.frame': 12 obs. of 3 variables:
## $ Location: Factor w/ 3 levels "1","2","3": 1 1 1 1 2 2 2 2 3 3 ...
## $ Density : Factor w/ 4 levels "0","70","90",..: 4 3 2 1 4 3 2 1 4 3 ...
## $ Yield : num 3107 2092 2329 2017 1531 ...
# 2a
# Investigate difference of means over densities
cat(c("H0: Coffee yield means equal among tree densities, HA: At least one mean different"))
## H0: Coffee yield means equal among tree densities, HA: At least one mean different
plot(Yield ~ Density, data=coffee, main="Coffee yield by density",
range=0, notch=T)
## Warning in bxp(structure(list(stats = structure(c(1766, 1866.5, 1967,
## 1992, : some notches went outside hinges ('box'): maybe set notch=FALSE
stripchart(Yield ~ Density, data=coffee, add=T, vertical=T, method='jitter',
jitter=.1)

# Not much visual evidence for difference of means, but variation over densities seems extreme
interaction.plot(coffee$Density, coffee$Location, coffee$Yield, ylab="Yield", xlab="Density",
main="Coffee yield over Location", trace.label="Location")

# Locations 2 and 3 seem to be roughly parallel, something odd happening in location 1
# Possible violation with assumption of homogenous variances or block/trt interaction
# Useful to take a visual inspection of block variance for possible further analysis,
# althogh difficult to ascertain with such a small sample size
plot(Yield ~ Location, data=coffee, main="Coffee yield by block",
range=0, notch=T)
## Warning in bxp(structure(list(stats = structure(c(2017, 2054.5, 2210.5, :
## some notches went outside hinges ('box'): maybe set notch=FALSE
stripchart(Yield ~ Location, data=coffee, add=T, vertical=T, method='jitter',
jitter=.1)

# Possible outlier in location 1, would be worth investigation in actual study
#Fit data for one way ANOVA; pay special attention to residuals plots
coffee.fit <- lm(Yield ~ Location+Density, data=coffee)
Anova(coffee.fit, type=3)
## Anova Table (Type III tests)
##
## Response: Yield
## Sum Sq Df F value Pr(>F)
## (Intercept) 9717436 1 70.4506 0.0001557 ***
## Location 903478 2 3.2751 0.1092718
## Density 248589 3 0.6007 0.6378225
## Residuals 827596 6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2))
plot(coffee.fit)

layout(1)
cat(c(" Our ANOVA showed no evidence that mean coffee yield varies among planting densities\n",
"(F=0.6, p=0.64); we do not reject H0. The residual plots show no clear violations of our\n",
"assumptions although there is potentially one outlier at location 1/density 185."))
## Our ANOVA showed no evidence that mean coffee yield varies among planting densities
## (F=0.6, p=0.64); we do not reject H0. The residual plots show no clear violations of our
## assumptions although there is potentially one outlier at location 1/density 185.
# 2b
# Perform analysis as if study was a CRE (as opposed to RCB)
# Same hypothesis, model does not include blocking variable
coffee.fit2 <- lm(Yield ~ Density, data=coffee)
Anova(coffee.fit2)
## Anova Table (Type II tests)
##
## Response: Yield
## Sum Sq Df F value Pr(>F)
## Density 248589 3 0.3829 0.7683
## Residuals 1731074 8
par(mfrow=c(2,2))
plot(coffee.fit2)

layout(1)
cat(c(" When treated as a CRD without blocks, a one way ANOVA still did not find evidence for\n",
"a difference in mean yield over the densities (F=0.3, p=0.8). Note our F statistic has decreased\n",
"through our deletion of the blocking variable. Our residual plots do show some possible\n",
"devience from normality; the possible outlier still appears to be possibly skewing the data."))
## When treated as a CRD without blocks, a one way ANOVA still did not find evidence for
## a difference in mean yield over the densities (F=0.3, p=0.8). Note our F statistic has decreased
## through our deletion of the blocking variable. Our residual plots do show some possible
## devience from normality; the possible outlier still appears to be possibly skewing the data.
# Test the effect of outlier on CRD ANOVA
coffee.out <- coffee
coffee.out[1,3] <- NA
coffee.fit.out <- lm(Yield ~ Density, data=coffee.out)
Anova(coffee.fit.out)
## Anova Table (Type II tests)
##
## Response: Yield
## Sum Sq Df F value Pr(>F)
## Density 194465 3 0.6712 0.5963
## Residuals 676031 7
# There is no difference in outcome through deleting our extreme value
# There is no reason for us to continue with multcomp testing
######################################################
# 3a
cat(c(" The type of design shown is a Latin Square Design; the nuisance variability relates\n",
"to location (i.e. rows and columns --> east/west and north/south regions)"))
## The type of design shown is a Latin Square Design; the nuisance variability relates
## to location (i.e. rows and columns --> east/west and north/south regions)
# 3b
# Analyze the data to see if there is evidence for difference of means over treatments
# H0: Mean grain yields are equal for all treatments, HA: at least one mean different, alpha=0.05
# Construct a data frame for the yield by three levels
Row <- c(rep(1,1),rep(2,1),rep(3,1),rep(4,1),rep(5,1))
Column <- c(rep(1,5),rep(2,5),rep(3,5),rep(4,5),rep(5,5))
Treatment <- c("D","O","SS","S","C","SS","C","S","O","D","O","SS","D","C","S",
"C","S","O","D","SS","S","D","C","SS","O")
Yield <- c(72.2,36.4,71.5,68.9,82,55.4,46.9,55.6,53.2,81,36.6,46.8,71.6,69.8,
76,67.9,54.9,67.5,79.6,87.9,73,68.5,78.4,77.2,70.9)
grain <- data.frame(Row, Column, Treatment, Yield)
grain
## Row Column Treatment Yield
## 1 1 1 D 72.2
## 2 2 1 O 36.4
## 3 3 1 SS 71.5
## 4 4 1 S 68.9
## 5 5 1 C 82.0
## 6 1 2 SS 55.4
## 7 2 2 C 46.9
## 8 3 2 S 55.6
## 9 4 2 O 53.2
## 10 5 2 D 81.0
## 11 1 3 O 36.6
## 12 2 3 SS 46.8
## 13 3 3 D 71.6
## 14 4 3 C 69.8
## 15 5 3 S 76.0
## 16 1 4 C 67.9
## 17 2 4 S 54.9
## 18 3 4 O 67.5
## 19 4 4 D 79.6
## 20 5 4 SS 87.9
## 21 1 5 S 73.0
## 22 2 5 D 68.5
## 23 3 5 C 78.4
## 24 4 5 SS 77.2
## 25 5 5 O 70.9
xtabs(Yield~Treatment+Row, data=grain)
## Row
## Treatment 1 2 3 4 5
## C 67.9 46.9 78.4 69.8 82.0
## D 72.2 68.5 71.6 79.6 81.0
## O 36.6 36.4 67.5 53.2 70.9
## S 73.0 54.9 55.6 68.9 76.0
## SS 55.4 46.8 71.5 77.2 87.9
xtabs(Yield~Treatment+Column, data=grain)
## Column
## Treatment 1 2 3 4 5
## C 82.0 46.9 69.8 67.9 78.4
## D 72.2 81.0 71.6 79.6 68.5
## O 36.4 53.2 36.6 67.5 70.9
## S 68.9 55.6 76.0 54.9 73.0
## SS 71.5 55.4 46.8 87.9 77.2
# Ensure row and column are factors
grain$Row <- factor(grain$Row, ordered=T)
grain$Column <- factor(grain$Column, ordered=T)
str(grain)
## 'data.frame': 25 obs. of 4 variables:
## $ Row : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 1 2 3 4 5 1 2 3 4 5 ...
## $ Column : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 1 1 1 1 1 2 2 2 2 2 ...
## $ Treatment: Factor w/ 5 levels "C","D","O","S",..: 2 3 5 4 1 5 1 4 3 2 ...
## $ Yield : num 72.2 36.4 71.5 68.9 82 55.4 46.9 55.6 53.2 81 ...
# Visually investigate means by treatment
plot(Yield ~ Treatment, data=grain, main="Grain yield by treatment",
range=0, notch=T)
## Warning in bxp(structure(list(stats = structure(c(46.9, 67.9, 69.8, 78.4,
## : some notches went outside hinges ('box'): maybe set notch=FALSE
stripchart(Yield ~ Treatment, data=grain, add=T, vertical=T, method='jitter',
jitter=.1)

# Check for any block interactions
interaction.plot(grain$Treatment, grain$Row, grain$Yield, ylab="Yield", xlab="Treatment",
main="Grain yield over Row", trace.label="Row")

interaction.plot(grain$Treatment, grain$Column, grain$Yield, ylab="Yield", xlab="Treatment",
main="Grain yield over Column", trace.label="Column")

# The row profile plots seem to be a little nicer than the columns but you can still
# see some general trends in the col plot with no areas of clear interaction
# Note in both, row/col 5 seem to behave differently, something happening around that
# border of the field?
# Fit an model for ANOVA
grain.fit <- aov(Yield ~ Row+Column+Treatment, data=grain)
Anova(grain.fit, type=3)
## Anova Table (Type III tests)
##
## Response: Yield
## Sum Sq Df F value Pr(>F)
## (Intercept) 23805.0 1 1413.772 8.048e-14 ***
## Row 2326.4 4 34.541 1.698e-06 ***
## Column 901.4 4 13.383 0.0002225 ***
## Treatment 1284.5 4 19.072 3.900e-05 ***
## Residuals 202.1 12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2))
plot(grain.fit)

layout(1)
cat(c(" An ANOVA on our fitted model shows strong evidence for a difference of mean grain\n",
"yields by fertilizer treatment type (F=19, p<<0.001). The plots of risidual values\n",
"do not show any clear violations of our ANOVA assumptions."))
## An ANOVA on our fitted model shows strong evidence for a difference of mean grain
## yields by fertilizer treatment type (F=19, p<<0.001). The plots of risidual values
## do not show any clear violations of our ANOVA assumptions.
# As we have found evidence of difference, the next step is multiple comparisons
grain.tuk <- glht(grain.fit, linfct=mcp(Treatment='Tukey'))
grain.tuk.cld <- cld(grain.tuk)
summary(grain.tuk)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = Yield ~ Row + Column + Treatment, data = grain)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## D - C == 0 5.580 2.595 2.150 0.26160
## O - C == 0 -16.080 2.595 -6.196 < 0.001 ***
## S - C == 0 -3.320 2.595 -1.279 0.70793
## SS - C == 0 -1.240 2.595 -0.478 0.98802
## O - D == 0 -21.660 2.595 -8.346 < 0.001 ***
## S - D == 0 -8.900 2.595 -3.429 0.03311 *
## SS - D == 0 -6.820 2.595 -2.628 0.12645
## S - O == 0 12.760 2.595 4.917 0.00266 **
## SS - O == 0 14.840 2.595 5.718 < 0.001 ***
## SS - S == 0 2.080 2.595 0.801 0.92520
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
grain.tuk.cld
## C D O S SS
## "bc" "c" "a" "b" "bc"
plot(TukeyHSD(grain.fit, which='Treatment', ordered=T))

cat(c(" A multiple comparison analysis using Tukey's method shows differences in mean grain\n",
"yields between treatments O-C, O-D, O-S, O-SS, and D-S at a 95% confidence level. In\n",
"general, our findings are that there are differences in yield between fertilizer and no\n",
"fertalizer, but minimal differences between the fertilizers (with the excpetion of S and D)."))
## A multiple comparison analysis using Tukey's method shows differences in mean grain
## yields between treatments O-C, O-D, O-S, O-SS, and D-S at a 95% confidence level. In
## general, our findings are that there are differences in yield between fertilizer and no
## fertalizer, but minimal differences between the fertilizers (with the excpetion of S and D).