Assignment3.R

######################################################
# Stat 430 - Assignment 3
#
# Sarah Rathwell - 301084687
#
# Objective - Investigate blocked designs.
######################################################
rm(list=ls())
library(car)
library(multcomp)

## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: splines
## Loading required package: TH.data

######################################################
cat(" Investigate blocked designs.\n\n",
    "Last modification:",date(),'\n')

##  Investigate blocked designs.
## 
##  Last modification: Fri Sep  9 11:59:50 2016

######################################################
# 1
# Investigate brick strength by cooling level, blocked by kiln

# Create data table for cooling effects, brick strength, and kiln
brick.mat           <- matrix(c(1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,
                                3,10,5,4,8,4,12,10,6,9,10,15,13,9,15,8,
                                14,7,9,13,5,5,5,5,5,10,10,10,10,10,15,15,
                                15,15,15,20,20,20,20,20),ncol=3,byrow=F)
colnames(brick.mat) <- c("Kiln","Strength","Cool.Temp")
brick               <- as.data.frame(brick.mat,row.names=NULL)
brick

##    Kiln Strength Cool.Temp
## 1     1        3         5
## 2     2       10         5
## 3     3        5         5
## 4     4        4         5
## 5     5        8         5
## 6     1        4        10
## 7     2       12        10
## 8     3       10        10
## 9     4        6        10
## 10    5        9        10
## 11    1       10        15
## 12    2       15        15
## 13    3       13        15
## 14    4        9        15
## 15    5       15        15
## 16    1        8        20
## 17    2       14        20
## 18    3        7        20
## 19    4        9        20
## 20    5       13        20

# Check if design is balanced and input matches given table
xtabs( ~ Cool.Temp+Kiln, data=brick)

##          Kiln
## Cool.Temp 1 2 3 4 5
##        5  1 1 1 1 1
##        10 1 1 1 1 1
##        15 1 1 1 1 1
##        20 1 1 1 1 1

xtabs(Strength ~ Cool.Temp+Kiln, data=brick)

##          Kiln
## Cool.Temp  1  2  3  4  5
##        5   3 10  5  4  8
##        10  4 12 10  6  9
##        15 10 15 13  9 15
##        20  8 14  7  9 13

# Ensure temp and kiln is read as factor
brick$Cool.Temp <- factor(brick$Cool.Temp)
brick$Kiln      <- factor(brick$Kiln)
str(brick)

## 'data.frame':    20 obs. of  3 variables:
##  $ Kiln     : Factor w/ 5 levels "1","2","3","4",..: 1 2 3 4 5 1 2 3 4 5 ...
##  $ Strength : num  3 10 5 4 8 4 12 10 6 9 ...
##  $ Cool.Temp: Factor w/ 4 levels "5","10","15",..: 1 1 1 1 1 2 2 2 2 2 ...

# 1a
# Visual analysis of mean strength over temp
plot(Strength ~ Cool.Temp, data=brick, main="Brick Strength by Temp",
     range=0, notch=T)

## Warning in bxp(structure(list(stats = structure(c(3, 4, 5, 8, 10, 4, 6, :
## some notches went outside hinges ('box'): maybe set notch=FALSE

stripchart(Strength ~ Cool.Temp, data=brick, add=T, vertical=T, method='jitter',
           jitter=.1)

cat(c("There is some visual evidence for a difference of means, meritting further analysis"))

## There is some visual evidence for a difference of means, meritting further analysis

# 1b
# Investigate difference of means over cooling temps 
cat(c("H0: Brick strength means equal among cooling tems, HA: At least one mean different"))

## H0: Brick strength means equal among cooling tems, HA: At least one mean different

# Check blocks for interactions for ANOVA assumptions
interaction.plot(brick$Cool.Temp, brick$Kiln, brick$Strength, ylab="Strength", xlab="Temp",
                 main="Brick strength over cooling temp", trace.label="Kiln")

# Note assumption of block additivity does not appear violated; there are no clear
# block interactions

# Fit data for one way ANOVA
brick.fit <- aov(Strength ~ Kiln + Cool.Temp, data=brick)
Anova(brick.fit, type=3)

## Anova Table (Type III tests)
## 
## Response: Strength
##              Sum Sq Df F value    Pr(>F)    
## (Intercept)  23.256  1  12.348   0.00427 ** 
## Kiln        122.200  4  16.221 8.765e-05 ***
## Cool.Temp   112.400  3  19.894 5.974e-05 ***
## Residuals    22.600 12                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Note that because design is balanced, order of blocks/treatment in model
# does not matter {block+trt and trt+block will yield same results}
cat(c(" An ANOVA for the fitted model shows strong evidence for a difference in\n",
      "mean strength over various cooling temps (F=19.9, p>>0.001); we reject H0"))

##  An ANOVA for the fitted model shows strong evidence for a difference in
##  mean strength over various cooling temps (F=19.9, p>>0.001); we reject H0

# 1c
# Check assumptions of normality for Anova
par(mfrow=c(2,2))
plot(brick.fit)

layout(1)
cat(c(" Our residuals appear to be randomply scattered and approximately\n",
      "normally distributed. There is no evidence that assumptions are violated."))

##  Our residuals appear to be randomply scattered and approximately
##  normally distributed. There is no evidence that assumptions are violated.

# 1d
# Found evidence for difference of means; continue with multcomp analysis
fit.tuk     <- glht(brick.fit, linfct=mcp(Cool.Temp='Tukey'))
fit.tuk.cld <- cld(fit.tuk)
summary(fit.tuk)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = Strength ~ Kiln + Cool.Temp, data = brick)
## 
## Linear Hypotheses:
##              Estimate Std. Error t value Pr(>|t|)    
## 10 - 5 == 0    2.2000     0.8679   2.535  0.10429    
## 15 - 5 == 0    6.4000     0.8679   7.374  < 0.001 ***
## 20 - 5 == 0    4.2000     0.8679   4.839  0.00202 ** 
## 15 - 10 == 0   4.2000     0.8679   4.839  0.00179 ** 
## 20 - 10 == 0   2.0000     0.8679   2.304  0.15144    
## 20 - 15 == 0  -2.2000     0.8679  -2.535  0.10431    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

fit.tuk.cld

##    5   10   15   20 
##  "a" "ab"  "c" "bc"

plot(TukeyHSD(brick.fit, which='Cool.Temp', ordered=T))

cat(c(" We can see that there is evidence for differences in mean brick strength\n",
    "between cooling temps 5-20, 5-15, and 10-15."))

##  We can see that there is evidence for differences in mean brick strength
##  between cooling temps 5-20, 5-15, and 10-15.

######################################################
# 2
# Investigate the coffee yields by planting density, blocked by location

# Create data table for yield, density, and location
coffee.mat           <- matrix(c(1,1,1,1,2,2,2,2,3,3,3,3,185,90,70,0,185,
                                 90,70,0,185,90,70,0,3107,2092,2329,2017,
                                 1531,2101,1519,1766,2167,2428,2160,1967),
                               ncol=3, byrow=F)
colnames(coffee.mat) <- c("Location","Density","Yield")
coffee               <- as.data.frame(coffee.mat,row.names=NULL)
coffee

##    Location Density Yield
## 1         1     185  3107
## 2         1      90  2092
## 3         1      70  2329
## 4         1       0  2017
## 5         2     185  1531
## 6         2      90  2101
## 7         2      70  1519
## 8         2       0  1766
## 9         3     185  2167
## 10        3      90  2428
## 11        3      70  2160
## 12        3       0  1967

# Check if design is balanced and input matches given table
xtabs( ~ Location+Density, data=coffee)

##         Density
## Location 0 70 90 185
##        1 1  1  1   1
##        2 1  1  1   1
##        3 1  1  1   1

xtabs(Yield ~ Location+Density, data=coffee)

##         Density
## Location    0   70   90  185
##        1 2017 2329 2092 3107
##        2 1766 1519 2101 1531
##        3 1967 2160 2428 2167

# Set treatment/block to factor
coffee$Location <- factor(coffee$Location)
coffee$Density  <- factor(coffee$Density)
str(coffee)

## 'data.frame':    12 obs. of  3 variables:
##  $ Location: Factor w/ 3 levels "1","2","3": 1 1 1 1 2 2 2 2 3 3 ...
##  $ Density : Factor w/ 4 levels "0","70","90",..: 4 3 2 1 4 3 2 1 4 3 ...
##  $ Yield   : num  3107 2092 2329 2017 1531 ...

# 2a
# Investigate difference of means over densities
cat(c("H0: Coffee yield means equal among tree densities, HA: At least one mean different"))

## H0: Coffee yield means equal among tree densities, HA: At least one mean different

plot(Yield ~ Density, data=coffee, main="Coffee yield by density",
     range=0, notch=T)

## Warning in bxp(structure(list(stats = structure(c(1766, 1866.5, 1967,
## 1992, : some notches went outside hinges ('box'): maybe set notch=FALSE

stripchart(Yield ~ Density, data=coffee, add=T, vertical=T, method='jitter',
           jitter=.1)

# Not much visual evidence for difference of means, but variation over densities seems extreme

interaction.plot(coffee$Density, coffee$Location, coffee$Yield, ylab="Yield", xlab="Density",
                 main="Coffee yield over Location", trace.label="Location")

# Locations 2 and 3 seem to be roughly parallel, something odd happening in location 1
# Possible violation with assumption of homogenous variances or block/trt interaction

# Useful to take a visual inspection of block variance for possible further analysis,
# althogh difficult to ascertain with such a small sample size
plot(Yield ~ Location, data=coffee, main="Coffee yield by block",
     range=0, notch=T)

## Warning in bxp(structure(list(stats = structure(c(2017, 2054.5, 2210.5, :
## some notches went outside hinges ('box'): maybe set notch=FALSE

stripchart(Yield ~ Location, data=coffee, add=T, vertical=T, method='jitter',
           jitter=.1)

# Possible outlier in location 1, would be worth investigation in actual study

#Fit data for one way ANOVA; pay special attention to residuals plots
coffee.fit <- lm(Yield ~ Location+Density, data=coffee)
Anova(coffee.fit, type=3)

## Anova Table (Type III tests)
## 
## Response: Yield
##              Sum Sq Df F value    Pr(>F)    
## (Intercept) 9717436  1 70.4506 0.0001557 ***
## Location     903478  2  3.2751 0.1092718    
## Density      248589  3  0.6007 0.6378225    
## Residuals    827596  6                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

par(mfrow=c(2,2))
plot(coffee.fit)

layout(1)
cat(c(" Our ANOVA showed no evidence that mean coffee yield varies among planting densities\n",
      "(F=0.6, p=0.64); we do not reject H0. The residual plots show no clear violations of our\n",
      "assumptions although there is potentially one outlier at location 1/density 185."))

##  Our ANOVA showed no evidence that mean coffee yield varies among planting densities
##  (F=0.6, p=0.64); we do not reject H0. The residual plots show no clear violations of our
##  assumptions although there is potentially one outlier at location 1/density 185.

# 2b
# Perform analysis as if study was a CRE (as opposed to RCB)
# Same hypothesis, model does not include blocking variable
coffee.fit2 <- lm(Yield ~ Density, data=coffee)
Anova(coffee.fit2)

## Anova Table (Type II tests)
## 
## Response: Yield
##            Sum Sq Df F value Pr(>F)
## Density    248589  3  0.3829 0.7683
## Residuals 1731074  8

par(mfrow=c(2,2))
plot(coffee.fit2)

layout(1)
cat(c(" When treated as a CRD without blocks, a one way ANOVA still did not find evidence for\n",
      "a difference in mean yield over the densities (F=0.3, p=0.8). Note our F statistic has decreased\n",
      "through our deletion of the blocking variable. Our residual plots do show some possible\n",
      "devience from normality; the possible outlier still appears to be possibly skewing the data."))

##  When treated as a CRD without blocks, a one way ANOVA still did not find evidence for
##  a difference in mean yield over the densities (F=0.3, p=0.8). Note our F statistic has decreased
##  through our deletion of the blocking variable. Our residual plots do show some possible
##  devience from normality; the possible outlier still appears to be possibly skewing the data.

# Test the effect of outlier on CRD ANOVA
coffee.out      <- coffee
coffee.out[1,3] <- NA
coffee.fit.out  <- lm(Yield ~ Density, data=coffee.out)
Anova(coffee.fit.out)

## Anova Table (Type II tests)
## 
## Response: Yield
##           Sum Sq Df F value Pr(>F)
## Density   194465  3  0.6712 0.5963
## Residuals 676031  7

# There is no difference in outcome through deleting our extreme value
# There is no reason for us to continue with multcomp testing
######################################################
# 3a
cat(c(" The type of design shown is a Latin Square Design; the nuisance variability relates\n",
      "to location (i.e. rows and columns --> east/west and north/south regions)"))

##  The type of design shown is a Latin Square Design; the nuisance variability relates
##  to location (i.e. rows and columns --> east/west and north/south regions)

# 3b
# Analyze the data to see if there is evidence for difference of means over treatments
# H0: Mean grain yields are equal for all treatments, HA: at least one mean different, alpha=0.05
# Construct a data frame for the yield by three levels
Row       <- c(rep(1,1),rep(2,1),rep(3,1),rep(4,1),rep(5,1))
Column    <- c(rep(1,5),rep(2,5),rep(3,5),rep(4,5),rep(5,5))
Treatment <- c("D","O","SS","S","C","SS","C","S","O","D","O","SS","D","C","S",
               "C","S","O","D","SS","S","D","C","SS","O")
Yield     <- c(72.2,36.4,71.5,68.9,82,55.4,46.9,55.6,53.2,81,36.6,46.8,71.6,69.8,
               76,67.9,54.9,67.5,79.6,87.9,73,68.5,78.4,77.2,70.9)
grain     <- data.frame(Row, Column, Treatment, Yield)
grain

##    Row Column Treatment Yield
## 1    1      1         D  72.2
## 2    2      1         O  36.4
## 3    3      1        SS  71.5
## 4    4      1         S  68.9
## 5    5      1         C  82.0
## 6    1      2        SS  55.4
## 7    2      2         C  46.9
## 8    3      2         S  55.6
## 9    4      2         O  53.2
## 10   5      2         D  81.0
## 11   1      3         O  36.6
## 12   2      3        SS  46.8
## 13   3      3         D  71.6
## 14   4      3         C  69.8
## 15   5      3         S  76.0
## 16   1      4         C  67.9
## 17   2      4         S  54.9
## 18   3      4         O  67.5
## 19   4      4         D  79.6
## 20   5      4        SS  87.9
## 21   1      5         S  73.0
## 22   2      5         D  68.5
## 23   3      5         C  78.4
## 24   4      5        SS  77.2
## 25   5      5         O  70.9

xtabs(Yield~Treatment+Row, data=grain)

##          Row
## Treatment    1    2    3    4    5
##        C  67.9 46.9 78.4 69.8 82.0
##        D  72.2 68.5 71.6 79.6 81.0
##        O  36.6 36.4 67.5 53.2 70.9
##        S  73.0 54.9 55.6 68.9 76.0
##        SS 55.4 46.8 71.5 77.2 87.9

xtabs(Yield~Treatment+Column, data=grain)

##          Column
## Treatment    1    2    3    4    5
##        C  82.0 46.9 69.8 67.9 78.4
##        D  72.2 81.0 71.6 79.6 68.5
##        O  36.4 53.2 36.6 67.5 70.9
##        S  68.9 55.6 76.0 54.9 73.0
##        SS 71.5 55.4 46.8 87.9 77.2

# Ensure row and column are factors
grain$Row    <- factor(grain$Row, ordered=T)
grain$Column <- factor(grain$Column, ordered=T)
str(grain)

## 'data.frame':    25 obs. of  4 variables:
##  $ Row      : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 1 2 3 4 5 1 2 3 4 5 ...
##  $ Column   : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 1 1 1 1 1 2 2 2 2 2 ...
##  $ Treatment: Factor w/ 5 levels "C","D","O","S",..: 2 3 5 4 1 5 1 4 3 2 ...
##  $ Yield    : num  72.2 36.4 71.5 68.9 82 55.4 46.9 55.6 53.2 81 ...

# Visually investigate means by treatment
plot(Yield ~ Treatment, data=grain, main="Grain yield by treatment",
     range=0, notch=T)

## Warning in bxp(structure(list(stats = structure(c(46.9, 67.9, 69.8, 78.4,
## : some notches went outside hinges ('box'): maybe set notch=FALSE

stripchart(Yield ~ Treatment, data=grain, add=T, vertical=T, method='jitter',
           jitter=.1)

# Check for any block interactions
interaction.plot(grain$Treatment, grain$Row, grain$Yield, ylab="Yield", xlab="Treatment",
                 main="Grain yield over Row", trace.label="Row")

interaction.plot(grain$Treatment, grain$Column, grain$Yield, ylab="Yield", xlab="Treatment",
                 main="Grain yield over Column", trace.label="Column")

# The row profile plots seem to be a little nicer than the columns but you can still
# see some general trends in the col plot with no areas of clear interaction
# Note in both, row/col 5 seem to behave differently, something happening around that
# border of the field?

# Fit an model for ANOVA
grain.fit <- aov(Yield ~ Row+Column+Treatment, data=grain)
Anova(grain.fit, type=3)

## Anova Table (Type III tests)
## 
## Response: Yield
##              Sum Sq Df  F value    Pr(>F)    
## (Intercept) 23805.0  1 1413.772 8.048e-14 ***
## Row          2326.4  4   34.541 1.698e-06 ***
## Column        901.4  4   13.383 0.0002225 ***
## Treatment    1284.5  4   19.072 3.900e-05 ***
## Residuals     202.1 12                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

par(mfrow=c(2,2))
plot(grain.fit)

layout(1)
cat(c(" An ANOVA on our fitted model shows strong evidence for a difference of mean grain\n",
      "yields by fertilizer treatment type (F=19, p<<0.001). The plots of risidual values\n",
      "do not show any clear violations of our ANOVA assumptions."))

##  An ANOVA on our fitted model shows strong evidence for a difference of mean grain
##  yields by fertilizer treatment type (F=19, p<<0.001). The plots of risidual values
##  do not show any clear violations of our ANOVA assumptions.

# As we have found evidence of difference, the next step is multiple comparisons
grain.tuk     <- glht(grain.fit, linfct=mcp(Treatment='Tukey'))
grain.tuk.cld <- cld(grain.tuk)
summary(grain.tuk)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = Yield ~ Row + Column + Treatment, data = grain)
## 
## Linear Hypotheses:
##             Estimate Std. Error t value Pr(>|t|)    
## D - C == 0     5.580      2.595   2.150  0.26160    
## O - C == 0   -16.080      2.595  -6.196  < 0.001 ***
## S - C == 0    -3.320      2.595  -1.279  0.70793    
## SS - C == 0   -1.240      2.595  -0.478  0.98802    
## O - D == 0   -21.660      2.595  -8.346  < 0.001 ***
## S - D == 0    -8.900      2.595  -3.429  0.03311 *  
## SS - D == 0   -6.820      2.595  -2.628  0.12645    
## S - O == 0    12.760      2.595   4.917  0.00266 ** 
## SS - O == 0   14.840      2.595   5.718  < 0.001 ***
## SS - S == 0    2.080      2.595   0.801  0.92520    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

grain.tuk.cld

##    C    D    O    S   SS 
## "bc"  "c"  "a"  "b" "bc"

plot(TukeyHSD(grain.fit, which='Treatment', ordered=T))

cat(c(" A multiple comparison analysis using Tukey's method shows differences in mean grain\n",
      "yields between treatments O-C, O-D, O-S, O-SS, and D-S at a 95% confidence level. In\n",
      "general, our findings are that there are differences in yield between fertilizer and no\n",
      "fertalizer, but minimal differences between the fertilizers (with the excpetion of S and D)."))

##  A multiple comparison analysis using Tukey's method shows differences in mean grain
##  yields between treatments O-C, O-D, O-S, O-SS, and D-S at a 95% confidence level. In
##  general, our findings are that there are differences in yield between fertilizer and no
##  fertalizer, but minimal differences between the fertilizers (with the excpetion of S and D).

Assignment3.R

Skynet

Fri Sep 9 11:59:49 2016