######################################################
# Stat 430 - Project
#
# Sarah Rathwell - 301084687
#
# Objective - Investigate the effects of temperature
# and lime juice on avocado oxydization
######################################################
rm(list=ls())
options(width=200)
library(ggplot2)
library(gridExtra)
## Loading required package: grid
library(plyr)
library(DoE.base)
## Loading required package: conf.design
##
## Attaching package: 'conf.design'
##
## The following object is masked from 'package:plyr':
##
## join
##
##
## Attaching package: 'DoE.base'
##
## The following objects are masked from 'package:stats':
##
## aov, lm
##
## The following object is masked from 'package:graphics':
##
## plot.design
######################################################
cat(" Investigate the effects of temperature and lime
juice on avocado oxidyzation.\n\n", "Last modification:
",date(),'\n')
## Investigate the effects of temperature and lime
## juice on avocado oxidyzation.
##
## Last modification:
## Wed Dec 3 10:11:54 2014
######################################################
#
# Pilot Study
#
# Attempt to allocate effective sample size to maximize
# power.
#
# Enter data from pilot run of 2 replicates
Lime <- rep(c(-1,1),4)
Temp <- rep(c(rep(-1,2),rep(1,2)),2)
LiTe <- Lime*Temp
Avo <- c(rep(1,4),rep(2,4))
Col <-c(.475,.409,.545,.313,.591,.429,.477,.417)
A.df <- data.frame(Lime,Temp,LiTe,Avo,Col)
A.df
## Lime Temp LiTe Avo Col
## 1 -1 -1 1 1 0.475
## 2 1 -1 -1 1 0.409
## 3 -1 1 -1 1 0.545
## 4 1 1 1 1 0.313
## 5 -1 -1 1 2 0.591
## 6 1 -1 -1 2 0.429
## 7 -1 1 -1 2 0.477
## 8 1 1 1 2 0.417
# Run an ANOVA on main effects/interaction
A.fit <-aov(Col~Lime*Temp,data=A.df)
anova(A.fit)
## Analysis of Variance Table
##
## Response: Col
## Df Sum Sq Mean Sq F value Pr(>F)
## Lime 1 0.033800 0.033800 9.2299 0.03847 *
## Temp 1 0.002888 0.002888 0.7886 0.42469
## Lime:Temp 1 0.000512 0.000512 0.1398 0.72744
## Residuals 4 0.014648 0.003662
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Estimate effects
A.mat <- as.matrix(A.df[,-5])
A.ef <- (t(A.mat)%*%Col)/(length(Col)/2)
A.ef
## [,1]
## Lime -0.1300
## Temp -0.0380
## LiTe -0.0160
## Avo 1.3925
# Estimate variance
sigma.hat <- (anova(A.fit)$"Mean Sq"[4])
sigma.hat
## [1] 0.003662
# Run basic optimization of sample size
n.opt <- 1.645^2*2*sigma.hat/.05^2
n.opt
## [1] 7.927571
# Inputting data into OPC for fixed effects model
# [appendix V] similarly showed an optimal choice of
# ~10 units.
######################################################
#
# Main Project
#
avo.df <- read.csv("AvoData.csv", header=T)
str(avo.df)
## 'data.frame': 36 obs. of 4 variables:
## $ Lime : int 1 1 -1 -1 1 1 -1 -1 1 1 ...
## $ Temp : int 1 -1 1 -1 1 -1 1 -1 1 -1 ...
## $ Block: int 1 1 1 1 2 2 2 2 3 3 ...
## $ Oxy : num 0.463 0.152 0.813 0.438 0.25 0.046 0.75 0.519 0.167 0.183 ...
avo.df[1:10,]
## Lime Temp Block Oxy
## 1 1 1 1 0.463
## 2 1 -1 1 0.152
## 3 -1 1 1 0.813
## 4 -1 -1 1 0.438
## 5 1 1 2 0.250
## 6 1 -1 2 0.046
## 7 -1 1 2 0.750
## 8 -1 -1 2 0.519
## 9 1 1 3 0.167
## 10 1 -1 3 0.183
# look at sample distributions
li.plot <- ggplot(avo.df, aes(factor(Lime),Oxy))+
geom_boxplot()+
theme_bw()+
ylab("Oxidized Proportion")+
scale_x_discrete(name="Lime", breaks=c("-1","1"),
labels=c("No", "Yes"))+
ggtitle("Sample Oxydization by Lime Level")
te.plot <- ggplot(avo.df, aes(factor(Temp),Oxy))+
geom_boxplot()+
theme_bw()+
ylab("Oxidized Proportion")+
scale_x_discrete(name="Temp", breaks=c("-1","1"),
labels=c("Low", "High"))+
ggtitle("Sample Oxydization by Temp Level")
grid.arrange(li.plot, te.plot)

# check interactions between factors
avo.means <- ddply(avo.df, .(Lime, Temp), summarize, Means=mean(Oxy))
li.te.plot <- ggplot(avo.df, aes(factor(Lime), Oxy, color=factor(Temp)))+
geom_boxplot()+
geom_point(data=avo.means, aes(y=Means))+
geom_line(data=avo.means, aes(y=Means, group=factor(Temp)))+
theme_bw()+
ylab("Oxidized Proportion")+
scale_colour_discrete(name="Temp", breaks=c("-1","1"),
labels=c("Low","High"))+
scale_x_discrete(name="Lime", breaks=c("-1","1"),
labels=c("No", "Yes"))+
ggtitle("Interaction Between Lime-Temp Factors")
li.te.plot

# check interactions between blocks and each factor
bl.li.int <- ddply(avo.df, .(Lime, Block), summarize, val=mean(Oxy))
bl.plot1 <- ggplot(avo.df, aes(factor(Lime), Oxy, color=factor(Block)))+
geom_point(data=bl.li.int, aes(y=val))+
geom_line(data=bl.li.int, aes(y=val, group=factor(Block)))+
theme_bw()+
ylab("Oxidized Proportion")+
scale_colour_discrete(name="Block")+
scale_x_discrete(name="Lime", breaks=c("-1","1"),
labels=c("No", "Yes"))+
ggtitle("Interaction Between Block-Lime")
bl.te.int <- ddply(avo.df, .(Temp, Block), summarize, val=mean(Oxy))
bl.plot2 <- ggplot(avo.df, aes(factor(Temp), Oxy, color=factor(Block)))+
geom_point(data=bl.te.int, aes(y=val))+
geom_line(data=bl.te.int, aes(y=val, group=factor(Block)))+
theme_bw()+
ylab("Oxidized Proportion")+
scale_colour_discrete(name="Block")+
scale_x_discrete(name="Temp", breaks=c("-1","1"),
labels=c("Low", "High"))+
ggtitle("Interaction Between Block-Temp")
grid.arrange(bl.plot1, bl.plot2)

# run an ANOVA for model:
# y(ijk) = mu + lime(i) + temp(j) + lime*temp(ij) + block(k) + e(ijk)
avo.fit <- lm(Oxy ~ Block + Lime*Temp, data=avo.df)
anova(avo.fit)
## Analysis of Variance Table
##
## Response: Oxy
## Df Sum Sq Mean Sq F value Pr(>F)
## Block 1 0.00814 0.00814 0.9792 0.3300581
## Lime 1 1.63243 1.63243 196.2920 5.910e-15 ***
## Temp 1 0.31062 0.31062 37.3506 8.938e-07 ***
## Lime:Temp 1 0.13764 0.13764 16.5507 0.0003021 ***
## Residuals 31 0.25781 0.00832
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# run model without block (no evidence for variance b/w avos)
avo.fit2 <- lm(Oxy ~ Lime*Temp, data=avo.df)
summary(avo.fit2)
##
## Call:
## lm.default(formula = Oxy ~ Lime * Temp, data = avo.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16311 -0.05611 -0.01133 0.03111 0.19178
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.45311 0.01519 29.822 < 2e-16 ***
## Lime -0.21294 0.01519 -14.015 3.31e-15 ***
## Temp 0.09289 0.01519 6.114 7.82e-07 ***
## Lime:Temp -0.06183 0.01519 -4.070 0.000288 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09116 on 32 degrees of freedom
## Multiple R-squared: 0.8867, Adjusted R-squared: 0.876
## F-statistic: 83.45 on 3 and 32 DF, p-value: 3.233e-15
anova(avo.fit2)
## Analysis of Variance Table
##
## Response: Oxy
## Df Sum Sq Mean Sq F value Pr(>F)
## Lime 1 1.63243 1.63243 196.420 3.306e-15 ***
## Temp 1 0.31062 0.31062 37.375 7.818e-07 ***
## Lime:Temp 1 0.13764 0.13764 16.561 0.0002879 ***
## Residuals 32 0.26595 0.00831
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# check model
par(mfrow=c(2,2))
plot(avo.fit2)
## hat values (leverages) are all = 0.1111111
## and there are no factor predictors; no plot no. 5

par(mfrow=c(1,1))
# check multiple comparisons for levels
avo.df$Lime.f <- ifelse(factor(avo.df$Lime)==1, avo.df$Lime.f<-'Y', avo.df$Lime.f<-'N')
avo.df$Temp.f <- ifelse(factor(avo.df$Temp)==1, avo.df$Temp.f<-'H', avo.df$Temp.f<-'L')
avo.df$Lime.f <- factor(avo.df$Lime.f)
avo.df$Temp.f <- factor(avo.df$Temp.f)
avo.aov <- aov(Oxy ~ Lime.f*Temp.f, data=avo.df)
avo.tuk <- TukeyHSD(avo.aov, ordered=T)
avo.tuk
## Tukey multiple comparisons of means
## 95% family-wise confidence level
## factor levels have been ordered
##
## Fit: aov.default(formula = Oxy ~ Lime.f * Temp.f, data = avo.df)
##
## $Lime.f
## diff lwr upr p adj
## N-Y 0.4258889 0.3639903 0.4877875 0
##
## $Temp.f
## diff lwr upr p adj
## H-L 0.1857778 0.1238792 0.2476763 8e-07
##
## $`Lime.f:Temp.f`
## diff lwr upr p adj
## Y:H-Y:L 0.06211111 -0.05432447 0.1785467 0.4814294
## N:L-Y:L 0.30222222 0.18578664 0.4186578 0.0000003
## N:H-Y:L 0.61166667 0.49523108 0.7281023 0.0000000
## N:L-Y:H 0.24011111 0.12367553 0.3565467 0.0000207
## N:H-Y:H 0.54955556 0.43311997 0.6659911 0.0000000
## N:H-N:L 0.30944444 0.19300886 0.4258800 0.0000002
plot(avo.tuk, sub="Lime: (Y)es/(N)o Temp: (H)igh/(L)ow")



# try a location-dispersion model with blocks as replicates
# already know the est means; need variance
# find oxydization log(var)'s for each effect grouping
# fit and plot linear model effects
# estimate variance by effects (95%CI)
# disp
avo.disp <- ddply(avo.df, .(Lime,Temp), summarize, Disp=log(var(Oxy)))
avo.disp.fit <- lm(Disp ~ Temp*Lime, data=avo.disp)
summary(avo.disp.fit)
##
## Call:
## lm.default(formula = Disp ~ Temp * Lime, data = avo.disp)
##
## Residuals:
## ALL 4 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.824494 NA NA NA
## Temp 0.193464 NA NA NA
## Lime 0.176180 NA NA NA
## Temp:Lime 0.007362 NA NA NA
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 3 and 0 DF, p-value: NA
disp.coef <- avo.disp.fit$coefficient
avo.disp.est <- exp(disp.coef[1] - disp.coef[2] + disp.coef[3])
avo.disp.est
## (Intercept)
## 0.007893007
# predict mean proportion of oxy when minimized. Recall that the interaction
# factor can be ignored when Lime is at highest level (see tukey HSD)
mean.coef <- avo.fit2$coefficient
avo.mean.est <- mean.coef[1] + mean.coef[2] - mean.coef[3]
avo.mean.est
## (Intercept)
## 0.1472778