######################################################
# Stat 430 - Project
#
# Sarah Rathwell - 301084687
#
# Objective - Investigate the effects of temperature 
#             and lime juice on avocado oxydization
######################################################
rm(list=ls())
options(width=200)
library(ggplot2)
library(gridExtra)
## Loading required package: grid
library(plyr)
library(DoE.base)
## Loading required package: conf.design
## 
## Attaching package: 'conf.design'
## 
## The following object is masked from 'package:plyr':
## 
##     join
## 
## 
## Attaching package: 'DoE.base'
## 
## The following objects are masked from 'package:stats':
## 
##     aov, lm
## 
## The following object is masked from 'package:graphics':
## 
##     plot.design
######################################################
cat(" Investigate the effects of temperature and lime
    juice on avocado oxidyzation.\n\n", "Last modification:
    ",date(),'\n')
##  Investigate the effects of temperature and lime
##     juice on avocado oxidyzation.
## 
##  Last modification:
##      Wed Dec  3 10:11:54 2014
######################################################
#
# Pilot Study
#
# Attempt to allocate effective sample size to maximize
# power.
#
# Enter data from pilot run of 2 replicates
Lime <- rep(c(-1,1),4) 
Temp <- rep(c(rep(-1,2),rep(1,2)),2)
LiTe <- Lime*Temp
Avo  <- c(rep(1,4),rep(2,4))
Col  <-c(.475,.409,.545,.313,.591,.429,.477,.417)
A.df <- data.frame(Lime,Temp,LiTe,Avo,Col)  
A.df
##   Lime Temp LiTe Avo   Col
## 1   -1   -1    1   1 0.475
## 2    1   -1   -1   1 0.409
## 3   -1    1   -1   1 0.545
## 4    1    1    1   1 0.313
## 5   -1   -1    1   2 0.591
## 6    1   -1   -1   2 0.429
## 7   -1    1   -1   2 0.477
## 8    1    1    1   2 0.417
# Run an ANOVA on main effects/interaction
A.fit <-aov(Col~Lime*Temp,data=A.df)
anova(A.fit)
## Analysis of Variance Table
## 
## Response: Col
##           Df   Sum Sq  Mean Sq F value  Pr(>F)  
## Lime       1 0.033800 0.033800  9.2299 0.03847 *
## Temp       1 0.002888 0.002888  0.7886 0.42469  
## Lime:Temp  1 0.000512 0.000512  0.1398 0.72744  
## Residuals  4 0.014648 0.003662                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Estimate effects
A.mat <- as.matrix(A.df[,-5])
A.ef  <- (t(A.mat)%*%Col)/(length(Col)/2)
A.ef
##         [,1]
## Lime -0.1300
## Temp -0.0380
## LiTe -0.0160
## Avo   1.3925
# Estimate variance
sigma.hat <- (anova(A.fit)$"Mean Sq"[4])
sigma.hat
## [1] 0.003662
# Run basic optimization of sample size
n.opt <- 1.645^2*2*sigma.hat/.05^2
n.opt
## [1] 7.927571
# Inputting data into OPC for fixed effects model 
# [appendix V] similarly showed an optimal choice of
# ~10 units.
######################################################
#
# Main Project
#
avo.df <- read.csv("AvoData.csv", header=T)
str(avo.df)
## 'data.frame':    36 obs. of  4 variables:
##  $ Lime : int  1 1 -1 -1 1 1 -1 -1 1 1 ...
##  $ Temp : int  1 -1 1 -1 1 -1 1 -1 1 -1 ...
##  $ Block: int  1 1 1 1 2 2 2 2 3 3 ...
##  $ Oxy  : num  0.463 0.152 0.813 0.438 0.25 0.046 0.75 0.519 0.167 0.183 ...
avo.df[1:10,]
##    Lime Temp Block   Oxy
## 1     1    1     1 0.463
## 2     1   -1     1 0.152
## 3    -1    1     1 0.813
## 4    -1   -1     1 0.438
## 5     1    1     2 0.250
## 6     1   -1     2 0.046
## 7    -1    1     2 0.750
## 8    -1   -1     2 0.519
## 9     1    1     3 0.167
## 10    1   -1     3 0.183
# look at sample distributions
li.plot <- ggplot(avo.df, aes(factor(Lime),Oxy))+
            geom_boxplot()+
            theme_bw()+
            ylab("Oxidized Proportion")+
            scale_x_discrete(name="Lime", breaks=c("-1","1"), 
                             labels=c("No", "Yes"))+
            ggtitle("Sample Oxydization by Lime Level")

te.plot <- ggplot(avo.df, aes(factor(Temp),Oxy))+
            geom_boxplot()+
            theme_bw()+
            ylab("Oxidized Proportion")+
            scale_x_discrete(name="Temp", breaks=c("-1","1"), 
                             labels=c("Low", "High"))+
            ggtitle("Sample Oxydization by Temp Level")

grid.arrange(li.plot, te.plot)

# check interactions between factors
avo.means  <- ddply(avo.df, .(Lime, Temp), summarize, Means=mean(Oxy))
li.te.plot <- ggplot(avo.df, aes(factor(Lime), Oxy, color=factor(Temp)))+
               geom_boxplot()+
               geom_point(data=avo.means, aes(y=Means))+
               geom_line(data=avo.means, aes(y=Means, group=factor(Temp)))+
               theme_bw()+
               ylab("Oxidized Proportion")+
               scale_colour_discrete(name="Temp", breaks=c("-1","1"), 
                                     labels=c("Low","High"))+
               scale_x_discrete(name="Lime", breaks=c("-1","1"), 
                                labels=c("No", "Yes"))+
              ggtitle("Interaction Between Lime-Temp Factors")
li.te.plot

# check interactions between blocks and each factor
bl.li.int <- ddply(avo.df, .(Lime, Block), summarize, val=mean(Oxy))
bl.plot1  <- ggplot(avo.df, aes(factor(Lime), Oxy, color=factor(Block)))+
              geom_point(data=bl.li.int, aes(y=val))+
              geom_line(data=bl.li.int, aes(y=val, group=factor(Block)))+
              theme_bw()+
              ylab("Oxidized Proportion")+
              scale_colour_discrete(name="Block")+
              scale_x_discrete(name="Lime", breaks=c("-1","1"), 
                               labels=c("No", "Yes"))+
              ggtitle("Interaction Between Block-Lime")

bl.te.int <- ddply(avo.df, .(Temp, Block), summarize, val=mean(Oxy))
bl.plot2  <- ggplot(avo.df, aes(factor(Temp), Oxy, color=factor(Block)))+
              geom_point(data=bl.te.int, aes(y=val))+
              geom_line(data=bl.te.int, aes(y=val, group=factor(Block)))+
              theme_bw()+
              ylab("Oxidized Proportion")+
              scale_colour_discrete(name="Block")+
              scale_x_discrete(name="Temp", breaks=c("-1","1"), 
                               labels=c("Low", "High"))+
              ggtitle("Interaction Between Block-Temp")

grid.arrange(bl.plot1, bl.plot2)

# run an ANOVA for model:
# y(ijk) = mu + lime(i) + temp(j) + lime*temp(ij) + block(k) + e(ijk)
avo.fit <- lm(Oxy ~ Block + Lime*Temp, data=avo.df)
anova(avo.fit)
## Analysis of Variance Table
## 
## Response: Oxy
##           Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Block      1 0.00814 0.00814   0.9792 0.3300581    
## Lime       1 1.63243 1.63243 196.2920 5.910e-15 ***
## Temp       1 0.31062 0.31062  37.3506 8.938e-07 ***
## Lime:Temp  1 0.13764 0.13764  16.5507 0.0003021 ***
## Residuals 31 0.25781 0.00832                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# run model without block (no evidence for variance b/w avos)
avo.fit2 <- lm(Oxy ~ Lime*Temp, data=avo.df)
summary(avo.fit2)
## 
## Call:
## lm.default(formula = Oxy ~ Lime * Temp, data = avo.df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16311 -0.05611 -0.01133  0.03111  0.19178 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.45311    0.01519  29.822  < 2e-16 ***
## Lime        -0.21294    0.01519 -14.015 3.31e-15 ***
## Temp         0.09289    0.01519   6.114 7.82e-07 ***
## Lime:Temp   -0.06183    0.01519  -4.070 0.000288 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09116 on 32 degrees of freedom
## Multiple R-squared:  0.8867, Adjusted R-squared:  0.876 
## F-statistic: 83.45 on 3 and 32 DF,  p-value: 3.233e-15
anova(avo.fit2)
## Analysis of Variance Table
## 
## Response: Oxy
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## Lime       1 1.63243 1.63243 196.420 3.306e-15 ***
## Temp       1 0.31062 0.31062  37.375 7.818e-07 ***
## Lime:Temp  1 0.13764 0.13764  16.561 0.0002879 ***
## Residuals 32 0.26595 0.00831                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# check model
par(mfrow=c(2,2))
plot(avo.fit2)
## hat values (leverages) are all = 0.1111111
##  and there are no factor predictors; no plot no. 5

par(mfrow=c(1,1))

# check multiple comparisons for levels

avo.df$Lime.f <- ifelse(factor(avo.df$Lime)==1, avo.df$Lime.f<-'Y', avo.df$Lime.f<-'N')
avo.df$Temp.f <- ifelse(factor(avo.df$Temp)==1, avo.df$Temp.f<-'H', avo.df$Temp.f<-'L')

avo.df$Lime.f <- factor(avo.df$Lime.f)
avo.df$Temp.f <- factor(avo.df$Temp.f)

avo.aov <- aov(Oxy ~ Lime.f*Temp.f, data=avo.df)
avo.tuk <- TukeyHSD(avo.aov, ordered=T)
avo.tuk
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
##     factor levels have been ordered
## 
## Fit: aov.default(formula = Oxy ~ Lime.f * Temp.f, data = avo.df)
## 
## $Lime.f
##          diff       lwr       upr p adj
## N-Y 0.4258889 0.3639903 0.4877875     0
## 
## $Temp.f
##          diff       lwr       upr p adj
## H-L 0.1857778 0.1238792 0.2476763 8e-07
## 
## $`Lime.f:Temp.f`
##               diff         lwr       upr     p adj
## Y:H-Y:L 0.06211111 -0.05432447 0.1785467 0.4814294
## N:L-Y:L 0.30222222  0.18578664 0.4186578 0.0000003
## N:H-Y:L 0.61166667  0.49523108 0.7281023 0.0000000
## N:L-Y:H 0.24011111  0.12367553 0.3565467 0.0000207
## N:H-Y:H 0.54955556  0.43311997 0.6659911 0.0000000
## N:H-N:L 0.30944444  0.19300886 0.4258800 0.0000002
plot(avo.tuk, sub="Lime: (Y)es/(N)o     Temp: (H)igh/(L)ow")

# try a location-dispersion model with blocks as replicates
# already know the est means; need variance
# find oxydization log(var)'s for each effect grouping
# fit and plot linear model effects
# estimate variance by effects (95%CI)

# disp
avo.disp  <- ddply(avo.df, .(Lime,Temp), summarize, Disp=log(var(Oxy)))

avo.disp.fit  <- lm(Disp ~ Temp*Lime, data=avo.disp)
summary(avo.disp.fit)
## 
## Call:
## lm.default(formula = Disp ~ Temp * Lime, data = avo.disp)
## 
## Residuals:
## ALL 4 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.824494         NA      NA       NA
## Temp         0.193464         NA      NA       NA
## Lime         0.176180         NA      NA       NA
## Temp:Lime    0.007362         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 3 and 0 DF,  p-value: NA
disp.coef    <- avo.disp.fit$coefficient
avo.disp.est <- exp(disp.coef[1] - disp.coef[2] + disp.coef[3])
avo.disp.est
## (Intercept) 
## 0.007893007
# predict mean proportion of oxy when minimized. Recall that the interaction
# factor can be ignored when Lime is at highest level (see tukey HSD)
mean.coef    <- avo.fit2$coefficient
avo.mean.est <- mean.coef[1] + mean.coef[2] - mean.coef[3]
avo.mean.est
## (Intercept) 
##   0.1472778