######################################################
#STAT 430 - Assignment 04
#
#Sarah Rathwell - 301084687
#
#Objective: Investigate factorial designs
######################################################
options(width=200)
rm(list=ls())
library(ggplot2)
library(plyr)
library(car)
library(doBy)
## Loading required package: survival
## Loading required package: splines
######################################################
cat(" Investigate factorial designs.\n\n", 
    "Last modification:",date(), '\n')
##  Investigate factorial designs.
## 
##  Last modification: Mon Nov 24 10:25:56 2014
######################################################
# 2
# build and check data set
Glass    <-c(rep(1,9),rep(2,9),rep(3,9))
Temp     <-c(rep(c(rep(100,3),rep(125,3),rep(150,3)),3))
Light    <-c(58,56.8,57,107,106.7,106.5,129.2,128,128.6,
             55,53,57.9,107,103.5,105,117.8,116.2,109.9,
             54.6,57.5,59.9,106.5,107.3,108.6,101.7,105.4,103.9)
Glass.df <-data.frame(Glass,Temp,Light)
Glass.df
##    Glass Temp Light
## 1      1  100  58.0
## 2      1  100  56.8
## 3      1  100  57.0
## 4      1  125 107.0
## 5      1  125 106.7
## 6      1  125 106.5
## 7      1  150 129.2
## 8      1  150 128.0
## 9      1  150 128.6
## 10     2  100  55.0
## 11     2  100  53.0
## 12     2  100  57.9
## 13     2  125 107.0
## 14     2  125 103.5
## 15     2  125 105.0
## 16     2  150 117.8
## 17     2  150 116.2
## 18     2  150 109.9
## 19     3  100  54.6
## 20     3  100  57.5
## 21     3  100  59.9
## 22     3  125 106.5
## 23     3  125 107.3
## 24     3  125 108.6
## 25     3  150 101.7
## 26     3  150 105.4
## 27     3  150 103.9
xtabs(~Glass+Temp,data=Glass.df)
##      Temp
## Glass 100 125 150
##     1   3   3   3
##     2   3   3   3
##     3   3   3   3
# set factors
Glass.df$Glass <-as.factor(Glass.df$Glass)
Glass.df$Temp  <-as.factor(Glass.df$Temp)
str(Glass.df)
## 'data.frame':    27 obs. of  3 variables:
##  $ Glass: Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 2 ...
##  $ Temp : Factor w/ 3 levels "100","125","150": 1 1 1 2 2 2 3 3 3 1 ...
##  $ Light: num  58 56.8 57 107 106.7 ...
# visually analyze sample data w/factor interaction
GlassInt <-ddply(Glass.df,.(Glass,Temp),summarise,val=mean(Light))
ggplot(Glass.df,aes(Glass,Light,color=Temp))+
  geom_boxplot()+
  geom_point(data=GlassInt,aes(y=val))+
  geom_line(data=GlassInt,aes(y=val,group=Temp))+
  theme_bw()+
  ggtitle("Summary of Light Over Glass Type & Temp")

# There appears to be some variation at the two higher temps over glass 
# types; more so at the highest level of temp; interaction between glass
# and temp at highest level

# a
# Fit an ANOVA for model y(ijk)=mu+T(i)+B(j)+(TB)(ij)+E(ijk); i,j,k={1:3}, 
# T~Glass, B~Temp
# Test hypothesis H0: T1=T2=T3=0; B1=B2=B3=0, HA: at least one T,B not 0

Glass.fit <-aov(Light~Glass*Temp,data=Glass.df,contrasts=list(Glass=contr.sum,Temp=contr.sum))
anova(Glass.fit)
## Analysis of Variance Table
## 
## Response: Light
##            Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Glass       2   310.9   155.4   35.814 5.315e-07 ***
## Temp        2 18142.5  9071.2 2089.966 < 2.2e-16 ***
## Glass:Temp  4   642.4   160.6   37.002 1.869e-08 ***
## Residuals  18    78.1     4.3                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# At alpha=0.5, there is strong evidence that temperature effects mean light 
# measurements (F = 2090, p<<.001), that glass effects mean light measurements 
# (F = 35.8, p<<.001), and that there is an interaction between temp and glass
# type which effects mean light measurements (F = 37, p<<.001)

# b
# Estimate main effects and interaction
summary.lm(Glass.fit)
## 
## Call:
## aov(formula = Light ~ Glass * Temp, data = Glass.df, contrasts = list(Glass = contr.sum, 
##     Temp = contr.sum))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7333 -0.5333 -0.0333  0.9333  3.1667 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   92.90741    0.40094 231.723  < 2e-16 ***
## Glass1         4.62593    0.56702   8.158 1.85e-07 ***
## Glass2        -1.20741    0.56702  -2.129   0.0473 *  
## Temp1        -36.27407    0.56702 -63.973  < 2e-16 ***
## Temp2         13.54815    0.56702  23.894 4.38e-15 ***
## Glass1:Temp1  -3.99259    0.80188  -4.979 9.72e-05 ***
## Glass2:Temp1  -0.12593    0.80188  -0.157   0.8770    
## Glass1:Temp2  -4.34815    0.80188  -5.422 3.76e-05 ***
## Glass2:Temp2  -0.08148    0.80188  -0.102   0.9202    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.083 on 18 degrees of freedom
## Multiple R-squared:  0.9959, Adjusted R-squared:  0.9941 
## F-statistic: 549.9 on 8 and 18 DF,  p-value: < 2.2e-16
# Our fitted model estimates are as follows:
# **Ti=mean(yi..)-mu, Bj=mean(y.j.)-mu, (TB)ij=mean(yij.)-mean(yi..)-mean(y.j.)+mu
# Mu = 92.9 (se:0.40)
# T1 = 4.6, T2 = -1.2 , T3 = -3.4 (se:0.57)
# B1 = -36.3, B2 = 13.6, B3 = 22.7 (se:0.57)
# TB11 = -4.0, TB12 = -4.3, TB13 = 8.3, TB21 = -0.1, TB22 = -0.1, TB23 = 0.2,  
# TB31 = 4.1, TB32 = 4.5, TB33 = -8.5 (se:0.80)  

# c
# check model assumptions via residuals
par(mfrow=c(2,2))
plot(Glass.fit)

# Note the leverage graph plots against glass; the plot against temp is different in 
# scatter along X but identical along the standardized residuals.
# We see some possible indication that our assumption of normality is incorrect. There 
# is possible evidence for an extraneous value (obs 18) which may warrent more 
# examination. Our risiduals however, do look approximately randomly scattered over the 
# fitted values.

# d
# analyse multiple comparisons for treatment differences
popMeans(Glass.fit,eff=c("Glass","Temp"))
##    estimate       se df    t.stat      p.value Glass Temp
## 1  57.26667 1.202826 18  47.61010 2.176569e-20     1  100
## 2  55.30000 1.202826 18  45.97507 4.062909e-20     2  100
## 3  57.33333 1.202826 18  47.66553 2.131794e-20     3  100
## 4 106.73333 1.202826 18  88.73548 3.100968e-25     1  125
## 5 105.16667 1.202826 18  87.43299 4.044254e-25     2  125
## 6 107.46667 1.202826 18  89.34516 2.742116e-25     3  125
## 7 128.60000 1.202826 18 106.91489 1.089425e-26     1  150
## 8 114.63333 1.202826 18  95.30335 8.598630e-26     2  150
## 9 103.66667 1.202826 18  86.18593 5.234627e-25     3  150
par(mfrow=c(1,1))
Glass.hsd <-TukeyHSD(Glass.fit,"Glass:Temp")
Glass.hsd
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Light ~ Glass * Temp, data = Glass.df, contrasts = list(Glass = contr.sum, Temp = contr.sum))
## 
## $`Glass:Temp`
##                     diff        lwr        upr     p adj
## 2:100-1:100  -1.96666667  -7.926921   3.993587 0.9561247
## 3:100-1:100   0.06666667  -5.893587   6.026921 1.0000000
## 1:125-1:100  49.46666667  43.506413  55.426921 0.0000000
## 2:125-1:100  47.90000000  41.939746  53.860254 0.0000000
## 3:125-1:100  50.20000000  44.239746  56.160254 0.0000000
## 1:150-1:100  71.33333333  65.373079  77.293587 0.0000000
## 2:150-1:100  57.36666667  51.406413  63.326921 0.0000000
## 3:150-1:100  46.40000000  40.439746  52.360254 0.0000000
## 3:100-2:100   2.03333333  -3.926921   7.993587 0.9474195
## 1:125-2:100  51.43333333  45.473079  57.393587 0.0000000
## 2:125-2:100  49.86666667  43.906413  55.826921 0.0000000
## 3:125-2:100  52.16666667  46.206413  58.126921 0.0000000
## 1:150-2:100  73.30000000  67.339746  79.260254 0.0000000
## 2:150-2:100  59.33333333  53.373079  65.293587 0.0000000
## 3:150-2:100  48.36666667  42.406413  54.326921 0.0000000
## 1:125-3:100  49.40000000  43.439746  55.360254 0.0000000
## 2:125-3:100  47.83333333  41.873079  53.793587 0.0000000
## 3:125-3:100  50.13333333  44.173079  56.093587 0.0000000
## 1:150-3:100  71.26666667  65.306413  77.226921 0.0000000
## 2:150-3:100  57.30000000  51.339746  63.260254 0.0000000
## 3:150-3:100  46.33333333  40.373079  52.293587 0.0000000
## 2:125-1:125  -1.56666667  -7.526921   4.393587 0.9885531
## 3:125-1:125   0.73333333  -5.226921   6.693587 0.9999463
## 1:150-1:125  21.86666667  15.906413  27.826921 0.0000000
## 2:150-1:125   7.90000000   1.939746  13.860254 0.0048999
## 3:150-1:125  -3.06666667  -9.026921   2.893587 0.6801842
## 3:125-2:125   2.30000000  -3.660254   8.260254 0.9014798
## 1:150-2:125  23.43333333  17.473079  29.393587 0.0000000
## 2:150-2:125   9.46666667   3.506413  15.426921 0.0007355
## 3:150-2:125  -1.50000000  -7.460254   4.460254 0.9913133
## 1:150-3:125  21.13333333  15.173079  27.093587 0.0000000
## 2:150-3:125   7.16666667   1.206413  13.126921 0.0119568
## 3:150-3:125  -3.80000000  -9.760254   2.160254 0.4260519
## 2:150-1:150 -13.96666667 -19.926921  -8.006413 0.0000050
## 3:150-1:150 -24.93333333 -30.893587 -18.973079 0.0000000
## 3:150-2:150 -10.96666667 -16.926921  -5.006413 0.0001271
plot(Glass.hsd)

# Most of the pairwise interaction terms are significant. We see no difference in mean light   
# by glass @ low and med temp. Also, we see no difference in the mean light @ high temp and  
# med temp for glass type 3. While there is a difference in means between high temp glass  
# type 2 and all types at lower temps, at the mid temp level, it is only significant at a 
# 95% level; if we were to increase our conf level, these would not show significance.

# Conclusions
#
# If our intent is to maximize light output, the best choice would be to impliment higher
# temperatures. If it is optimal to run at temp = 150, the best choice would be glass 
# type 1, or 2 if the savings overwrote the light increase. If 150 is not optimal or we 
# do not require such a high light output, choosing either temp = 100 or 125 would mean  
# that we could probably get away with choosing the cheapest glass level, regardless of 
# what that may be.
######################################################
# 4
# build and check data set
Density  <-c(rep(c(185,90,70,0),4))
Location <-c(rep(1,4),rep(2,4),rep(3,4),rep(4,4))
Yield    <-c(NA,2092,2329,2017,1531,NA,1819,1766,2167,2160,NA,1967,1245,1260,1523,NA)
Trees.df <-data.frame(Density,Location,Yield)
Trees.df
##    Density Location Yield
## 1      185        1    NA
## 2       90        1  2092
## 3       70        1  2329
## 4        0        1  2017
## 5      185        2  1531
## 6       90        2    NA
## 7       70        2  1819
## 8        0        2  1766
## 9      185        3  2167
## 10      90        3  2160
## 11      70        3    NA
## 12       0        3  1967
## 13     185        4  1245
## 14      90        4  1260
## 15      70        4  1523
## 16       0        4    NA
xtabs(Yield~Location+Density,data=Trees.df)
##         Density
## Location    0   70   90  185
##        1 2017 2329 2092    0
##        2 1766 1819    0 1531
##        3 1967    0 2160 2167
##        4    0 1523 1260 1245
# set factors
Trees.df$Density  <-as.factor(Trees.df$Density)
Trees.df$Location <-as.factor(Trees.df$Location)
str(Trees.df)
## 'data.frame':    16 obs. of  3 variables:
##  $ Density : Factor w/ 4 levels "0","70","90",..: 4 3 2 1 4 3 2 1 4 3 ...
##  $ Location: Factor w/ 4 levels "1","2","3","4": 1 1 1 1 2 2 2 2 3 3 ...
##  $ Yield   : num  NA 2092 2329 2017 1531 ...
# a
# Setting treatments as our Density levels, and blocks as Location, we have the following
# sample statistics:
# a = 4, b = 4, k = 3, r = 3, lambda = 2 (we can see all these from our data table)

# b
# Investigate effect of density on yield
# Model: y(ij)=mu+T(i)+B(j)+E(ij), i,j={1:4}
# H0: T1=T2=T3=T4=0 , Ha: at least one != 0
# visually check data
TreesInt <-ddply(Trees.df,.(Density,Location),summarise,val=mean(Yield))
ggplot(Trees.df,aes(Density,Yield,color=Location))+
  geom_point(data=TreesInt[complete.cases(TreesInt),],aes(y=val))+
  geom_line(data=TreesInt[complete.cases(TreesInt),],aes(y=val,group=Location))+
  theme_bw()+
  ggtitle("Summary of Block Mean Yield Over Treatment")

# We see some possible evidence of interaction at location 1/2, although difficult
# to say for sure as we are missing observations from density levels. Locations 
# 2:4 look approximately parallel. We also see what looks like a possible decrease
# in yield as density increases.

# Test hypothesis
Trees.fit <-aov(Yield~Location+Density,data=Trees.df)
Anova(Trees.fit,type=3)
## Anova Table (Type III tests)
## 
## Response: Yield
##              Sum Sq Df  F value    Pr(>F)    
## (Intercept) 7175322  1 630.1909 1.872e-06 ***
## Location    1258563  3  36.8455 0.0007813 ***
## Density      117768  3   3.4477 0.1080670    
## Residuals     56930  5                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# We did not find any evidence for a difference in mean yields by density 
# (F = 3.45, p = 0.1). There is no need to continue with multiple comparisons
# after this. 

# c
# Test assumptions for ANOVA
par(mfrow=c(2,2))
plot(Trees.fit)

# Both our residuals and qq plot show no major violations of our assumptions,
# although one of our observations (8) is a little farther outside the general
# realm of where we would expect the residuals to land.
layout(1,1)
##############################################