######################################################
#STAT 430 - Assignment 04
#
#Sarah Rathwell - 301084687
#
#Objective: Investigate factorial designs
######################################################
options(width=200)
rm(list=ls())
library(ggplot2)
library(plyr)
library(car)
library(doBy)
## Loading required package: survival
## Loading required package: splines
######################################################
cat(" Investigate factorial designs.\n\n",
"Last modification:",date(), '\n')
## Investigate factorial designs.
##
## Last modification: Mon Nov 24 10:25:56 2014
######################################################
# 2
# build and check data set
Glass <-c(rep(1,9),rep(2,9),rep(3,9))
Temp <-c(rep(c(rep(100,3),rep(125,3),rep(150,3)),3))
Light <-c(58,56.8,57,107,106.7,106.5,129.2,128,128.6,
55,53,57.9,107,103.5,105,117.8,116.2,109.9,
54.6,57.5,59.9,106.5,107.3,108.6,101.7,105.4,103.9)
Glass.df <-data.frame(Glass,Temp,Light)
Glass.df
## Glass Temp Light
## 1 1 100 58.0
## 2 1 100 56.8
## 3 1 100 57.0
## 4 1 125 107.0
## 5 1 125 106.7
## 6 1 125 106.5
## 7 1 150 129.2
## 8 1 150 128.0
## 9 1 150 128.6
## 10 2 100 55.0
## 11 2 100 53.0
## 12 2 100 57.9
## 13 2 125 107.0
## 14 2 125 103.5
## 15 2 125 105.0
## 16 2 150 117.8
## 17 2 150 116.2
## 18 2 150 109.9
## 19 3 100 54.6
## 20 3 100 57.5
## 21 3 100 59.9
## 22 3 125 106.5
## 23 3 125 107.3
## 24 3 125 108.6
## 25 3 150 101.7
## 26 3 150 105.4
## 27 3 150 103.9
xtabs(~Glass+Temp,data=Glass.df)
## Temp
## Glass 100 125 150
## 1 3 3 3
## 2 3 3 3
## 3 3 3 3
# set factors
Glass.df$Glass <-as.factor(Glass.df$Glass)
Glass.df$Temp <-as.factor(Glass.df$Temp)
str(Glass.df)
## 'data.frame': 27 obs. of 3 variables:
## $ Glass: Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 2 ...
## $ Temp : Factor w/ 3 levels "100","125","150": 1 1 1 2 2 2 3 3 3 1 ...
## $ Light: num 58 56.8 57 107 106.7 ...
# visually analyze sample data w/factor interaction
GlassInt <-ddply(Glass.df,.(Glass,Temp),summarise,val=mean(Light))
ggplot(Glass.df,aes(Glass,Light,color=Temp))+
geom_boxplot()+
geom_point(data=GlassInt,aes(y=val))+
geom_line(data=GlassInt,aes(y=val,group=Temp))+
theme_bw()+
ggtitle("Summary of Light Over Glass Type & Temp")

# There appears to be some variation at the two higher temps over glass
# types; more so at the highest level of temp; interaction between glass
# and temp at highest level
# a
# Fit an ANOVA for model y(ijk)=mu+T(i)+B(j)+(TB)(ij)+E(ijk); i,j,k={1:3},
# T~Glass, B~Temp
# Test hypothesis H0: T1=T2=T3=0; B1=B2=B3=0, HA: at least one T,B not 0
Glass.fit <-aov(Light~Glass*Temp,data=Glass.df,contrasts=list(Glass=contr.sum,Temp=contr.sum))
anova(Glass.fit)
## Analysis of Variance Table
##
## Response: Light
## Df Sum Sq Mean Sq F value Pr(>F)
## Glass 2 310.9 155.4 35.814 5.315e-07 ***
## Temp 2 18142.5 9071.2 2089.966 < 2.2e-16 ***
## Glass:Temp 4 642.4 160.6 37.002 1.869e-08 ***
## Residuals 18 78.1 4.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# At alpha=0.5, there is strong evidence that temperature effects mean light
# measurements (F = 2090, p<<.001), that glass effects mean light measurements
# (F = 35.8, p<<.001), and that there is an interaction between temp and glass
# type which effects mean light measurements (F = 37, p<<.001)
# b
# Estimate main effects and interaction
summary.lm(Glass.fit)
##
## Call:
## aov(formula = Light ~ Glass * Temp, data = Glass.df, contrasts = list(Glass = contr.sum,
## Temp = contr.sum))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.7333 -0.5333 -0.0333 0.9333 3.1667
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 92.90741 0.40094 231.723 < 2e-16 ***
## Glass1 4.62593 0.56702 8.158 1.85e-07 ***
## Glass2 -1.20741 0.56702 -2.129 0.0473 *
## Temp1 -36.27407 0.56702 -63.973 < 2e-16 ***
## Temp2 13.54815 0.56702 23.894 4.38e-15 ***
## Glass1:Temp1 -3.99259 0.80188 -4.979 9.72e-05 ***
## Glass2:Temp1 -0.12593 0.80188 -0.157 0.8770
## Glass1:Temp2 -4.34815 0.80188 -5.422 3.76e-05 ***
## Glass2:Temp2 -0.08148 0.80188 -0.102 0.9202
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.083 on 18 degrees of freedom
## Multiple R-squared: 0.9959, Adjusted R-squared: 0.9941
## F-statistic: 549.9 on 8 and 18 DF, p-value: < 2.2e-16
# Our fitted model estimates are as follows:
# **Ti=mean(yi..)-mu, Bj=mean(y.j.)-mu, (TB)ij=mean(yij.)-mean(yi..)-mean(y.j.)+mu
# Mu = 92.9 (se:0.40)
# T1 = 4.6, T2 = -1.2 , T3 = -3.4 (se:0.57)
# B1 = -36.3, B2 = 13.6, B3 = 22.7 (se:0.57)
# TB11 = -4.0, TB12 = -4.3, TB13 = 8.3, TB21 = -0.1, TB22 = -0.1, TB23 = 0.2,
# TB31 = 4.1, TB32 = 4.5, TB33 = -8.5 (se:0.80)
# c
# check model assumptions via residuals
par(mfrow=c(2,2))
plot(Glass.fit)

# Note the leverage graph plots against glass; the plot against temp is different in
# scatter along X but identical along the standardized residuals.
# We see some possible indication that our assumption of normality is incorrect. There
# is possible evidence for an extraneous value (obs 18) which may warrent more
# examination. Our risiduals however, do look approximately randomly scattered over the
# fitted values.
# d
# analyse multiple comparisons for treatment differences
popMeans(Glass.fit,eff=c("Glass","Temp"))
## estimate se df t.stat p.value Glass Temp
## 1 57.26667 1.202826 18 47.61010 2.176569e-20 1 100
## 2 55.30000 1.202826 18 45.97507 4.062909e-20 2 100
## 3 57.33333 1.202826 18 47.66553 2.131794e-20 3 100
## 4 106.73333 1.202826 18 88.73548 3.100968e-25 1 125
## 5 105.16667 1.202826 18 87.43299 4.044254e-25 2 125
## 6 107.46667 1.202826 18 89.34516 2.742116e-25 3 125
## 7 128.60000 1.202826 18 106.91489 1.089425e-26 1 150
## 8 114.63333 1.202826 18 95.30335 8.598630e-26 2 150
## 9 103.66667 1.202826 18 86.18593 5.234627e-25 3 150
par(mfrow=c(1,1))
Glass.hsd <-TukeyHSD(Glass.fit,"Glass:Temp")
Glass.hsd
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Light ~ Glass * Temp, data = Glass.df, contrasts = list(Glass = contr.sum, Temp = contr.sum))
##
## $`Glass:Temp`
## diff lwr upr p adj
## 2:100-1:100 -1.96666667 -7.926921 3.993587 0.9561247
## 3:100-1:100 0.06666667 -5.893587 6.026921 1.0000000
## 1:125-1:100 49.46666667 43.506413 55.426921 0.0000000
## 2:125-1:100 47.90000000 41.939746 53.860254 0.0000000
## 3:125-1:100 50.20000000 44.239746 56.160254 0.0000000
## 1:150-1:100 71.33333333 65.373079 77.293587 0.0000000
## 2:150-1:100 57.36666667 51.406413 63.326921 0.0000000
## 3:150-1:100 46.40000000 40.439746 52.360254 0.0000000
## 3:100-2:100 2.03333333 -3.926921 7.993587 0.9474195
## 1:125-2:100 51.43333333 45.473079 57.393587 0.0000000
## 2:125-2:100 49.86666667 43.906413 55.826921 0.0000000
## 3:125-2:100 52.16666667 46.206413 58.126921 0.0000000
## 1:150-2:100 73.30000000 67.339746 79.260254 0.0000000
## 2:150-2:100 59.33333333 53.373079 65.293587 0.0000000
## 3:150-2:100 48.36666667 42.406413 54.326921 0.0000000
## 1:125-3:100 49.40000000 43.439746 55.360254 0.0000000
## 2:125-3:100 47.83333333 41.873079 53.793587 0.0000000
## 3:125-3:100 50.13333333 44.173079 56.093587 0.0000000
## 1:150-3:100 71.26666667 65.306413 77.226921 0.0000000
## 2:150-3:100 57.30000000 51.339746 63.260254 0.0000000
## 3:150-3:100 46.33333333 40.373079 52.293587 0.0000000
## 2:125-1:125 -1.56666667 -7.526921 4.393587 0.9885531
## 3:125-1:125 0.73333333 -5.226921 6.693587 0.9999463
## 1:150-1:125 21.86666667 15.906413 27.826921 0.0000000
## 2:150-1:125 7.90000000 1.939746 13.860254 0.0048999
## 3:150-1:125 -3.06666667 -9.026921 2.893587 0.6801842
## 3:125-2:125 2.30000000 -3.660254 8.260254 0.9014798
## 1:150-2:125 23.43333333 17.473079 29.393587 0.0000000
## 2:150-2:125 9.46666667 3.506413 15.426921 0.0007355
## 3:150-2:125 -1.50000000 -7.460254 4.460254 0.9913133
## 1:150-3:125 21.13333333 15.173079 27.093587 0.0000000
## 2:150-3:125 7.16666667 1.206413 13.126921 0.0119568
## 3:150-3:125 -3.80000000 -9.760254 2.160254 0.4260519
## 2:150-1:150 -13.96666667 -19.926921 -8.006413 0.0000050
## 3:150-1:150 -24.93333333 -30.893587 -18.973079 0.0000000
## 3:150-2:150 -10.96666667 -16.926921 -5.006413 0.0001271
plot(Glass.hsd)

# Most of the pairwise interaction terms are significant. We see no difference in mean light
# by glass @ low and med temp. Also, we see no difference in the mean light @ high temp and
# med temp for glass type 3. While there is a difference in means between high temp glass
# type 2 and all types at lower temps, at the mid temp level, it is only significant at a
# 95% level; if we were to increase our conf level, these would not show significance.
# Conclusions
#
# If our intent is to maximize light output, the best choice would be to impliment higher
# temperatures. If it is optimal to run at temp = 150, the best choice would be glass
# type 1, or 2 if the savings overwrote the light increase. If 150 is not optimal or we
# do not require such a high light output, choosing either temp = 100 or 125 would mean
# that we could probably get away with choosing the cheapest glass level, regardless of
# what that may be.
######################################################
# 4
# build and check data set
Density <-c(rep(c(185,90,70,0),4))
Location <-c(rep(1,4),rep(2,4),rep(3,4),rep(4,4))
Yield <-c(NA,2092,2329,2017,1531,NA,1819,1766,2167,2160,NA,1967,1245,1260,1523,NA)
Trees.df <-data.frame(Density,Location,Yield)
Trees.df
## Density Location Yield
## 1 185 1 NA
## 2 90 1 2092
## 3 70 1 2329
## 4 0 1 2017
## 5 185 2 1531
## 6 90 2 NA
## 7 70 2 1819
## 8 0 2 1766
## 9 185 3 2167
## 10 90 3 2160
## 11 70 3 NA
## 12 0 3 1967
## 13 185 4 1245
## 14 90 4 1260
## 15 70 4 1523
## 16 0 4 NA
xtabs(Yield~Location+Density,data=Trees.df)
## Density
## Location 0 70 90 185
## 1 2017 2329 2092 0
## 2 1766 1819 0 1531
## 3 1967 0 2160 2167
## 4 0 1523 1260 1245
# set factors
Trees.df$Density <-as.factor(Trees.df$Density)
Trees.df$Location <-as.factor(Trees.df$Location)
str(Trees.df)
## 'data.frame': 16 obs. of 3 variables:
## $ Density : Factor w/ 4 levels "0","70","90",..: 4 3 2 1 4 3 2 1 4 3 ...
## $ Location: Factor w/ 4 levels "1","2","3","4": 1 1 1 1 2 2 2 2 3 3 ...
## $ Yield : num NA 2092 2329 2017 1531 ...
# a
# Setting treatments as our Density levels, and blocks as Location, we have the following
# sample statistics:
# a = 4, b = 4, k = 3, r = 3, lambda = 2 (we can see all these from our data table)
# b
# Investigate effect of density on yield
# Model: y(ij)=mu+T(i)+B(j)+E(ij), i,j={1:4}
# H0: T1=T2=T3=T4=0 , Ha: at least one != 0
# visually check data
TreesInt <-ddply(Trees.df,.(Density,Location),summarise,val=mean(Yield))
ggplot(Trees.df,aes(Density,Yield,color=Location))+
geom_point(data=TreesInt[complete.cases(TreesInt),],aes(y=val))+
geom_line(data=TreesInt[complete.cases(TreesInt),],aes(y=val,group=Location))+
theme_bw()+
ggtitle("Summary of Block Mean Yield Over Treatment")

# We see some possible evidence of interaction at location 1/2, although difficult
# to say for sure as we are missing observations from density levels. Locations
# 2:4 look approximately parallel. We also see what looks like a possible decrease
# in yield as density increases.
# Test hypothesis
Trees.fit <-aov(Yield~Location+Density,data=Trees.df)
Anova(Trees.fit,type=3)
## Anova Table (Type III tests)
##
## Response: Yield
## Sum Sq Df F value Pr(>F)
## (Intercept) 7175322 1 630.1909 1.872e-06 ***
## Location 1258563 3 36.8455 0.0007813 ***
## Density 117768 3 3.4477 0.1080670
## Residuals 56930 5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# We did not find any evidence for a difference in mean yields by density
# (F = 3.45, p = 0.1). There is no need to continue with multiple comparisons
# after this.
# c
# Test assumptions for ANOVA
par(mfrow=c(2,2))
plot(Trees.fit)

# Both our residuals and qq plot show no major violations of our assumptions,
# although one of our observations (8) is a little farther outside the general
# realm of where we would expect the residuals to land.
layout(1,1)
##############################################