######################################################
#STAT 430 - Assignment 07
#
#Sarah Rathwell - 301084687
#
#Objective: Investigate cross-array/response designs
######################################################
rm(list=ls())
library(DoE.base)
## Loading required package: grid
## Loading required package: conf.design
##
## Attaching package: 'DoE.base'
##
## The following objects are masked from 'package:stats':
##
## aov, lm
##
## The following object is masked from 'package:graphics':
##
## plot.design
######################################################
cat(" Investigate cross-array/response designs.\n\n",
"Last modification:",date(), '\n')
## Investigate cross-array/response designs.
##
## Last modification: Fri Sep 9 12:07:14 2016
######################################################
# 1
# (a)
# For this design, (2^(5-1) and 2^3), E = ABCD
# ==> I = ABCDE
# (b)
# A = BCDE
# B = ACDE
# C = ABDE
# D = ABCE
# AB = CDE
# AC = BDE
# AD = BCE
# BC = ADE
# BD = ACE
# CD = ABE
# ABC = DE
# ABD = CE
# ACD = BE
# BCD = AE
# ABCD = E
# (c)
# read in gear table without noise variable labels
gear.ca <- read.table("gearcrossarray.txt",header=T)
str(gear.ca)
## 'data.frame': 16 obs. of 13 variables:
## $ A : int 1 1 1 1 1 1 1 1 -1 -1 ...
## $ B : int 1 1 1 1 -1 -1 -1 -1 1 1 ...
## $ C : int 1 1 -1 -1 1 1 -1 -1 1 1 ...
## $ D : int 1 -1 1 -1 1 -1 1 -1 1 -1 ...
## $ E : int 1 -1 -1 1 -1 1 1 -1 -1 1 ...
## $ o1: num 7 13.5 3 10.5 10 6.5 5.5 4 -4 9 ...
## $ o2: num 12 14.5 11 14.5 23 22 28 14 18.5 19 ...
## $ o3: num 6.5 5.5 5.5 6.5 3.5 14.5 7.5 6.5 11.5 17.5 ...
## $ o4: num 14 17 18 17.5 23 23 28 23 26 21 ...
## $ o5: num 3 -7.5 3 3 4.5 5.5 4 9 -0.5 0.5 ...
## $ o6: num 14 15 19 14.5 25.5 18.5 27.5 25.5 13 20 ...
## $ o7: num 4 -4.5 1 9 10 8 10.5 9 0 6.5 ...
## $ o8: num 16.5 12 21 24 21 21.5 30 24.5 16.5 18 ...
gear.ca
## A B C D E o1 o2 o3 o4 o5 o6 o7 o8
## 1 1 1 1 1 1 7.0 12.0 6.5 14.0 3.0 14.0 4.0 16.5
## 2 1 1 1 -1 -1 13.5 14.5 5.5 17.0 -7.5 15.0 -4.5 12.0
## 3 1 1 -1 1 -1 3.0 11.0 5.5 18.0 3.0 19.0 1.0 21.0
## 4 1 1 -1 -1 1 10.5 14.5 6.5 17.5 3.0 14.5 9.0 24.0
## 5 1 -1 1 1 -1 10.0 23.0 3.5 23.0 4.5 25.5 10.0 21.0
## 6 1 -1 1 -1 1 6.5 22.0 14.5 23.0 5.5 18.5 8.0 21.5
## 7 1 -1 -1 1 1 5.5 28.0 7.5 28.0 4.0 27.5 10.5 30.0
## 8 1 -1 -1 -1 -1 4.0 14.0 6.5 23.0 9.0 25.5 9.0 24.5
## 9 -1 1 1 1 -1 -4.0 18.5 11.5 26.0 -0.5 13.0 0.0 16.5
## 10 -1 1 1 -1 1 9.0 19.0 17.5 21.0 0.5 20.0 6.5 18.0
## 11 -1 1 -1 1 1 17.5 20.0 10.0 23.0 6.5 21.5 0.0 26.0
## 12 -1 1 -1 -1 -1 7.0 23.5 1.0 20.0 7.0 22.5 4.0 22.5
## 13 -1 -1 1 1 1 2.5 22.0 12.0 19.5 7.0 27.5 8.5 23.5
## 14 -1 -1 1 -1 -1 24.0 26.0 14.5 27.5 7.0 22.5 13.0 22.0
## 15 -1 -1 -1 1 -1 5.5 27.0 2.5 31.0 12.5 27.0 11.5 32.5
## 16 -1 -1 -1 -1 1 11.0 21.5 12.0 27.0 16.5 29.5 16.0 28.5
gear.mat <- as.matrix(gear.ca)
# find mean and variance for each row of observations (6 per treatment)
gear.means <- rowMeans(gear.mat[,6:13])
row.vars <- function(data){
row.var=NULL
for(i in 1:nrow(data)){
row.var[i]=var(data[i,6:13])
}
return(row.var)
}
gear.vars <- row.vars(gear.mat)
gear.mv <- as.data.frame(gear.mat[,1:5])
gear.mv$mean <- gear.means
gear.mv$disp <- log(gear.vars) # take the log of the vars for a better LM fit
gear.mv
## A B C D E mean disp
## 1 1 1 1 1 1 9.6250 3.265623
## 2 1 1 1 -1 -1 8.1875 4.485371
## 3 1 1 -1 1 -1 10.1875 4.198208
## 4 1 1 -1 -1 1 12.4375 3.783276
## 5 1 -1 1 1 -1 15.0625 4.393953
## 6 1 -1 1 -1 1 14.9375 3.992203
## 7 1 -1 -1 1 1 17.6250 4.912524
## 8 1 -1 -1 -1 -1 14.4375 4.323600
## 9 -1 1 1 1 -1 10.1250 4.723429
## 10 -1 1 1 -1 1 13.9375 4.049842
## 11 -1 1 -1 1 1 15.5625 4.414042
## 12 -1 1 -1 -1 -1 13.4375 4.508059
## 13 -1 -1 1 1 1 15.3125 4.397036
## 14 -1 -1 1 -1 -1 19.5625 3.954586
## 15 -1 -1 -1 1 -1 18.6875 4.968293
## 16 -1 -1 -1 -1 1 20.2500 4.011222
# fit a linear model for the 5 treatment levels to find effects on mean
gear.mean.fit <- lm(mean ~ A*B*C*D*E, data=gear.mv)
summary(gear.mean.fit)
##
## Call:
## lm.default(formula = mean ~ A * B * C * D * E, data = gear.mv)
##
## Residuals:
## ALL 16 residuals are 0: no residual degrees of freedom!
##
## Coefficients: (16 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.434e+01 NA NA NA
## A -1.523e+00 NA NA NA
## B -2.648e+00 NA NA NA
## C -9.922e-01 NA NA NA
## D -3.125e-01 NA NA NA
## E 6.250e-01 NA NA NA
## A:B -5.469e-02 NA NA NA
## A:C 1.328e-01 NA NA NA
## B:C -2.266e-01 NA NA NA
## A:D 6.250e-01 NA NA NA
## B:D 1.344e-16 NA NA NA
## C:D -5.000e-01 NA NA NA
## A:E 2.188e-01 NA NA NA
## B:E 5.781e-01 NA NA NA
## C:E -5.156e-01 NA NA NA
## D:E -1.172e-01 NA NA NA
## A:B:C NA NA NA NA
## A:B:D NA NA NA NA
## A:C:D NA NA NA NA
## B:C:D NA NA NA NA
## A:B:E NA NA NA NA
## A:C:E NA NA NA NA
## B:C:E NA NA NA NA
## A:D:E NA NA NA NA
## B:D:E NA NA NA NA
## C:D:E NA NA NA NA
## A:B:C:D NA NA NA NA
## A:B:C:E NA NA NA NA
## A:B:D:E NA NA NA NA
## A:C:D:E NA NA NA NA
## B:C:D:E NA NA NA NA
## A:B:C:D:E NA NA NA NA
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 15 and 0 DF, p-value: NA
# use lsr coefficients to fit effect sizes to half normal plot
mean.coeffs <- 2*gear.mean.fit$coefficients[!is.na(gear.mean.fit$coefficients)][-1]
halfnormal(mean.coeffs, main="Plot for Mean Fit Coeffs")

sort(abs(mean.coeffs))
## B:D A:B D:E A:C A:E
## 2.688821e-16 1.093750e-01 2.343750e-01 2.656250e-01 4.375000e-01
## B:C D C:D C:E B:E
## 4.531250e-01 6.250000e-01 1.000000e+00 1.031250e+00 1.156250e+00
## A:D E C A B
## 1.250000e+00 1.250000e+00 1.984375e+00 3.046875e+00 5.296875e+00
# Note that effects B and A appear to be the strongest, although we may
# also be interested in including C. The E, D, and interaction effects do
# not appear as important.
# fit a linear model for the 5 treatement levels to find effects on dispersion
gear.disp.fit <- lm(disp ~ A*B*C*D*E, data=gear.mv)
summary(gear.disp.fit)
##
## Call:
## lm.default(formula = disp ~ A * B * C * D * E, data = gear.mv)
##
## Residuals:
## ALL 16 residuals are 0: no residual degrees of freedom!
##
## Coefficients: (16 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.273829 NA NA NA
## A -0.104484 NA NA NA
## B -0.095348 NA NA NA
## C -0.116074 NA NA NA
## D 0.135309 NA NA NA
## E -0.170608 NA NA NA
## A:B -0.140877 NA NA NA
## A:C -0.018984 NA NA NA
## B:C 0.068659 NA NA NA
## A:D -0.112077 NA NA NA
## B:D -0.163465 NA NA NA
## C:D -0.098054 NA NA NA
## A:E -0.010330 NA NA NA
## B:E -0.129677 NA NA NA
## C:E -0.060971 NA NA NA
## D:E 0.008776 NA NA NA
## A:B:C NA NA NA NA
## A:B:D NA NA NA NA
## A:C:D NA NA NA NA
## B:C:D NA NA NA NA
## A:B:E NA NA NA NA
## A:C:E NA NA NA NA
## B:C:E NA NA NA NA
## A:D:E NA NA NA NA
## B:D:E NA NA NA NA
## C:D:E NA NA NA NA
## A:B:C:D NA NA NA NA
## A:B:C:E NA NA NA NA
## A:B:D:E NA NA NA NA
## A:C:D:E NA NA NA NA
## B:C:D:E NA NA NA NA
## A:B:C:D:E NA NA NA NA
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 15 and 0 DF, p-value: NA
# use lsr coefficients to fit effect sizes to half normal plot
disp.coeffs <- 2*gear.disp.fit$coefficients[!is.na(gear.disp.fit$coefficients)][-1]
halfnormal(disp.coeffs, main="Plot for Dispersion Fit Coeffs")

sort(abs(disp.coeffs))
## D:E A:E A:C C:E B:C B
## 0.01755186 0.02066009 0.03796705 0.12194215 0.13731741 0.19069579
## C:D A A:D C B:E D
## 0.19610878 0.20896898 0.22415416 0.23214761 0.25935451 0.27061865
## A:B B:D E
## 0.28175466 0.32692993 0.34121656
# This plot is more difficult to read. Most effect coeffs appear to fit
# approximately on the line. I would take E as a main effect, and possibly
# the interaction B:D to help minimize our variance model.
#
#
# The model we will use for mean:
# Mean = -1.523*A - 2.648*B - 9.922*C
# The model we will use for dispersion:
# log(Var) = -0.171*E - 0.163*B*D
# (d)
# To minimize the variance for this model, we would want to choose the
# factor level E = 1. If we choose to include B:D, our choices would
# depend on whether we were looking for a specific mean. That being said,
# if we chose a positive B (as it is an effect in the mean), we would choose
# D = 1; if we chose a negative B, we would choose D = -1.
# (e)
gear.a <- read.table("geararray.txt",header=T)
str(gear.a)
## 'data.frame': 128 obs. of 9 variables:
## $ A : int 1 1 1 1 1 1 1 1 -1 -1 ...
## $ B : int 1 1 1 1 -1 -1 -1 -1 1 1 ...
## $ C : int 1 1 -1 -1 1 1 -1 -1 1 1 ...
## $ D : int 1 -1 1 -1 1 -1 1 -1 1 -1 ...
## $ E : int 1 -1 -1 1 -1 1 1 -1 -1 1 ...
## $ z1: int 1 1 1 1 1 1 1 1 1 1 ...
## $ z2: int 1 1 1 1 1 1 1 1 1 1 ...
## $ z3: int 1 1 1 1 1 1 1 1 1 1 ...
## $ y : num 7 13.5 3 10.5 10 6.5 5.5 4 -4 9 ...
gear.a[1:10,]
## A B C D E z1 z2 z3 y
## 1 1 1 1 1 1 1 1 1 7.0
## 2 1 1 1 -1 -1 1 1 1 13.5
## 3 1 1 -1 1 -1 1 1 1 3.0
## 4 1 1 -1 -1 1 1 1 1 10.5
## 5 1 -1 1 1 -1 1 1 1 10.0
## 6 1 -1 1 -1 1 1 1 1 6.5
## 7 1 -1 -1 1 1 1 1 1 5.5
## 8 1 -1 -1 -1 -1 1 1 1 4.0
## 9 -1 1 1 1 -1 1 1 1 -4.0
## 10 -1 1 1 -1 1 1 1 1 9.0
gear.a.fit <- lm(y ~ A*B*C*D*E*z1*z2*z3, data=gear.a)
gear.a.coeffs <- 2*gear.a.fit$coefficients[!is.na(gear.a.fit$coefficients)][-1]
halfnormal(gear.a.coeffs, main="Plot for Response Fit Coeffs")

sort(abs(gear.a.coeffs))
## B:z1:z2 B:C:z1:z2:z3 B:D B:E:z1 D:E:z3
## 7.302406e-16 1.831059e-15 2.776794e-15 1.562500e-02 1.562500e-02
## D:z2:z3 B:z2 z1:z2 A:z2:z3 E:z1:z2
## 3.125000e-02 4.687500e-02 6.250000e-02 7.812500e-02 7.812500e-02
## A:C:z1 C:E:z3 A:B A:D:z1:z3 C:D:z2
## 9.375000e-02 9.375000e-02 1.093750e-01 1.093750e-01 1.250000e-01
## A:C:z3 B:C:z1 C:z1:z2:z3 A:C:z1:z3 D:z2
## 1.406250e-01 1.562500e-01 1.562500e-01 1.875000e-01 1.875000e-01
## A:E:z2:z3 C:E:z2 C:z2 D:E C:D:z1:z2:z3
## 1.875000e-01 2.187500e-01 2.343750e-01 2.343750e-01 2.343750e-01
## D:E:z2:z3 B:D:z1:z3 C:z1:z2 A:C:z1:z2:z3 B:C:z2:z3
## 2.343750e-01 2.343750e-01 2.500000e-01 2.500000e-01 2.656250e-01
## E:z1 A:B:z2 B:D:z1 A:C B:E:z2
## 2.656250e-01 2.656250e-01 2.656250e-01 2.656250e-01 2.812500e-01
## A:z2 A:z1 C:D:z3 B:D:z1:z2:z3 C:E:z1:z3
## 2.968750e-01 3.125000e-01 3.125000e-01 3.281250e-01 3.281250e-01
## B:E:z1:z3 A:D:z1 C:D:z2:z3 A:E:z1 B:z1:z2:z3
## 3.281250e-01 3.281250e-01 3.437500e-01 3.593750e-01 3.750000e-01
## A:D:z2:z3 A:D:z3 A:B:z1:z3 C:z1:z3 D:z1:z2
## 3.750000e-01 4.062500e-01 4.062500e-01 4.062500e-01 4.218750e-01
## D:E:z1:z2:z3 A:E C:D:z1:z2 B:C A:D:z1:z2
## 4.375000e-01 4.375000e-01 4.531250e-01 4.531250e-01 4.531250e-01
## z2:z3 A:B:z1:z2:z3 A:z1:z2 A:E:z3 A:z3
## 4.531250e-01 4.687500e-01 4.687500e-01 4.687500e-01 4.843750e-01
## A:B:z1:z2 B:C:z1:z3 C:E:z2:z3 D:E:z1 D:z1:z3
## 5.000000e-01 5.000000e-01 5.000000e-01 5.000000e-01 5.156250e-01
## A:z1:z2:z3 D:E:z1:z2 A:z1:z3 A:B:z1 A:B:z2:z3
## 5.312500e-01 5.312500e-01 5.312500e-01 5.312500e-01 5.468750e-01
## A:E:z1:z2:z3 A:E:z2 E:z2 B:D:z2 A:C:z1:z2
## 5.468750e-01 5.625000e-01 5.625000e-01 5.625000e-01 5.625000e-01
## E:z2:z3 B:C:z3 D A:D:z2 A:D:z1:z2:z3
## 5.937500e-01 6.093750e-01 6.250000e-01 6.250000e-01 6.406250e-01
## B:z2:z3 D:z1:z2:z3 E:z1:z3 B:D:z2:z3 z1:z2:z3
## 6.406250e-01 6.406250e-01 6.718750e-01 6.875000e-01 7.187500e-01
## E:z1:z2:z3 D:z1 B:E:z1:z2:z3 B:C:z1:z2 C:E:z1:z2:z3
## 7.656250e-01 7.656250e-01 7.656250e-01 8.125000e-01 8.281250e-01
## z1 B:E:z2:z3 C:D:z1:z3 C:E:z1:z2 B:C:z2
## 8.437500e-01 8.437500e-01 8.593750e-01 8.593750e-01 8.906250e-01
## A:C:z2:z3 A:E:z1:z2 B:E:z1:z2 D:E:z1:z3 C:D
## 8.906250e-01 8.906250e-01 8.906250e-01 9.375000e-01 1.000000e+00
## E:z3 B:D:z1:z2 C:E B:E:z3 A:B:z3
## 1.000000e+00 1.015625e+00 1.031250e+00 1.062500e+00 1.140625e+00
## B:E B:z3 A:D E D:E:z2
## 1.156250e+00 1.171875e+00 1.250000e+00 1.250000e+00 1.265625e+00
## A:E:z1:z3 B:z1:z3 C:z3 z2 A:C:z2
## 1.359375e+00 1.375000e+00 1.390625e+00 1.390625e+00 1.390625e+00
## B:D:z3 C:z2:z3 C:E:z1 D:z3 z1:z3
## 1.406250e+00 1.515625e+00 1.578125e+00 1.687500e+00 1.718750e+00
## B:z1 C:D:z1 C C:z1 A
## 1.843750e+00 1.859375e+00 1.984375e+00 2.593750e+00 3.046875e+00
## B z3
## 5.296875e+00 1.439062e+01
gear.a.fit$coefficients[1]
## (Intercept)
## 14.33594
# Our fit shows Z3 as a main effect. We may also wish to include B, A, and
# C:Z1 as well.
# The response model is as follows:
# y = 14.336 + 14.391*Z3 + 5.297*B + 3.047*A + 2.594*C1*Z1
# The expected response:
# E(y) = 14.336 + 5.297*B + 3.047*A
# The variance:
# V(y) = (14.391*sigma(z3))^2 +(2.594*C*sigma(z1))^2 + sigma(e)^2
# (f)
# To minimize V(y), we would choose C = -1.
# (g)
# If we ran a design similar to our response design, but with B set to the
# the Z1 factor levels, and Z1 now = Z2 * Z3, such that I = Z1*Z2*Z3, we would
# be able to compute all the control factors, as well as all 2 level interactions
# between control and noise factors. All control-noise factors which would be
# aliased would be 3 or more level interactions, which we can generally assume
# to be less important in our analysis. If we wanted to be able to assess all possible
# control-noise variables, we would have to double the size of the study to allow
# for each observation to have a unique assortment of levels.