######################################################
#STAT 430 - Assignment 07
#
#Sarah Rathwell - 301084687
#
#Objective: Investigate cross-array/response designs
######################################################
rm(list=ls())
library(DoE.base)
## Loading required package: grid
## Loading required package: conf.design
## 
## Attaching package: 'DoE.base'
## 
## The following objects are masked from 'package:stats':
## 
##     aov, lm
## 
## The following object is masked from 'package:graphics':
## 
##     plot.design
######################################################
cat(" Investigate cross-array/response designs.\n\n", 
    "Last modification:",date(), '\n')
##  Investigate cross-array/response designs.
## 
##  Last modification: Fri Sep  9 12:07:14 2016
######################################################
# 1
# (a)
# For this design, (2^(5-1) and 2^3), E = ABCD
# ==> I = ABCDE

# (b)
# A = BCDE
# B = ACDE
# C = ABDE
# D = ABCE
# AB = CDE
# AC = BDE
# AD = BCE
# BC = ADE
# BD = ACE
# CD = ABE
# ABC = DE
# ABD = CE
# ACD = BE
# BCD = AE
# ABCD = E

# (c)
# read in gear table without noise variable labels
gear.ca <- read.table("gearcrossarray.txt",header=T)
str(gear.ca)
## 'data.frame':    16 obs. of  13 variables:
##  $ A : int  1 1 1 1 1 1 1 1 -1 -1 ...
##  $ B : int  1 1 1 1 -1 -1 -1 -1 1 1 ...
##  $ C : int  1 1 -1 -1 1 1 -1 -1 1 1 ...
##  $ D : int  1 -1 1 -1 1 -1 1 -1 1 -1 ...
##  $ E : int  1 -1 -1 1 -1 1 1 -1 -1 1 ...
##  $ o1: num  7 13.5 3 10.5 10 6.5 5.5 4 -4 9 ...
##  $ o2: num  12 14.5 11 14.5 23 22 28 14 18.5 19 ...
##  $ o3: num  6.5 5.5 5.5 6.5 3.5 14.5 7.5 6.5 11.5 17.5 ...
##  $ o4: num  14 17 18 17.5 23 23 28 23 26 21 ...
##  $ o5: num  3 -7.5 3 3 4.5 5.5 4 9 -0.5 0.5 ...
##  $ o6: num  14 15 19 14.5 25.5 18.5 27.5 25.5 13 20 ...
##  $ o7: num  4 -4.5 1 9 10 8 10.5 9 0 6.5 ...
##  $ o8: num  16.5 12 21 24 21 21.5 30 24.5 16.5 18 ...
gear.ca
##     A  B  C  D  E   o1   o2   o3   o4   o5   o6   o7   o8
## 1   1  1  1  1  1  7.0 12.0  6.5 14.0  3.0 14.0  4.0 16.5
## 2   1  1  1 -1 -1 13.5 14.5  5.5 17.0 -7.5 15.0 -4.5 12.0
## 3   1  1 -1  1 -1  3.0 11.0  5.5 18.0  3.0 19.0  1.0 21.0
## 4   1  1 -1 -1  1 10.5 14.5  6.5 17.5  3.0 14.5  9.0 24.0
## 5   1 -1  1  1 -1 10.0 23.0  3.5 23.0  4.5 25.5 10.0 21.0
## 6   1 -1  1 -1  1  6.5 22.0 14.5 23.0  5.5 18.5  8.0 21.5
## 7   1 -1 -1  1  1  5.5 28.0  7.5 28.0  4.0 27.5 10.5 30.0
## 8   1 -1 -1 -1 -1  4.0 14.0  6.5 23.0  9.0 25.5  9.0 24.5
## 9  -1  1  1  1 -1 -4.0 18.5 11.5 26.0 -0.5 13.0  0.0 16.5
## 10 -1  1  1 -1  1  9.0 19.0 17.5 21.0  0.5 20.0  6.5 18.0
## 11 -1  1 -1  1  1 17.5 20.0 10.0 23.0  6.5 21.5  0.0 26.0
## 12 -1  1 -1 -1 -1  7.0 23.5  1.0 20.0  7.0 22.5  4.0 22.5
## 13 -1 -1  1  1  1  2.5 22.0 12.0 19.5  7.0 27.5  8.5 23.5
## 14 -1 -1  1 -1 -1 24.0 26.0 14.5 27.5  7.0 22.5 13.0 22.0
## 15 -1 -1 -1  1 -1  5.5 27.0  2.5 31.0 12.5 27.0 11.5 32.5
## 16 -1 -1 -1 -1  1 11.0 21.5 12.0 27.0 16.5 29.5 16.0 28.5
gear.mat <- as.matrix(gear.ca)

# find mean and variance for each row of observations (6 per treatment)
gear.means <- rowMeans(gear.mat[,6:13])
row.vars <- function(data){
  row.var=NULL
  for(i in 1:nrow(data)){
    row.var[i]=var(data[i,6:13])
  }
  return(row.var)
}
gear.vars <- row.vars(gear.mat)
gear.mv <- as.data.frame(gear.mat[,1:5])
gear.mv$mean <- gear.means
gear.mv$disp <- log(gear.vars) # take the log of the vars for a better LM fit
gear.mv
##     A  B  C  D  E    mean     disp
## 1   1  1  1  1  1  9.6250 3.265623
## 2   1  1  1 -1 -1  8.1875 4.485371
## 3   1  1 -1  1 -1 10.1875 4.198208
## 4   1  1 -1 -1  1 12.4375 3.783276
## 5   1 -1  1  1 -1 15.0625 4.393953
## 6   1 -1  1 -1  1 14.9375 3.992203
## 7   1 -1 -1  1  1 17.6250 4.912524
## 8   1 -1 -1 -1 -1 14.4375 4.323600
## 9  -1  1  1  1 -1 10.1250 4.723429
## 10 -1  1  1 -1  1 13.9375 4.049842
## 11 -1  1 -1  1  1 15.5625 4.414042
## 12 -1  1 -1 -1 -1 13.4375 4.508059
## 13 -1 -1  1  1  1 15.3125 4.397036
## 14 -1 -1  1 -1 -1 19.5625 3.954586
## 15 -1 -1 -1  1 -1 18.6875 4.968293
## 16 -1 -1 -1 -1  1 20.2500 4.011222
# fit a linear model for the 5 treatment levels to find effects on mean
gear.mean.fit <- lm(mean ~ A*B*C*D*E, data=gear.mv)
summary(gear.mean.fit)
## 
## Call:
## lm.default(formula = mean ~ A * B * C * D * E, data = gear.mv)
## 
## Residuals:
## ALL 16 residuals are 0: no residual degrees of freedom!
## 
## Coefficients: (16 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)  1.434e+01         NA      NA       NA
## A           -1.523e+00         NA      NA       NA
## B           -2.648e+00         NA      NA       NA
## C           -9.922e-01         NA      NA       NA
## D           -3.125e-01         NA      NA       NA
## E            6.250e-01         NA      NA       NA
## A:B         -5.469e-02         NA      NA       NA
## A:C          1.328e-01         NA      NA       NA
## B:C         -2.266e-01         NA      NA       NA
## A:D          6.250e-01         NA      NA       NA
## B:D          1.344e-16         NA      NA       NA
## C:D         -5.000e-01         NA      NA       NA
## A:E          2.188e-01         NA      NA       NA
## B:E          5.781e-01         NA      NA       NA
## C:E         -5.156e-01         NA      NA       NA
## D:E         -1.172e-01         NA      NA       NA
## A:B:C               NA         NA      NA       NA
## A:B:D               NA         NA      NA       NA
## A:C:D               NA         NA      NA       NA
## B:C:D               NA         NA      NA       NA
## A:B:E               NA         NA      NA       NA
## A:C:E               NA         NA      NA       NA
## B:C:E               NA         NA      NA       NA
## A:D:E               NA         NA      NA       NA
## B:D:E               NA         NA      NA       NA
## C:D:E               NA         NA      NA       NA
## A:B:C:D             NA         NA      NA       NA
## A:B:C:E             NA         NA      NA       NA
## A:B:D:E             NA         NA      NA       NA
## A:C:D:E             NA         NA      NA       NA
## B:C:D:E             NA         NA      NA       NA
## A:B:C:D:E           NA         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 15 and 0 DF,  p-value: NA
# use lsr coefficients to fit effect sizes to half normal plot
mean.coeffs <- 2*gear.mean.fit$coefficients[!is.na(gear.mean.fit$coefficients)][-1]
halfnormal(mean.coeffs, main="Plot for Mean Fit Coeffs")

sort(abs(mean.coeffs))
##          B:D          A:B          D:E          A:C          A:E 
## 2.688821e-16 1.093750e-01 2.343750e-01 2.656250e-01 4.375000e-01 
##          B:C            D          C:D          C:E          B:E 
## 4.531250e-01 6.250000e-01 1.000000e+00 1.031250e+00 1.156250e+00 
##          A:D            E            C            A            B 
## 1.250000e+00 1.250000e+00 1.984375e+00 3.046875e+00 5.296875e+00
# Note that effects B and A appear to be the strongest, although we may
# also be interested in including C. The E, D, and interaction effects do
# not appear as important. 

# fit a linear model for the 5 treatement levels to find effects on dispersion
gear.disp.fit <- lm(disp ~ A*B*C*D*E, data=gear.mv)
summary(gear.disp.fit)
## 
## Call:
## lm.default(formula = disp ~ A * B * C * D * E, data = gear.mv)
## 
## Residuals:
## ALL 16 residuals are 0: no residual degrees of freedom!
## 
## Coefficients: (16 not defined because of singularities)
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept)  4.273829         NA      NA       NA
## A           -0.104484         NA      NA       NA
## B           -0.095348         NA      NA       NA
## C           -0.116074         NA      NA       NA
## D            0.135309         NA      NA       NA
## E           -0.170608         NA      NA       NA
## A:B         -0.140877         NA      NA       NA
## A:C         -0.018984         NA      NA       NA
## B:C          0.068659         NA      NA       NA
## A:D         -0.112077         NA      NA       NA
## B:D         -0.163465         NA      NA       NA
## C:D         -0.098054         NA      NA       NA
## A:E         -0.010330         NA      NA       NA
## B:E         -0.129677         NA      NA       NA
## C:E         -0.060971         NA      NA       NA
## D:E          0.008776         NA      NA       NA
## A:B:C              NA         NA      NA       NA
## A:B:D              NA         NA      NA       NA
## A:C:D              NA         NA      NA       NA
## B:C:D              NA         NA      NA       NA
## A:B:E              NA         NA      NA       NA
## A:C:E              NA         NA      NA       NA
## B:C:E              NA         NA      NA       NA
## A:D:E              NA         NA      NA       NA
## B:D:E              NA         NA      NA       NA
## C:D:E              NA         NA      NA       NA
## A:B:C:D            NA         NA      NA       NA
## A:B:C:E            NA         NA      NA       NA
## A:B:D:E            NA         NA      NA       NA
## A:C:D:E            NA         NA      NA       NA
## B:C:D:E            NA         NA      NA       NA
## A:B:C:D:E          NA         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 15 and 0 DF,  p-value: NA
# use lsr coefficients to fit effect sizes to half normal plot
disp.coeffs <- 2*gear.disp.fit$coefficients[!is.na(gear.disp.fit$coefficients)][-1]
halfnormal(disp.coeffs, main="Plot for Dispersion Fit Coeffs")

sort(abs(disp.coeffs))
##        D:E        A:E        A:C        C:E        B:C          B 
## 0.01755186 0.02066009 0.03796705 0.12194215 0.13731741 0.19069579 
##        C:D          A        A:D          C        B:E          D 
## 0.19610878 0.20896898 0.22415416 0.23214761 0.25935451 0.27061865 
##        A:B        B:D          E 
## 0.28175466 0.32692993 0.34121656
# This plot is more difficult to read. Most effect coeffs appear to fit
# approximately on the line. I would take E as a main effect, and possibly
# the interaction B:D to help minimize our variance model.
#
#
# The model we will use for mean:
# Mean = -1.523*A - 2.648*B - 9.922*C
# The model we will use for dispersion:
# log(Var) = -0.171*E - 0.163*B*D

# (d)
# To minimize the variance for this model, we would want to choose the
# factor level E = 1. If we choose to include B:D, our choices would 
# depend on whether we were looking for a specific mean. That being said, 
# if we chose a positive B (as it is an effect in the mean), we would choose
# D = 1; if we chose a negative B, we would choose D = -1.

# (e)
gear.a <- read.table("geararray.txt",header=T)
str(gear.a)
## 'data.frame':    128 obs. of  9 variables:
##  $ A : int  1 1 1 1 1 1 1 1 -1 -1 ...
##  $ B : int  1 1 1 1 -1 -1 -1 -1 1 1 ...
##  $ C : int  1 1 -1 -1 1 1 -1 -1 1 1 ...
##  $ D : int  1 -1 1 -1 1 -1 1 -1 1 -1 ...
##  $ E : int  1 -1 -1 1 -1 1 1 -1 -1 1 ...
##  $ z1: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ z2: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ z3: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ y : num  7 13.5 3 10.5 10 6.5 5.5 4 -4 9 ...
gear.a[1:10,]
##     A  B  C  D  E z1 z2 z3    y
## 1   1  1  1  1  1  1  1  1  7.0
## 2   1  1  1 -1 -1  1  1  1 13.5
## 3   1  1 -1  1 -1  1  1  1  3.0
## 4   1  1 -1 -1  1  1  1  1 10.5
## 5   1 -1  1  1 -1  1  1  1 10.0
## 6   1 -1  1 -1  1  1  1  1  6.5
## 7   1 -1 -1  1  1  1  1  1  5.5
## 8   1 -1 -1 -1 -1  1  1  1  4.0
## 9  -1  1  1  1 -1  1  1  1 -4.0
## 10 -1  1  1 -1  1  1  1  1  9.0
gear.a.fit <- lm(y ~ A*B*C*D*E*z1*z2*z3, data=gear.a)
gear.a.coeffs <- 2*gear.a.fit$coefficients[!is.na(gear.a.fit$coefficients)][-1]
halfnormal(gear.a.coeffs, main="Plot for Response Fit Coeffs")

sort(abs(gear.a.coeffs))
##      B:z1:z2 B:C:z1:z2:z3          B:D       B:E:z1       D:E:z3 
## 7.302406e-16 1.831059e-15 2.776794e-15 1.562500e-02 1.562500e-02 
##      D:z2:z3         B:z2        z1:z2      A:z2:z3      E:z1:z2 
## 3.125000e-02 4.687500e-02 6.250000e-02 7.812500e-02 7.812500e-02 
##       A:C:z1       C:E:z3          A:B    A:D:z1:z3       C:D:z2 
## 9.375000e-02 9.375000e-02 1.093750e-01 1.093750e-01 1.250000e-01 
##       A:C:z3       B:C:z1   C:z1:z2:z3    A:C:z1:z3         D:z2 
## 1.406250e-01 1.562500e-01 1.562500e-01 1.875000e-01 1.875000e-01 
##    A:E:z2:z3       C:E:z2         C:z2          D:E C:D:z1:z2:z3 
## 1.875000e-01 2.187500e-01 2.343750e-01 2.343750e-01 2.343750e-01 
##    D:E:z2:z3    B:D:z1:z3      C:z1:z2 A:C:z1:z2:z3    B:C:z2:z3 
## 2.343750e-01 2.343750e-01 2.500000e-01 2.500000e-01 2.656250e-01 
##         E:z1       A:B:z2       B:D:z1          A:C       B:E:z2 
## 2.656250e-01 2.656250e-01 2.656250e-01 2.656250e-01 2.812500e-01 
##         A:z2         A:z1       C:D:z3 B:D:z1:z2:z3    C:E:z1:z3 
## 2.968750e-01 3.125000e-01 3.125000e-01 3.281250e-01 3.281250e-01 
##    B:E:z1:z3       A:D:z1    C:D:z2:z3       A:E:z1   B:z1:z2:z3 
## 3.281250e-01 3.281250e-01 3.437500e-01 3.593750e-01 3.750000e-01 
##    A:D:z2:z3       A:D:z3    A:B:z1:z3      C:z1:z3      D:z1:z2 
## 3.750000e-01 4.062500e-01 4.062500e-01 4.062500e-01 4.218750e-01 
## D:E:z1:z2:z3          A:E    C:D:z1:z2          B:C    A:D:z1:z2 
## 4.375000e-01 4.375000e-01 4.531250e-01 4.531250e-01 4.531250e-01 
##        z2:z3 A:B:z1:z2:z3      A:z1:z2       A:E:z3         A:z3 
## 4.531250e-01 4.687500e-01 4.687500e-01 4.687500e-01 4.843750e-01 
##    A:B:z1:z2    B:C:z1:z3    C:E:z2:z3       D:E:z1      D:z1:z3 
## 5.000000e-01 5.000000e-01 5.000000e-01 5.000000e-01 5.156250e-01 
##   A:z1:z2:z3    D:E:z1:z2      A:z1:z3       A:B:z1    A:B:z2:z3 
## 5.312500e-01 5.312500e-01 5.312500e-01 5.312500e-01 5.468750e-01 
## A:E:z1:z2:z3       A:E:z2         E:z2       B:D:z2    A:C:z1:z2 
## 5.468750e-01 5.625000e-01 5.625000e-01 5.625000e-01 5.625000e-01 
##      E:z2:z3       B:C:z3            D       A:D:z2 A:D:z1:z2:z3 
## 5.937500e-01 6.093750e-01 6.250000e-01 6.250000e-01 6.406250e-01 
##      B:z2:z3   D:z1:z2:z3      E:z1:z3    B:D:z2:z3     z1:z2:z3 
## 6.406250e-01 6.406250e-01 6.718750e-01 6.875000e-01 7.187500e-01 
##   E:z1:z2:z3         D:z1 B:E:z1:z2:z3    B:C:z1:z2 C:E:z1:z2:z3 
## 7.656250e-01 7.656250e-01 7.656250e-01 8.125000e-01 8.281250e-01 
##           z1    B:E:z2:z3    C:D:z1:z3    C:E:z1:z2       B:C:z2 
## 8.437500e-01 8.437500e-01 8.593750e-01 8.593750e-01 8.906250e-01 
##    A:C:z2:z3    A:E:z1:z2    B:E:z1:z2    D:E:z1:z3          C:D 
## 8.906250e-01 8.906250e-01 8.906250e-01 9.375000e-01 1.000000e+00 
##         E:z3    B:D:z1:z2          C:E       B:E:z3       A:B:z3 
## 1.000000e+00 1.015625e+00 1.031250e+00 1.062500e+00 1.140625e+00 
##          B:E         B:z3          A:D            E       D:E:z2 
## 1.156250e+00 1.171875e+00 1.250000e+00 1.250000e+00 1.265625e+00 
##    A:E:z1:z3      B:z1:z3         C:z3           z2       A:C:z2 
## 1.359375e+00 1.375000e+00 1.390625e+00 1.390625e+00 1.390625e+00 
##       B:D:z3      C:z2:z3       C:E:z1         D:z3        z1:z3 
## 1.406250e+00 1.515625e+00 1.578125e+00 1.687500e+00 1.718750e+00 
##         B:z1       C:D:z1            C         C:z1            A 
## 1.843750e+00 1.859375e+00 1.984375e+00 2.593750e+00 3.046875e+00 
##            B           z3 
## 5.296875e+00 1.439062e+01
gear.a.fit$coefficients[1]
## (Intercept) 
##    14.33594
# Our fit shows Z3 as a main effect. We may also wish to include B, A, and 
# C:Z1 as well. 

# The response model is as follows:
# y = 14.336 + 14.391*Z3 + 5.297*B + 3.047*A + 2.594*C1*Z1 
# The expected response:
# E(y) = 14.336 + 5.297*B + 3.047*A
# The variance:
# V(y) = (14.391*sigma(z3))^2 +(2.594*C*sigma(z1))^2 + sigma(e)^2

# (f)
# To minimize V(y), we would choose C = -1. 

# (g)
# If we ran a design similar to our response design, but with B set to the
# the Z1 factor levels, and Z1 now = Z2 * Z3, such that I = Z1*Z2*Z3, we would
# be able to compute all the control factors, as well as all 2 level interactions
# between control and noise factors. All control-noise factors which would be
# aliased would be 3 or more level interactions, which we can generally assume
# to be less important in our analysis. If we wanted to be able to assess all possible
# control-noise variables, we would have to double the size of the study to allow
# for each observation to have a unique assortment of levels.