######################################################
#STAT 430 - Assignment 05
#
#Sarah Rathwell - 301084687
#
#Objective: Investigate factorial designs
######################################################
options(width=200)
rm(list=ls())
library(DoE.base)
## Loading required package: grid
## Loading required package: conf.design
##
## Attaching package: 'DoE.base'
##
## The following objects are masked from 'package:stats':
##
## aov, lm
##
## The following object is masked from 'package:graphics':
##
## plot.design
library(ggplot2)
library(plyr)
##
## Attaching package: 'plyr'
##
## The following object is masked from 'package:conf.design':
##
## join
library(doBy)
## Loading required package: survival
## Loading required package: splines
######################################################
cat(" Investigate factorial designs.\n\n",
"Last modification:",date(), '\n')
## Investigate factorial designs.
##
## Last modification: Mon Nov 24 10:27:35 2014
######################################################
# 1
# build and check data set
Oil <- c(rep(-1,4),rep(1,4))
Carbon <-c(rep(c(rep(-1,2),rep(1,2)),2))
Steel <-c(rep(c(-1,1),4))
OC <-Oil*Carbon
OS <-Oil*Steel
CS <-Carbon*Steel
OCS <-Oil*Carbon*Steel
Spring <-c(67,79,61,75,59,90,52,87)
Spring.df <-data.frame(Oil,Carbon,Steel,OC,OS,CS,OCS,Spring)
Spring.df
## Oil Carbon Steel OC OS CS OCS Spring
## 1 -1 -1 -1 1 1 1 -1 67
## 2 -1 -1 1 1 -1 -1 1 79
## 3 -1 1 -1 -1 1 -1 1 61
## 4 -1 1 1 -1 -1 1 -1 75
## 5 1 -1 -1 -1 -1 1 1 59
## 6 1 -1 1 -1 1 -1 -1 90
## 7 1 1 -1 1 -1 -1 -1 52
## 8 1 1 1 1 1 1 1 87
# a
# Estimate factorial effects
# eg) Oil=mean(y+..)-mean(y-..)
mean(Spring.df$Spring[Oil==1])-mean(Spring.df$Spring[Oil==-1])
## [1] 1.5
# eg) Oil*Carbon=mean(y++.)-mean(y+..)-mean(y.+.)-mean(y--.)+mean(y-..)+mean(y.-.)
mean(Spring.df$Spring[OC==1])-mean(Spring.df$Spring[OC==-1])
## [1] 0
# Create effect matrix (X'Y)/(#obs/2)
X.mat <-as.matrix(Spring.df[,-8])
Spring.ef <-(t(X.mat)%*%Spring)/(length(Spring)/2)
Spring.ef
## [,1]
## Oil 1.5
## Carbon -5.0
## Steel 23.0
## OC 0.0
## OS 10.0
## CS 1.5
## OCS 0.5
# b
# check probability plot for factor effect size
Spring.fit <-lm(Spring~Oil*Carbon*Steel,data=Spring.df)
Spring.coeffs <-2*Spring.fit$coeff[2:8]
halfnormal(Spring.coeffs,main='Plot for Spring Coeffs')

# It appears that Steel alone and the Oil-Steel interaction are the largest
# effects.
# c
# run an ANOVA
# ensure factors are read as factors for analysis
Spring.df$Oilf <-as.factor(Spring.df$Oil)
Spring.df$Carbonf <-as.factor(Spring.df$Carbon)
Spring.df$Steelf <-as.factor(Spring.df$Steel)
Spring.aov <-aov(Spring~Oilf*Carbonf*Steelf,data=Spring.df)
anova(Spring.aov)
## Warning in anova.lm(Spring.aov): ANOVA F-tests on an essentially perfect fit are unreliable
## Analysis of Variance Table
##
## Response: Spring
## Df Sum Sq Mean Sq F value Pr(>F)
## Oilf 1 4.5 4.5
## Carbonf 1 50.0 50.0
## Steelf 1 1058.0 1058.0
## Oilf:Carbonf 1 0.0 0.0
## Oilf:Steelf 1 200.0 200.0
## Carbonf:Steelf 1 4.5 4.5
## Oilf:Carbonf:Steelf 1 0.5 0.5
## Residuals 0 0.0
# although there are no replicates, note that Steel and Oil*Steel have the largest
# sum.sq as shown in the probability plot
# d
# Fit model yijk = mu+T(Oil)i+TV(Oil*Steel)ik+Errorijk
Spring.fit2 <-lm(Spring~Oil+Oil:Steel,data=Spring.df)
summary(Spring.fit2)
##
## Call:
## lm.default(formula = Spring ~ Oil + Oil:Steel, data = Spring.df)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## -8.5 13.5 -14.5 9.5 -8.0 13.0 -15.0 10.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.250 5.275 13.507 3.98e-05 ***
## Oil 0.750 5.275 0.142 0.892
## Oil:Steel 5.000 5.275 0.948 0.387
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.92 on 5 degrees of freedom
## Multiple R-squared: 0.1552, Adjusted R-squared: -0.1827
## F-statistic: 0.4593 on 2 and 5 DF, p-value: 0.6559
# The estimates for sig coefficients are as follows:
# Mu = 71.3
# Ti = 0.8
# (TV)ik = 5.0
# e
# check residuals
par(mfrow=c(2,2))
plot(Spring.fit2)
## hat values (leverages) are all = 0.375
## and there are no factor predictors; no plot no. 5

layout(1,1)
# Clearly we have issues with this plot as there are no residuals so to speak. Without
# having any replicates, our model is based on single data points and will in effect
# look like a perfect fit in terms of residuals
# f
# Check main effects/interaction plots for largest effects (Oil*Steel)
ggplot(Spring.df,aes(Steelf,Spring,color=Oilf))+
geom_boxplot()+
geom_point(aes(y=Spring))+
geom_line(aes(y=Spring,group=Oilf))+
theme_bw()+
ggtitle("Non-Cracked Spring % by Steel Level Over Oil Level")

# From the plot we see that in order ot maximize percentage of non-cracked springs,
# our best option (from the sample) is to choose our higher steel temp (1600);
# secondary to this, we would further maximize the percentage by chosing oil temp
# 100, but only if the higher steel temp is chosen.
######################################################
# 2
# Build and check data set
A <- rep(c(rep(-1,4),rep(1,4)),5)
B <- rep(c(rep(c(rep(-1,2),rep(1,2)),2)),5)
C <- rep(c(-1,1),20)
AB <- A*B
AC <- A*C
CB <- C*B
ABC <- A*B*C
R <- c(54.6,86.2,41.4,62.8,59.6,82,43.4,65.6,73,66.2,51.2,64.8,52.8,72.8,49,65,
139.2,79.2,42.6,74.6,55.2,76.6,48.6,64.2,55.4,86,58.6,74.6,61,73.4,49.6,
60.8,52.6,82.6,58.4,64.6,61,75,55.2,77.4)
Cam.df<-data.frame(A,B,C,AB,AC,CB,ABC,R)
Cam.df[1:15,]
## A B C AB AC CB ABC R
## 1 -1 -1 -1 1 1 1 -1 54.6
## 2 -1 -1 1 1 -1 -1 1 86.2
## 3 -1 1 -1 -1 1 -1 1 41.4
## 4 -1 1 1 -1 -1 1 -1 62.8
## 5 1 -1 -1 -1 -1 1 1 59.6
## 6 1 -1 1 -1 1 -1 -1 82.0
## 7 1 1 -1 1 -1 -1 -1 43.4
## 8 1 1 1 1 1 1 1 65.6
## 9 -1 -1 -1 1 1 1 -1 73.0
## 10 -1 -1 1 1 -1 -1 1 66.2
## 11 -1 1 -1 -1 1 -1 1 51.2
## 12 -1 1 1 -1 -1 1 -1 64.8
## 13 1 -1 -1 -1 -1 1 1 52.8
## 14 1 -1 1 -1 1 -1 -1 72.8
## 15 1 1 -1 1 -1 -1 -1 49.0
# a
# compute sample mean/var for each treatment
M1 <-mean(c(Cam.df$R[1],Cam.df$R[9],Cam.df$R[17],Cam.df$R[25],Cam.df$R[33]))
M2 <-mean(c(Cam.df$R[2],Cam.df$R[10],Cam.df$R[18],Cam.df$R[26],Cam.df$R[34]))
M3 <-mean(c(Cam.df$R[3],Cam.df$R[11],Cam.df$R[19],Cam.df$R[27],Cam.df$R[35]))
M4 <-mean(c(Cam.df$R[4],Cam.df$R[12],Cam.df$R[20],Cam.df$R[28],Cam.df$R[36]))
M5 <-mean(c(Cam.df$R[5],Cam.df$R[13],Cam.df$R[21],Cam.df$R[29],Cam.df$R[37]))
M6 <-mean(c(Cam.df$R[6],Cam.df$R[14],Cam.df$R[22],Cam.df$R[30],Cam.df$R[38]))
M7 <-mean(c(Cam.df$R[7],Cam.df$R[15],Cam.df$R[23],Cam.df$R[31],Cam.df$R[39]))
M8 <-mean(c(Cam.df$R[8],Cam.df$R[16],Cam.df$R[24],Cam.df$R[32],Cam.df$R[40]))
V1 <-var(c(Cam.df$R[1],Cam.df$R[9],Cam.df$R[17],Cam.df$R[25],Cam.df$R[33]))
V2 <-var(c(Cam.df$R[2],Cam.df$R[10],Cam.df$R[18],Cam.df$R[26],Cam.df$R[34]))
V3 <-var(c(Cam.df$R[3],Cam.df$R[11],Cam.df$R[19],Cam.df$R[27],Cam.df$R[35]))
V4 <-var(c(Cam.df$R[4],Cam.df$R[12],Cam.df$R[20],Cam.df$R[28],Cam.df$R[36]))
V5 <-var(c(Cam.df$R[5],Cam.df$R[13],Cam.df$R[21],Cam.df$R[29],Cam.df$R[37]))
V6 <-var(c(Cam.df$R[6],Cam.df$R[14],Cam.df$R[22],Cam.df$R[30],Cam.df$R[38]))
V7 <-var(c(Cam.df$R[7],Cam.df$R[15],Cam.df$R[23],Cam.df$R[31],Cam.df$R[39]))
V8 <-var(c(Cam.df$R[8],Cam.df$R[16],Cam.df$R[24],Cam.df$R[32],Cam.df$R[40]))
c.mean <-c(M1,M2,M3,M4,M5,M6,M7,M8)
c.var <-c(V1,V2,V3,V4,V5,V6,V7,V8)
c.mean
## [1] 74.96 80.04 50.44 68.28 57.92 75.96 49.16 66.60
c.var
## [1] 1356.928 68.068 68.428 33.892 13.852 13.588 17.548 39.900
# b
# Estimate factorial effects
# eg) A=mean(y+...)-mean(y-...)
mean(Cam.df$R[A==1])-mean(Cam.df$R[A==-1])
## [1] -6.02
# eg)AB=mean(y++..)-mean(y+...)-mean(y.+..)-mean(y--..)+mean(y-...)+mean(y.-..)
mean(Cam.df$R[AB==1])-mean(Cam.df$R[AB==-1])
## [1] 4.54
# Create effect matrix (X'Y)/(#obs/2)
Cam.mat <-as.matrix(Cam.df)[,-8]
Cam.ef <-(t(Cam.mat)%*%R)/(length(R)/2)
Cam.ef
## [,1]
## A -6.02
## B -13.60
## C 14.60
## AB 4.54
## AC 3.14
## CB 3.04
## ABC -3.34
# c
# check probability plot for factor effect size
Cam.fit <-lm(R~A*B*C,data=Cam.df)
Cam.coeffs <-2*Cam.fit$coeff[2:8]
halfnormal(Cam.coeffs,main="Plot for Cam Coeffs")

# There is one clear coefficient higher than the others; this would coincide with
# our C factor, with B as the next largest
# d
# run an ANOVA
# ensure factors are read as factors for analysis
Cam.df$Af <-as.factor(Cam.df$A)
Cam.df$Bf <-as.factor(Cam.df$B)
Cam.df$Cf <-as.factor(Cam.df$C)
Cam.aov <-aov(R~Af*Bf*Cf, data=Cam.df)
anova(Cam.aov)
## Analysis of Variance Table
##
## Response: R
## Df Sum Sq Mean Sq F value Pr(>F)
## Af 1 362.4 362.40 1.7983 0.189359
## Bf 1 1849.6 1849.60 9.1780 0.004817 **
## Cf 1 2131.6 2131.60 10.5773 0.002699 **
## Af:Bf 1 206.1 206.12 1.0228 0.319447
## Af:Cf 1 98.6 98.60 0.4892 0.489316
## Bf:Cf 1 92.4 92.42 0.4586 0.503154
## Af:Bf:Cf 1 111.6 111.56 0.5536 0.462296
## Residuals 32 6448.8 201.53
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# As expected from part b, factors B and C are the only ones with significant
# effects (p<0.01 for both)
# e
# Fit model yijkl = mu+B(B)j+V(B)k+Errorijkl
Cam.fit2 <-lm(R~B+C,data=Cam.df)
summary(Cam.fit2)
##
## Call:
## lm.default(formula = R ~ B + C, data = Cam.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.32 -6.27 -2.02 3.28 74.28
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65.420 2.224 29.416 < 2e-16 ***
## B -6.800 2.224 -3.058 0.00413 **
## C 7.300 2.224 3.282 0.00225 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.07 on 37 degrees of freedom
## Multiple R-squared: 0.3523, Adjusted R-squared: 0.3173
## F-statistic: 10.06 on 2 and 37 DF, p-value: 0.0003241
# The estimates for coefficients are as follows (se:2.25):
# Mu = 65.4
# Bj = -6.8
# Vk = 7.3
# f
# analyze residuals
par(mfrow=c(2,2))
plot(Cam.aov)

layout(1,1)
# Our qq plot appears apprixomately normal, however on all our residual plots we
# see observation 17 appears to be an extreneous value; it would merit investigation
# g
# compute dispersion effects
Cam.disp.mat <-as.matrix(Cam.df[1:8,1:7])
Cam.disp.mat
## A B C AB AC CB ABC
## 1 -1 -1 -1 1 1 1 -1
## 2 -1 -1 1 1 -1 -1 1
## 3 -1 1 -1 -1 1 -1 1
## 4 -1 1 1 -1 -1 1 -1
## 5 1 -1 -1 -1 -1 1 1
## 6 1 -1 1 -1 1 -1 -1
## 7 1 1 -1 1 -1 -1 -1
## 8 1 1 1 1 1 1 1
Cam.disp.ef <-(t(Cam.disp.mat)%*%log(c.var))/(length(log(c.var))/2)
Cam.disp.ef
## [,1]
## A -1.8483785
## B -0.5927063
## C -0.7232202
## AB 1.2495561
## AC 1.1243171
## CB 0.7826368
## ABC -0.3622973
# h
# check probability plot for dispersion effect size
Cam.disp.df <-data.frame(Cam.df[1:8,1:7],log(c.var))
Disp.fit <-lm(log.c.var.~A*B*C,data=Cam.disp.df)
Disp.coeffs <-2*Disp.fit$coeff[2:8]
halfnormal(Disp.coeffs,main="Plot for Dispersion Coeffs")

# We see one ploint far removed from others which, against our effect estimates,
# is for our A factor.
# i
# Fit model yijkl = mu+Ti(A)+Errorijkl
Disp.fit2 <-lm(log.c.var.~A,data=Cam.disp.df)
summary(Disp.fit2)
##
## Call:
## lm.default(formula = log.c.var. ~ A, data = Cam.disp.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2724 -0.5712 -0.3284 0.1231 2.4174
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.8714 0.4303 8.997 0.000105 ***
## A -0.9242 0.4303 -2.148 0.075332 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.217 on 6 degrees of freedom
## Multiple R-squared: 0.4347, Adjusted R-squared: 0.3404
## F-statistic: 4.613 on 1 and 6 DF, p-value: 0.07533
# The estimates for sig dispersion coefficients are as follows:
# Mu = 3.9
# Ti = -0.9
# j
CamInt <-ddply(Cam.df,.(Bf,Cf),summarise,val=mean(R))
ggplot(Cam.df,aes(Bf,R,color=Cf))+
geom_boxplot()+
geom_point(data=CamInt,aes(y=val))+
geom_line(data=CamInt,aes(y=val,group=Cf))+
geom_hline(aes(yintercept=75))+
theme_bw()+
ggtitle("Roughness by Factors B and C")

# From the plot, and based on the information we gathered in our two regression
# models, it seems the best option for optimizing roughness at 75 would be to choose
# B and C such that our linear model y=75, and A s/t our dispersion model y is
# minimized.
# plot line of Y=75 from Cam.fit2 with regression coeffs B&C (continuous)
x.reg1 <-c(-1,1)
y.reg1 <-1.32+.93*x.reg1
plot(x.reg1,y.reg1,type='l',main="Roughness lm with factors C and B",ylab=
'C',xlab='B',xlim=c(-1,1),ylim=c(-1,1))
abline(h=1,lty=2) # add the general bounds of A and B
abline(h=-1,lty=2)
abline(v=1,lty=2)
abline(v=-1,lty=2)

# locate bounds of line given general bounds of B and C
1.32+.93*(-1) #lower bound of C (upper=1)
## [1] 0.39
(1-1.32)/.93 #upper bound of B (lower=-1)
## [1] -0.344086
# plot possible dispersion with Disp.fit2 regression coeff A (discrete)
x.reg2 <-c(-1,1)
y.reg2 <-3.9-0.9*x.reg2
plot(factor(x.reg2),y.reg2,type='h',main="Dispersion lm with factor A",ylab='LM',xlab='A')

# For our plot of Cam.fit2 setting Y=75, we see that we can maintain Y=75 with
# infinite combinations of C from [0.39,1] and B from [-1,-0.34]
# Because given our dispersion model, we see that the variance is minimized for
# A = 1
# Although our analysis did not necessarily show that B or C are significant factors
# in our dispersion model, if changes in B and C are not too expensive or time
# consuming, it may be to our benefit to include them in our attempt to minimize
# variance anyway. Looking back at our effects for dispersion, note the next highest
# is our AB then AC interactions. As both of these interactions are positive, and
# we know it is preferable to chose A=1, we would minimize variance by minimizing
# B and C. This would be done by choosing the combination (C,B)=(0.39,-1)
#
# Conclusion:
# If it does not negatively effect cost/efficiency, our most suitible option would
# be to chose the following factor combinations: A = 1, C = 0.39, B = -1.
######################################################
# 6
# enter data from Eg. 6.2 (p257) w/obs ab missing
A <-rep(c(-1,1),8)
B <-rep(c(rep(-1,2),rep(1,2)),4)
C <-rep(c(rep(-1,4),rep(1,4)),2)
D <-c(rep(-1,8),rep(1,8))
AB <-A*B
AC <-A*C
AD <-A*D
BC <-B*C
BD <-B*D
CD <-C*D
ABC <-A*B*C
ABD <-A*B*D
ACD <-A*C*D
BCD <-B*C*D
ABCD <-A*B*C*D
Fr <-c(45,71,48,NA,68,60,80,65,43,100,45,104,75,86,70,96)
P.df <-data.frame(A,B,C,D,AB,AC,AD,BC,BD,CD,ABC,ABD,ACD,BCD,ABCD,Fr)
P.df
## A B C D AB AC AD BC BD CD ABC ABD ACD BCD ABCD Fr
## 1 -1 -1 -1 -1 1 1 1 1 1 1 -1 -1 -1 -1 1 45
## 2 1 -1 -1 -1 -1 -1 -1 1 1 1 1 1 1 -1 -1 71
## 3 -1 1 -1 -1 -1 1 1 -1 -1 1 1 1 -1 1 -1 48
## 4 1 1 -1 -1 1 -1 -1 -1 -1 1 -1 -1 1 1 1 NA
## 5 -1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 1 -1 68
## 6 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 1 -1 1 1 60
## 7 -1 1 1 -1 -1 -1 1 1 -1 -1 -1 1 1 -1 1 80
## 8 1 1 1 -1 1 1 -1 1 -1 -1 1 -1 -1 -1 -1 65
## 9 -1 -1 -1 1 1 1 -1 1 -1 -1 -1 1 1 1 -1 43
## 10 1 -1 -1 1 -1 -1 1 1 -1 -1 1 -1 -1 1 1 100
## 11 -1 1 -1 1 -1 1 -1 -1 1 -1 1 -1 1 -1 1 45
## 12 1 1 -1 1 1 -1 1 -1 1 -1 -1 1 -1 -1 -1 104
## 13 -1 -1 1 1 1 -1 -1 -1 -1 1 1 1 -1 -1 1 75
## 14 1 -1 1 1 -1 1 1 -1 -1 1 -1 -1 1 -1 -1 86
## 15 -1 1 1 1 -1 -1 -1 1 1 1 -1 -1 -1 1 -1 70
## 16 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 96
# estimate missing val so highest order int contrast = 0
ABCD1 <-P.df$Fr[ABCD==1] #includes NA
ABCD1 #NA is in second placeholder
## [1] 45 NA 60 80 100 45 75 96
null.val <-mean(P.df$Fr[ABCD==-1]) * length(ABCD1) - sum(ABCD1[-2])
null.val
## [1] 54
# set missing val (placeholder [4,6]) to 54
P.new.df <-P.df
P.new.df[4,16] <-null.val
P.new.df
## A B C D AB AC AD BC BD CD ABC ABD ACD BCD ABCD Fr
## 1 -1 -1 -1 -1 1 1 1 1 1 1 -1 -1 -1 -1 1 45
## 2 1 -1 -1 -1 -1 -1 -1 1 1 1 1 1 1 -1 -1 71
## 3 -1 1 -1 -1 -1 1 1 -1 -1 1 1 1 -1 1 -1 48
## 4 1 1 -1 -1 1 -1 -1 -1 -1 1 -1 -1 1 1 1 54
## 5 -1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 1 -1 68
## 6 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 1 -1 1 1 60
## 7 -1 1 1 -1 -1 -1 1 1 -1 -1 -1 1 1 -1 1 80
## 8 1 1 1 -1 1 1 -1 1 -1 -1 1 -1 -1 -1 -1 65
## 9 -1 -1 -1 1 1 1 -1 1 -1 -1 -1 1 1 1 -1 43
## 10 1 -1 -1 1 -1 -1 1 1 -1 -1 1 -1 -1 1 1 100
## 11 -1 1 -1 1 -1 1 -1 -1 1 -1 1 -1 1 -1 1 45
## 12 1 1 -1 1 1 -1 1 -1 1 -1 -1 1 -1 -1 -1 104
## 13 -1 -1 1 1 1 -1 -1 -1 -1 1 1 1 -1 -1 1 75
## 14 1 -1 1 1 -1 1 1 -1 -1 1 -1 -1 1 -1 -1 86
## 15 -1 1 1 1 -1 -1 -1 1 1 1 -1 -1 -1 1 -1 70
## 16 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 96
# do analysis of effects
P.new.fit <-lm(Fr~A*B*C*D,data=P.new.df)
P.coeffs <-2*P.new.fit$coeff[2:16]
halfnormal(P.coeffs,main='Plot for P.coeffs')

# note that the results we get from estimating our missing value ab is similar to
# that of the original: our significant effects appear to be A,C,D,AC, and AD
# estimate effects
P.mat <-as.matrix(P.new.df[,1:15])
P.mat
## A B C D AB AC AD BC BD CD ABC ABD ACD BCD ABCD
## [1,] -1 -1 -1 -1 1 1 1 1 1 1 -1 -1 -1 -1 1
## [2,] 1 -1 -1 -1 -1 -1 -1 1 1 1 1 1 1 -1 -1
## [3,] -1 1 -1 -1 -1 1 1 -1 -1 1 1 1 -1 1 -1
## [4,] 1 1 -1 -1 1 -1 -1 -1 -1 1 -1 -1 1 1 1
## [5,] -1 -1 1 -1 1 -1 1 -1 1 -1 1 -1 1 1 -1
## [6,] 1 -1 1 -1 -1 1 -1 -1 1 -1 -1 1 -1 1 1
## [7,] -1 1 1 -1 -1 -1 1 1 -1 -1 -1 1 1 -1 1
## [8,] 1 1 1 -1 1 1 -1 1 -1 -1 1 -1 -1 -1 -1
## [9,] -1 -1 -1 1 1 1 -1 1 -1 -1 -1 1 1 1 -1
## [10,] 1 -1 -1 1 -1 -1 1 1 -1 -1 1 -1 -1 1 1
## [11,] -1 1 -1 1 -1 1 -1 -1 1 -1 1 -1 1 -1 1
## [12,] 1 1 -1 1 1 -1 1 -1 1 -1 -1 1 -1 -1 -1
## [13,] -1 -1 1 1 1 -1 -1 -1 -1 1 1 1 -1 -1 1
## [14,] 1 -1 1 1 -1 1 1 -1 -1 1 -1 -1 1 -1 -1
## [15,] -1 1 1 1 -1 -1 -1 1 1 1 -1 -1 -1 1 -1
## [16,] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Fr.vec <-P.new.df$Fr
P.ef <-(t(P.mat)%*%Fr.vec)/(length(Fr.vec)/2)
P.ef
## [,1]
## A 20.25
## B 1.75
## C 11.25
## D 16.00
## AB -1.25
## AC -16.75
## AD 18.00
## BC 3.75
## BD 1.00
## CD -2.50
## ABC 3.25
## ABD 5.50
## ACD -3.00
## BCD -4.00
## ABCD 0.00
# Note that our significant interactions are only slightly larger than those
# originally estimated:
# A: 20.25 vs 21.63
# C: 11.25 vs 9.88
# D: 16.00 vs 14.63
# AC: -16.75 vs -18.13
# AD: 18.00 vs 16.63