Assignment5.R

######################################################
#STAT 430 - Assignment 05
#
#Sarah Rathwell - 301084687
#
#Objective: Investigate factorial designs
######################################################
options(width=200)
rm(list=ls())
library(DoE.base)

## Loading required package: grid
## Loading required package: conf.design
## 
## Attaching package: 'DoE.base'
## 
## The following objects are masked from 'package:stats':
## 
##     aov, lm
## 
## The following object is masked from 'package:graphics':
## 
##     plot.design

library(ggplot2)
library(plyr)

## 
## Attaching package: 'plyr'
## 
## The following object is masked from 'package:conf.design':
## 
##     join

library(doBy)

## Loading required package: survival
## Loading required package: splines

######################################################
cat(" Investigate factorial designs.\n\n", 
    "Last modification:",date(), '\n')

##  Investigate factorial designs.
## 
##  Last modification: Mon Nov 24 10:27:35 2014

######################################################
# 1
# build and check data set
Oil       <- c(rep(-1,4),rep(1,4))
Carbon    <-c(rep(c(rep(-1,2),rep(1,2)),2))
Steel     <-c(rep(c(-1,1),4))
OC        <-Oil*Carbon
OS        <-Oil*Steel
CS        <-Carbon*Steel
OCS       <-Oil*Carbon*Steel
Spring    <-c(67,79,61,75,59,90,52,87)
Spring.df <-data.frame(Oil,Carbon,Steel,OC,OS,CS,OCS,Spring)
Spring.df

##   Oil Carbon Steel OC OS CS OCS Spring
## 1  -1     -1    -1  1  1  1  -1     67
## 2  -1     -1     1  1 -1 -1   1     79
## 3  -1      1    -1 -1  1 -1   1     61
## 4  -1      1     1 -1 -1  1  -1     75
## 5   1     -1    -1 -1 -1  1   1     59
## 6   1     -1     1 -1  1 -1  -1     90
## 7   1      1    -1  1 -1 -1  -1     52
## 8   1      1     1  1  1  1   1     87

# a
# Estimate factorial effects
# eg) Oil=mean(y+..)-mean(y-..)

mean(Spring.df$Spring[Oil==1])-mean(Spring.df$Spring[Oil==-1])

## [1] 1.5

# eg) Oil*Carbon=mean(y++.)-mean(y+..)-mean(y.+.)-mean(y--.)+mean(y-..)+mean(y.-.)

mean(Spring.df$Spring[OC==1])-mean(Spring.df$Spring[OC==-1])

## [1] 0

# Create effect matrix (X'Y)/(#obs/2)
X.mat     <-as.matrix(Spring.df[,-8])
Spring.ef <-(t(X.mat)%*%Spring)/(length(Spring)/2)
Spring.ef

##        [,1]
## Oil     1.5
## Carbon -5.0
## Steel  23.0
## OC      0.0
## OS     10.0
## CS      1.5
## OCS     0.5

# b
# check probability plot for factor effect size
Spring.fit    <-lm(Spring~Oil*Carbon*Steel,data=Spring.df)
Spring.coeffs <-2*Spring.fit$coeff[2:8]
halfnormal(Spring.coeffs,main='Plot for Spring Coeffs')

# It appears that Steel alone and the Oil-Steel interaction are the largest
# effects.

# c
# run an ANOVA
# ensure factors are read as factors for analysis
Spring.df$Oilf <-as.factor(Spring.df$Oil)
Spring.df$Carbonf <-as.factor(Spring.df$Carbon)
Spring.df$Steelf <-as.factor(Spring.df$Steel)

Spring.aov <-aov(Spring~Oilf*Carbonf*Steelf,data=Spring.df)
anova(Spring.aov)

## Warning in anova.lm(Spring.aov): ANOVA F-tests on an essentially perfect fit are unreliable

## Analysis of Variance Table
## 
## Response: Spring
##                     Df Sum Sq Mean Sq F value Pr(>F)
## Oilf                 1    4.5     4.5               
## Carbonf              1   50.0    50.0               
## Steelf               1 1058.0  1058.0               
## Oilf:Carbonf         1    0.0     0.0               
## Oilf:Steelf          1  200.0   200.0               
## Carbonf:Steelf       1    4.5     4.5               
## Oilf:Carbonf:Steelf  1    0.5     0.5               
## Residuals            0    0.0

# although there are no replicates, note that Steel and Oil*Steel have the largest
# sum.sq as shown in the probability plot

# d
# Fit model yijk = mu+T(Oil)i+TV(Oil*Steel)ik+Errorijk
Spring.fit2 <-lm(Spring~Oil+Oil:Steel,data=Spring.df)
summary(Spring.fit2)

## 
## Call:
## lm.default(formula = Spring ~ Oil + Oil:Steel, data = Spring.df)
## 
## Residuals:
##     1     2     3     4     5     6     7     8 
##  -8.5  13.5 -14.5   9.5  -8.0  13.0 -15.0  10.0 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   71.250      5.275  13.507 3.98e-05 ***
## Oil            0.750      5.275   0.142    0.892    
## Oil:Steel      5.000      5.275   0.948    0.387    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.92 on 5 degrees of freedom
## Multiple R-squared:  0.1552, Adjusted R-squared:  -0.1827 
## F-statistic: 0.4593 on 2 and 5 DF,  p-value: 0.6559

# The estimates for sig coefficients are as follows:
# Mu = 71.3
# Ti = 0.8
# (TV)ik = 5.0

# e
# check residuals
par(mfrow=c(2,2))
plot(Spring.fit2)

## hat values (leverages) are all = 0.375
##  and there are no factor predictors; no plot no. 5

layout(1,1)
# Clearly we have issues with this plot as there are no residuals so to speak. Without
# having any replicates, our model is based on single data points and will in effect
# look like a perfect fit in terms of residuals

# f
# Check main effects/interaction plots for largest effects (Oil*Steel)
ggplot(Spring.df,aes(Steelf,Spring,color=Oilf))+
  geom_boxplot()+
  geom_point(aes(y=Spring))+
  geom_line(aes(y=Spring,group=Oilf))+
  theme_bw()+
  ggtitle("Non-Cracked Spring % by Steel Level Over Oil Level")

# From the plot we see that in order ot maximize percentage of non-cracked springs,
# our best option (from the sample) is to choose our higher steel temp (1600);
# secondary to this, we would further maximize the percentage by chosing oil temp
# 100, but only if the higher steel temp is chosen. 
######################################################
# 2
# Build and check data set
A     <- rep(c(rep(-1,4),rep(1,4)),5)
B     <- rep(c(rep(c(rep(-1,2),rep(1,2)),2)),5)
C     <- rep(c(-1,1),20)
AB    <- A*B
AC    <- A*C
CB    <- C*B
ABC   <- A*B*C
R     <- c(54.6,86.2,41.4,62.8,59.6,82,43.4,65.6,73,66.2,51.2,64.8,52.8,72.8,49,65,
          139.2,79.2,42.6,74.6,55.2,76.6,48.6,64.2,55.4,86,58.6,74.6,61,73.4,49.6,
          60.8,52.6,82.6,58.4,64.6,61,75,55.2,77.4)
Cam.df<-data.frame(A,B,C,AB,AC,CB,ABC,R)
Cam.df[1:15,]

##     A  B  C AB AC CB ABC    R
## 1  -1 -1 -1  1  1  1  -1 54.6
## 2  -1 -1  1  1 -1 -1   1 86.2
## 3  -1  1 -1 -1  1 -1   1 41.4
## 4  -1  1  1 -1 -1  1  -1 62.8
## 5   1 -1 -1 -1 -1  1   1 59.6
## 6   1 -1  1 -1  1 -1  -1 82.0
## 7   1  1 -1  1 -1 -1  -1 43.4
## 8   1  1  1  1  1  1   1 65.6
## 9  -1 -1 -1  1  1  1  -1 73.0
## 10 -1 -1  1  1 -1 -1   1 66.2
## 11 -1  1 -1 -1  1 -1   1 51.2
## 12 -1  1  1 -1 -1  1  -1 64.8
## 13  1 -1 -1 -1 -1  1   1 52.8
## 14  1 -1  1 -1  1 -1  -1 72.8
## 15  1  1 -1  1 -1 -1  -1 49.0

# a
# compute sample mean/var for each treatment
M1 <-mean(c(Cam.df$R[1],Cam.df$R[9],Cam.df$R[17],Cam.df$R[25],Cam.df$R[33]))
M2 <-mean(c(Cam.df$R[2],Cam.df$R[10],Cam.df$R[18],Cam.df$R[26],Cam.df$R[34]))
M3 <-mean(c(Cam.df$R[3],Cam.df$R[11],Cam.df$R[19],Cam.df$R[27],Cam.df$R[35]))
M4 <-mean(c(Cam.df$R[4],Cam.df$R[12],Cam.df$R[20],Cam.df$R[28],Cam.df$R[36]))
M5 <-mean(c(Cam.df$R[5],Cam.df$R[13],Cam.df$R[21],Cam.df$R[29],Cam.df$R[37]))
M6 <-mean(c(Cam.df$R[6],Cam.df$R[14],Cam.df$R[22],Cam.df$R[30],Cam.df$R[38]))
M7 <-mean(c(Cam.df$R[7],Cam.df$R[15],Cam.df$R[23],Cam.df$R[31],Cam.df$R[39]))
M8 <-mean(c(Cam.df$R[8],Cam.df$R[16],Cam.df$R[24],Cam.df$R[32],Cam.df$R[40]))

V1 <-var(c(Cam.df$R[1],Cam.df$R[9],Cam.df$R[17],Cam.df$R[25],Cam.df$R[33]))
V2 <-var(c(Cam.df$R[2],Cam.df$R[10],Cam.df$R[18],Cam.df$R[26],Cam.df$R[34]))
V3 <-var(c(Cam.df$R[3],Cam.df$R[11],Cam.df$R[19],Cam.df$R[27],Cam.df$R[35]))
V4 <-var(c(Cam.df$R[4],Cam.df$R[12],Cam.df$R[20],Cam.df$R[28],Cam.df$R[36]))
V5 <-var(c(Cam.df$R[5],Cam.df$R[13],Cam.df$R[21],Cam.df$R[29],Cam.df$R[37]))
V6 <-var(c(Cam.df$R[6],Cam.df$R[14],Cam.df$R[22],Cam.df$R[30],Cam.df$R[38]))
V7 <-var(c(Cam.df$R[7],Cam.df$R[15],Cam.df$R[23],Cam.df$R[31],Cam.df$R[39]))
V8 <-var(c(Cam.df$R[8],Cam.df$R[16],Cam.df$R[24],Cam.df$R[32],Cam.df$R[40]))

c.mean <-c(M1,M2,M3,M4,M5,M6,M7,M8)
c.var  <-c(V1,V2,V3,V4,V5,V6,V7,V8)
c.mean

## [1] 74.96 80.04 50.44 68.28 57.92 75.96 49.16 66.60

c.var

## [1] 1356.928   68.068   68.428   33.892   13.852   13.588   17.548   39.900

# b
# Estimate factorial effects
# eg) A=mean(y+...)-mean(y-...)

mean(Cam.df$R[A==1])-mean(Cam.df$R[A==-1])

## [1] -6.02

# eg)AB=mean(y++..)-mean(y+...)-mean(y.+..)-mean(y--..)+mean(y-...)+mean(y.-..)

mean(Cam.df$R[AB==1])-mean(Cam.df$R[AB==-1])

## [1] 4.54

# Create effect matrix (X'Y)/(#obs/2)
Cam.mat <-as.matrix(Cam.df)[,-8]

Cam.ef <-(t(Cam.mat)%*%R)/(length(R)/2)
Cam.ef

##       [,1]
## A    -6.02
## B   -13.60
## C    14.60
## AB    4.54
## AC    3.14
## CB    3.04
## ABC  -3.34

# c
# check probability plot for factor effect size
Cam.fit    <-lm(R~A*B*C,data=Cam.df)
Cam.coeffs <-2*Cam.fit$coeff[2:8]
halfnormal(Cam.coeffs,main="Plot for Cam Coeffs")

# There is one clear coefficient higher than the others; this would coincide with
# our C factor, with B as the next largest

# d
# run an ANOVA
# ensure factors are read as factors for analysis
Cam.df$Af <-as.factor(Cam.df$A)
Cam.df$Bf <-as.factor(Cam.df$B)
Cam.df$Cf <-as.factor(Cam.df$C)

Cam.aov <-aov(R~Af*Bf*Cf, data=Cam.df)
anova(Cam.aov)

## Analysis of Variance Table
## 
## Response: R
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## Af         1  362.4  362.40  1.7983 0.189359   
## Bf         1 1849.6 1849.60  9.1780 0.004817 **
## Cf         1 2131.6 2131.60 10.5773 0.002699 **
## Af:Bf      1  206.1  206.12  1.0228 0.319447   
## Af:Cf      1   98.6   98.60  0.4892 0.489316   
## Bf:Cf      1   92.4   92.42  0.4586 0.503154   
## Af:Bf:Cf   1  111.6  111.56  0.5536 0.462296   
## Residuals 32 6448.8  201.53                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# As expected from part b, factors B and C are the only ones with significant
# effects (p<0.01 for both)

# e
# Fit model yijkl = mu+B(B)j+V(B)k+Errorijkl 
Cam.fit2 <-lm(R~B+C,data=Cam.df)
summary(Cam.fit2)

## 
## Call:
## lm.default(formula = R ~ B + C, data = Cam.df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -13.32  -6.27  -2.02   3.28  74.28 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   65.420      2.224  29.416  < 2e-16 ***
## B             -6.800      2.224  -3.058  0.00413 ** 
## C              7.300      2.224   3.282  0.00225 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.07 on 37 degrees of freedom
## Multiple R-squared:  0.3523, Adjusted R-squared:  0.3173 
## F-statistic: 10.06 on 2 and 37 DF,  p-value: 0.0003241

# The estimates for coefficients are as follows (se:2.25):
# Mu = 65.4
# Bj = -6.8
# Vk = 7.3

# f
# analyze residuals
par(mfrow=c(2,2))
plot(Cam.aov)

layout(1,1)
# Our qq plot appears apprixomately normal, however on all our residual plots we
# see observation 17 appears to be an extreneous value; it would merit investigation

# g
# compute dispersion effects
Cam.disp.mat <-as.matrix(Cam.df[1:8,1:7])
Cam.disp.mat

##    A  B  C AB AC CB ABC
## 1 -1 -1 -1  1  1  1  -1
## 2 -1 -1  1  1 -1 -1   1
## 3 -1  1 -1 -1  1 -1   1
## 4 -1  1  1 -1 -1  1  -1
## 5  1 -1 -1 -1 -1  1   1
## 6  1 -1  1 -1  1 -1  -1
## 7  1  1 -1  1 -1 -1  -1
## 8  1  1  1  1  1  1   1

Cam.disp.ef <-(t(Cam.disp.mat)%*%log(c.var))/(length(log(c.var))/2)
Cam.disp.ef

##           [,1]
## A   -1.8483785
## B   -0.5927063
## C   -0.7232202
## AB   1.2495561
## AC   1.1243171
## CB   0.7826368
## ABC -0.3622973

# h
# check probability plot for dispersion effect size
Cam.disp.df <-data.frame(Cam.df[1:8,1:7],log(c.var))
Disp.fit    <-lm(log.c.var.~A*B*C,data=Cam.disp.df)
Disp.coeffs <-2*Disp.fit$coeff[2:8]
halfnormal(Disp.coeffs,main="Plot for Dispersion Coeffs")

# We see one ploint far removed from others which, against our effect estimates, 
# is for our A factor.

# i
# Fit model yijkl = mu+Ti(A)+Errorijkl
Disp.fit2 <-lm(log.c.var.~A,data=Cam.disp.df)
summary(Disp.fit2)

## 
## Call:
## lm.default(formula = log.c.var. ~ A, data = Cam.disp.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2724 -0.5712 -0.3284  0.1231  2.4174 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.8714     0.4303   8.997 0.000105 ***
## A            -0.9242     0.4303  -2.148 0.075332 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.217 on 6 degrees of freedom
## Multiple R-squared:  0.4347, Adjusted R-squared:  0.3404 
## F-statistic: 4.613 on 1 and 6 DF,  p-value: 0.07533

# The estimates for sig dispersion coefficients are as follows:
# Mu = 3.9
# Ti = -0.9

# j
CamInt <-ddply(Cam.df,.(Bf,Cf),summarise,val=mean(R))
ggplot(Cam.df,aes(Bf,R,color=Cf))+
  geom_boxplot()+
  geom_point(data=CamInt,aes(y=val))+
  geom_line(data=CamInt,aes(y=val,group=Cf))+
  geom_hline(aes(yintercept=75))+
  theme_bw()+
  ggtitle("Roughness by Factors B and C")

# From the plot, and based on the information we gathered in our two regression
# models, it seems the best option for optimizing roughness at 75 would be to choose
# B and C such that our linear model y=75, and A s/t our dispersion model y is 
# minimized.

# plot line of Y=75 from Cam.fit2 with regression coeffs B&C (continuous)
x.reg1 <-c(-1,1)
y.reg1 <-1.32+.93*x.reg1
plot(x.reg1,y.reg1,type='l',main="Roughness lm with factors C and B",ylab=
       'C',xlab='B',xlim=c(-1,1),ylim=c(-1,1))
abline(h=1,lty=2) # add the general bounds of A and B
abline(h=-1,lty=2)
abline(v=1,lty=2)
abline(v=-1,lty=2)

# locate bounds of line given general bounds of B and C
1.32+.93*(-1) #lower bound of C (upper=1)

## [1] 0.39

(1-1.32)/.93  #upper bound of B (lower=-1)

## [1] -0.344086

# plot possible dispersion with Disp.fit2 regression coeff A (discrete)
x.reg2 <-c(-1,1)
y.reg2 <-3.9-0.9*x.reg2
plot(factor(x.reg2),y.reg2,type='h',main="Dispersion lm with factor A",ylab='LM',xlab='A')

# For our plot of Cam.fit2 setting Y=75, we see that we can maintain Y=75 with 
# infinite combinations of C from [0.39,1] and B from [-1,-0.34]
# Because given our dispersion model, we see that the variance is minimized for
# A = 1
# Although our analysis did not necessarily show that B or C are significant factors
# in our dispersion model, if changes in B and C are not too expensive or time 
# consuming, it may be to our benefit to include them in our attempt to minimize
# variance anyway. Looking back at our effects for dispersion, note the next highest
# is our AB then AC interactions. As both of these interactions are positive, and
# we know it is preferable to chose A=1, we would minimize variance by minimizing
# B and C. This would be done by choosing the combination (C,B)=(0.39,-1)
#
# Conclusion:
# If it does not negatively effect cost/efficiency, our most suitible option would
# be to chose the following factor combinations: A = 1, C = 0.39, B = -1. 
######################################################
# 6
# enter data from Eg. 6.2 (p257) w/obs ab missing
A    <-rep(c(-1,1),8)
B    <-rep(c(rep(-1,2),rep(1,2)),4)
C    <-rep(c(rep(-1,4),rep(1,4)),2)
D    <-c(rep(-1,8),rep(1,8))
AB   <-A*B
AC   <-A*C
AD   <-A*D
BC   <-B*C
BD   <-B*D
CD   <-C*D
ABC  <-A*B*C
ABD  <-A*B*D
ACD  <-A*C*D
BCD  <-B*C*D
ABCD <-A*B*C*D
Fr   <-c(45,71,48,NA,68,60,80,65,43,100,45,104,75,86,70,96)
P.df <-data.frame(A,B,C,D,AB,AC,AD,BC,BD,CD,ABC,ABD,ACD,BCD,ABCD,Fr)
P.df

##     A  B  C  D AB AC AD BC BD CD ABC ABD ACD BCD ABCD  Fr
## 1  -1 -1 -1 -1  1  1  1  1  1  1  -1  -1  -1  -1    1  45
## 2   1 -1 -1 -1 -1 -1 -1  1  1  1   1   1   1  -1   -1  71
## 3  -1  1 -1 -1 -1  1  1 -1 -1  1   1   1  -1   1   -1  48
## 4   1  1 -1 -1  1 -1 -1 -1 -1  1  -1  -1   1   1    1  NA
## 5  -1 -1  1 -1  1 -1  1 -1  1 -1   1  -1   1   1   -1  68
## 6   1 -1  1 -1 -1  1 -1 -1  1 -1  -1   1  -1   1    1  60
## 7  -1  1  1 -1 -1 -1  1  1 -1 -1  -1   1   1  -1    1  80
## 8   1  1  1 -1  1  1 -1  1 -1 -1   1  -1  -1  -1   -1  65
## 9  -1 -1 -1  1  1  1 -1  1 -1 -1  -1   1   1   1   -1  43
## 10  1 -1 -1  1 -1 -1  1  1 -1 -1   1  -1  -1   1    1 100
## 11 -1  1 -1  1 -1  1 -1 -1  1 -1   1  -1   1  -1    1  45
## 12  1  1 -1  1  1 -1  1 -1  1 -1  -1   1  -1  -1   -1 104
## 13 -1 -1  1  1  1 -1 -1 -1 -1  1   1   1  -1  -1    1  75
## 14  1 -1  1  1 -1  1  1 -1 -1  1  -1  -1   1  -1   -1  86
## 15 -1  1  1  1 -1 -1 -1  1  1  1  -1  -1  -1   1   -1  70
## 16  1  1  1  1  1  1  1  1  1  1   1   1   1   1    1  96

# estimate missing val so highest order int contrast = 0
ABCD1    <-P.df$Fr[ABCD==1] #includes NA
ABCD1                       #NA is in second placeholder

## [1]  45  NA  60  80 100  45  75  96

null.val <-mean(P.df$Fr[ABCD==-1]) * length(ABCD1) - sum(ABCD1[-2])
null.val

## [1] 54

# set missing val (placeholder [4,6]) to 54
P.new.df      <-P.df
P.new.df[4,16] <-null.val
P.new.df

##     A  B  C  D AB AC AD BC BD CD ABC ABD ACD BCD ABCD  Fr
## 1  -1 -1 -1 -1  1  1  1  1  1  1  -1  -1  -1  -1    1  45
## 2   1 -1 -1 -1 -1 -1 -1  1  1  1   1   1   1  -1   -1  71
## 3  -1  1 -1 -1 -1  1  1 -1 -1  1   1   1  -1   1   -1  48
## 4   1  1 -1 -1  1 -1 -1 -1 -1  1  -1  -1   1   1    1  54
## 5  -1 -1  1 -1  1 -1  1 -1  1 -1   1  -1   1   1   -1  68
## 6   1 -1  1 -1 -1  1 -1 -1  1 -1  -1   1  -1   1    1  60
## 7  -1  1  1 -1 -1 -1  1  1 -1 -1  -1   1   1  -1    1  80
## 8   1  1  1 -1  1  1 -1  1 -1 -1   1  -1  -1  -1   -1  65
## 9  -1 -1 -1  1  1  1 -1  1 -1 -1  -1   1   1   1   -1  43
## 10  1 -1 -1  1 -1 -1  1  1 -1 -1   1  -1  -1   1    1 100
## 11 -1  1 -1  1 -1  1 -1 -1  1 -1   1  -1   1  -1    1  45
## 12  1  1 -1  1  1 -1  1 -1  1 -1  -1   1  -1  -1   -1 104
## 13 -1 -1  1  1  1 -1 -1 -1 -1  1   1   1  -1  -1    1  75
## 14  1 -1  1  1 -1  1  1 -1 -1  1  -1  -1   1  -1   -1  86
## 15 -1  1  1  1 -1 -1 -1  1  1  1  -1  -1  -1   1   -1  70
## 16  1  1  1  1  1  1  1  1  1  1   1   1   1   1    1  96

# do analysis of effects
P.new.fit <-lm(Fr~A*B*C*D,data=P.new.df)
P.coeffs   <-2*P.new.fit$coeff[2:16]
halfnormal(P.coeffs,main='Plot for P.coeffs')

# note that the results we get from estimating our missing value ab is similar to
# that of the original: our significant effects appear to be A,C,D,AC, and AD

# estimate effects
P.mat  <-as.matrix(P.new.df[,1:15])
P.mat

##        A  B  C  D AB AC AD BC BD CD ABC ABD ACD BCD ABCD
##  [1,] -1 -1 -1 -1  1  1  1  1  1  1  -1  -1  -1  -1    1
##  [2,]  1 -1 -1 -1 -1 -1 -1  1  1  1   1   1   1  -1   -1
##  [3,] -1  1 -1 -1 -1  1  1 -1 -1  1   1   1  -1   1   -1
##  [4,]  1  1 -1 -1  1 -1 -1 -1 -1  1  -1  -1   1   1    1
##  [5,] -1 -1  1 -1  1 -1  1 -1  1 -1   1  -1   1   1   -1
##  [6,]  1 -1  1 -1 -1  1 -1 -1  1 -1  -1   1  -1   1    1
##  [7,] -1  1  1 -1 -1 -1  1  1 -1 -1  -1   1   1  -1    1
##  [8,]  1  1  1 -1  1  1 -1  1 -1 -1   1  -1  -1  -1   -1
##  [9,] -1 -1 -1  1  1  1 -1  1 -1 -1  -1   1   1   1   -1
## [10,]  1 -1 -1  1 -1 -1  1  1 -1 -1   1  -1  -1   1    1
## [11,] -1  1 -1  1 -1  1 -1 -1  1 -1   1  -1   1  -1    1
## [12,]  1  1 -1  1  1 -1  1 -1  1 -1  -1   1  -1  -1   -1
## [13,] -1 -1  1  1  1 -1 -1 -1 -1  1   1   1  -1  -1    1
## [14,]  1 -1  1  1 -1  1  1 -1 -1  1  -1  -1   1  -1   -1
## [15,] -1  1  1  1 -1 -1 -1  1  1  1  -1  -1  -1   1   -1
## [16,]  1  1  1  1  1  1  1  1  1  1   1   1   1   1    1

Fr.vec <-P.new.df$Fr

P.ef <-(t(P.mat)%*%Fr.vec)/(length(Fr.vec)/2)
P.ef

##        [,1]
## A     20.25
## B      1.75
## C     11.25
## D     16.00
## AB    -1.25
## AC   -16.75
## AD    18.00
## BC     3.75
## BD     1.00
## CD    -2.50
## ABC    3.25
## ABD    5.50
## ACD   -3.00
## BCD   -4.00
## ABCD   0.00

# Note that our significant interactions are only slightly larger than those 
# originally estimated:
# A: 20.25 vs 21.63
# C: 11.25 vs 9.88
# D: 16.00 vs 14.63
# AC: -16.75 vs -18.13
# AD: 18.00 vs 16.63

Assignment5.R

Skynet

Mon Nov 24 10:27:32 2014