PPA Day1 workshop

#install.packages("DoE.base")
library(DoE.base)

## Loading required package: grid

## Loading required package: conf.design

## Registered S3 method overwritten by 'DoE.base':
##   method           from       
##   factorize.factor conf.design

## 
## Attaching package: 'DoE.base'

## The following objects are masked from 'package:stats':
## 
##     aov, lm

## The following object is masked from 'package:graphics':
## 
##     plot.design

## The following object is masked from 'package:base':
## 
##     lengths

#install.packages("FrF2")
library(FrF2)

#### Randomized block designl; Two-way layout model ####

# 1) Randomized block design (handphone)
cellphone = read.table("cellphone.txt", header = T)
brand = cellphone$Brand
promotion = cellphone$Promotion
rating = cellphone$Rating
# Boxplot 
boxplot(rating ~ promotion)

boxplot(rating~brand)

### ANOVA table (brand and promotion)
fit_cellphone = lm(rating ~ brand + promotion)
anova(fit_cellphone)

## Analysis of Variance Table
## 
## Response: rating
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## brand      3 197.00  65.668  9.1198 0.001116 **
## promotion  5 201.32  40.263  5.5917 0.004191 **
## Residuals 15 108.01   7.201                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# residual plot 
plot(fit_cellphone$fitted, fit_cellphone$res, xlab="fitted value", ylab="residual", main="Residual vs Fitted")

### ANOVA table (promotion only)
fit_cellphone1 = lm(rating ~ promotion)
anova(fit_cellphone1)

## Analysis of Variance Table
## 
## Response: rating
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## promotion  5 201.32  40.263  2.3761 0.08024 .
## Residuals 18 305.01  16.945                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

### multiple comparison using Tukey method
fit1 = aov(rating ~ promotion + brand, data=cellphone)
TukeyHSD(fit1)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov.default(formula = rating ~ promotion + brand, data = cellphone)
## 
## $promotion
##        diff         lwr          upr     p adj
## B-A  5.7550  -0.4097239 11.919723853 0.0741633
## C-A  8.4925   2.3277761 14.657223853 0.0048516
## D-A  2.3375  -3.8272239  8.502223853 0.8149970
## E-A  1.2325  -4.9322239  7.397223853 0.9849215
## F-A  4.9475  -1.2172239 11.112223853 0.1552755
## C-B  2.7375  -3.4272239  8.902223853 0.7024907
## D-B -3.4175  -9.5822239  2.747223853 0.4934365
## E-B -4.5225 -10.6872239  1.642223853 0.2225996
## F-B -0.8075  -6.9722239  5.357223853 0.9978438
## D-C -6.1550 -12.3197239  0.009723853 0.0504756
## E-C -7.2600 -13.4247239 -1.095276147 0.0168331
## F-C -3.5450  -9.7097239  2.619723853 0.4558882
## E-D -1.1050  -7.2697239  5.059723853 0.9907491
## F-D  2.6100  -3.5547239  8.774723853 0.7402085
## F-E  3.7150  -2.4497239  9.879723853 0.4078918
## 
## $brand
##            diff        lwr        upr     p adj
## B2-B1  7.886667  3.4214872 12.3518462 0.0006865
## B3-B1  5.113333  0.6481538  9.5785128 0.0224526
## B4-B1  3.286667 -1.1785128  7.7518462 0.1910175
## B3-B2 -2.773333 -7.2385128  1.6918462 0.3157187
## B4-B2 -4.600000 -9.0651795 -0.1348205 0.0424317
## B4-B3 -1.826667 -6.2918462  2.6385128 0.6485419

#2) two way anova with interactions (popcorn)

popcorn.data = read.table("popcorn.txt", sep="\t", header=T)
popcorn.data

##    popcorn baking.time sugar.type
## 1     42.5           1          1
## 2    138.4           2          1
## 3    180.9           3          1
## 4     39.8           1          2
## 5    132.4           2          2
## 6    176.8           3          2
## 7     43.3           1          1
## 8    144.4           2          1
## 9    180.5           3          1
## 10    40.3           1          2
## 11   132.4           2          2
## 12   173.6           3          2
## 13    42.9           1          1
## 14   142.7           2          1
## 15   183.0           3          1
## 16    41.2           1          2
## 17   130.3           2          2
## 18   174.9           3          2

popcorn = popcorn.data$popcorn
baking.time = as.factor(popcorn.data$baking.time)
sugar.type = as.factor(popcorn.data$sugar.type)

boxplot(popcorn ~ baking.time)

boxplot(popcorn ~ sugar.type)

popcorn.fit1 = lm(popcorn ~ baking.time * sugar.type)
anova(popcorn.fit1)

## Analysis of Variance Table
## 
## Response: popcorn
##                        Df Sum Sq Mean Sq    F value    Pr(>F)    
## baking.time             2  58864 29431.8 10975.1736 < 2.2e-16 ***
## sugar.type              1    180   179.9    67.0729 2.955e-06 ***
## baking.time:sugar.type  2     44    22.0     8.2202  0.005642 ** 
## Residuals              12     32     2.7                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(popcorn.fit1$fitted, popcorn.fit1$res, xlab="fitted value", ylab="residual", main="Residual vs Fitted")

popcorn.fit2 = lm(log(popcorn) ~ baking.time * sugar.type)
anova(popcorn.fit2)

## Analysis of Variance Table
## 
## Response: log(popcorn)
##                        Df Sum Sq Mean Sq    F value    Pr(>F)    
## baking.time             2 7.1935  3.5967 19670.4837 < 2.2e-16 ***
## sugar.type              1 0.0143  0.0143    78.1091 1.337e-06 ***
## baking.time:sugar.type  2 0.0011  0.0006     3.0574    0.0845 .  
## Residuals              12 0.0022  0.0002                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(popcorn.fit2$fitted, popcorn.fit2$res, xlab="fitted value", ylab="residual", main="Residual vs Fitted")

# multiple comparison using Tukey
#fit1 = aov(popcorn ~ baking.time + sugar.type, data=cellphone)
fit2 = aov(log(popcorn) ~ baking.time + sugar.type, data=cellphone)
TukeyHSD(fit2)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov.default(formula = log(popcorn) ~ baking.time + sugar.type, data = cellphone)
## 
## $baking.time
##          diff       lwr       upr p adj
## 2-1 1.1882982 1.1650553 1.2115411     0
## 3-1 1.4539939 1.4307510 1.4772368     0
## 3-2 0.2656957 0.2424528 0.2889386     0
## 
## $sugar.type
##            diff         lwr         upr   p adj
## 2-1 -0.05633677 -0.07188848 -0.04078505 1.9e-06

#3) 2^k Full Factorial (Bicycle) 
library(DoE.base)
library(FrF2)

A = B = C = c(-1, +1)

design <- expand.grid(Seat.Height=A, Generator=B, Tire.pressure=C)

bicycle.plan<-data.frame(repl=rep(1:2, each=8), rbind(design))

bicycle.plan$time<-c(51, 41, 54, 44, 50, 39, 53, 41, 54, 43, 60, 43, 48, 39, 51, 44)
bicycle.plan

##    repl Seat.Height Generator Tire.pressure time
## 1     1          -1        -1            -1   51
## 2     1           1        -1            -1   41
## 3     1          -1         1            -1   54
## 4     1           1         1            -1   44
## 5     1          -1        -1             1   50
## 6     1           1        -1             1   39
## 7     1          -1         1             1   53
## 8     1           1         1             1   41
## 9     2          -1        -1            -1   54
## 10    2           1        -1            -1   43
## 11    2          -1         1            -1   60
## 12    2           1         1            -1   43
## 13    2          -1        -1             1   48
## 14    2           1        -1             1   39
## 15    2          -1         1             1   51
## 16    2           1         1             1   44

# model fitting and estimation
model1 = lm(time ~ Seat.Height*Generator*Tire.pressure, data=bicycle.plan)
summary(model1)

## 
## Call:
## lm.default(formula = time ~ Seat.Height * Generator * Tire.pressure, 
##     data = bicycle.plan)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##     -3     -1      0      1      3 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          47.1875     0.5116  92.238 2.13e-13 ***
## Seat.Height                          -5.4375     0.5116 -10.629 5.37e-06 ***
## Generator                             1.5625     0.5116   3.054   0.0157 *  
## Tire.pressure                        -1.5625     0.5116  -3.054   0.0157 *  
## Seat.Height:Generator                -0.3125     0.5116  -0.611   0.5583    
## Seat.Height:Tire.pressure             0.5625     0.5116   1.100   0.3035    
## Generator:Tire.pressure               0.0625     0.5116   0.122   0.9058    
## Seat.Height:Generator:Tire.pressure   0.4375     0.5116   0.855   0.4173    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.046 on 8 degrees of freedom
## Multiple R-squared:  0.9436, Adjusted R-squared:  0.8943 
## F-statistic: 19.14 on 7 and 8 DF,  p-value: 0.0002109

anova(model1)

## Analysis of Variance Table
## 
## Response: time
##                                     Df Sum Sq Mean Sq  F value    Pr(>F)    
## Seat.Height                          1 473.06  473.06 112.9701 5.374e-06 ***
## Generator                            1  39.06   39.06   9.3284   0.01572 *  
## Tire.pressure                        1  39.06   39.06   9.3284   0.01572 *  
## Seat.Height:Generator                1   1.56    1.56   0.3731   0.55825    
## Seat.Height:Tire.pressure            1   5.06    5.06   1.2090   0.30352    
## Generator:Tire.pressure              1   0.06    0.06   0.0149   0.90578    
## Seat.Height:Generator:Tire.pressure  1   3.06    3.06   0.7313   0.41732    
## Residuals                            8  33.50    4.19                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# residual plot
plot(model1$fitted, model1$res, xlab="fitted value", ylab="residual", main="Residual vs Fitted")

# plot the main effects
MEPlot(model1)

# plot the interaction effects
IAPlot(model1)

#4a)2^k Full Factorial (Handphone) - 2^3 x 2 replicates
A = B = C = c(-1, +1)

design <- expand.grid(Price=A, Message=B, Promotion=C)

design<-data.frame(repl=rep(1:2, each=8), rbind(design))
design

##    repl Price Message Promotion
## 1     1    -1      -1        -1
## 2     1     1      -1        -1
## 3     1    -1       1        -1
## 4     1     1       1        -1
## 5     1    -1      -1         1
## 6     1     1      -1         1
## 7     1    -1       1         1
## 8     1     1       1         1
## 9     2    -1      -1        -1
## 10    2     1      -1        -1
## 11    2    -1       1        -1
## 12    2     1       1        -1
## 13    2    -1      -1         1
## 14    2     1      -1         1
## 15    2    -1       1         1
## 16    2     1       1         1

design$sales<-c(550, 669, 633, 642, 1037, 749, 1075, 729, 604, 650, 601, 635, 1052, 868, 1063, 860)

# model fitting and estimation
model1 = lm(sales ~ Price*Message*Promotion, data=design)
summary(model1)

## 
## Call:
## lm.default(formula = sales ~ Price * Message * Promotion, data = design)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -65.50 -11.12   0.00  11.12  65.50 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              776.062     11.865  65.406 3.32e-12 ***
## Price                    -50.812     11.865  -4.282 0.002679 ** 
## Message                    3.688     11.865   0.311 0.763911    
## Promotion                153.062     11.865  12.900 1.23e-06 ***
## Price:Message            -12.437     11.865  -1.048 0.325168    
## Price:Promotion          -76.812     11.865  -6.474 0.000193 ***
## Message:Promotion         -1.062     11.865  -0.090 0.930849    
## Price:Message:Promotion    2.813     11.865   0.237 0.818586    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.46 on 8 degrees of freedom
## Multiple R-squared:  0.9661, Adjusted R-squared:  0.9364 
## F-statistic: 32.56 on 7 and 8 DF,  p-value: 2.896e-05

anova(model1)

## Analysis of Variance Table
## 
## Response: sales
##                         Df Sum Sq Mean Sq  F value    Pr(>F)    
## Price                    1  41311   41311  18.3394 0.0026786 ** 
## Message                  1    218     218   0.0966 0.7639107    
## Promotion                1 374850  374850 166.4105 1.233e-06 ***
## Price:Message            1   2475    2475   1.0988 0.3251679    
## Price:Promotion          1  94403   94403  41.9090 0.0001934 ***
## Message:Promotion        1     18      18   0.0080 0.9308486    
## Price:Message:Promotion  1    127     127   0.0562 0.8185861    
## Residuals                8  18020    2253                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# residual plot
plot(model1$fitted, model1$res, xlab="fitted value", ylab="residual", main="Residual vs Fitted")

# plot the main effects
MEPlot(model1)

# plot the interaction effects
IAPlot(model1)

#4b) 2^k Full Factorial with Block (Handphone brand) - 2^3 x 2 replicates
A = B = C = c(-1, +1)

design <- expand.grid(Price=A, Message=B, Promotion=C)

design<-data.frame(repl=rep(1:2, each=8), rbind(design))
design$sales<-c(550, 669, 633, 642, 1037, 749, 1075, 729, 604, 650, 601, 635, 1052, 868, 1063, 860)
design

##    repl Price Message Promotion sales
## 1     1    -1      -1        -1   550
## 2     1     1      -1        -1   669
## 3     1    -1       1        -1   633
## 4     1     1       1        -1   642
## 5     1    -1      -1         1  1037
## 6     1     1      -1         1   749
## 7     1    -1       1         1  1075
## 8     1     1       1         1   729
## 9     2    -1      -1        -1   604
## 10    2     1      -1        -1   650
## 11    2    -1       1        -1   601
## 12    2     1       1        -1   635
## 13    2    -1      -1         1  1052
## 14    2     1      -1         1   868
## 15    2    -1       1         1  1063
## 16    2     1       1         1   860

#add brand as block
design$brand <-c(1,1,1,1,1,1,1,1,-1,-1,-1,-1,-1,-1,-1,-1)
design

##    repl Price Message Promotion sales brand
## 1     1    -1      -1        -1   550     1
## 2     1     1      -1        -1   669     1
## 3     1    -1       1        -1   633     1
## 4     1     1       1        -1   642     1
## 5     1    -1      -1         1  1037     1
## 6     1     1      -1         1   749     1
## 7     1    -1       1         1  1075     1
## 8     1     1       1         1   729     1
## 9     2    -1      -1        -1   604    -1
## 10    2     1      -1        -1   650    -1
## 11    2    -1       1        -1   601    -1
## 12    2     1       1        -1   635    -1
## 13    2    -1      -1         1  1052    -1
## 14    2     1      -1         1   868    -1
## 15    2    -1       1         1  1063    -1
## 16    2     1       1         1   860    -1

# model fitting and estimation
model1 = lm(sales ~ Price*Message*Promotion+brand, data=design)
summary(model1)

## 
## Call:
## lm.default(formula = sales ~ Price * Message * Promotion + brand, 
##     data = design)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -49.94 -22.44   0.00  22.44  49.94 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              776.062     11.238  69.055 3.51e-11 ***
## Price                    -50.812     11.238  -4.521 0.002728 ** 
## Message                    3.688     11.238   0.328 0.752417    
## Promotion                153.062     11.238  13.620 2.71e-06 ***
## brand                    -15.562     11.238  -1.385 0.208649    
## Price:Message            -12.438     11.238  -1.107 0.304996    
## Price:Promotion          -76.813     11.238  -6.835 0.000245 ***
## Message:Promotion         -1.062     11.238  -0.095 0.927327    
## Price:Message:Promotion    2.812     11.238   0.250 0.809572    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44.95 on 7 degrees of freedom
## Multiple R-squared:  0.9734, Adjusted R-squared:  0.943 
## F-statistic:    32 on 8 and 7 DF,  p-value: 7.749e-05

anova(model1)

## Analysis of Variance Table
## 
## Response: sales
##                         Df Sum Sq Mean Sq  F value    Pr(>F)    
## Price                    1  41311   41311  20.4429 0.0027276 ** 
## Message                  1    218     218   0.1077 0.7524172    
## Promotion                1 374850  374850 185.4980 2.707e-06 ***
## brand                    1   3875    3875   1.9176 0.2086490    
## Price:Message            1   2475    2475   1.2248 0.3049956    
## Price:Promotion          1  94403   94403  46.7160 0.0002453 ***
## Message:Promotion        1     18      18   0.0089 0.9273271    
## Price:Message:Promotion  1    127     127   0.0626 0.8095716    
## Residuals                7  14145    2021                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#5) 2^k Full Factorial (Stain Removal) 
A = B = C = D = c(-1, +1)
design <- expand.grid(A=A, B=B, C=C, D=D)
design

##     A  B  C  D
## 1  -1 -1 -1 -1
## 2   1 -1 -1 -1
## 3  -1  1 -1 -1
## 4   1  1 -1 -1
## 5  -1 -1  1 -1
## 6   1 -1  1 -1
## 7  -1  1  1 -1
## 8   1  1  1 -1
## 9  -1 -1 -1  1
## 10  1 -1 -1  1
## 11 -1  1 -1  1
## 12  1  1 -1  1
## 13 -1 -1  1  1
## 14  1 -1  1  1
## 15 -1  1  1  1
## 16  1  1  1  1

A = design$A
B = design$B
C = design$C
D = design$D
design

##     A  B  C  D
## 1  -1 -1 -1 -1
## 2   1 -1 -1 -1
## 3  -1  1 -1 -1
## 4   1  1 -1 -1
## 5  -1 -1  1 -1
## 6   1 -1  1 -1
## 7  -1  1  1 -1
## 8   1  1  1 -1
## 9  -1 -1 -1  1
## 10  1 -1 -1  1
## 11 -1  1 -1  1
## 12  1  1 -1  1
## 13 -1 -1  1  1
## 14  1 -1  1  1
## 15 -1  1  1  1
## 16  1  1  1  1

design<-data.frame(repl=rep(1:2, each=16), rbind(design))
design

##    repl  A  B  C  D
## 1     1 -1 -1 -1 -1
## 2     1  1 -1 -1 -1
## 3     1 -1  1 -1 -1
## 4     1  1  1 -1 -1
## 5     1 -1 -1  1 -1
## 6     1  1 -1  1 -1
## 7     1 -1  1  1 -1
## 8     1  1  1  1 -1
## 9     1 -1 -1 -1  1
## 10    1  1 -1 -1  1
## 11    1 -1  1 -1  1
## 12    1  1  1 -1  1
## 13    1 -1 -1  1  1
## 14    1  1 -1  1  1
## 15    1 -1  1  1  1
## 16    1  1  1  1  1
## 17    2 -1 -1 -1 -1
## 18    2  1 -1 -1 -1
## 19    2 -1  1 -1 -1
## 20    2  1  1 -1 -1
## 21    2 -1 -1  1 -1
## 22    2  1 -1  1 -1
## 23    2 -1  1  1 -1
## 24    2  1  1  1 -1
## 25    2 -1 -1 -1  1
## 26    2  1 -1 -1  1
## 27    2 -1  1 -1  1
## 28    2  1  1 -1  1
## 29    2 -1 -1  1  1
## 30    2  1 -1  1  1
## 31    2 -1  1  1  1
## 32    2  1  1  1  1

design$stain.removal = c(15.1, 20.6, 68.7, 101, 32.9, 46.1, 87.5, 119, 11.3, 19.6, 62.1, 103.2, 27.1, 40.3, 87.7, 128.3, 14, 25, 61, 100, 39, 40, 90, 150, 15, 26, 70, 95, 30, 47, 80, 110)
design

##    repl  A  B  C  D stain.removal
## 1     1 -1 -1 -1 -1          15.1
## 2     1  1 -1 -1 -1          20.6
## 3     1 -1  1 -1 -1          68.7
## 4     1  1  1 -1 -1         101.0
## 5     1 -1 -1  1 -1          32.9
## 6     1  1 -1  1 -1          46.1
## 7     1 -1  1  1 -1          87.5
## 8     1  1  1  1 -1         119.0
## 9     1 -1 -1 -1  1          11.3
## 10    1  1 -1 -1  1          19.6
## 11    1 -1  1 -1  1          62.1
## 12    1  1  1 -1  1         103.2
## 13    1 -1 -1  1  1          27.1
## 14    1  1 -1  1  1          40.3
## 15    1 -1  1  1  1          87.7
## 16    1  1  1  1  1         128.3
## 17    2 -1 -1 -1 -1          14.0
## 18    2  1 -1 -1 -1          25.0
## 19    2 -1  1 -1 -1          61.0
## 20    2  1  1 -1 -1         100.0
## 21    2 -1 -1  1 -1          39.0
## 22    2  1 -1  1 -1          40.0
## 23    2 -1  1  1 -1          90.0
## 24    2  1  1  1 -1         150.0
## 25    2 -1 -1 -1  1          15.0
## 26    2  1 -1 -1  1          26.0
## 27    2 -1  1 -1  1          70.0
## 28    2  1  1 -1  1          95.0
## 29    2 -1 -1  1  1          30.0
## 30    2  1 -1  1  1          47.0
## 31    2 -1  1  1  1          80.0
## 32    2  1  1  1  1         110.0

# Full model
model1 = lm(stain.removal ~ A*B*C*D, data=design)
summary(model1)

## 
## Call:
## lm.default(formula = stain.removal ~ A * B * C * D, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.500  -3.087   0.000   3.087  15.500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 61.32812    1.30889  46.855  < 2e-16 ***
## A           11.86563    1.30889   9.065 1.06e-07 ***
## B           33.26562    1.30889  25.415 2.31e-14 ***
## C           10.85313    1.30889   8.292 3.47e-07 ***
## D           -1.79062    1.30889  -1.368    0.190    
## A:B          6.85313    1.30889   5.236 8.16e-05 ***
## A:C          1.04062    1.30889   0.795    0.438    
## B:C          1.11562    1.30889   0.852    0.407    
## A:D         -0.22813    1.30889  -0.174    0.864    
## B:D         -0.76563    1.30889  -0.585    0.567    
## C:D         -1.59063    1.30889  -1.215    0.242    
## A:B:C        0.50313    1.30889   0.384    0.706    
## A:B:D       -1.40312    1.30889  -1.072    0.300    
## A:C:D       -0.07812    1.30889  -0.060    0.953    
## B:C:D       -0.91562    1.30889  -0.700    0.494    
## A:B:C:D     -0.90313    1.30889  -0.690    0.500    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.404 on 16 degrees of freedom
## Multiple R-squared:  0.9811, Adjusted R-squared:  0.9634 
## F-statistic: 55.44 on 15 and 16 DF,  p-value: 6.409e-11

anova(model1)

## Analysis of Variance Table
## 
## Response: stain.removal
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## A          1   4505    4505  82.1816 1.056e-07 ***
## B          1  35411   35411 645.9293 2.314e-14 ***
## C          1   3769    3769  68.7548 3.473e-07 ***
## D          1    103     103   1.8716    0.1902    
## A:B        1   1503    1503  27.4139 8.160e-05 ***
## A:C        1     35      35   0.6321    0.4382    
## B:C        1     40      40   0.7265    0.4066    
## A:D        1      2       2   0.0304    0.8638    
## B:D        1     19      19   0.3422    0.5667    
## C:D        1     81      81   1.4768    0.2419    
## A:B:C      1      8       8   0.1478    0.7057    
## A:B:D      1     63      63   1.1492    0.2996    
## A:C:D      1      0       0   0.0036    0.9531    
## B:C:D      1     27      27   0.4894    0.4943    
## A:B:C:D    1     26      26   0.4761    0.5001    
## Residuals 16    877      55                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Reduced model
model2 = lm(stain.removal ~ A*B*C, data= design)
summary(model2)

## 
## Call:
## lm.default(formula = stain.removal ~ A * B * C, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.825  -3.237   0.900   2.862  23.175 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  61.3281     1.2486  49.118  < 2e-16 ***
## A            11.8656     1.2486   9.503 1.32e-09 ***
## B            33.2656     1.2486  26.643  < 2e-16 ***
## C            10.8531     1.2486   8.692 7.04e-09 ***
## A:B           6.8531     1.2486   5.489 1.21e-05 ***
## A:C           1.0406     1.2486   0.833    0.413    
## B:C           1.1156     1.2486   0.894    0.380    
## A:B:C         0.5031     1.2486   0.403    0.691    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.063 on 24 degrees of freedom
## Multiple R-squared:  0.9742, Adjusted R-squared:  0.9667 
## F-statistic: 129.6 on 7 and 24 DF,  p-value: < 2.2e-16

anova(model2)

## Analysis of Variance Table
## 
## Response: stain.removal
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## A          1   4505    4505  90.3132 1.317e-09 ***
## B          1  35411   35411 709.8415 < 2.2e-16 ***
## C          1   3769    3769  75.5579 7.042e-09 ***
## A:B        1   1503    1503  30.1264 1.213e-05 ***
## A:C        1     35      35   0.6946    0.4128    
## B:C        1     40      40   0.7984    0.3805    
## A:B:C      1      8       8   0.1624    0.6905    
## Residuals 24   1197      50                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# residual plot 
plot(model1$fitted, model1$res, xlab="fitted value", ylab="residual", main="Residual vs Fitted")

plot(model2$fitted, model2$res, xlab="fitted value", ylab="residual", main="Residual vs Fitted")

# plot the main effects
MEPlot(model1)

# plot the interaction effects
IAPlot(model1)

#6) Fractional Factorial (Stain Removal) - optional
A = B = C = c(-1, +1)
design <- expand.grid(A=A, B=B, C=C)
design

##    A  B  C
## 1 -1 -1 -1
## 2  1 -1 -1
## 3 -1  1 -1
## 4  1  1 -1
## 5 -1 -1  1
## 6  1 -1  1
## 7 -1  1  1
## 8  1  1  1

A = design$A
B = design$B
C = design$C

design$D <- A*B*C

design<-data.frame(repl=rep(1:2, each=8), rbind(design))
design

##    repl  A  B  C  D
## 1     1 -1 -1 -1 -1
## 2     1  1 -1 -1  1
## 3     1 -1  1 -1  1
## 4     1  1  1 -1 -1
## 5     1 -1 -1  1  1
## 6     1  1 -1  1 -1
## 7     1 -1  1  1 -1
## 8     1  1  1  1  1
## 9     2 -1 -1 -1 -1
## 10    2  1 -1 -1  1
## 11    2 -1  1 -1  1
## 12    2  1  1 -1 -1
## 13    2 -1 -1  1  1
## 14    2  1 -1  1 -1
## 15    2 -1  1  1 -1
## 16    2  1  1  1  1

design$stain.removal = c(15.1, 20.6, 68.7, 101, 32.9, 46.1, 87.5, 119, 14, 25, 61, 100, 39, 40, 90, 150)
design

##    repl  A  B  C  D stain.removal
## 1     1 -1 -1 -1 -1          15.1
## 2     1  1 -1 -1  1          20.6
## 3     1 -1  1 -1  1          68.7
## 4     1  1  1 -1 -1         101.0
## 5     1 -1 -1  1  1          32.9
## 6     1  1 -1  1 -1          46.1
## 7     1 -1  1  1 -1          87.5
## 8     1  1  1  1  1         119.0
## 9     2 -1 -1 -1 -1          14.0
## 10    2  1 -1 -1  1          25.0
## 11    2 -1  1 -1  1          61.0
## 12    2  1  1 -1 -1         100.0
## 13    2 -1 -1  1  1          39.0
## 14    2  1 -1  1 -1          40.0
## 15    2 -1  1  1 -1          90.0
## 16    2  1  1  1  1         150.0

# Full model
model1 = lm(stain.removal ~ A*B*C*D, data=design)
summary(model1)

## 
## Call:
## lm.default(formula = stain.removal ~ A * B * C * D, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.500  -2.413   0.000   2.413  15.500 
## 
## Coefficients: (8 not defined because of singularities)
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   63.119      2.094  30.143 1.59e-09 ***
## A             12.094      2.094   5.775 0.000417 ***
## B             34.031      2.094  16.252 2.07e-07 ***
## C             12.444      2.094   5.943 0.000345 ***
## D              1.406      2.094   0.672 0.520782    
## A:B            8.256      2.094   3.943 0.004279 ** 
## A:C            1.119      2.094   0.534 0.607683    
## B:C            2.031      2.094   0.970 0.360442    
## A:D               NA         NA      NA       NA    
## B:D               NA         NA      NA       NA    
## C:D               NA         NA      NA       NA    
## A:B:C             NA         NA      NA       NA    
## A:B:D             NA         NA      NA       NA    
## A:C:D             NA         NA      NA       NA    
## B:C:D             NA         NA      NA       NA    
## A:B:C:D           NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.376 on 8 degrees of freedom
## Multiple R-squared:  0.9777, Adjusted R-squared:  0.9581 
## F-statistic:    50 on 7 and 8 DF,  p-value: 5.592e-06

anova(model1)

## Analysis of Variance Table
## 
## Response: stain.removal
##           Df  Sum Sq Mean Sq  F value    Pr(>F)    
## A          1  2340.1  2340.1  33.3552 0.0004167 ***
## B          1 18530.0 18530.0 264.1179 2.067e-07 ***
## C          1  2477.6  2477.6  35.3138 0.0003448 ***
## D          1    31.6    31.6   0.4510 0.5207816    
## A:B        1  1090.7  1090.7  15.5456 0.0042791 ** 
## A:C        1    20.0    20.0   0.2854 0.6076834    
## B:C        1    66.0    66.0   0.9410 0.3604420    
## Residuals  8   561.3    70.2                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Reduced model
model2 = lm(stain.removal ~ A*B*C, data=design)
summary(model2)

## 
## Call:
## lm.default(formula = stain.removal ~ A * B * C, data = design)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.500  -2.413   0.000   2.413  15.500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   63.119      2.094  30.143 1.59e-09 ***
## A             12.094      2.094   5.775 0.000417 ***
## B             34.031      2.094  16.252 2.07e-07 ***
## C             12.444      2.094   5.943 0.000345 ***
## A:B            8.256      2.094   3.943 0.004279 ** 
## A:C            1.119      2.094   0.534 0.607683    
## B:C            2.031      2.094   0.970 0.360442    
## A:B:C          1.406      2.094   0.672 0.520782    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.376 on 8 degrees of freedom
## Multiple R-squared:  0.9777, Adjusted R-squared:  0.9581 
## F-statistic:    50 on 7 and 8 DF,  p-value: 5.592e-06

anova(model2)

## Analysis of Variance Table
## 
## Response: stain.removal
##           Df  Sum Sq Mean Sq  F value    Pr(>F)    
## A          1  2340.1  2340.1  33.3552 0.0004167 ***
## B          1 18530.0 18530.0 264.1179 2.067e-07 ***
## C          1  2477.6  2477.6  35.3138 0.0003448 ***
## A:B        1  1090.7  1090.7  15.5456 0.0042791 ** 
## A:C        1    20.0    20.0   0.2854 0.6076834    
## B:C        1    66.0    66.0   0.9410 0.3604420    
## A:B:C      1    31.6    31.6   0.4510 0.5207816    
## Residuals  8   561.3    70.2                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

PPA Day1 workshop

Li Ruiqian

1/20/2022