The Procter & Gamble Company developed a new formula for one of its laundry soaps. Before introducing the new formula in the marketplace, the company wanted to know wheather consumers would prefer the new formula, called X, to the original formula called, M

Analysis for a Field Test of Laundry Soaps (R)

library(ggplot2)  # graphics for interaction plotting

# set contrast for tests of significance with factors
options(contrasts=c(factor = "contr.treatment", ordered="contr.poly"))

First import data from the comma-delimited file gsoaps.csv,the response variable relates to brand choice, choice is factor/binary response variable (M or X) the explanatory variables are factors (categorical variables): wtemp = water temperature with levels LOW OR HIGH wtype = water type with levels SOFT, MEDIUM, OR HARD muser = is user of brand M with labels NO OR YES

# convert to individual observations as required for logistic regression
indices <- rep(1:nrow(gsoaps),gsoaps$freq)
soaps <- gsoaps[indices,-1]
soaps <- data.frame(soaps, row.names=NULL)

Check the data frame

print(str(soaps))
## 'data.frame':    1008 obs. of  4 variables:
##  $ choice: Factor w/ 2 levels "M","X": 2 2 2 2 2 2 2 2 2 2 ...
##  $ muser : Factor w/ 2 levels "NO","YES": 1 1 1 1 1 1 1 1 1 1 ...
##  $ wtemp : Factor w/ 2 levels "HIGH","LOW": 2 2 2 2 2 2 2 2 2 2 ...
##  $ wtype : Factor w/ 3 levels "HARD","MEDIUM",..: 1 1 1 1 1 1 1 1 1 1 ...
## NULL
print(head(soaps))
##   choice muser wtemp wtype
## 1      X    NO   LOW  HARD
## 2      X    NO   LOW  HARD
## 3      X    NO   LOW  HARD
## 4      X    NO   LOW  HARD
## 5      X    NO   LOW  HARD
## 6      X    NO   LOW  HARD
print(tail(soaps))
##      choice muser wtemp wtype
## 1003      M   YES  HIGH  SOFT
## 1004      M   YES  HIGH  SOFT
## 1005      M   YES  HIGH  SOFT
## 1006      M   YES  HIGH  SOFT
## 1007      M   YES  HIGH  SOFT
## 1008      M   YES  HIGH  SOFT
with(soaps, print(table(wtemp, wtype, muser, choice)))
## , , muser = NO, choice = M
## 
##       wtype
## wtemp  HARD MEDIUM SOFT
##   HIGH   30     23   27
##   LOW    42     50   53
## 
## , , muser = YES, choice = M
## 
##       wtype
## wtemp  HARD MEDIUM SOFT
##   HIGH   43     47   29
##   LOW    52     55   49
## 
## , , muser = NO, choice = X
## 
##       wtype
## wtemp  HARD MEDIUM SOFT
##   HIGH   42     33   29
##   LOW    68     66   63
## 
## , , muser = YES, choice = X
## 
##       wtype
## wtemp  HARD MEDIUM SOFT
##   HIGH   24     23   19
##   LOW    37     47   57
# specify complete experimental design with interactions
soaps_model <- choice ~ muser * wtemp * wtype 

Fit the model

# fit the complete model     
soaps_fit <- glm(soaps_model, family = binomial, data=soaps)
     
# summary of fit
print(soaps_fit)
## 
## Call:  glm(formula = soaps_model, family = binomial, data = soaps)
## 
## Coefficients:
##                   (Intercept)                       muserYES  
##                       0.33647                       -0.91962  
##                      wtempLOW                    wtypeMEDIUM  
##                       0.14537                        0.02454  
##                     wtypeSOFT              muserYES:wtempLOW  
##                      -0.26501                        0.09745  
##          muserYES:wtypeMEDIUM             muserYES:wtypeSOFT  
##                      -0.15605                        0.42530  
##          wtempLOW:wtypeMEDIUM             wtempLOW:wtypeSOFT  
##                      -0.22875                       -0.04398  
## muserYES:wtempLOW:wtypeMEDIUM    muserYES:wtempLOW:wtypeSOFT  
##                       0.54339                        0.37525  
## 
## Degrees of Freedom: 1007 Total (i.e. Null);  996 Residual
## Null Deviance:       1397 
## Residual Deviance: 1364  AIC: 1388
# analysis of deviance for experimental factors
# providing likelihood ratio chi-square tests of each effect
print(anova(soaps_fit, test="Chisq"))  
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: choice
## 
## Terms added sequentially (first to last)
## 
## 
##                   Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                               1007     1397.3              
## muser              1  20.5815      1006     1376.7 5.715e-06 ***
## wtemp              1   3.8002      1005     1372.9   0.05125 .  
## wtype              2   0.2160      1003     1372.7   0.89763    
## muser:wtemp        1   2.7328      1002     1370.0   0.09831 .  
## muser:wtype        2   4.5961      1000     1365.4   0.10045    
## wtemp:wtype        2   0.1618       998     1365.2   0.92228    
## muser:wtemp:wtype  2   0.7373       996     1364.5   0.69166    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interaction plotting

# code the choice as a 0/1 binary variable 1 = Brand X  
soaps$response <- ifelse((soaps$choice == "X"), 1, 0)

# compute choice share cell means for use in interaction plot
response_mean <- aggregate(response ~ muser * wtemp,
  data = soaps, mean)
interaction_plot <- ggplot(response_mean, 
  aes(x = muser, y = 100*response, 
  group = wtemp, fill = wtemp)) +  
  geom_line(linetype = "solid", size = 1, colour = "black") +
  geom_point(size = 4, shape = 21) +
  ylab("Preference for Soap X (percentage)") +
  xlab("Currently Uses Soap M") +
  labs(fill = "Water Temperature") +
  theme(plot.title = element_text(lineheight=.8, face="bold"))
print(interaction_plot)