conjoint.R

###################################
# based on Code for: R for Marketing Research and Analytics, Chapter 13
#
# Authors:  Chris Chapman               Elea McDonnell Feit
#           cnchapman+rbook@gmail.com   efeit@drexel.edu
#
# Copyright 2015, Springer 
#
# Last update: January 7, 2015
# Version: 1.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
# You may obtain a copy of the License at
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


answers <- read.csv("Feedback Bogen Alimentaris Locham 2015 (Responses) - conjoint.csv")

#####

## create data

attrib <- list(Hilfestellung = c("keinSupport", "rechtlichePruefung"), 
               Ausgabeformate = c("Etikette", "EtiketteAllergenaushang", "EtiketteAllergenaushangProduktsheet"),
               Jahrespreis = c("99", "600", "1200", "2000")
               )

matrix(c(1,2,3, 11,12,13), nrow = 2, ncol = 3, byrow = TRUE,
       dimnames = list(c("row1", "row2"),
                       c("C.1", "C.2", "C.3")))

##      C.1 C.2 C.3
## row1   1   2   3
## row2  11  12  13

q=array(dim= c(7,3,3), dimnames = c("ques", "alt", "attr"))
q[1,1,]= c(1,3,2)
q[1,2,]= c(2,1,3)
q[1,3,]= c(1,1,1)

q[2,1,]= c(2,2,2)
q[2,2,]= c(1,2,1)
q[2,3,]= c(2,3,4)

q[3,1,]= c(2,1,2)
q[3,2,]= c(1,3,1)
q[3,3,]= c(2,2,3)

q[4,1,]= c(2,1,1)
q[4,2,]= c(1,3,3)
q[4,3,]= c(2,2,2)

q[5,1,]= c(1,1,3)
q[5,2,]= c(1,2,4)
q[5,3,]= c(2,1,4)

q[6,1,]= c(2,3,2)
q[6,2,]= c(1,1,1)
q[6,3,]= c(2,1,1)

q[7,1,]= c(1,1,2)
q[7,2,]= c(1,3,3)
q[7,3,]= c(2,2,4)

cbc.df= data.frame(resp.id=integer(), ques=integer(), alt=integer(), Hilfestellung=character(), Ausgabeformate=character(), Jahrespreis=character(), choice=integer(), stringsAsFactors = FALSE)
for(userIndex in 1: nrow(answers)){
user = answers[userIndex,]
for(questionIndex in 1:ncol(user)) {
 answerIndex = user[,questionIndex]
 if (answerIndex==4)
   next
 row = NULL
 for(alternativeIndex in  1:3){
     
   attributeIndex = q[questionIndex, alternativeIndex, 1]
   a=attrib$Hilfestellung[attributeIndex]
   
   attributeIndex = q[questionIndex, alternativeIndex, 2]
   b=attrib$Ausgabeformate[attributeIndex]
   
   attributeIndex = q[questionIndex, alternativeIndex, 3]
   c=attrib$Jahrespreis[attributeIndex]
   choice=0
   if (answerIndex == alternativeIndex)
     choice=1;
   row = data.frame(resp.id=userIndex, ques=questionIndex, alt=alternativeIndex, Hilfestellung=a, Ausgabeformate=b, Jahrespreis=c, choice=choice)
   cbc.df = rbind(cbc.df, row);
  }
 }
}

## END long table generation



# Choice data descriptives
head(cbc.df)

##   resp.id ques alt      Hilfestellung                      Ausgabeformate
## 1       1    1   1        keinSupport EtiketteAllergenaushangProduktsheet
## 2       1    1   2 rechtlichePruefung                            Etikette
## 3       1    1   3        keinSupport                            Etikette
## 4       1    2   1 rechtlichePruefung             EtiketteAllergenaushang
## 5       1    2   2        keinSupport             EtiketteAllergenaushang
## 6       1    2   3 rechtlichePruefung EtiketteAllergenaushangProduktsheet
##   Jahrespreis choice
## 1         600      1
## 2        1200      0
## 3          99      0
## 4         600      1
## 5          99      0
## 6        2000      0

summary(cbc.df)

##     resp.id           ques            alt               Hilfestellung
##  Min.   : 1.00   Min.   :1.000   Min.   :1   keinSupport       :161  
##  1st Qu.: 6.00   1st Qu.:2.000   1st Qu.:1   rechtlichePruefung:205  
##  Median :11.00   Median :3.500   Median :2                           
##  Mean   :11.39   Mean   :3.738   Mean   :2                           
##  3rd Qu.:17.00   3rd Qu.:6.000   3rd Qu.:3                           
##  Max.   :22.00   Max.   :7.000   Max.   :3                           
##                              Ausgabeformate Jahrespreis     choice      
##  EtiketteAllergenaushangProduktsheet:114    600 :114    Min.   :0.0000  
##  Etikette                           :146    1200: 80    1st Qu.:0.0000  
##  EtiketteAllergenaushang            :106    99  :121    Median :0.0000  
##                                             2000: 51    Mean   :0.3333  
##                                                         3rd Qu.:1.0000  
##                                                         Max.   :1.0000

# write.csv(cbc.df, "rintro-chapter13conjoint.csv", row.names=FALSE)
preisAuswahl= xtabs(choice ~ Jahrespreis, data=cbc.df)
barplot(preisAuswahl)

formatWahl= xtabs(choice ~ Ausgabeformate, data=cbc.df)
barplot(formatWahl)

Wahl.der.Hilfestellung=xtabs(choice ~ Hilfestellung, data=cbc.df)
barplot(Wahl.der.Hilfestellung)

# Fitting a choice model with mlogit
library(mlogit)

## Loading required package: Formula
## Loading required package: maxLik
## Loading required package: miscTools
## 
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
## 
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/

cbc.mlogit <- mlogit.data(data=cbc.df, choice="choice", shape="long", varying=3:6, alt.levels=paste("pos",1:3), 
                          id.var="resp.id")

Einfachstes Modell

m1 <- mlogit(choice ~ 0 + Hilfestellung + Ausgabeformate  + Jahrespreis, data = cbc.mlogit)
summary(m1)

## 
## Call:
## mlogit(formula = choice ~ 0 + Hilfestellung + Ausgabeformate + 
##     Jahrespreis, data = cbc.mlogit, method = "nr", print.level = 0)
## 
## Frequencies of alternatives:
##   pos 1   pos 2   pos 3 
## 0.44262 0.26230 0.29508 
## 
## nr method
## 5 iterations, 0h:0m:0s 
## g'(-H)^-1g = 6.28E-08 
## gradient close to zero 
## 
## Coefficients :
##                                       Estimate Std. Error t-value
## HilfestellungrechtlichePruefung        1.98367    0.48661  4.0765
## AusgabeformateEtikette                -1.89315    0.39876 -4.7476
## AusgabeformateEtiketteAllergenaushang -0.72565    0.33484 -2.1672
## Jahrespreis1200                       -1.35761    0.35771 -3.7953
## Jahrespreis99                          1.13068    0.38415  2.9433
## Jahrespreis2000                       -2.79259    0.62508 -4.4675
##                                        Pr(>|t|)    
## HilfestellungrechtlichePruefung       4.572e-05 ***
## AusgabeformateEtikette                2.059e-06 ***
## AusgabeformateEtiketteAllergenaushang 0.0302207 *  
## Jahrespreis1200                       0.0001475 ***
## Jahrespreis99                         0.0032471 ** 
## Jahrespreis2000                       7.913e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-Likelihood: -109.88

Modell das von keinem Systematischen Bias ausgeht

m2 <- mlogit(choice ~ Hilfestellung + Ausgabeformate  + Jahrespreis, data = cbc.mlogit)
summary(m2)

## 
## Call:
## mlogit(formula = choice ~ Hilfestellung + Ausgabeformate + Jahrespreis, 
##     data = cbc.mlogit, method = "nr", print.level = 0)
## 
## Frequencies of alternatives:
##   pos 1   pos 2   pos 3 
## 0.44262 0.26230 0.29508 
## 
## nr method
## 5 iterations, 0h:0m:0s 
## g'(-H)^-1g = 5.24E-08 
## gradient close to zero 
## 
## Coefficients :
##                                       Estimate Std. Error t-value
## pos 2:(intercept)                      0.37235    0.42123  0.8839
## pos 3:(intercept)                      0.25893    0.34226  0.7565
## HilfestellungrechtlichePruefung        2.16044    0.52626  4.1053
## AusgabeformateEtikette                -1.89972    0.41254 -4.6049
## AusgabeformateEtiketteAllergenaushang -0.88916    0.41350 -2.1503
## Jahrespreis1200                       -1.64349    0.47772 -3.4403
## Jahrespreis99                          0.97076    0.42247  2.2978
## Jahrespreis2000                       -3.17591    0.75731 -4.1937
##                                        Pr(>|t|)    
## pos 2:(intercept)                      0.376726    
## pos 3:(intercept)                      0.449335    
## HilfestellungrechtlichePruefung       4.038e-05 ***
## AusgabeformateEtikette                4.126e-06 ***
## AusgabeformateEtiketteAllergenaushang  0.031530 *  
## Jahrespreis1200                        0.000581 ***
## Jahrespreis99                          0.021572 *  
## Jahrespreis2000                       2.744e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-Likelihood: -109.41
## McFadden R^2:  0.16338 
## Likelihood ratio test : chisq = 42.733 (p.value = 1.3173e-07)

lrtest(m1, m2)

## Likelihood ratio test
## 
## Model 1: choice ~ 0 + Hilfestellung + Ausgabeformate + Jahrespreis
## Model 2: choice ~ Hilfestellung + Ausgabeformate + Jahrespreis
##   #Df  LogLik Df  Chisq Pr(>Chisq)
## 1   6 -109.88                     
## 2   8 -109.41  2 0.9415     0.6245

Aus dem Likely Ratio Test sehen wir dass beide Modelle gleich gut sind. Ein starker Hinweiss, dass die Schüler nicht einfach nur was angekreuzt haben ohne nachzudenken. Wir gehen weiter zu einem model mit numerischen Preisen:

m3 <- mlogit(choice ~ 0 + Hilfestellung + Ausgabeformate 
             + as.numeric(as.character(Jahrespreis)), 
             data = cbc.mlogit)
summary(m3)

## 
## Call:
## mlogit(formula = choice ~ 0 + Hilfestellung + Ausgabeformate + 
##     as.numeric(as.character(Jahrespreis)), data = cbc.mlogit, 
##     method = "nr", print.level = 0)
## 
## Frequencies of alternatives:
##   pos 1   pos 2   pos 3 
## 0.44262 0.26230 0.29508 
## 
## nr method
## 5 iterations, 0h:0m:0s 
## g'(-H)^-1g = 6.88E-08 
## gradient close to zero 
## 
## Coefficients :
##                                          Estimate  Std. Error t-value
## HilfestellungrechtlichePruefung        1.96088179  0.43060066  4.5538
## AusgabeformateEtikette                -1.86166288  0.34580311 -5.3836
## AusgabeformateEtiketteAllergenaushang -0.72233482  0.33655535 -2.1463
## as.numeric(as.character(Jahrespreis)) -0.00209939  0.00042374 -4.9544
##                                        Pr(>|t|)    
## HilfestellungrechtlichePruefung       5.268e-06 ***
## AusgabeformateEtikette                7.301e-08 ***
## AusgabeformateEtiketteAllergenaushang   0.03185 *  
## as.numeric(as.character(Jahrespreis)) 7.255e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-Likelihood: -110.13

Die Preisneigung ist leicht negativ, Preis spielt bei der Auswahl nicht eine besonders grosse rolle

lrtest(m1, m3)

## Likelihood ratio test
## 
## Model 1: choice ~ 0 + Hilfestellung + Ausgabeformate + Jahrespreis
## Model 2: choice ~ 0 + Hilfestellung + Ausgabeformate + as.numeric(as.character(Jahrespreis))
##   #Df  LogLik Df  Chisq Pr(>Chisq)
## 1   6 -109.88                     
## 2   4 -110.13 -2 0.5006     0.7786

das Modell mit Numerischen Preis ist gut, wir verwenden es weiter Zahlungsbereitschaft

Zahlungsbereitschaft.RechtlichePrüfung=coef(m3)["HilfestellungrechtlichePruefung"]/(-coef(m3)["as.numeric(as.character(Jahrespreis))"])
print(c(round(Zahlungsbereitschaft.RechtlichePrüfung), "EUR"))

## HilfestellungrechtlichePruefung                                 
##                           "934"                           "EUR"

Predicting shares

predict.mnl <- function(model, data) {
  # Function for predicting shares from a multinomial logit model 
  # model: mlogit object returned by mlogit()
  # data: a data frame containing the set of designs for which you want to 
  #       predict shares.  Same format at the data used to estimate model. 
  data.model <- model.matrix(update(model$formula, 0 ~ .), data = data)[,-1]
  utility <- data.model%*%model$coef
  share <- exp(utility)/sum(exp(utility))
  cbind(share, data)
}

(new.data <- expand.grid(attrib)[c(1,2,3,12,24), ])

##         Hilfestellung                      Ausgabeformate Jahrespreis
## 1         keinSupport                            Etikette          99
## 2  rechtlichePruefung                            Etikette          99
## 3         keinSupport             EtiketteAllergenaushang          99
## 12 rechtlichePruefung EtiketteAllergenaushangProduktsheet         600
## 24 rechtlichePruefung EtiketteAllergenaushangProduktsheet        2000

predict.mnl(m3, new.data)

##         share      Hilfestellung                      Ausgabeformate
## 1  0.10493057        keinSupport                            Etikette
## 2  0.74559364 rechtlichePruefung                            Etikette
## 3  0.01630768        keinSupport             EtiketteAllergenaushang
## 12 0.12647611 rechtlichePruefung EtiketteAllergenaushangProduktsheet
## 24 0.00669200 rechtlichePruefung EtiketteAllergenaushangProduktsheet
##    Jahrespreis
## 1           99
## 2           99
## 3           99
## 12         600
## 24        2000

predict.mnl(m1, new.data)

##         share      Hilfestellung                      Ausgabeformate
## 1  0.10481403        keinSupport                            Etikette
## 2  0.76193071 rechtlichePruefung                            Etikette
## 3  0.01578465        keinSupport             EtiketteAllergenaushang
## 12 0.09487818 rechtlichePruefung EtiketteAllergenaushangProduktsheet
## 24 0.02259243 rechtlichePruefung EtiketteAllergenaushangProduktsheet
##    Jahrespreis
## 1           99
## 2           99
## 3           99
## 12         600
## 24        2000

Share sensitivity

sensitivity.mnl <- function(model, attrib, base.data, competitor.data) {
  # Function for creating data for a share-sensitivity chart
  # model: mlogit object returned by mlogit() function
  # attrib: list of vectors with attribute levels to be used in sensitivity
  # base.data: data frame containing baseline design of target product
  # competitor.data: data frame contining design of competitive set
  data <- rbind(base.data, competitor.data)
  base.share <- predict.mnl(model, data)[1,1]
  share <- NULL
  for (a in seq_along(attrib)) {
    for (i in attrib[[a]]) {
      data[1,] <- base.data
      data[1,a] <- i
      share <- c(share, predict.mnl(model, data)[1,1])
    }
  }
  data.frame(level=unlist(attrib), share=share, increase=share-base.share)
}
base.data <- expand.grid(attrib)[c(1), ]
competitor.data <- expand.grid(attrib)[c(4,6,12,18), ]
(tradeoff <- sensitivity.mnl(m1, attrib, base.data, competitor.data))

##                                               level       share
## Hilfestellung1                          keinSupport 0.057413230
## Hilfestellung2                   rechtlichePruefung 0.306892994
## Ausgabeformate1                            Etikette 0.057413230
## Ausgabeformate2             EtiketteAllergenaushang 0.009089515
## Ausgabeformate3 EtiketteAllergenaushangProduktsheet 0.028636851
## Jahrespreis1                                     99 0.057413230
## Jahrespreis2                                    600 0.015428922
## Jahrespreis3                                   1200 0.158734010
## Jahrespreis4                                   2000 0.003717641
##                    increase
## Hilfestellung1   0.00000000
## Hilfestellung2   0.24947976
## Ausgabeformate1  0.00000000
## Ausgabeformate2 -0.04832371
## Ausgabeformate3 -0.02877638
## Jahrespreis1     0.00000000
## Jahrespreis2    -0.04198431
## Jahrespreis3     0.10132078
## Jahrespreis4    -0.05369559

barplot(tradeoff$increase, horiz=FALSE, names.arg=tradeoff$level,
        ylab="Change in Share for Baseline Product")

# Share predictions for two identical vehicles
#new.data.2 <- expand.grid(attrib)[c(8, 8, 1, 3, 41, 49, 26), ]
#predict.mnl(m1, new.data.2)  


# Prediction uncertainty & conjoint design
#small.cbc <- mlogit.data(data=cbc.df[1:(25*15*3),], 
#                         choice="choice", shape="long", 
#                         varying=3:6, alt.levels=paste("pos", 1:3), 
#                         id.var="resp.id")
#m4 <- mlogit(choice ~ 0 + Hilfestellung + Ausgabeformate + eng + Jahrespreis, data = small.cbc)
#summary(m4)  # larger standard errors

#cbind(predict.mnl(m4, new.data), predict.mnl(m1, new.data))

conjoint.R

roland

Mon Jan 4 08:38:29 2016