Attribution Model based on Markov Chain

We can represent every customer journey (sequence of channels/touchpoints) as a chain in a directed Markov graph where each vertex is a possible state (channel/touchpoint) and the edges represent the probability of transition between the states (including conversion.) By computing the model and estimating transition probabilities we can attribute every channel/touchpoint. Please review the original from https://analyzecore.com/2016/08/03/attribution-model-r-part-1/. and https://analyzecore.com/2017/05/31/marketing-multi-channel-attribution-model-r-part-2-practical-issues/.

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape2)
## Warning: package 'reshape2' was built under R version 3.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.3
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.4.3
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 3.4.3
library(RColorBrewer)
library(ChannelAttribution)
## Warning: package 'ChannelAttribution' was built under R version 3.4.3
library(markovchain)
## Warning: package 'markovchain' was built under R version 3.4.3
## Package:  markovchain
## Version:  0.6.9.8-1
## Date:     2017-08-15
## BugReport: http://github.com/spedygiorgio/markovchain/issues

Simple Example

# creating a data sample
df1 <- data.frame(path = c('c1 > c2 > c3', 'c1', 'c2 > c3'), conv = c(1, 0, 0), conv_null = c(0, 1, 1))
 
# calculating the model
mod1 <- markov_model(df1,
                    var_path = 'path',
                    var_conv = 'conv',
                    var_null = 'conv_null',
                    out_more = TRUE)

Extracting the results of attribution

df_res1 <- mod1$result

df_res1
##   channel_name total_conversions
## 1           c1           0.19984
## 2           c2           0.40008
## 3           c3           0.40008

Extracting the transition matrix

df_trans1 <- mod1$transition_matrix
df_trans1 <- dcast(df_trans1, channel_from ~ channel_to, value.var = 'transition_probability')

df_trans1
##   channel_from (conversion) (null)        c1        c2 c3
## 1      (start)           NA     NA 0.6666667 0.3333333 NA
## 2           c1           NA    0.5        NA 0.5000000 NA
## 3           c2           NA     NA        NA        NA  1
## 4           c3          0.5    0.5        NA        NA NA

Plotting the Markov Graph

df_trans <- mod1$transition_matrix
 
# adding dummies in order to plot the graph
df_dummy <- data.frame(channel_from = c('(start)', '(conversion)', '(null)'),
                       channel_to = c('(start)', '(conversion)', '(null)'),
                       transition_probability = c(0, 1, 1))
df_trans <- rbind(df_trans, df_dummy)
 
# ordering channels
df_trans$channel_from <- factor(df_trans$channel_from,
                                levels = c('(start)', '(conversion)', '(null)', 'c1', 'c2', 'c3'))
df_trans$channel_to <- factor(df_trans$channel_to,
                                levels = c('(start)', '(conversion)', '(null)', 'c1', 'c2', 'c3'))
df_trans <- dcast(df_trans, channel_from ~ channel_to, value.var = 'transition_probability')
 
# creating the markovchain object
trans_matrix <- matrix(data = as.matrix(df_trans[, -1]),
                       nrow = nrow(df_trans[, -1]), ncol = ncol(df_trans[, -1]),
                       dimnames = list(c(as.character(df_trans[, 1])), c(colnames(df_trans[, -1]))))
trans_matrix[is.na(trans_matrix)] <- 0
trans_matrix1 <- new("markovchain", transitionMatrix = trans_matrix)
 


plot(trans_matrix1, edge.arrow.size = 0.35)

Simulating the “Real” Data

set.seed(354)
df2 <- data.frame(client_id = sample(c(1:1000), 5000, replace = TRUE),
                  date = sample(c(1:32), 5000, replace = TRUE),
                  channel = sample(c(0:9), 5000, replace = TRUE,
                                   prob = c(0.1, 0.15, 0.05, 0.07, 0.11, 0.07, 0.13, 0.1, 0.06, 0.16)))
df2$date <- as.Date(df2$date, origin = "2015-01-01")
df2$channel <- paste0('channel_', df2$channel)
 
# aggregating channels to the paths for each customer
df2 <- df2 %>%
        arrange(client_id, date) %>%
        group_by(client_id) %>%
        summarise(path = paste(channel, collapse = ' > '),
                  # assume that all paths were finished with conversion
                  conv = 1,
                  conv_null = 0) %>%
        ungroup()
## Warning: package 'bindrcpp' was built under R version 3.4.3
# calculating the models (Markov and heuristics)
mod2 <- markov_model(df2,
                     var_path = 'path',
                     var_conv = 'conv',
                     var_null = 'conv_null',
                     out_more = TRUE)
 
# heuristic_models() function doesn't work for me, therefore I used the manual calculations
# instead of:
#h_mod2 <- heuristic_models(df2, var_path = 'path', var_conv = 'conv')
 
df_hm <- df2 %>%
        mutate(channel_name_ft = sub('>.*', '', path),
               channel_name_ft = sub(' ', '', channel_name_ft),
               channel_name_lt = sub('.*>', '', path),
               channel_name_lt = sub(' ', '', channel_name_lt))
# first-touch conversions
df_ft <- df_hm %>%
        group_by(channel_name_ft) %>%
        summarise(first_touch_conversions = sum(conv)) %>%
        ungroup()
# last-touch conversions
df_lt <- df_hm %>%
        group_by(channel_name_lt) %>%
        summarise(last_touch_conversions = sum(conv)) %>%
        ungroup()
 
h_mod2 <- merge(df_ft, df_lt, by.x = 'channel_name_ft', by.y = 'channel_name_lt')
 
# merging all models
all_models <- merge(h_mod2, mod2$result, by.x = 'channel_name_ft', by.y = 'channel_name')
colnames(all_models)[c(1, 4)] <- c('channel_name', 'attrib_model_conversions')

all_models
##    channel_name first_touch_conversions last_touch_conversions
## 1     channel_0                      92                     91
## 2     channel_1                     151                    165
## 3     channel_2                      49                     43
## 4     channel_3                      67                     64
## 5     channel_4                     113                    109
## 6     channel_5                      90                     64
## 7     channel_6                     140                    135
## 8     channel_7                      89                     92
## 9     channel_8                      55                     67
## 10    channel_9                     144                    160
##    attrib_model_conversions
## 1                  98.29081
## 2                 137.25817
## 3                  57.99385
## 4                  71.73688
## 5                 111.73998
## 6                  84.52604
## 7                 125.08583
## 8                 100.39449
## 9                  66.97398
## 10                135.99997

Visualizations of Transition Matrix

# transition matrix heatmap for "real" data
df_plot_trans <- mod2$transition_matrix
 
cols <- c("#e7f0fa", "#c9e2f6", "#95cbee", "#0099dc", "#4ab04a", "#ffd73e", "#eec73a",
          "#e29421", "#e29421", "#f05336", "#ce472e")
t <- max(df_plot_trans$transition_probability)
 
ggplot(df_plot_trans, aes(y = channel_from, x = channel_to, fill = transition_probability)) +
        theme_minimal() +
        geom_tile(colour = "white", width = .9, height = .9) +
        scale_fill_gradientn(colours = cols, limits = c(0, t),
                             breaks = seq(0, t, by = t/4),
                             labels = c("0", round(t/4*1, 2), round(t/4*2, 2), round(t/4*3, 2), round(t/4*4, 2)),
                             guide = guide_colourbar(ticks = T, nbin = 50, barheight = .5, label = T, barwidth = 10)) +
        geom_text(aes(label = round(transition_probability, 2)), fontface = "bold", size = 4) +
        theme(legend.position = 'bottom',
              legend.direction = "horizontal",
              panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              plot.title = element_text(size = 20, face = "bold", vjust = 2, color = 'black', lineheight = 0.8),
              axis.title.x = element_text(size = 24, face = "bold"),
              axis.title.y = element_text(size = 24, face = "bold"),
              axis.text.y = element_text(size = 8, face = "bold", color = 'black'),
              axis.text.x = element_text(size = 8, angle = 90, hjust = 0.5, vjust = 0.5, face = "plain")) +
        ggtitle("Transition matrix heatmap")

Visualizations of Model Comparison

# models comparison
all_mod_plot <- melt(all_models, id.vars = 'channel_name', variable.name = 'conv_type')
all_mod_plot$value <- round(all_mod_plot$value)
# slope chart
pal <- colorRampPalette(brewer.pal(10, "Set1"))
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
ggplot(all_mod_plot, aes(x = conv_type, y = value, group = channel_name)) +
        theme_solarized(base_size = 18, base_family = "", light = TRUE) +
        scale_color_manual(values = pal(10)) +
        scale_fill_manual(values = pal(10)) +
        geom_line(aes(color = channel_name), size = 2.5, alpha = 0.8) +
        geom_point(aes(color = channel_name), size = 5) +
        geom_label_repel(aes(label = paste0(channel_name, ': ', value), fill = factor(channel_name)),
                         alpha = 0.7,
                         fontface = 'bold', color = 'white', size = 5,
                         box.padding = unit(0.25, 'lines'), point.padding = unit(0.5, 'lines'),
                         max.iter = 100) +
        theme(legend.position = 'none',
              legend.title = element_text(size = 16, color = 'black'),
              legend.text = element_text(size = 16, vjust = 2, color = 'black'),
              plot.title = element_text(size = 20, face = "bold", vjust = 2, color = 'black', lineheight = 0.8),
              axis.title.x = element_text(size = 24, face = "bold"),
              axis.title.y = element_text(size = 16, face = "bold"),
              axis.text.x = element_text(size = 16, face = "bold", color = 'black'),
              axis.text.y = element_blank(),
              axis.ticks.x = element_blank(),
              axis.ticks.y = element_blank(),
              panel.border = element_blank(),
              panel.grid.major = element_line(colour = "grey", linetype = "dotted"),
              panel.grid.minor = element_blank(),
              strip.text = element_text(size = 16, hjust = 0.5, vjust = 0.5, face = "bold", color = 'black'),
              strip.background = element_rect(fill = "#f0b35f")) +
        labs(x = 'Model', y = 'Conversions') +
        ggtitle('Models comparison') +
        guides(colour = guide_legend(override.aes = list(size = 4)))