Package Loading

rm(list=ls())
installIfAbsentAndLoad <- function(neededVector) {
for(thispackage in neededVector) {
if( ! require(thispackage, character.only = T) )
{ install.packages(thispackage)}
require(thispackage, character.only = T)}}
needed <- c('tidyverse', 'reshape2', 'ggthemes', 'ggrepel', 'RColorBrewer', 'ChannelAttribution', 'markovchain', 'visNetwork', 'expm', 'stringr', 'purrrlyr', 'GameTheoryAllocation')

installIfAbsentAndLoad(needed)

Data Inspect

campaign_data <- read.csv('campaign_data.csv')
dim(campaign_data)

## [1] 650000      6

str(campaign_data)

## 'data.frame':    650000 obs. of  6 variables:
##  $ cookie          : Factor w/ 240108 levels "00000FkCnDfDDf0iC97iC703B",..: 48777 104167 169977 169977 48083 48083 4671 4671 4671 4671 ...
##  $ time            : Factor w/ 529083 levels "2018-07-01T13:13:16Z",..: 376244 216394 106646 106650 495198 495231 244905 258829 258837 512054 ...
##  $ interaction     : Factor w/ 2 levels "conversion","impression": 2 2 2 2 2 2 2 2 2 2 ...
##  $ conversion      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ conversion_value: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ channel         : Factor w/ 5 levels "Facebook","Instagram",..: 2 1 1 1 5 5 2 2 1 2 ...

summary(campaign_data)

##                        cookie                         time       
##  FBAfi0nkAB7oAiiACfBC7non0:   773   2018-07-14T20:50:09Z:    12  
##  F3h7D7kE7C9Dhk3C9oBiEDC0o:   524   2018-07-01T15:15:17Z:    11  
##  FCDiE9Cii37EFA0D9fFioooiD:   325   2018-07-01T15:18:17Z:    11  
##  9BnAAFfn3Ah3Akof3Ci0DDB7B:   302   2018-07-01T15:14:46Z:     9  
##  k0nk030FAECCDoAkiC7koEk3f:   245   2018-07-01T15:15:15Z:     9  
##  hkFDFB9f3Bhin93E9Cfi70DCh:   243   2018-07-01T15:16:53Z:     9  
##  (Other)                  :647588   (Other)             :649939  
##      interaction       conversion      conversion_value
##  conversion: 19613   Min.   :0.00000   Min.   :0.0000  
##  impression:630387   1st Qu.:0.00000   1st Qu.:0.0000  
##                      Median :0.00000   Median :0.0000  
##                      Mean   :0.03017   Mean   :0.1885  
##                      3rd Qu.:0.00000   3rd Qu.:0.0000  
##                      Max.   :1.00000   Max.   :8.5000  
##                                                        
##            channel      
##  Facebook      :199312  
##  Instagram     : 85281  
##  Online Display: 74677  
##  Online Video  :131768  
##  Paid Search   :158962  
##                         
##

Check missing values

sapply(campaign_data, function(x) sum(is.na(x)))

##           cookie             time      interaction       conversion 
##                0                0                0                0 
## conversion_value          channel 
##                0                0

Check duplicates

sum(duplicated(campaign_data))

## [1] 5006

Data Preprocessing

Remove duplicates

campaign_data <- unique(campaign_data)
dim(campaign_data)

## [1] 644994      6

Transform data

# transform time
campaign_data$time <- lapply(strsplit(as.character(campaign_data$time),'Z|T'), paste, collapse=' ')
campaign_data$time <- strptime(campaign_data$time, "%Y-%m-%d %H:%M:%S")
campaign_data$time <- as.POSIXct(campaign_data$time, "%Y-%m-%d %H:%M:%S", tz=Sys.timezone())

Split paths

df_paths <- campaign_data %>%
  group_by(cookie) %>%
  arrange(time) %>%
  mutate(path_no = ifelse(is.na(lag(cumsum(conversion))), 0, lag(cumsum(conversion))) + 1) %>%
  ungroup()

Modeling

First Touch

# Get all the converted path
conversion_path = df_paths %>%
  group_by(cookie, path_no) %>%
  mutate(new_conversion=sum(conversion)) %>%
  filter(new_conversion==1) %>%
  ungroup()

first_touch = conversion_path %>%
  group_by(cookie, path_no) %>%
  top_n(-1, time) %>%
  mutate(
    order_participation = 1/n()
  ) %>%
  ungroup() %>%
  group_by(channel) %>%
  summarize(total_conversions = sum(order_participation)) %>% collect()

# check
# sum(first_touch$total_conversions)

Last touch

last_touch = conversion_path %>%
  group_by(cookie, path_no) %>%
  top_n(1, time) %>%
  mutate(
    order_participation = 1/n()
  ) %>%
  ungroup() %>%
  group_by(channel) %>%
  summarize(total_conversions = sum(order_participation)) %>% collect()

Even touch

even_touch = conversion_path %>%
  group_by(cookie, path_no) %>%
  mutate(
    order_participation = 1/n()
  ) %>%
  ungroup() %>%
  group_by(channel) %>%
  summarize(total_conversions = sum(order_participation)) %>% collect()

Time Decay

half_life <- 7*24*60*60 # 7 days times 24 hours times 60 minutes time 60 seconds

timeDecay_touch = conversion_path %>%
  group_by(cookie, path_no) %>%
  mutate(
    # channel credit decays by half every seven days
    order_participation = 0.5^(as.double(time - min(time), unit='secs')*1/half_life),
    order_participation = order_participation/sum(order_participation)
  ) %>%
  ungroup() %>%
  group_by(channel) %>%
  summarize(total_conversions = sum(order_participation)) %>% collect()

model_compare <- cbind(first_touch, last_touch$total_conversions, even_touch$total_conversions, timeDecay_touch$total_conversions)
colnames(model_compare) = c('channel', 'first_touch', 'last_touch', 'even_touch', 'time_decay')
model_compare

Sharpley Model

df_seq <- df_paths %>%
  group_by(cookie, path_no) %>%
  summarize(
    fb_touches = max(ifelse(channel == 'Facebook', 1, 0)),
    ins_touches = max(ifelse(channel == 'Instagram', 1, 0)),
    vid_touches = max(ifelse(channel == 'Online Video', 1, 0)),
    dis_touches = max(ifelse(channel == 'Online Display', 1, 0)),
    sear_touches = max(ifelse(channel == 'Paid Search', 1, 0)),
    conversions = sum(conversion)
  ) %>% ungroup()

conv_rates = df_seq %>%
 group_by(fb_touches,
   ins_touches,
   vid_touches,
   dis_touches,
   sear_touches) %>%
 summarize(
   conversions = sum(conversions),
   total_sequences = n()
 ) %>% collect()

number_of_channels = 5

# The coalitions function is a handy function from the GameTheoryALlocation
# library that creates a binary matrix to which can fit characteristic function 
touch_combos = as.data.frame(coalitions(number_of_channels)$Binary)
names(touch_combos) = c("Facebook","Instagram","Online Display", "Online Video", "Paid Search")

# join my previous summary results with the binary matrix the GameTheoryAllocation library built for me.
touch_combo_conv_rate = left_join(touch_combos, conv_rates, 
 by = c(
 "Facebook"="fb_touches",
 "Instagram" = "ins_touches",
 "Online Display" = "dis_touches",
 "Online Video" = "vid_touches",
 "Paid Search" = "sear_touches"
 )
)

# fill in any NAs with 0
touch_combo_conv_rate = touch_combo_conv_rate %>%
 mutate_all(funs(ifelse(is.na(.),0,.))) %>%
 mutate(
 conv_rate = ifelse(total_sequences > 0, conversions/total_sequences, 0)
 )

# Building Shapley Values for each channel combination

shap_vals = as.data.frame(coalitions(number_of_channels)$Binary)
names(shap_vals) = c("Facebook","Instagram","Online Display", "Online Video",
  "Paid Search")
coalition_mat = shap_vals
shap_vals[2^number_of_channels,] = Shapley_value(touch_combo_conv_rate$conv_rate, game="profit")

## [1] "Shapley Value"

for(i in 2:(2^number_of_channels-1)){
  if(sum(coalition_mat[i,]) == 1){
    shap_vals[i,which(shap_vals[i,]==1)] = touch_combo_conv_rate[i,"conv_rate"]
  }else if(sum(coalition_mat[i,]) > 1){
    if(sum(coalition_mat[i,]) < number_of_channels){
      channels_of_interest = which(coalition_mat[i,] == 1)
      char_func = data.frame(rates = touch_combo_conv_rate[1,"conv_rate"])
      for(j in 2:i){
        if(sum(coalition_mat[j,channels_of_interest])>0 & 
            sum(coalition_mat[j,-channels_of_interest])==0)
          char_func = rbind(char_func,touch_combo_conv_rate[j,"conv_rate"])
      }
      shap_vals[i,channels_of_interest] = 
        Shapley_value(char_func$rates, game="profit")
    }
  }
}

## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"
## [1] "Shapley Value"

# Apply Shapley Values as attribution weighting
order_distribution = shap_vals * touch_combo_conv_rate$total_sequences
shapley_value = t(t(round(colSums(order_distribution))))
shapley_value = data.frame(mid_campaign = row.names(shapley_value), 
  total_conversions = as.numeric(shapley_value))

model_compare$shapley_value = shapley_value$total_conversions
model_compare

Markov Chain

# attribution model for splitted multi and unique channel paths
df_all_paths_compl <- df_paths %>%
  group_by(cookie, path_no) %>%
  summarise(path = paste(channel, collapse = ' > '),
            conversion = sum(conversion)) %>%
  ungroup() %>%
  mutate(null_conversion = ifelse(conversion == 1, 0, 1))

mod_attrib_complete <- markov_model(
  df_all_paths_compl,
  var_path = 'path',
  var_conv = 'conversion',
  var_null = 'null_conversion',
  out_more = TRUE
)

markov_chain <- mod_attrib_complete$result
colnames(markov_chain) <- c('channel', 'markov_chain')

model_compare = merge(model_compare, markov_chain, by = 'channel')

mod_attrib_complete$removal_effects

Markov Chain Result Visualization

g_channel_performance <- ggplot(model_compare, aes(x = channel, y = markov_chain, fill = channel)) + 
  geom_bar(stat = "identity", width = 0.6) +
  ylim(0, 7000) +
  scale_fill_manual(values = c("#CE2D4F",
                               "#A14DA0",
                               "#9D79BC",
                               "#7F96FF",
                               "#A9CEF4")) +
  theme_minimal() +
  theme(axis.text.x = element_text(size = 9, angle = 30, hjust = 0.6, face = "bold")) +
  theme(panel.grid.major.x = element_blank()) +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_text(aes(label = round(markov_chain, 0)), fontface = "bold", size = 4, vjust = -1) + 
  labs(x = "", y = "Conversions") +
  ggtitle("Channel Performance") +
  guides(fill=FALSE)

g_channel_performance

Model Comparison

df_compare = melt(model_compare, id='channel')

g_model_comparison <- ggplot(df_compare, aes(x = channel, y = value, fill = variable)) + 
  geom_bar(stat = "identity", width = 0.6, position = position_dodge(width = 0.7)) +
  scale_fill_manual(labels = c("First Touch", "Last Touch", "Linear", 'Time Decay', 'Sharpley', 'Markov'), 
                    values = c("#e65368",
                               "#4e74ff",
                               "#87BFFF",
                               "#3BCEAC",
                               "#CE2D4F",
                               "#A14DA0")) +
  theme_minimal() +
  theme(axis.text.x = element_text(size = 9, angle = 30, hjust = 0.6, face = "bold")) +
  theme(panel.grid.major.x = element_blank()) +
  labs(x = "", y = "Budget $") +
  ggtitle("Markov vs Heuristics") +
  theme(plot.title = element_text(hjust = 0.5))

g_model_comparison

Economic Analysis

campaign_budget_daily = read.csv("budget_sample_daily.csv")

campaign_budget_total = campaign_budget_daily %>%
  group_by(channel) %>%
  summarise(total_cost = round(sum(cost), 1))

campaign_attribution = merge(model_compare, campaign_budget_total, 
                             by.x = "channel")

Calculate ROAS and CPA

campaign_attribution = 
  campaign_attribution %>%
  mutate(chanel_weight = (shapley_value / sum(shapley_value)),
         cost_weight = (total_cost / sum(total_cost)),
         roas = chanel_weight / cost_weight,
         optimal_budget = total_cost * roas,
         CPA = total_cost / shapley_value)

Budget Allocation

df_g2 = campaign_attribution[, c("channel", "total_cost", "optimal_budget")]
df_g2 = melt(df_g2, id = "channel")

# Create double bar chart
g_budget_allocation <- ggplot(df_g2, aes(x = channel, y = value, fill = variable)) + 
  geom_bar(stat = "identity", width = 0.6, position = position_dodge(width = 0.7)) +
  scale_fill_manual(labels = c("Current Budget", "Optimal Budget"), values = c("#FFD166", "#04A777")) +
  theme_minimal() +
  theme(axis.text.x = element_text(size = 10, angle = 30, hjust = 0.6, face = "bold")) +
  theme(panel.grid.major.x = element_blank()) +
  geom_text(aes(label = round(value, 0)), 
            fontface = "bold", size = 3.5, 
            vjust = -0.5, position = position_dodge(width = 0.75)) +
  labs(x = "", y = "Budget $") +
  ggtitle("Budget Allocation") +
  theme(plot.title = element_text(hjust = 0.5))

g_budget_allocation

Customer Journey

trans_matrix_prob <- mod_attrib_complete$transition_matrix %>%
  dmap_at(c(1, 2), as.character)

edges <-
  data.frame(
    from = trans_matrix_prob$channel_from,
    to = trans_matrix_prob$channel_to,
    label = round(trans_matrix_prob$transition_probability, 2),
    font.size = trans_matrix_prob$transition_probability * 100,
    width = trans_matrix_prob$transition_probability * 15,
    shadow = TRUE,
    arrows = "to",
    color = list(color = "#95cbee", highlight = "red")
  )

nodes <- data_frame(id = c( c(trans_matrix_prob$channel_from), c(trans_matrix_prob$channel_to) )) %>%
  distinct(id) %>%
  arrange(id) %>%
  mutate(
    label = id,
    color = ifelse(
      label %in% c('(start)', '(conversion)'),
      '#4ab04a',
      ifelse(label == '(null)', '#ce472e', '#ffd73e')
    ),
    shadow = TRUE,
    shape = "box"
  )

## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.

visNetwork(nodes,
           edges,
           height = "2000px",
           width = "100%",
           main = "Generic Probabilistic model's Transition Matrix") %>%
  visIgraphLayout(randomSeed = 123) %>%
  visNodes(size = 5) %>%
  visOptions(highlightNearest = TRUE)

##### modeling states and conversions #####
# transition matrix preprocessing
df_dummy <- data.frame(channel_from = c('(start)', '(conversion)', '(null)'),
                       channel_to = c('(start)', '(conversion)', '(null)'),
                       n = c(0, 0, 0),
                       tot_n = c(0, 0, 0),
                       perc = c(0, 1, 1))

trans_matrix_complete <- mod_attrib_complete$transition_matrix
trans_matrix_complete <- rbind(trans_matrix_complete, df_dummy %>%
                                 mutate(transition_probability = perc) %>%
                                 select(channel_from, channel_to, transition_probability))
trans_matrix_complete$channel_to <- factor(trans_matrix_complete$channel_to, levels = c(levels(trans_matrix_complete$channel_from)))
trans_matrix_complete <- dcast(trans_matrix_complete, channel_from ~ channel_to, value.var = 'transition_probability')
trans_matrix_complete[is.na(trans_matrix_complete)] <- 0
rownames(trans_matrix_complete) <- trans_matrix_complete$channel_from
trans_matrix_complete <- as.matrix(trans_matrix_complete[, -1])

# transition matrix heatmap for "real" data
df_plot_trans <- mod_attrib_complete$transition_matrix

cols <- c("#e7f0fa", "#c9e2f6", "#95cbee", "#0099dc", "#4ab04a", "#ffd73e", "#eec73a",
          "#e29421", "#e29421", "#f05336", "#ce472e")
t <- max(df_plot_trans$transition_probability)

ggplot(df_plot_trans, aes(y = channel_from, x = channel_to, fill = transition_probability)) +
  theme_minimal() +
  geom_tile(colour = "white", width = .9, height = .9) +
  scale_fill_gradientn(colours = cols, limits = c(0, t),
                       breaks = seq(0, t, by = t/4),
                       labels = c("0", round(t/4*1, 2), round(t/4*2, 2), round(t/4*3, 2), round(t/4*4, 2)),
                       guide = guide_colourbar(ticks = T, nbin = 50, barheight = .5, label = T, barwidth = 10)) +
  geom_text(aes(label = round(transition_probability, 2)), fontface = "bold", size = 4) +
  theme(legend.position = 'bottom',
        legend.direction = "horizontal",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        plot.title = element_text(size = 20, face = "bold", vjust = 2, color = 'black', lineheight = 0.8),
        axis.title.x = element_text(size = 10, face = "bold"),
        axis.title.y = element_text(size = 10, face = "bold"),
        axis.text.y = element_text(size = 8, face = "bold", color = 'black'),
        axis.text.x = element_text(size = 8, angle = 90, hjust = 0.5, vjust = 0.5, face = "plain")) +
  ggtitle("Transition matrix heatmap")

Customer Journey Duration

# computing time lapses from the first contact to conversion/last contact
df_journey <- df_paths %>%
        group_by(cookie, path_no) %>%
        summarise(path = paste(channel, collapse = ' > '),
                  first_touch_date = min(time),
                  last_touch_date = max(time),
                  tot_time_lapse = round(as.numeric(last_touch_date - first_touch_date)),
                  conversion = sum(conversion)) %>%
        ungroup()

# distribution plot
ggplot(df_journey %>% filter(conversion == 1), aes(x = tot_time_lapse)) + theme_minimal() + geom_histogram(fill = '#4e79a6', binwidth = 1)

# cumulative distribution plot
ggplot(df_journey %>% filter(conversion == 1), aes(x = tot_time_lapse)) +
        theme_minimal() +
        stat_ecdf(geom = 'step', color = '#4e79a7', size = 2, alpha = 0.7) +
        geom_hline(yintercept = 0.95, color = '#e15759', size = 1.5) +
        geom_vline(xintercept = 23, color = '#e15759', size = 1.5, linetype = 2)

### for generic probabilistic model ###
df_journey_2 <- melt(df_journey[c(50:100), ] %>% select(cookie, first_touch_date, last_touch_date, conversion),
                    id.vars = c('cookie', 'conversion'),
                    value.name = 'touch_date') %>%
        arrange(cookie)
 
ggplot(df_journey_2, aes(x = cookie, y = touch_date, color = factor(conversion), group = cookie)) +
        theme_minimal() +
        coord_flip() +
        geom_point(size = 2) +
        geom_line(size = 0.5, color = 'darkgrey') +
        theme(legend.position = 'bottom',
              panel.border = element_blank(),
              panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              axis.ticks.x = element_blank(),
              axis.ticks.y = element_blank()) +
        guides(colour = guide_legend(override.aes = list(size = 5)))

Multi-Touch Attribution Model

Yuwei Yao

Package Loading

Data Inspect

Data Preprocessing

Modeling

First Touch

Last touch

Even touch

Time Decay

Sharpley Model

Markov Chain

Markov Chain Result Visualization

Model Comparison

Economic Analysis

Calculate ROAS and CPA

Budget Allocation

Customer Journey

Customer Journey Duration