Dataset
The dataset is from an article in Medium. It tracked customer journeys from July 1, 2018, to July 31, 2018, and detailedly recorded when each ad showed up, which customer it reached, and whether the customer successfully converted.
For example, for the first cookie, its journey includes four states: Instagram > Online Display > Online Display > Online Display. Unfortunately, the user did not convert during the observed period, and thus both conversion and conversion value are 0.
dt = fread('attribution data.csv')
head(dt) %>% kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = F,
position="center")
|
cookie
|
time
|
interaction
|
conversion
|
conversion_value
|
channel
|
|
00000FkCnDfDDf0iC97iC703B
|
2018-07-03T13:02:11Z
|
impression
|
0
|
0
|
Instagram
|
|
00000FkCnDfDDf0iC97iC703B
|
2018-07-17T19:15:07Z
|
impression
|
0
|
0
|
Online Display
|
|
00000FkCnDfDDf0iC97iC703B
|
2018-07-24T15:51:46Z
|
impression
|
0
|
0
|
Online Display
|
|
00000FkCnDfDDf0iC97iC703B
|
2018-07-29T07:44:51Z
|
impression
|
0
|
0
|
Online Display
|
|
0000nACkD9nFkBBDECD3ki00E
|
2018-07-03T09:44:57Z
|
impression
|
0
|
0
|
Paid Search
|
|
0000nACkD9nFkBBDECD3ki00E
|
2018-07-03T23:36:49Z
|
impression
|
0
|
0
|
Paid Search
|
- cookie: used for identifying unique customers
- time
- interaction: “impression” or “conversion”.
- conversion: 0 or 1
- conversion_value: how much values a consumer brings about after conversion
- channel: including “Facebook”, “Instagram”, “Online Display”, “Online Video”, and “Paid Search”
Preprocessing
In order to apply the Markov Chain model, we have to transform our data and create a path variable like our previous example. Here I used data.table to deal with our data because it can process data much more quickly than data.frame.
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringr)
dt = dt[order(cookie, time),time:=ymd_hms(time)][,id := seq_len(.N), by = cookie]
dt_wide = dcast(data = dt, formula = cookie ~ id, value.var = "channel")
dt_wide = dt_wide[, path:=do.call(paste,c(.SD, sep=' > ')), .SDcols=-1]
dt_wide = dt_wide[, path:=word(path, 1, sep = " > NA")]
conversion = dt[, .(conversion=sum(conversion), conversion_value=sum(conversion_value)), by=cookie]
setkey(conversion, cookie)
setkey(dt_wide, cookie)
dt_wide = merge(dt_wide, conversion)
head(dt_wide[, .(path, conversion, conversion_value)]) %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = F,
position="center")
|
path
|
conversion
|
conversion_value
|
|
Instagram > Online Display > Online Display > Online Display
|
0
|
0
|
|
Paid Search > Paid Search > Paid Search > Paid Search > Paid Search > Paid Search
|
0
|
0
|
|
Paid Search > Paid Search > Paid Search > Paid Search > Paid Search
|
0
|
0
|
|
Instagram
|
0
|
0
|
|
Paid Search
|
0
|
0
|
|
Instagram > Facebook > Facebook > Instagram
|
0
|
0
|
Markov Chain Modeling
R has a great package designed for channel attribution, called ChannelAttribution. It can be used to build models based on heuristic and markovian approaches, respectively. To evaluate the Markov Chain results, I simultaneously run heuristic models and regard them as base models.
# Apply models
H = heuristic_models(Data = dt_wide,
var_path = 'path',
var_conv = 'conversion',
var_value='conversion_value',
sep='>')
M = markov_model(Data = dt_wide,
var_path = 'path',
var_conv = 'conversion',
var_value='conversion_value',
sep='>',
order=1,
out_more=TRUE)
## Warning in markov_model(Data = dt_wide, var_path = "path", var_conv =
## "conversion", : This function is deprecated and it could be removed from
## future versions. Use markov_model_mp instead. Disable this warning setting
## vebose=FALSE.
M$result %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = F,
position="center")
|
channel_name
|
total_conversions
|
total_conversion_value
|
|
Instagram
|
3490.969
|
21831.79
|
|
Online Display
|
2028.840
|
12634.91
|
|
Paid Search
|
3997.732
|
24937.06
|
|
Facebook
|
5270.212
|
32948.91
|
|
Online Video
|
2851.247
|
17878.33
|
H %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = F,
position="center")
|
channel_name
|
first_touch_conversions
|
first_touch_value
|
last_touch_conversions
|
last_touch_value
|
linear_touch_conversions
|
linear_touch_value
|
|
Instagram
|
2329
|
14579.5
|
2244
|
14039.5
|
2265.179
|
14171.72
|
|
Online Display
|
2160
|
13419.0
|
2139
|
13298.5
|
2124.315
|
13205.19
|
|
Paid Search
|
4757
|
29724.0
|
4547
|
28331.5
|
4681.199
|
29194.46
|
|
Facebook
|
5177
|
32283.0
|
5301
|
33143.5
|
5218.903
|
32614.30
|
|
Online Video
|
3216
|
20225.5
|
3408
|
21418.0
|
3349.403
|
21045.33
|
M$transition_matrix %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = F,
position="center")
|
channel_from
|
channel_to
|
transition_probability
|
|
(start)
|
Paid Search
|
0.2696865
|
|
(start)
|
Facebook
|
0.2934974
|
|
(start)
|
Online Video
|
0.1823233
|
|
(start)
|
Instagram
|
0.1320370
|
|
(start)
|
Online Display
|
0.1224559
|
|
Instagram
|
Online Video
|
0.0342033
|
|
Instagram
|
Facebook
|
0.5980769
|
|
Instagram
|
(conversion)
|
0.3082418
|
|
Instagram
|
Paid Search
|
0.0379121
|
|
Instagram
|
Online Display
|
0.0215659
|
|
Online Display
|
(conversion)
|
0.6551302
|
|
Online Display
|
Instagram
|
0.0462481
|
|
Online Display
|
Paid Search
|
0.1601838
|
|
Online Display
|
Online Video
|
0.0398162
|
|
Online Display
|
Facebook
|
0.0986217
|
|
Paid Search
|
(conversion)
|
0.6954726
|
|
Paid Search
|
Online Video
|
0.0620985
|
|
Paid Search
|
Online Display
|
0.0819823
|
|
Paid Search
|
Instagram
|
0.0455797
|
|
Paid Search
|
Facebook
|
0.1148669
|
|
Facebook
|
(conversion)
|
0.4766229
|
|
Facebook
|
Instagram
|
0.3845531
|
|
Facebook
|
Online Video
|
0.0524186
|
|
Facebook
|
Online Display
|
0.0272433
|
|
Facebook
|
Paid Search
|
0.0591620
|
|
Online Video
|
Instagram
|
0.0490838
|
|
Online Video
|
(conversion)
|
0.7434555
|
|
Online Video
|
Paid Search
|
0.0706806
|
|
Online Video
|
Online Display
|
0.0237784
|
|
Online Video
|
Facebook
|
0.1130017
|
M$transition_matrix %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = F,
position="center")
|
channel_from
|
channel_to
|
transition_probability
|
|
(start)
|
Paid Search
|
0.2696865
|
|
(start)
|
Facebook
|
0.2934974
|
|
(start)
|
Online Video
|
0.1823233
|
|
(start)
|
Instagram
|
0.1320370
|
|
(start)
|
Online Display
|
0.1224559
|
|
Instagram
|
Online Video
|
0.0342033
|
|
Instagram
|
Facebook
|
0.5980769
|
|
Instagram
|
(conversion)
|
0.3082418
|
|
Instagram
|
Paid Search
|
0.0379121
|
|
Instagram
|
Online Display
|
0.0215659
|
|
Online Display
|
(conversion)
|
0.6551302
|
|
Online Display
|
Instagram
|
0.0462481
|
|
Online Display
|
Paid Search
|
0.1601838
|
|
Online Display
|
Online Video
|
0.0398162
|
|
Online Display
|
Facebook
|
0.0986217
|
|
Paid Search
|
(conversion)
|
0.6954726
|
|
Paid Search
|
Online Video
|
0.0620985
|
|
Paid Search
|
Online Display
|
0.0819823
|
|
Paid Search
|
Instagram
|
0.0455797
|
|
Paid Search
|
Facebook
|
0.1148669
|
|
Facebook
|
(conversion)
|
0.4766229
|
|
Facebook
|
Instagram
|
0.3845531
|
|
Facebook
|
Online Video
|
0.0524186
|
|
Facebook
|
Online Display
|
0.0272433
|
|
Facebook
|
Paid Search
|
0.0591620
|
|
Online Video
|
Instagram
|
0.0490838
|
|
Online Video
|
(conversion)
|
0.7434555
|
|
Online Video
|
Paid Search
|
0.0706806
|
|
Online Video
|
Online Display
|
0.0237784
|
|
Online Video
|
Facebook
|
0.1130017
|
M$removal_effects %>%
kable() %>%
kable_styling(bootstrap_options = "striped",
full_width = F,
position="center")
|
channel_name
|
removal_effects_conversion
|
removal_effects_conversion_value
|
|
Instagram
|
0.310793
|
0.3110501
|
|
Online Display
|
0.180623
|
0.1800167
|
|
Paid Search
|
0.355909
|
0.3552926
|
|
Facebook
|
0.469195
|
0.4694419
|
|
Online Video
|
0.253840
|
0.2547228
|