library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
## ✔ tibble 1.4.1 ✔ dplyr 0.7.4
## ✔ tidyr 0.7.2 ✔ stringr 1.2.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.4.4
library(ggrepel)
library(RColorBrewer)
library(ChannelAttribution)
library(markovchain)
## Package: markovchain
## Version: 0.6.9.8-1
## Date: 2017-08-15
## BugReport: http://github.com/spedygiorgio/markovchain/issues
library(visNetwork)
library(expm)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
##
## Attaching package: 'expm'
## The following object is masked from 'package:Matrix':
##
## expm
library(stringr)
library(readr)
library(purrrlyr)
CustInteractEnqFull <- read.table("journey data--Customer Transactions and Interactions/d2_cust_intrctn_enq.csv",header = TRUE,sep="\t",stringsAsFactors = TRUE,colClasses=c("rec_creat_dttm"="POSIXct","intrctn_end_dttm"="POSIXct","intrctn_begin_dttm"="POSIXct","prev_intrctn_dttm"="POSIXct","intrctn_dttm"="POSIXct"))
head(CustInteractEnqFull)
## intrctn_enq_id intrctn_id intrctn_dttm intrctn_ln_item_seq_num
## 1 1.98802e+17 1.98802e+17 1988-02-04 21:39:00 1
## 2 1.98802e+17 1.98802e+17 1988-02-04 21:42:42 1
## 3 1.98802e+17 1.98802e+17 1988-02-04 21:43:40 1
## 4 1.98802e+17 1.98802e+17 1988-02-04 21:47:54 1
## 5 1.98802e+17 1.98802e+17 1988-02-04 21:50:06 1
## 6 1.98802e+17 1.98802e+17 1988-02-04 21:57:08 1
## prev_intrctn_id prev_intrctn_dttm promo_ofr_id promo_ofr_ref_int_id
## 1 1.98802e+17 1988-02-04 21:23:00 ID1618150004 1618150004
## 2 1.98802e+17 1988-02-04 21:19:00 ID1618150003 1618150003
## 3 1.98802e+17 1988-02-04 21:11:00 ID1618150002 1618150002
## 4 1.98802e+17 1988-02-04 21:31:00 ID1618150005 1618150005
## 5 1.98802e+17 1988-02-04 21:41:00 ID1618150002 1618150002
## 6 1.98802e+17 1988-02-04 22:01:00 ID1618150002 1618150002
## promo_initv_id promo_initv_ref_int_id intrctn_type intrctn_dirctn
## 1 ID914920004 914920004 near_to_store INBOUND
## 2 ID914920003 914920003 near_to_store INBOUND
## 3 ID914920002 914920002 checkin INBOUND
## 4 ID914920005 914920005 checkin INBOUND
## 5 ID914920002 914920002 near_to_store INBOUND
## 6 ID914920002 914920002 checkin INBOUND
## intrctn_ctgry intrctn_sub_ctgry intrctn_ctgry_id intrctn_reason
## 1 ENQUIRY ACCESS INTCATID_001 AWARENESS
## 2 ENQUIRY ACCESS INTCATID_001 AWARENESS
## 3 ENQUIRY ACCESS INTCATID_001 RESEARCH
## 4 ENQUIRY ACCESS INTCATID_001 RESEARCH
## 5 ENQUIRY ACCESS INTCATID_001 AWARENESS
## 6 ENQUIRY ACCESS INTCATID_001 RESEARCH
## intrctn_begin_dttm intrctn_end_dttm terminal_nm act_type_id
## 1 1988-02-04 21:39:00 1988-02-04 21:39:00 COUNTER_NO_11 12090001
## 2 1988-02-04 21:42:42 1988-02-04 21:42:42 COUNTER_NO_12 12090001
## 3 1988-02-04 21:43:40 1988-02-04 21:43:40 COUNTER_NO_14 12090001
## 4 1988-02-04 21:47:54 1988-02-04 21:47:54 COUNTER_NO_4 12090001
## 5 1988-02-04 21:50:06 1988-02-04 21:50:06 COUNTER_NO_1 12090001
## 6 1988-02-04 21:57:08 1988-02-04 21:57:08 COUNTER_NO_10 12090001
## act_type_cd act_type act_id emp_id emp_nm chnl_id chnl_cd
## 1 CD12090001 INTERACTION 1.98802e+16 50004 CD50004 380013 CD380013
## 2 CD12090001 INTERACTION 1.98802e+16 50003 CD50003 380013 CD380013
## 3 CD12090001 INTERACTION 1.98802e+16 50003 CD50003 380013 CD380013
## 4 CD12090001 INTERACTION 1.98802e+16 50004 CD50004 380013 CD380013
## 5 CD12090001 INTERACTION 1.98802e+16 50004 CD50004 380013 CD380013
## 6 CD12090001 INTERACTION 1.98802e+16 50004 CD50004 380013 CD380013
## chnl_nm tch_pnt_id tch_pnt_cd tch_pnt_nm cust_id cust_ref_int_id
## 1 Geo-Spatial 200004 CD200004 In-Store CUST100003 100003
## 2 Geo-Spatial 200004 CD200004 In-Store CUST100002 100002
## 3 Geo-Spatial 200004 CD200004 In-Store CUST100001 100001
## 4 Geo-Spatial 200004 CD200004 In-Store CUST100004 100004
## 5 Geo-Spatial 200004 CD200004 In-Store CUST100005 100005
## 6 Geo-Spatial 200004 CD200004 In-Store CUST100007 100007
## cust_nm bsn_unit_id bsn_unit_cd bsn_unit_nm locn_id locn_cd
## 1 Rowan Morris 100031 BSN_100031 AREA00031 100003 LCN_100003
## 2 Zane Harrison 100004 BSN_100004 AREA00004 100010 LCN_100010
## 3 Victoria Santana 100043 BSN_100043 AREA00043 100011 LCN_100011
## 4 Jakob Nichols 100023 BSN_100023 AREA00023 100003 LCN_100003
## 5 Selena Whitehead 100039 BSN_100039 AREA00039 100010 LCN_100010
## 6 Maurice Moon 100020 BSN_100020 AREA00020 100011 LCN_100011
## locn_nm item_id item_ref_int_id item_nm mrhrc_hier_grp_id
## 1 N_LCN_A00003 CD5130001 5130001 NAN 300001
## 2 N_LCN_A00010 CD5130001 5130001 NAN 300001
## 3 N_LCN_A00011 CD5130001 5130001 NAN 300001
## 4 N_LCN_A00003 CD5130001 5130001 NAN 300001
## 5 N_LCN_A00010 CD5130001 5130001 NAN 300001
## 6 N_LCN_A00011 CD5130001 5130001 NAN 300001
## mrhrc_hier_grp_nm st_id st_cd ofr_intrctn_type intrctn_event
## 1 MRHRC L3 Group 1 36 S SUSPECT InStore_NearToInStore
## 2 MRHRC L3 Group 1 36 S SUSPECT InStore_NearToInStore
## 3 MRHRC L3 Group 1 36 S SUSPECT InStore_CheckIn
## 4 MRHRC L3 Group 1 36 S SUSPECT InStore_CheckIn
## 5 MRHRC L3 Group 1 36 S SUSPECT InStore_NearToInStore
## 6 MRHRC L3 Group 1 36 S SUSPECT InStore_CheckIn
## item_intrctn_type intrctn_desc batch_run_id job_run_id
## 1 AWARENESS Probable customer activity NA NA
## 2 AWARENESS Probable customer activity NA NA
## 3 RESEARCH Walked in to store NA NA
## 4 RESEARCH Walked in to store NA NA
## 5 AWARENESS Probable customer activity NA NA
## 6 RESEARCH Walked in to store NA NA
## src_sys_id src_sys_nm tenant_id rec_creat_by rec_creat_dttm
## 1 SRC1 NA 100001 NA 1988-02-04 21:39:00
## 2 SRC1 NA 100001 NA 1988-02-04 21:42:42
## 3 SRC1 NA 100001 NA 1988-02-04 21:43:40
## 4 SRC1 NA 100001 NA 1988-02-04 21:47:54
## 5 SRC1 NA 100001 NA 1988-02-04 21:50:06
## 6 SRC1 NA 100001 NA 1988-02-04 21:57:08
str(CustInteractEnqFull)
## 'data.frame': 5343 obs. of 58 variables:
## $ intrctn_enq_id : num 1.99e+17 1.99e+17 1.99e+17 1.99e+17 1.99e+17 ...
## $ intrctn_id : num 1.99e+17 1.99e+17 1.99e+17 1.99e+17 1.99e+17 ...
## $ intrctn_dttm : POSIXct, format: "1988-02-04 21:39:00" "1988-02-04 21:42:42" ...
## $ intrctn_ln_item_seq_num: int 1 1 1 1 1 1 1 1 1 2 ...
## $ prev_intrctn_id : num 1.99e+17 1.99e+17 1.99e+17 1.99e+17 1.99e+17 ...
## $ prev_intrctn_dttm : POSIXct, format: "1988-02-04 21:23:00" "1988-02-04 21:19:00" ...
## $ promo_ofr_id : Factor w/ 6 levels "ID1618150001",..: 4 3 2 5 2 2 5 5 4 5 ...
## $ promo_ofr_ref_int_id : int 1618150004 1618150003 1618150002 1618150005 1618150002 1618150002 1618150005 1618150005 1618150004 1618150005 ...
## $ promo_initv_id : Factor w/ 6 levels "ID914920001",..: 4 3 2 5 2 2 5 5 4 5 ...
## $ promo_initv_ref_int_id : int 914920004 914920003 914920002 914920005 914920002 914920002 914920005 914920005 914920004 914920005 ...
## $ intrctn_type : Factor w/ 34 levels "add_items_into_cart",..: 21 21 5 5 21 5 5 5 5 15 ...
## $ intrctn_dirctn : Factor w/ 1 level "INBOUND": 1 1 1 1 1 1 1 1 1 1 ...
## $ intrctn_ctgry : Factor w/ 1 level "ENQUIRY": 1 1 1 1 1 1 1 1 1 1 ...
## $ intrctn_sub_ctgry : Factor w/ 10 levels "ACCESS","ACTION",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ intrctn_ctgry_id : Factor w/ 10 levels "INTCATID_001",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ intrctn_reason : Factor w/ 4 levels "AWARENESS","ENGAGE",..: 1 1 4 4 1 4 4 4 4 4 ...
## $ intrctn_begin_dttm : POSIXct, format: "1988-02-04 21:39:00" "1988-02-04 21:42:42" ...
## $ intrctn_end_dttm : POSIXct, format: "1988-02-04 21:39:00" "1988-02-04 21:42:42" ...
## $ terminal_nm : Factor w/ 14 levels "COUNTER_NO_1",..: 3 4 6 9 1 2 13 7 3 4 ...
## $ act_type_id : int 12090001 12090001 12090001 12090001 12090001 12090001 12090001 12090001 12090001 12090001 ...
## $ act_type_cd : Factor w/ 1 level "CD12090001": 1 1 1 1 1 1 1 1 1 1 ...
## $ act_type : Factor w/ 1 level "INTERACTION": 1 1 1 1 1 1 1 1 1 1 ...
## $ act_id : num 1.99e+16 1.99e+16 1.99e+16 1.99e+16 1.99e+16 ...
## $ emp_id : int 50004 50003 50003 50004 50004 50004 50004 50002 50002 50003 ...
## $ emp_nm : Factor w/ 4 levels "CD50001","CD50002",..: 4 3 3 4 4 4 4 2 2 3 ...
## $ chnl_id : int 380013 380013 380013 380013 380013 380013 380013 380013 380013 380013 ...
## $ chnl_cd : Factor w/ 7 levels "CD380002","CD380003",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ chnl_nm : Factor w/ 7 levels "Geo-Spatial",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ tch_pnt_id : int 200004 200004 200004 200004 200004 200004 200004 200004 200004 200004 ...
## $ tch_pnt_cd : Factor w/ 4 levels "CD200001","CD200002",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ tch_pnt_nm : Factor w/ 4 levels "App","In-Store",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ cust_id : Factor w/ 947 levels "CUST100001","CUST100002",..: 3 2 1 4 5 7 6 8 11 2 ...
## $ cust_ref_int_id : int 100003 100002 100001 100004 100005 100007 100006 100008 100011 100002 ...
## $ cust_nm : Factor w/ 919 levels "Aaliyah Copeland",..: 796 913 900 429 827 680 227 858 616 913 ...
## $ bsn_unit_id : int 100031 100004 100043 100023 100039 100020 100007 100004 100034 100026 ...
## $ bsn_unit_cd : Factor w/ 98 levels "BSN_100001","BSN_100002",..: 31 4 43 23 39 20 7 4 34 26 ...
## $ bsn_unit_nm : Factor w/ 98 levels "AREA00001","AREA00002",..: 31 4 43 23 39 20 7 4 34 26 ...
## $ locn_id : int 100003 100010 100011 100003 100010 100011 100001 100009 100005 100005 ...
## $ locn_cd : Factor w/ 28 levels "LCN_100001","LCN_100002",..: 3 10 11 3 10 11 1 9 5 5 ...
## $ locn_nm : Factor w/ 28 levels "N_LCN_A00001",..: 3 10 11 3 10 11 1 9 5 5 ...
## $ item_id : Factor w/ 444 levels "CD5130001","CD5130002",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ item_ref_int_id : int 5130001 5130001 5130001 5130001 5130001 5130001 5130001 5130001 5130001 5130001 ...
## $ item_nm : Factor w/ 212 levels "1 Ton A/C","1.2 Ton A/C",..: 142 142 142 142 142 142 142 142 142 142 ...
## $ mrhrc_hier_grp_id : int 300001 300001 300001 300001 300001 300001 300001 300001 300001 300001 ...
## $ mrhrc_hier_grp_nm : Factor w/ 3 levels "MRHRC L2 Group 3",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ st_id : int 36 36 36 36 36 36 36 36 36 36 ...
## $ st_cd : Factor w/ 1 level "S": 1 1 1 1 1 1 1 1 1 1 ...
## $ ofr_intrctn_type : Factor w/ 6 levels "CUSTOMER","EVANGELIST",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ intrctn_event : Factor w/ 59 levels "InStore_AddItemsInCart",..: 11 11 4 4 11 4 4 4 4 7 ...
## $ item_intrctn_type : Factor w/ 4 levels "AWARENESS","ENGAGE",..: 1 1 4 4 1 4 4 4 4 4 ...
## $ intrctn_desc : Factor w/ 26 levels "Benefits of loyalty program",..: 22 22 26 26 22 26 26 26 26 22 ...
## $ batch_run_id : logi NA NA NA NA NA NA ...
## $ job_run_id : logi NA NA NA NA NA NA ...
## $ src_sys_id : Factor w/ 1 level "SRC1": 1 1 1 1 1 1 1 1 1 1 ...
## $ src_sys_nm : logi NA NA NA NA NA NA ...
## $ tenant_id : int 100001 100001 100001 100001 100001 100001 100001 100001 100001 100001 ...
## $ rec_creat_by : logi NA NA NA NA NA NA ...
## $ rec_creat_dttm : POSIXct, format: "1988-02-04 21:39:00" "1988-02-04 21:42:42" ...
summary(CustInteractEnqFull)
## intrctn_enq_id intrctn_id intrctn_dttm
## Min. :1.988e+17 Min. :1.988e+17 Min. :1988-02-04 00:05:54
## 1st Qu.:1.991e+17 1st Qu.:1.991e+17 1st Qu.:1991-03-07 05:50:00
## Median :1.994e+17 Median :1.994e+17 Median :1994-04-10 16:16:50
## Mean :1.993e+17 Mean :1.993e+17 Mean :1993-08-09 14:35:39
## 3rd Qu.:1.994e+17 3rd Qu.:1.994e+17 3rd Qu.:1994-05-16 05:09:11
## Max. :2.000e+17 Max. :2.000e+17 Max. :2000-06-19 06:12:30
##
## intrctn_ln_item_seq_num prev_intrctn_id prev_intrctn_dttm
## Min. : 1.000 Min. :1.988e+17 Min. :1988-02-04 00:05:54
## 1st Qu.: 1.000 1st Qu.:1.991e+17 1st Qu.:1991-03-07 05:56:50
## Median : 3.000 Median :1.994e+17 Median :1994-04-10 16:27:00
## Mean : 2.929 Mean :1.993e+17 Mean :1993-08-09 09:02:26
## 3rd Qu.: 4.000 3rd Qu.:1.994e+17 3rd Qu.:1994-05-15 04:08:20
## Max. :10.000 Max. :2.000e+17 Max. :2000-06-19 04:49:26
##
## promo_ofr_id promo_ofr_ref_int_id promo_initv_id
## ID1618150001:850 Min. :1.618e+09 ID914920001:850
## ID1618150002:878 1st Qu.:1.618e+09 ID914920002:878
## ID1618150003:936 Median :1.618e+09 ID914920003:936
## ID1618150004:881 Mean :1.618e+09 ID914920004:881
## ID1618150005:905 3rd Qu.:1.618e+09 ID914920005:905
## ID1618150006:893 Max. :1.618e+09 ID914920006:893
##
## promo_initv_ref_int_id intrctn_type intrctn_dirctn
## Min. :914920001 cart_abondoned : 425 INBOUND:5343
## 1st Qu.:914920002 cart_checkout : 356
## Median :914920004 add_items_into_cart: 349
## Mean :914920004 login : 326
## 3rd Qu.:914920005 product_enquiry : 317
## Max. :914920006 checkin : 284
## (Other) :3286
## intrctn_ctgry intrctn_sub_ctgry intrctn_ctgry_id intrctn_reason
## ENQUIRY:5343 ACCESS :1110 INTCATID_001:1110 AWARENESS:1264
## SELECT :1005 INTCATID_008:1005 ENGAGE : 987
## ENQUIRY : 835 INTCATID_003: 835 PURCHASE :1858
## OPINION : 748 INTCATID_002: 748 RESEARCH :1234
## TRANSACT: 518 INTCATID_005: 518
## EXIT : 425 INTCATID_010: 425
## (Other) : 702 (Other) : 702
## intrctn_begin_dttm intrctn_end_dttm
## Min. :1988-02-04 00:05:54 Min. :1988-02-04 00:05:54
## 1st Qu.:1991-03-07 05:50:00 1st Qu.:1991-03-07 05:51:02
## Median :1994-04-10 16:16:50 Median :1994-04-10 16:16:50
## Mean :1993-08-09 14:35:39 Mean :1993-08-09 14:39:51
## 3rd Qu.:1994-05-16 05:09:11 3rd Qu.:1994-05-16 05:09:11
## Max. :2000-06-19 06:12:30 Max. :2000-06-19 06:12:30
##
## terminal_nm act_type_id act_type_cd
## COUNTER_NO_10: 517 Min. :12090001 CD12090001:5343
## COUNTER_NO_6 : 441 1st Qu.:12090001
## COUNTER_NO_14: 429 Median :12090001
## COUNTER_NO_1 : 414 Mean :12090001
## COUNTER_NO_8 : 412 3rd Qu.:12090001
## COUNTER_NO_4 : 392 Max. :12090001
## (Other) :2738
## act_type act_id emp_id emp_nm
## INTERACTION:5343 Min. :1.988e+16 Min. :50001 CD50001:4003
## 1st Qu.:1.991e+16 1st Qu.:50001 CD50002: 429
## Median :1.994e+16 Median :50001 CD50003: 441
## Mean :1.993e+16 Mean :50002 CD50004: 470
## 3rd Qu.:1.994e+16 3rd Qu.:50002
## Max. :2.000e+16 Max. :50004
##
## chnl_id chnl_cd chnl_nm
## Min. :380002 CD380002: 83 Geo-Spatial :1327
## 1st Qu.:380007 CD380003: 108 In-Store Shopping App:1365
## Median :380009 CD380004: 72 Instagram : 519
## Mean :380009 CD380006: 519 Pinterest : 72
## 3rd Qu.:380009 CD380007:1869 Retailer Website :1869
## Max. :380013 CD380009:1365 Twitter : 83
## CD380013:1327 You Tube : 108
## tch_pnt_id tch_pnt_cd tch_pnt_nm cust_id
## Min. :2e+05 CD200001: 782 App :1365 CUST300043: 21
## 1st Qu.:2e+05 CD200002:1869 In-Store :1327 CUST300138: 19
## Median :2e+05 CD200003:1365 Social Media: 782 CUST500131: 19
## Mean :2e+05 CD200004:1327 Website :1869 CUST300023: 18
## 3rd Qu.:2e+05 CUST300048: 17
## Max. :2e+05 CUST500005: 17
## (Other) :5232
## cust_ref_int_id cust_nm bsn_unit_id
## Min. :100001 Kayla Cain : 26 Min. :100001
## 1st Qu.:200052 Tia Ramos : 21 1st Qu.:200001
## Median :300098 Lilyana Grimes : 19 Median :200017
## Mean :298959 Martha Murray : 19 Mean :175189
## 3rd Qu.:400145 Micah Carver : 18 3rd Qu.:200033
## Max. :500199 Braylen Mcfadden: 17 Max. :200049
## (Other) :5223
## bsn_unit_cd bsn_unit_nm locn_id locn_cd
## BSN_200044: 106 SITE00044: 106 Min. :100001 WEB_200012: 317
## BSN_200042: 104 SITE00042: 104 1st Qu.:200001 WEB_200003: 313
## BSN_200002: 97 SITE00002: 97 Median :200005 WEB_200001: 300
## BSN_200012: 95 SITE00012: 95 Mean :175171 WEB_200002: 300
## BSN_200003: 93 SITE00003: 93 3rd Qu.:200010 WEB_200010: 299
## BSN_200008: 92 SITE00008: 92 Max. :200014 WEB_200008: 293
## (Other) :4756 (Other) :4756 (Other) :3521
## locn_nm item_id item_ref_int_id
## N_WEB_A00012: 317 CD5130001:3394 Min. :5130001
## N_WEB_A00003: 313 CD5130051: 11 1st Qu.:5130001
## N_WEB_A00001: 300 CD5130314: 11 Median :5130001
## N_WEB_A00002: 300 CD5130438: 11 Mean :5130083
## N_WEB_A00010: 299 CD5130031: 10 3rd Qu.:5130145
## N_WEB_A00008: 293 CD5130034: 10 Max. :5130446
## (Other) :3521 (Other) :1896
## item_nm mrhrc_hier_grp_id mrhrc_hier_grp_nm
## NAN :3394 Min. :200003 MRHRC L2 Group 3: 782
## Godrej Interio: 87 1st Qu.:300001 MRHRC L3 Group 1:1327
## Home Town : 82 Median :300002 MRHRC L3 Group 2:3234
## Royal Oak : 74 Mean :285366
## Evok : 68 3rd Qu.:300002
## Black Pottery : 56 Max. :300002
## (Other) :1582
## st_id st_cd ofr_intrctn_type intrctn_event
## Min. :36 S:5343 CUSTOMER : 705 InStore_CheckIn : 284
## 1st Qu.:36 EVANGELIST : 987 Mobile_CartAbondoned : 245
## Median :36 LEAD :1337 Soc_Like : 196
## Mean :36 OPPORTUNITY: 467 ONL_Login : 181
## 3rd Qu.:36 PROSPECT : 791 ONL_CartCheckOut : 177
## Max. :36 SUSPECT :1056 InStore_NearToInStore: 176
## (Other) :4084
## item_intrctn_type intrctn_desc batch_run_id
## AWARENESS:1264 Looking for purchase of item : 539 Mode:logical
## ENGAGE : 987 Displays interest in product : 477 NA's:5343
## PURCHASE :1671 Displays dis-satisfaction : 425
## RESEARCH :1421 Looking for offers for purchase : 417
## Customer engagement towards brand: 374
## Displays willingness to buy : 349
## (Other) :2762
## job_run_id src_sys_id src_sys_nm tenant_id rec_creat_by
## Mode:logical SRC1:5343 Mode:logical Min. :1e+05 Mode:logical
## NA's:5343 NA's:5343 1st Qu.:1e+05 NA's:5343
## Median :1e+05
## Mean :1e+05
## 3rd Qu.:1e+05
## Max. :1e+05
##
## rec_creat_dttm
## Min. :1988-02-04 00:05:54
## 1st Qu.:1991-03-07 05:51:02
## Median :1994-04-10 16:16:50
## Mean :1993-08-09 14:39:51
## 3rd Qu.:1994-05-16 05:09:11
## Max. :2000-06-19 06:12:30
##
library(dplyr)
CustInteractEnq <- CustInteractEnqFull %>% mutate(conversion = (CustInteractEnqFull$intrctn_type == "cart_checkout"))%>% select(c("cust_id","intrctn_dttm","chnl_nm","tch_pnt_nm","intrctn_type","intrctn_reason","intrctn_sub_ctgry","conversion")) %>% arrange(cust_id,intrctn_dttm)
summary(CustInteractEnq$intrctn_type)
## add_items_into_cart add_items_into_wishlist
## 349 121
## cart_abondoned cart_checkout
## 425 356
## checkin click_on_offer
## 284 259
## click_on_product comment
## 222 46
## Connect delete_items_from_cart
## 104 137
## delete_items_from_wishlist download
## 121 54
## feedback follow
## 161 47
## in_parking like
## 94 196
## login loyalty_enquiry
## 326 67
## loyalty_enrollment loyalty_events_points_transfer
## 145 179
## near_to_store offer_enquiry
## 176 158
## participation_instore_events password_change
## 23 72
## pin product_enquiry
## 72 317
## product_review read_review
## 132 249
## referrals response_to_survey
## 81 107
## share subscribe
## 57 54
## tweet view
## 83 69
head(CustInteractEnq,30)
## cust_id intrctn_dttm chnl_nm tch_pnt_nm
## 1 CUST100001 1988-02-04 21:43:40 Geo-Spatial In-Store
## 2 CUST100001 1988-02-04 22:16:36 Geo-Spatial In-Store
## 3 CUST100001 1988-02-04 22:50:36 Geo-Spatial In-Store
## 4 CUST100001 1988-02-04 23:05:16 Geo-Spatial In-Store
## 5 CUST100002 1988-02-04 21:42:42 Geo-Spatial In-Store
## 6 CUST100002 1988-02-04 22:16:48 Geo-Spatial In-Store
## 7 CUST100002 1988-02-04 22:43:26 Geo-Spatial In-Store
## 8 CUST100003 1988-02-04 21:39:00 Geo-Spatial In-Store
## 9 CUST100004 1988-02-04 21:47:54 Geo-Spatial In-Store
## 10 CUST100004 1988-02-04 22:27:36 Geo-Spatial In-Store
## 11 CUST100004 1988-02-04 22:55:30 Geo-Spatial In-Store
## 12 CUST100004 1988-02-04 23:31:00 Geo-Spatial In-Store
## 13 CUST100004 1988-02-06 08:27:58 Geo-Spatial In-Store
## 14 CUST100004 1988-02-06 09:35:08 Geo-Spatial In-Store
## 15 CUST100004 1988-02-06 10:49:36 Geo-Spatial In-Store
## 16 CUST100004 1988-02-06 12:04:46 Geo-Spatial In-Store
## 17 CUST100004 1988-02-06 12:57:38 Geo-Spatial In-Store
## 18 CUST100005 1988-02-04 00:31:54 Geo-Spatial In-Store
## 19 CUST100005 1988-02-04 01:10:40 Geo-Spatial In-Store
## 20 CUST100005 1988-02-04 01:39:26 Geo-Spatial In-Store
## 21 CUST100005 1988-02-04 02:15:26 Instagram Social Media
## 22 CUST100005 1988-02-04 21:50:06 Geo-Spatial In-Store
## 23 CUST100005 1988-02-04 22:26:38 Geo-Spatial In-Store
## 24 CUST100005 1988-02-04 23:03:56 Geo-Spatial In-Store
## 25 CUST100005 1988-02-04 23:41:38 Geo-Spatial In-Store
## 26 CUST100005 1988-02-05 00:01:08 Geo-Spatial In-Store
## 27 CUST100005 1988-02-06 05:05:04 Geo-Spatial In-Store
## 28 CUST100005 1988-02-06 06:02:38 Geo-Spatial In-Store
## 29 CUST100005 1988-02-06 07:13:02 Geo-Spatial In-Store
## 30 CUST100005 1988-02-06 08:25:56 Geo-Spatial In-Store
## intrctn_type intrctn_reason intrctn_sub_ctgry conversion
## 1 checkin RESEARCH ACCESS FALSE
## 2 add_items_into_cart PURCHASE SELECT FALSE
## 3 delete_items_from_cart PURCHASE DESELECT FALSE
## 4 cart_checkout PURCHASE TRANSACT TRUE
## 5 near_to_store AWARENESS ACCESS FALSE
## 6 in_parking RESEARCH ACCESS FALSE
## 7 checkin RESEARCH ACCESS FALSE
## 8 near_to_store AWARENESS ACCESS FALSE
## 9 checkin RESEARCH ACCESS FALSE
## 10 add_items_into_cart PURCHASE SELECT FALSE
## 11 delete_items_from_cart PURCHASE DESELECT FALSE
## 12 cart_abondoned PURCHASE EXIT FALSE
## 13 checkin RESEARCH ACCESS FALSE
## 14 product_enquiry AWARENESS ENQUIRY FALSE
## 15 offer_enquiry AWARENESS ENQUIRY FALSE
## 16 add_items_into_cart PURCHASE SELECT FALSE
## 17 cart_checkout PURCHASE TRANSACT TRUE
## 18 loyalty_enquiry AWARENESS ENQUIRY FALSE
## 19 loyalty_enquiry PURCHASE ENROLL FALSE
## 20 cart_checkout PURCHASE TRANSACT TRUE
## 21 like ENGAGE OPINION FALSE
## 22 near_to_store AWARENESS ACCESS FALSE
## 23 in_parking RESEARCH ACCESS FALSE
## 24 checkin RESEARCH ACCESS FALSE
## 25 product_enquiry AWARENESS ENQUIRY FALSE
## 26 offer_enquiry AWARENESS ENQUIRY FALSE
## 27 checkin RESEARCH ACCESS FALSE
## 28 product_enquiry AWARENESS ENQUIRY FALSE
## 29 offer_enquiry AWARENESS ENQUIRY FALSE
## 30 loyalty_enquiry AWARENESS ENQUIRY FALSE
CustInteractEnq <- CustInteractEnq %>%
arrange(cust_id,intrctn_dttm) %>%
group_by(cust_id) %>%
mutate(path_no = ifelse(is.na(lag(cumsum(conversion))), 0, lag(cumsum(conversion))) + 1) %>%
ungroup()
head(CustInteractEnq,50)
## # A tibble: 50 x 9
## cust_… intrctn_dttm chnl_… tch_p… intrc… intr… intr… conv… path…
## <fctr> <dttm> <fctr> <fctr> <fctr> <fct> <fct> <lgl> <dbl>
## 1 CUST1… 1988-02-04 21:43:40 Geo-S… In-St… check… RESE… ACCE… F 1.00
## 2 CUST1… 1988-02-04 22:16:36 Geo-S… In-St… add_i… PURC… SELE… F 1.00
## 3 CUST1… 1988-02-04 22:50:36 Geo-S… In-St… delet… PURC… DESE… F 1.00
## 4 CUST1… 1988-02-04 23:05:16 Geo-S… In-St… cart_… PURC… TRAN… T 1.00
## 5 CUST1… 1988-02-04 21:42:42 Geo-S… In-St… near_… AWAR… ACCE… F 1.00
## 6 CUST1… 1988-02-04 22:16:48 Geo-S… In-St… in_pa… RESE… ACCE… F 1.00
## 7 CUST1… 1988-02-04 22:43:26 Geo-S… In-St… check… RESE… ACCE… F 1.00
## 8 CUST1… 1988-02-04 21:39:00 Geo-S… In-St… near_… AWAR… ACCE… F 1.00
## 9 CUST1… 1988-02-04 21:47:54 Geo-S… In-St… check… RESE… ACCE… F 1.00
## 10 CUST1… 1988-02-04 22:27:36 Geo-S… In-St… add_i… PURC… SELE… F 1.00
## # ... with 40 more rows
df_paths_1 <- CustInteractEnq %>%
filter(path_no == 1) %>%
select(-path_no)
##### replace some channels #####
df_path_1_clean <- df_paths_1 %>%
# removing NAs
filter(!is.na(chnl_nm))
##### one- and multi-channel paths #####
df_path_1_clean <- df_path_1_clean %>%
group_by(cust_id) %>%
mutate(uniq_channel_tag = ifelse(length(unique(chnl_nm)) == 1, TRUE, FALSE)) %>%
ungroup()
df_path_1_clean_uniq <- df_path_1_clean %>%
filter(uniq_channel_tag == TRUE) %>%
select(-uniq_channel_tag)
df_path_1_clean_multi <- df_path_1_clean %>%
filter(uniq_channel_tag == FALSE) %>%
select(-uniq_channel_tag)
head(df_path_1_clean_uniq,50)
## # A tibble: 50 x 8
## cust_id intrctn_dttm chnl_… tch_p… intrct… intr… intrc… conv…
## <fctr> <dttm> <fctr> <fctr> <fctr> <fct> <fctr> <lgl>
## 1 CUST100001 1988-02-04 21:43:40 Geo-S… In-St… checkin RESE… ACCESS F
## 2 CUST100001 1988-02-04 22:16:36 Geo-S… In-St… add_it… PURC… SELECT F
## 3 CUST100001 1988-02-04 22:50:36 Geo-S… In-St… delete… PURC… DESEL… F
## 4 CUST100001 1988-02-04 23:05:16 Geo-S… In-St… cart_c… PURC… TRANS… T
## 5 CUST100002 1988-02-04 21:42:42 Geo-S… In-St… near_t… AWAR… ACCESS F
## 6 CUST100002 1988-02-04 22:16:48 Geo-S… In-St… in_par… RESE… ACCESS F
## 7 CUST100002 1988-02-04 22:43:26 Geo-S… In-St… checkin RESE… ACCESS F
## 8 CUST100003 1988-02-04 21:39:00 Geo-S… In-St… near_t… AWAR… ACCESS F
## 9 CUST100004 1988-02-04 21:47:54 Geo-S… In-St… checkin RESE… ACCESS F
## 10 CUST100004 1988-02-04 22:27:36 Geo-S… In-St… add_it… PURC… SELECT F
## # ... with 40 more rows
df_all_paths <- df_path_1_clean %>%
group_by(cust_id) %>%
summarise(path = paste(chnl_nm, collapse = ' > '),
conversion = sum(conversion)) %>%
ungroup() %>%
filter(conversion == 1)
tail(df_all_paths,50)
## # A tibble: 50 x 3
## cust_id path conve…
## <fctr> <chr> <int>
## 1 CUST500122 Pinterest > Instagram > In-Store Shopping App > In-S… 1
## 2 CUST500126 In-Store Shopping App > In-Store Shopping App > In-S… 1
## 3 CUST500127 In-Store Shopping App > In-Store Shopping App > In-S… 1
## 4 CUST500128 In-Store Shopping App > In-Store Shopping App > In-S… 1
## 5 CUST500129 Geo-Spatial > Geo-Spatial > Geo-Spatial > Geo-Spatia… 1
## 6 CUST500130 In-Store Shopping App > In-Store Shopping App > In-S… 1
## 7 CUST500131 Geo-Spatial > Geo-Spatial > Geo-Spatial > Geo-Spatia… 1
## 8 CUST500132 Retailer Website > Retailer Website > Retailer Websi… 1
## 9 CUST500133 In-Store Shopping App > In-Store Shopping App > In-S… 1
## 10 CUST500134 Geo-Spatial > Geo-Spatial > Geo-Spatial > Geo-Spatia… 1
## # ... with 40 more rows
mod_attrib <- markov_model(df_all_paths,
var_path = 'path',
var_conv = 'conversion',
out_more = TRUE)
mod_attrib$removal_effects
## channel_name removal_effects
## 1 Geo-Spatial 0.405084
## 2 Retailer Website 0.485169
## 3 In-Store Shopping App 0.172690
## 4 Instagram 0.039689
## 5 You Tube 0.010020
## 6 Pinterest 0.006647
## 7 Twitter 0.003280
mod_attrib$result
## channel_name total_conversions
## 1 Geo-Spatial 108.6162168
## 2 Retailer Website 130.0896142
## 3 In-Store Shopping App 46.3038147
## 4 Instagram 10.6419138
## 5 You Tube 2.6866884
## 6 Pinterest 1.7822772
## 7 Twitter 0.8794749
d_all <- data.frame(mod_attrib$result)
df_multi_paths <- df_path_1_clean_multi %>%
group_by(cust_id) %>%
summarise(path = paste(chnl_nm, collapse = ' > '),
conversion = sum(conversion)) %>%
ungroup() %>%
filter(conversion == 1)
mod_attrib_alt <- markov_model(df_multi_paths,
var_path = 'path',
var_conv = 'conversion',
out_more = TRUE)
mod_attrib_alt$removal_effects
## channel_name removal_effects
## 1 Instagram 0.386570
## 2 In-Store Shopping App 0.193497
## 3 Geo-Spatial 0.646075
## 4 Retailer Website 0.774334
## 5 You Tube 0.097134
## 6 Pinterest 0.064652
## 7 Twitter 0.031910
mod_attrib_alt$result
## channel_name total_conversions
## 1 Instagram 5.4615910
## 2 In-Store Shopping App 2.7337907
## 3 Geo-Spatial 9.1279649
## 4 Retailer Website 10.9400512
## 5 You Tube 1.3723418
## 6 Pinterest 0.9134252
## 7 Twitter 0.4508352
# adding unique paths
df_uniq_paths <- df_path_1_clean_uniq %>%
filter(conversion == 1) %>%
group_by(chnl_nm) %>%
summarise(conversions = sum(conversion)) %>%
ungroup()
d_multi <- data.frame(mod_attrib_alt$result)
d_split <- full_join(d_multi, df_uniq_paths, by = c('channel_name' = 'chnl_nm')) %>%
mutate(result = total_conversions + conversions)
sum(d_all$total_conversions)
## [1] 301
sum(d_split$result,na.rm = TRUE)
## [1] 292.8018
df_hm <- df_all_paths %>%
mutate(channel_name_ft = trimws(sub('>.*', '', path)),
#channel_name_ft = sub(' ', '', channel_name_ft),
channel_name_lt = trimws(sub('.*>', '', path)))
#channel_name_lt = sub(' ', '', channel_name_lt))
tail(df_hm,50)
## # A tibble: 50 x 5
## cust_id path conve… channel… channel…
## <fctr> <chr> <int> <chr> <chr>
## 1 CUST500122 Pinterest > Instagram > In-Store S… 1 Pintere… In-Stor…
## 2 CUST500126 In-Store Shopping App > In-Store S… 1 In-Stor… In-Stor…
## 3 CUST500127 In-Store Shopping App > In-Store S… 1 In-Stor… In-Stor…
## 4 CUST500128 In-Store Shopping App > In-Store S… 1 In-Stor… In-Stor…
## 5 CUST500129 Geo-Spatial > Geo-Spatial > Geo-Sp… 1 Geo-Spa… Geo-Spa…
## 6 CUST500130 In-Store Shopping App > In-Store S… 1 In-Stor… In-Stor…
## 7 CUST500131 Geo-Spatial > Geo-Spatial > Geo-Sp… 1 Geo-Spa… Retaile…
## 8 CUST500132 Retailer Website > Retailer Websit… 1 Retaile… Retaile…
## 9 CUST500133 In-Store Shopping App > In-Store S… 1 In-Stor… In-Stor…
## 10 CUST500134 Geo-Spatial > Geo-Spatial > Geo-Sp… 1 Geo-Spa… Geo-Spa…
## # ... with 40 more rows
# first-touch conversions
df_ft <- df_hm %>%
group_by(channel_name_ft) %>%
summarise(FtouchConv = sum(conversion)) %>%
ungroup()
df_ft
## # A tibble: 7 x 2
## channel_name_ft FtouchConv
## <chr> <int>
## 1 Geo-Spatial 120
## 2 In-Store Shopping App 46
## 3 Instagram 7
## 4 Pinterest 2
## 5 Retailer Website 122
## 6 Twitter 1
## 7 You Tube 3
# last-touch conversions
df_lt <- df_hm %>%
group_by(channel_name_lt) %>%
summarise(LtouchConv = sum(conversion)) %>%
ungroup()
df_lt
## # A tibble: 3 x 2
## channel_name_lt LtouchConv
## <chr> <int>
## 1 Geo-Spatial 103
## 2 In-Store Shopping App 52
## 3 Retailer Website 146
#h_mod2 <- heuristic_models(CustInteractEnq, var_path = 'path', var_conv = 'conv')
h_mod2 <- merge(df_lt, df_ft, by.x = 'channel_name_lt', by.y = 'channel_name_ft')
h_mod2
## channel_name_lt LtouchConv FtouchConv
## 1 Geo-Spatial 103 120
## 2 In-Store Shopping App 52 46
## 3 Retailer Website 146 122
# merging all models
all_models <- merge(h_mod2, mod_attrib$result, by.x = 'channel_name_lt', by.y = 'channel_name')
colnames(all_models)[c(1, 4)] <- c('channel_name', 'attribModConv')
all_models
## channel_name LtouchConv FtouchConv attribModConv
## 1 Geo-Spatial 103 120 108.61622
## 2 In-Store Shopping App 52 46 46.30381
## 3 Retailer Website 146 122 130.08961
mod_attrib
## $result
## channel_name total_conversions
## 1 Geo-Spatial 108.6162168
## 2 Retailer Website 130.0896142
## 3 In-Store Shopping App 46.3038147
## 4 Instagram 10.6419138
## 5 You Tube 2.6866884
## 6 Pinterest 1.7822772
## 7 Twitter 0.8794749
##
## $transition_matrix
## channel_from channel_to transition_probability
## 1 (start) Geo-Spatial 0.398671096
## 2 (start) Retailer Website 0.405315615
## 3 (start) In-Store Shopping App 0.152823920
## 4 (start) Instagram 0.023255814
## 5 (start) You Tube 0.009966777
## 6 (start) Pinterest 0.006644518
## 7 (start) Twitter 0.003322259
## 8 Geo-Spatial (conversion) 0.844262295
## 9 Geo-Spatial Retailer Website 0.155737705
## 10 Retailer Website (conversion) 1.000000000
## 11 In-Store Shopping App (conversion) 1.000000000
## 12 Instagram In-Store Shopping App 0.416666667
## 13 Instagram Retailer Website 0.416666667
## 14 Instagram Geo-Spatial 0.166666667
## 15 You Tube Instagram 0.666666667
## 16 You Tube In-Store Shopping App 0.333333333
## 17 Pinterest Instagram 1.000000000
## 18 Twitter Instagram 1.000000000
##
## $removal_effects
## channel_name removal_effects
## 1 Geo-Spatial 0.405084
## 2 Retailer Website 0.485169
## 3 In-Store Shopping App 0.172690
## 4 Instagram 0.039689
## 5 You Tube 0.010020
## 6 Pinterest 0.006647
## 7 Twitter 0.003280
df_plot_trans <- mod_attrib$transition_matrix
cols <- c("#e7f0fa", "#c9e2f6", "#95cbee", "#0099dc", "#4ab04a", "#ffd73e", "#eec73a",
"#e29421")
t <- max(df_plot_trans$transition_probability)
ggplot(df_plot_trans, aes(y = channel_from, x = channel_to, fill = transition_probability)) +
theme_minimal() +
geom_tile(colour = "white", width = .9, height = .9) +
scale_fill_gradientn(colours = cols, limits = c(0, t),
breaks = seq(0, t, by = t/4),
labels = c("0", round(t/4*1, 2), round(t/4*2, 2), round(t/4*3, 2), round(t/4*4, 2)),
guide = guide_colourbar(ticks = T, nbin = 50, barheight = .5, label = T, barwidth = 10)) +
geom_text(aes(label = round(transition_probability, 2)), fontface = "bold", size = 4) +
theme(legend.position = 'bottom',
legend.direction = "horizontal",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(size = 20, face = "bold", vjust = 2, color = 'black', lineheight = 0.8),
axis.title.x = element_text(size = 24, face = "bold"),
axis.title.y = element_text(size = 24, face = "bold"),
axis.text.y = element_text(size = 8, face = "bold", color = 'black'),
axis.text.x = element_text(size = 8, angle = 90, hjust = 0.5, vjust = 0.5, face = "plain")) +
ggtitle("Transition matrix heatmap")
# models comparison
all_mod_plot <- melt(all_models, id.vars = 'channel_name', variable.name = 'conv_type')
all_mod_plot$value <- round(all_mod_plot$value)
# slope chart
pal <- colorRampPalette(brewer.pal(10, "Set1"))
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
ggplot(all_mod_plot, aes(x = conv_type, y = value, group = channel_name)) +
theme_solarized(base_size = 18, base_family = "", light = TRUE) +
scale_color_manual(values = pal(10)) +
scale_fill_manual(values = pal(10)) +
geom_line(aes(color = channel_name), size = 2.5, alpha = 0.8) +
geom_point(aes(color = channel_name), size = 5) +
geom_label_repel(aes(label = paste0(channel_name, ': ', value), fill = factor(channel_name)),
alpha = 0.7,
fontface = 'bold', color = 'white', size = 5,
box.padding = unit(0.25, 'lines'), point.padding = unit(0.5, 'lines'),
max.iter = 100) +
theme(legend.position = 'none',
legend.title = element_text(size = 16, color = 'black'),
legend.text = element_text(size = 16, vjust = 2, color = 'black'),
plot.title = element_text(size = 20, face = "bold", vjust = 2, color = 'black', lineheight = 0.8),
axis.title.x = element_text(size = 24, face = "bold"),
axis.title.y = element_text(size = 16, face = "bold"),
axis.text.x = element_text(size = 16, face = "bold", color = 'black'),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_line(colour = "grey", linetype = "dotted"),
panel.grid.minor = element_blank(),
strip.text = element_text(size = 16, hjust = 0.5, vjust = 0.5, face = "bold", color = 'black'),
strip.background = element_rect(fill = "#f0b35f")) +
labs(x = 'Model', y = 'Conversions') +
ggtitle('Models comparison') +
guides(colour = guide_legend(override.aes = list(size = 4)))
mod_attrib$removal_effects
## channel_name removal_effects
## 1 Geo-Spatial 0.405084
## 2 Retailer Website 0.485169
## 3 In-Store Shopping App 0.172690
## 4 Instagram 0.039689
## 5 You Tube 0.010020
## 6 Pinterest 0.006647
## 7 Twitter 0.003280
mod_attrib$result
## channel_name total_conversions
## 1 Geo-Spatial 108.6162168
## 2 Retailer Website 130.0896142
## 3 In-Store Shopping App 46.3038147
## 4 Instagram 10.6419138
## 5 You Tube 2.6866884
## 6 Pinterest 1.7822772
## 7 Twitter 0.8794749
d_all <- data.frame(mod_attrib$result)
# attribution model for splitted multi and unique channel paths
df_multi_paths <- df_path_1_clean_multi %>%
group_by(cust_id) %>%
summarise(path = paste(chnl_nm, collapse = ' > '),
conversion = sum(conversion)) %>%
ungroup() %>%
filter(conversion == 1)
mod_attrib_alt <- markov_model(df_multi_paths,
var_path = 'path',
var_conv = 'conversion',
out_more = TRUE)
mod_attrib_alt$removal_effects
## channel_name removal_effects
## 1 Instagram 0.386561
## 2 In-Store Shopping App 0.193437
## 3 Geo-Spatial 0.645352
## 4 Retailer Website 0.774268
## 5 You Tube 0.096711
## 6 Pinterest 0.064215
## 7 Twitter 0.032487
mod_attrib_alt$result
## channel_name total_conversions
## 1 Instagram 5.4643053
## 2 In-Store Shopping App 2.7343649
## 3 Geo-Spatial 9.1224939
## 4 Retailer Website 10.9448102
## 5 You Tube 1.3670764
## 6 Pinterest 0.9077231
## 7 Twitter 0.4592261
# adding unique paths
df_uniq_paths <- df_path_1_clean_uniq %>%
filter(conversion == 1) %>%
group_by(chnl_nm) %>%
summarise(conversions = sum(conversion)) %>%
ungroup()
d_multi <- data.frame(mod_attrib_alt$result)
d_split <- full_join(d_multi, df_uniq_paths, by = c('channel_name' = 'chnl_nm')) %>%
mutate(result = total_conversions + conversions)
sum(d_all$total_conversions)
## [1] 301
sum(d_split$result,na.rm = TRUE)
## [1] 292.8017
##### Generic Probabilistic Model #####
df_all_paths_compl <- df_path_1_clean %>%
group_by(cust_id) %>%
summarise(path = paste(chnl_nm, collapse = ' > '),
conversion = sum(conversion)) %>%
ungroup() %>%
mutate(null_conversion = ifelse(conversion == 1, 0, 1))
mod_attrib_complete <- markov_model(
df_all_paths_compl,
var_path = 'path',
var_conv = 'conversion',
var_null = 'null_conversion',
out_more = TRUE
)
trans_matrix_prob <- mod_attrib_complete$transition_matrix %>%
dmap_at(c(1, 2), as.character)
##### viz #####
edges <-
data.frame(
from = trans_matrix_prob$channel_from,
to = trans_matrix_prob$channel_to,
label = round(trans_matrix_prob$transition_probability, 2),
font.size = trans_matrix_prob$transition_probability * 100,
width = trans_matrix_prob$transition_probability * 15,
shadow = TRUE,
arrows = "to",
color = list(color = "#95cbee", highlight = "red")
)
nodes <- data_frame(id = c( c(trans_matrix_prob$channel_from), c(trans_matrix_prob$channel_to) )) %>%
distinct(id) %>%
arrange(id) %>%
mutate(
label = id,
color = ifelse(
label %in% c('(start)', '(conversion)'),
'#4ab04a',
ifelse(label == '(null)', '#ce472e', '#ffd73e')
),
shadow = TRUE,
shape = "box"
)
visNetwork(nodes,
edges,
height = "2000px",
width = "100%",
main = "Generic Probabilistic model's Transition Matrix") %>%
visIgraphLayout(randomSeed = 123) %>%
visNodes(size = 5) %>%
visOptions(highlightNearest = TRUE)
##### Generic Probabilistic Model #####
df_all_paths_compl <- df_path_1_clean %>%
group_by(cust_id) %>%
summarise(path = paste(chnl_nm, collapse = ' > '),
conversion = sum(conversion)) %>%
ungroup() %>%
mutate(null_conversion = ifelse(conversion == 1, 0, 1))
mod_attrib_complete <- markov_model(
df_all_paths_compl,
var_path = 'path',
var_conv = 'conversion',
var_null = 'null_conversion',
out_more = TRUE
)
trans_matrix_prob <- mod_attrib_complete$transition_matrix %>%
dmap_at(c(1, 2), as.character)
##### viz #####
edges <-
data.frame(
from = trans_matrix_prob$channel_from,
to = trans_matrix_prob$channel_to,
label = round(trans_matrix_prob$transition_probability, 2),
font.size = trans_matrix_prob$transition_probability * 100,
width = trans_matrix_prob$transition_probability * 15,
shadow = TRUE,
arrows = "to",
color = list(color = "#95cbee", highlight = "red")
)
nodes <- data_frame(id = c( c(trans_matrix_prob$channel_from), c(trans_matrix_prob$channel_to) )) %>%
distinct(id) %>%
arrange(id) %>%
mutate(
label = id,
color = ifelse(
label %in% c('(start)', '(conversion)'),
'#4ab04a',
ifelse(label == '(null)', '#ce472e', '#ffd73e')
),
shadow = TRUE,
shape = "box"
)
visNetwork(nodes,
edges,
height = "2000px",
width = "100%",
main = "Generic Probabilistic model's Transition Matrix") %>%
visIgraphLayout(randomSeed = 123) %>%
visNodes(size = 5) %>%
visOptions(highlightNearest = TRUE)
##### Customer journey duration #####
# computing time lapses from the first contact to conversion/last contact
df_multi_paths_tl <- df_path_1_clean_multi %>%
group_by(cust_id) %>%
summarise(path = paste(chnl_nm, collapse = ' > '),
first_touch_date = min(intrctn_dttm),
last_touch_date = max(intrctn_dttm),
tot_time_lapse = round(as.numeric(last_touch_date - first_touch_date)),
conversion = sum(conversion)) %>%
ungroup()
# distribution plot
ggplot(df_multi_paths_tl %>% filter(conversion == 1), aes(x = tot_time_lapse)) +
theme_minimal() +
geom_histogram(fill = '#4e79a7', binwidth = 1)
# cumulative distribution plot
ggplot(df_multi_paths_tl %>% filter(conversion == 1), aes(x = tot_time_lapse)) +
theme_minimal() +
stat_ecdf(geom = 'step', color = '#4e79a7', size = 2, alpha = 0.7) +
geom_hline(yintercept = 0.90, color = '#e15759', size = 1.5) +
geom_vline(xintercept = 4, color = '#e15759', size = 1.5, linetype = 2)
### removing not first events ###
df_customers <- CustInteractEnq %>%
group_by(cust_id, intrctn_type) %>%
filter(intrctn_dttm == min(intrctn_dttm)) %>%
ungroup()
df_customersInStore <- df_customers %>% filter(tch_pnt_nm %in% c("In-Store"))
df_customersInStore[] <- lapply(df_customersInStore, function(x) if(is.factor(x)) factor(x) else x)
summary(df_customersInStore)
## cust_id intrctn_dttm chnl_nm
## CUST100035: 9 Min. :1988-02-04 00:05:54 Geo-Spatial:1190
## CUST100194: 9 1st Qu.:1988-02-04 07:16:56
## CUST100031: 8 Median :1988-02-04 14:50:19
## CUST100055: 8 Mean :1990-08-27 23:24:35
## CUST100061: 8 3rd Qu.:1994-04-10 05:51:25
## CUST100064: 8 Max. :2000-06-19 06:12:30
## (Other) :1140
## tch_pnt_nm intrctn_type intrctn_reason
## In-Store:1190 checkin :252 AWARENESS:390
## near_to_store :161 ENGAGE : 69
## add_items_into_cart:132 PURCHASE :387
## product_enquiry :107 RESEARCH :344
## cart_checkout :103
## in_parking : 92
## (Other) :343
## intrctn_sub_ctgry conversion path_no
## ACCESS :505 Mode :logical Min. :1.000
## ENQUIRY :229 FALSE:1087 1st Qu.:1.000
## SELECT :132 TRUE :103 Median :1.000
## TRANSACT:120 Mean :1.057
## DESELECT: 83 3rd Qu.:1.000
## OPINION : 69 Max. :2.000
## (Other) : 52
summary(df_customersInStore$intrctn_sub_ctgry)
## ACCESS DESELECT ENQUIRY ENROLL EXIT OPINION SELECT TRANSACT
## 505 83 229 1 51 69 132 120
df.content <- df_customersInStore %>% select(intrctn_type,intrctn_sub_ctgry)%>%
group_by(intrctn_type,intrctn_sub_ctgry) %>% summarise(number=n())
df.content
## # A tibble: 15 x 3
## # Groups: intrctn_type [?]
## intrctn_type intrctn_sub_ctgry number
## <fctr> <fctr> <int>
## 1 add_items_into_cart SELECT 132
## 2 cart_abondoned EXIT 51
## 3 cart_checkout TRANSACT 103
## 4 checkin ACCESS 252
## 5 delete_items_from_cart DESELECT 83
## 6 feedback OPINION 23
## 7 in_parking ACCESS 92
## 8 loyalty_enquiry ENQUIRY 39
## 9 loyalty_enquiry ENROLL 1
## 10 loyalty_events_points_transfer TRANSACT 17
## 11 near_to_store ACCESS 161
## 12 offer_enquiry ENQUIRY 83
## 13 participation_instore_events OPINION 23
## 14 product_enquiry ENQUIRY 107
## 15 response_to_survey OPINION 23
df.all <- df.content %>%
group_by(intrctn_sub_ctgry) %>%
mutate(totnum = sum(number)) %>%
ungroup() %>%
mutate(dum = (max(totnum) - totnum)/2,
maxx = totnum + dum,
minx = dum)
head(df.all)
## # A tibble: 6 x 7
## intrctn_type intrctn_sub_ctgry number totnum dum maxx minx
## <fctr> <fctr> <int> <int> <dbl> <dbl> <dbl>
## 1 add_items_into_cart SELECT 132 132 186 318 186
## 2 cart_abondoned EXIT 51 51 227 278 227
## 3 cart_checkout TRANSACT 103 120 192 312 192
## 4 checkin ACCESS 252 505 0 505 0
## 5 delete_items_from_cart DESELECT 83 83 211 294 211
## 6 feedback OPINION 23 69 218 287 218
df.lines <- df.all %>%
distinct(intrctn_sub_ctgry, maxx, minx)
df.lines
## # A tibble: 8 x 3
## intrctn_sub_ctgry maxx minx
## <fctr> <dbl> <dbl>
## 1 SELECT 318 186
## 2 EXIT 278 227
## 3 TRANSACT 312 192
## 4 ACCESS 505 0
## 5 DESELECT 294 211
## 6 OPINION 287 218
## 7 ENQUIRY 367 138
## 8 ENROLL 253 252
df.dum <- df.all %>%
distinct(intrctn_sub_ctgry,dum) %>%
mutate(intrctn_type = 'dummy',
number = dum) %>%
select(intrctn_type, intrctn_sub_ctgry,number)
head(df.dum)
## # A tibble: 6 x 3
## intrctn_type intrctn_sub_ctgry number
## <chr> <fctr> <dbl>
## 1 dummy SELECT 186
## 2 dummy EXIT 227
## 3 dummy TRANSACT 192
## 4 dummy ACCESS 0
## 5 dummy DESELECT 211
## 6 dummy OPINION 218
conv <- df.all$totnum[df.all$intrctn_sub_ctgry == 'ACCESS']
df.rates <- df.all %>%
distinct(intrctn_sub_ctgry, totnum) %>%
mutate(prevnum = lag(totnum),
rate = round(totnum / conv, 3)) %>%
select(intrctn_sub_ctgry, totnum, prevnum, rate)
## Warning in totnum/conv: longer object length is not a multiple of shorter
## object length
df.rates <- na.omit(df.rates)
df.rates
## # A tibble: 7 x 4
## intrctn_sub_ctgry totnum prevnum rate
## <fctr> <int> <int> <dbl>
## 1 EXIT 51 132 0.101
## 2 TRANSACT 120 51 0.238
## 3 ACCESS 505 120 1.00
## 4 DESELECT 83 505 0.164
## 5 OPINION 69 83 0.137
## 6 ENQUIRY 229 69 0.453
## 7 ENROLL 1 229 0.00200
df.all <- df.all %>%
select(intrctn_type,intrctn_sub_ctgry,number)
df.all
## # A tibble: 15 x 3
## intrctn_type intrctn_sub_ctgry number
## <fctr> <fctr> <int>
## 1 add_items_into_cart SELECT 132
## 2 cart_abondoned EXIT 51
## 3 cart_checkout TRANSACT 103
## 4 checkin ACCESS 252
## 5 delete_items_from_cart DESELECT 83
## 6 feedback OPINION 23
## 7 in_parking ACCESS 92
## 8 loyalty_enquiry ENQUIRY 39
## 9 loyalty_enquiry ENROLL 1
## 10 loyalty_events_points_transfer TRANSACT 17
## 11 near_to_store ACCESS 161
## 12 offer_enquiry ENQUIRY 83
## 13 participation_instore_events OPINION 23
## 14 product_enquiry ENQUIRY 107
## 15 response_to_survey OPINION 23
df.all <- rbind(df.all, df.dum)
df.all$intrctn_sub_ctgry <- factor(df.all$intrctn_sub_ctgry, levels = c('ENROLL','OPINION','TRANSACT','EXIT','DESELECT','SELECT','ENQUIRY','ACCESS'))
df.all <- df.all %>%
arrange(desc(intrctn_sub_ctgry))
list1 <- df.all %>% distinct(intrctn_type) %>%
filter(intrctn_type != 'dummy')
df.all$intrctn_type <- factor(df.all$intrctn_type, levels = c(as.character(list1$intrctn_type), 'dummy'))
#calculating position of labels
df.all <- df.all %>%
arrange(intrctn_sub_ctgry,desc(intrctn_type)) %>%
group_by(intrctn_sub_ctgry) %>%
mutate(pos = cumsum(number) - 0.5*number) %>%
ungroup()
# creating custom palette with 'white' color for dummies
cols <- c("#fec44f", "#fc9272", "#a1d99b", "#fee0d2",
"#2ca25f", "#8856a7", "#43a2ca", "#fdbb84",
"#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#fec44f", "#fc9272", "#a1d99b", "#fee0d2",
"#2ca25f", "#8856a7", "#43a2ca", "#fdbb84",
"#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#fec44f", "#fc9272", "#a1d99b", "#fee0d2")
# plotting chart
ggplot() +
theme_minimal() +
coord_flip() +
scale_fill_manual(values=cols) +
geom_bar(data=df.all, aes(x=intrctn_sub_ctgry, y=number, fill=intrctn_type), stat="identity", width=1) +
geom_text(data=df.all[df.all$intrctn_type!='dummy', ],
aes(x=intrctn_sub_ctgry, y=pos, label=paste0(intrctn_type, '-', number)),
size=2, color='white', fontface="bold") +
geom_ribbon(data=df.lines, aes(x=intrctn_sub_ctgry, ymax=max(maxx), ymin=maxx, group=1), fill='white') +
geom_line(data=df.lines, aes(x=intrctn_sub_ctgry, y=maxx, group=1), color='darkred', size=2) +
geom_ribbon(data=df.lines, aes(x=intrctn_sub_ctgry, ymax=minx, ymin=min(minx), group=1), fill='white') +
geom_line(data=df.lines, aes(x=intrctn_sub_ctgry, y=minx, group=1), color='darkred', size=2) +
geom_text(data=df.rates, aes(x=intrctn_sub_ctgry, y=(df.lines$minx[-1]), label=paste0(rate*100, '%')), hjust=1.2,
color='darkblue', fontface="bold") +
theme(legend.position='none', axis.ticks=element_blank(), axis.text.x=element_blank(),
axis.title.x=element_blank())
df_customersTotal <- df_customers %>% filter(tch_pnt_nm %in% c("Website"))
df_customersTotal[] <- lapply(df_customersTotal, function(x) if(is.factor(x)) factor(x) else x)
summary(df_customersTotal)
## cust_id intrctn_dttm chnl_nm
## CUST200043: 13 Min. :1991-03-07 00:04:00 Retailer Website:1733
## CUST200017: 12 1st Qu.:1991-03-07 07:53:02
## CUST200031: 10 Median :1991-03-08 11:06:38
## CUST200070: 10 Mean :1993-07-18 09:47:50
## CUST200127: 10 3rd Qu.:1994-04-12 17:33:32
## CUST200144: 10 Max. :2000-06-19 04:49:06
## (Other) :1668
## tch_pnt_nm intrctn_type intrctn_reason
## Website:1733 login :165 AWARENESS:281
## cart_checkout :146 ENGAGE :181
## add_items_into_cart:122 PURCHASE :877
## cart_abondoned :120 RESEARCH :394
## click_on_offer :113
## click_on_product :110
## (Other) :957
## intrctn_sub_ctgry conversion path_no
## SELECT :454 Mode :logical Min. :1.000
## TRANSACT:255 FALSE:1587 1st Qu.:1.000
## ENQUIRY :216 TRUE :146 Median :1.000
## ACCESS :165 Mean :1.153
## DESELECT:162 3rd Qu.:1.000
## OPINION :130 Max. :3.000
## (Other) :351
summary(df_customersTotal$intrctn_sub_ctgry)
## ACCESS ACTION DESELECT ENQUIRY ENROLL EXIT OPINION PROMOTE
## 165 71 162 216 109 120 130 51
## SELECT TRANSACT
## 454 255
dfTotal <- df_customersTotal %>% select(intrctn_type,intrctn_sub_ctgry)%>%
group_by(intrctn_type,intrctn_sub_ctgry) %>% summarise(number=n())
dfTotal
## # A tibble: 19 x 3
## # Groups: intrctn_type [?]
## intrctn_type intrctn_sub_ctgry number
## <fctr> <fctr> <int>
## 1 add_items_into_cart SELECT 122
## 2 add_items_into_wishlist SELECT 109
## 3 cart_abondoned EXIT 120
## 4 cart_checkout TRANSACT 146
## 5 click_on_offer SELECT 113
## 6 click_on_product SELECT 110
## 7 delete_items_from_cart DESELECT 53
## 8 delete_items_from_wishlist DESELECT 109
## 9 feedback OPINION 56
## 10 login ACCESS 165
## 11 loyalty_enrollment ENROLL 109
## 12 loyalty_events_points_transfer TRANSACT 109
## 13 offer_enquiry ENQUIRY 39
## 14 password_change ACTION 71
## 15 product_enquiry ENQUIRY 77
## 16 product_review OPINION 40
## 17 read_review ENQUIRY 100
## 18 referrals PROMOTE 51
## 19 response_to_survey OPINION 34
dfallTotal <- dfTotal %>%
group_by(intrctn_sub_ctgry) %>%
mutate(totnum = sum(number)) %>%
ungroup() %>%
mutate(dum = (max(totnum) - totnum)/2,
maxx = totnum + dum,
minx = dum)
dfallTotal
## # A tibble: 19 x 7
## intrctn_type intrctn_s… numb… totn… dum maxx minx
## <fctr> <fctr> <int> <int> <dbl> <dbl> <dbl>
## 1 add_items_into_cart SELECT 122 454 0 454 0
## 2 add_items_into_wishlist SELECT 109 454 0 454 0
## 3 cart_abondoned EXIT 120 120 167 287 167
## 4 cart_checkout TRANSACT 146 255 99.5 354 99.5
## 5 click_on_offer SELECT 113 454 0 454 0
## 6 click_on_product SELECT 110 454 0 454 0
## 7 delete_items_from_cart DESELECT 53 162 146 308 146
## 8 delete_items_from_wishlist DESELECT 109 162 146 308 146
## 9 feedback OPINION 56 130 162 292 162
## 10 login ACCESS 165 165 144 310 144
## 11 loyalty_enrollment ENROLL 109 109 172 282 172
## 12 loyalty_events_points_transfer TRANSACT 109 255 99.5 354 99.5
## 13 offer_enquiry ENQUIRY 39 216 119 335 119
## 14 password_change ACTION 71 71 192 262 192
## 15 product_enquiry ENQUIRY 77 216 119 335 119
## 16 product_review OPINION 40 130 162 292 162
## 17 read_review ENQUIRY 100 216 119 335 119
## 18 referrals PROMOTE 51 51 202 252 202
## 19 response_to_survey OPINION 34 130 162 292 162
dflinesTotal <- dfallTotal %>%
distinct(intrctn_sub_ctgry, maxx, minx)
dflinesTotal
## # A tibble: 10 x 3
## intrctn_sub_ctgry maxx minx
## <fctr> <dbl> <dbl>
## 1 SELECT 454 0
## 2 EXIT 287 167
## 3 TRANSACT 354 99.5
## 4 DESELECT 308 146
## 5 OPINION 292 162
## 6 ACCESS 310 144
## 7 ENROLL 282 172
## 8 ENQUIRY 335 119
## 9 ACTION 262 192
## 10 PROMOTE 252 202
dfdumTotal <- dfallTotal %>%
distinct(intrctn_sub_ctgry,dum) %>%
mutate(intrctn_type = 'dummy',
number = dum) %>%
select(intrctn_type, intrctn_sub_ctgry,number)
head(dfdumTotal)
## # A tibble: 6 x 3
## intrctn_type intrctn_sub_ctgry number
## <chr> <fctr> <dbl>
## 1 dummy SELECT 0
## 2 dummy EXIT 167
## 3 dummy TRANSACT 99.5
## 4 dummy DESELECT 146
## 5 dummy OPINION 162
## 6 dummy ACCESS 144
conv <- dfallTotal$totnum[dfallTotal$intrctn_sub_ctgry == 'SELECT']
dfratesTotal <- dfallTotal %>%
distinct(intrctn_sub_ctgry, totnum) %>%
mutate(rate = round(totnum / conv, 3)) %>%
select(intrctn_sub_ctgry, totnum, rate)
## Warning in totnum/conv: longer object length is not a multiple of shorter
## object length
dfratesTotal <- na.omit(dfratesTotal)
dfratesTotal
## # A tibble: 10 x 3
## intrctn_sub_ctgry totnum rate
## <fctr> <int> <dbl>
## 1 SELECT 454 1.00
## 2 EXIT 120 0.264
## 3 TRANSACT 255 0.562
## 4 DESELECT 162 0.357
## 5 OPINION 130 0.286
## 6 ACCESS 165 0.363
## 7 ENROLL 109 0.240
## 8 ENQUIRY 216 0.476
## 9 ACTION 71 0.156
## 10 PROMOTE 51 0.112
dfallTotal <- dfallTotal %>%
select(intrctn_type,intrctn_sub_ctgry,number)
dfallTotal
## # A tibble: 19 x 3
## intrctn_type intrctn_sub_ctgry number
## <fctr> <fctr> <int>
## 1 add_items_into_cart SELECT 122
## 2 add_items_into_wishlist SELECT 109
## 3 cart_abondoned EXIT 120
## 4 cart_checkout TRANSACT 146
## 5 click_on_offer SELECT 113
## 6 click_on_product SELECT 110
## 7 delete_items_from_cart DESELECT 53
## 8 delete_items_from_wishlist DESELECT 109
## 9 feedback OPINION 56
## 10 login ACCESS 165
## 11 loyalty_enrollment ENROLL 109
## 12 loyalty_events_points_transfer TRANSACT 109
## 13 offer_enquiry ENQUIRY 39
## 14 password_change ACTION 71
## 15 product_enquiry ENQUIRY 77
## 16 product_review OPINION 40
## 17 read_review ENQUIRY 100
## 18 referrals PROMOTE 51
## 19 response_to_survey OPINION 34
dfallTotalFinal <- rbind(dfallTotal, dfdumTotal)
dfallTotalFinal
## # A tibble: 29 x 3
## intrctn_type intrctn_sub_ctgry number
## <fctr> <fctr> <dbl>
## 1 add_items_into_cart SELECT 122
## 2 add_items_into_wishlist SELECT 109
## 3 cart_abondoned EXIT 120
## 4 cart_checkout TRANSACT 146
## 5 click_on_offer SELECT 113
## 6 click_on_product SELECT 110
## 7 delete_items_from_cart DESELECT 53.0
## 8 delete_items_from_wishlist DESELECT 109
## 9 feedback OPINION 56.0
## 10 login ACCESS 165
## # ... with 19 more rows
dfallTotalFinal$intrctn_sub_ctgry <- factor(dfallTotalFinal$intrctn_sub_ctgry, levels = c('PROMOTE','ENROLL','ACTION','OPINION','TRANSACT','EXIT','DESELECT','ENQUIRY','ACCESS','SELECT'))
dfallTotalFinal <- dfallTotalFinal %>%
arrange(desc(intrctn_sub_ctgry))
list1 <- dfallTotalFinal %>% distinct(intrctn_type) %>%
filter(intrctn_type != 'dummy')
dfallTotalFinal$intrctn_type <- factor(dfallTotalFinal$intrctn_type, levels = c(as.character(list1$intrctn_type), 'dummy'))
#calculating position of labels
dfallTotalFinal <- dfallTotalFinal %>%
arrange(intrctn_sub_ctgry,desc(intrctn_type)) %>%
group_by(intrctn_sub_ctgry) %>%
mutate(pos = cumsum(number) - 0.5*number) %>%
ungroup()
# creating custom palette with 'white' color for dummies
cols <- c("#fec44f", "#fc9272", "#a1d99b", "#fee0d2",
"#2ca25f", "#8856a7", "#43a2ca", "#fdbb84",
"#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#fec44f", "#fc9272", "#a1d99b", "#fee0d2",
"#2ca25f", "#8856a7", "#43a2ca", "#fdbb84",
"#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#fec44f", "#fc9272", "#a1d99b", "#fee0d2")
# plotting chart
ggplot() +
theme_minimal() +
coord_flip() +
scale_fill_manual(values=cols) +
geom_bar(data=dfallTotalFinal, aes(x=intrctn_sub_ctgry, y=number, fill=intrctn_type), stat="identity", width=1) +
geom_text(data=dfallTotalFinal[dfallTotalFinal$intrctn_type!='dummy', ],
aes(x=intrctn_sub_ctgry, y=pos, label=paste0(intrctn_type, '-', number)),
size=2, color='white', fontface="bold") +
geom_ribbon(data=dflinesTotal, aes(x=intrctn_sub_ctgry, ymax=max(maxx), ymin=maxx, group=1), fill='white') +
geom_line(data=dflinesTotal, aes(x=intrctn_sub_ctgry, y=maxx, group=1), color='darkred', size=2) +
geom_ribbon(data=dflinesTotal, aes(x=intrctn_sub_ctgry, ymax=minx, ymin=min(minx), group=1), fill='white') +
geom_line(data=dflinesTotal, aes(x=intrctn_sub_ctgry, y=minx, group=1), color='darkred', size=2) +
geom_text(data=dfratesTotal, aes(x=intrctn_sub_ctgry, y=(dflinesTotal$minx), label=paste0(rate*100, '%')), hjust=1.2,
color='darkblue', fontface="bold") +
theme(legend.position='none', axis.ticks=element_blank(), axis.text.x=element_blank(),
axis.title.x=element_blank())