library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
## ✔ tibble  1.4.1     ✔ dplyr   0.7.4
## ✔ tidyr   0.7.2     ✔ stringr 1.2.0
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.4.4
library(ggrepel)
library(RColorBrewer)
library(ChannelAttribution)
library(markovchain)
## Package:  markovchain
## Version:  0.6.9.8-1
## Date:     2017-08-15
## BugReport: http://github.com/spedygiorgio/markovchain/issues
library(visNetwork)
library(expm)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## Attaching package: 'expm'
## The following object is masked from 'package:Matrix':
## 
##     expm
library(stringr)
library(readr)
library(purrrlyr)
CustInteractEnqFull <- read.table("journey data--Customer Transactions and Interactions/d2_cust_intrctn_enq.csv",header = TRUE,sep="\t",stringsAsFactors = TRUE,colClasses=c("rec_creat_dttm"="POSIXct","intrctn_end_dttm"="POSIXct","intrctn_begin_dttm"="POSIXct","prev_intrctn_dttm"="POSIXct","intrctn_dttm"="POSIXct"))
head(CustInteractEnqFull)
##   intrctn_enq_id  intrctn_id        intrctn_dttm intrctn_ln_item_seq_num
## 1    1.98802e+17 1.98802e+17 1988-02-04 21:39:00                       1
## 2    1.98802e+17 1.98802e+17 1988-02-04 21:42:42                       1
## 3    1.98802e+17 1.98802e+17 1988-02-04 21:43:40                       1
## 4    1.98802e+17 1.98802e+17 1988-02-04 21:47:54                       1
## 5    1.98802e+17 1.98802e+17 1988-02-04 21:50:06                       1
## 6    1.98802e+17 1.98802e+17 1988-02-04 21:57:08                       1
##   prev_intrctn_id   prev_intrctn_dttm promo_ofr_id promo_ofr_ref_int_id
## 1     1.98802e+17 1988-02-04 21:23:00 ID1618150004           1618150004
## 2     1.98802e+17 1988-02-04 21:19:00 ID1618150003           1618150003
## 3     1.98802e+17 1988-02-04 21:11:00 ID1618150002           1618150002
## 4     1.98802e+17 1988-02-04 21:31:00 ID1618150005           1618150005
## 5     1.98802e+17 1988-02-04 21:41:00 ID1618150002           1618150002
## 6     1.98802e+17 1988-02-04 22:01:00 ID1618150002           1618150002
##   promo_initv_id promo_initv_ref_int_id  intrctn_type intrctn_dirctn
## 1    ID914920004              914920004 near_to_store        INBOUND
## 2    ID914920003              914920003 near_to_store        INBOUND
## 3    ID914920002              914920002       checkin        INBOUND
## 4    ID914920005              914920005       checkin        INBOUND
## 5    ID914920002              914920002 near_to_store        INBOUND
## 6    ID914920002              914920002       checkin        INBOUND
##   intrctn_ctgry intrctn_sub_ctgry intrctn_ctgry_id intrctn_reason
## 1       ENQUIRY            ACCESS     INTCATID_001      AWARENESS
## 2       ENQUIRY            ACCESS     INTCATID_001      AWARENESS
## 3       ENQUIRY            ACCESS     INTCATID_001       RESEARCH
## 4       ENQUIRY            ACCESS     INTCATID_001       RESEARCH
## 5       ENQUIRY            ACCESS     INTCATID_001      AWARENESS
## 6       ENQUIRY            ACCESS     INTCATID_001       RESEARCH
##    intrctn_begin_dttm    intrctn_end_dttm   terminal_nm act_type_id
## 1 1988-02-04 21:39:00 1988-02-04 21:39:00 COUNTER_NO_11    12090001
## 2 1988-02-04 21:42:42 1988-02-04 21:42:42 COUNTER_NO_12    12090001
## 3 1988-02-04 21:43:40 1988-02-04 21:43:40 COUNTER_NO_14    12090001
## 4 1988-02-04 21:47:54 1988-02-04 21:47:54  COUNTER_NO_4    12090001
## 5 1988-02-04 21:50:06 1988-02-04 21:50:06  COUNTER_NO_1    12090001
## 6 1988-02-04 21:57:08 1988-02-04 21:57:08 COUNTER_NO_10    12090001
##   act_type_cd    act_type      act_id emp_id  emp_nm chnl_id  chnl_cd
## 1  CD12090001 INTERACTION 1.98802e+16  50004 CD50004  380013 CD380013
## 2  CD12090001 INTERACTION 1.98802e+16  50003 CD50003  380013 CD380013
## 3  CD12090001 INTERACTION 1.98802e+16  50003 CD50003  380013 CD380013
## 4  CD12090001 INTERACTION 1.98802e+16  50004 CD50004  380013 CD380013
## 5  CD12090001 INTERACTION 1.98802e+16  50004 CD50004  380013 CD380013
## 6  CD12090001 INTERACTION 1.98802e+16  50004 CD50004  380013 CD380013
##       chnl_nm tch_pnt_id tch_pnt_cd tch_pnt_nm    cust_id cust_ref_int_id
## 1 Geo-Spatial     200004   CD200004   In-Store CUST100003          100003
## 2 Geo-Spatial     200004   CD200004   In-Store CUST100002          100002
## 3 Geo-Spatial     200004   CD200004   In-Store CUST100001          100001
## 4 Geo-Spatial     200004   CD200004   In-Store CUST100004          100004
## 5 Geo-Spatial     200004   CD200004   In-Store CUST100005          100005
## 6 Geo-Spatial     200004   CD200004   In-Store CUST100007          100007
##            cust_nm bsn_unit_id bsn_unit_cd bsn_unit_nm locn_id    locn_cd
## 1     Rowan Morris      100031  BSN_100031   AREA00031  100003 LCN_100003
## 2    Zane Harrison      100004  BSN_100004   AREA00004  100010 LCN_100010
## 3 Victoria Santana      100043  BSN_100043   AREA00043  100011 LCN_100011
## 4    Jakob Nichols      100023  BSN_100023   AREA00023  100003 LCN_100003
## 5 Selena Whitehead      100039  BSN_100039   AREA00039  100010 LCN_100010
## 6     Maurice Moon      100020  BSN_100020   AREA00020  100011 LCN_100011
##        locn_nm   item_id item_ref_int_id item_nm mrhrc_hier_grp_id
## 1 N_LCN_A00003 CD5130001         5130001     NAN            300001
## 2 N_LCN_A00010 CD5130001         5130001     NAN            300001
## 3 N_LCN_A00011 CD5130001         5130001     NAN            300001
## 4 N_LCN_A00003 CD5130001         5130001     NAN            300001
## 5 N_LCN_A00010 CD5130001         5130001     NAN            300001
## 6 N_LCN_A00011 CD5130001         5130001     NAN            300001
##   mrhrc_hier_grp_nm st_id st_cd ofr_intrctn_type         intrctn_event
## 1  MRHRC L3 Group 1    36     S          SUSPECT InStore_NearToInStore
## 2  MRHRC L3 Group 1    36     S          SUSPECT InStore_NearToInStore
## 3  MRHRC L3 Group 1    36     S          SUSPECT       InStore_CheckIn
## 4  MRHRC L3 Group 1    36     S          SUSPECT       InStore_CheckIn
## 5  MRHRC L3 Group 1    36     S          SUSPECT InStore_NearToInStore
## 6  MRHRC L3 Group 1    36     S          SUSPECT       InStore_CheckIn
##   item_intrctn_type               intrctn_desc batch_run_id job_run_id
## 1         AWARENESS Probable customer activity           NA         NA
## 2         AWARENESS Probable customer activity           NA         NA
## 3          RESEARCH         Walked in to store           NA         NA
## 4          RESEARCH         Walked in to store           NA         NA
## 5         AWARENESS Probable customer activity           NA         NA
## 6          RESEARCH         Walked in to store           NA         NA
##   src_sys_id src_sys_nm tenant_id rec_creat_by      rec_creat_dttm
## 1       SRC1         NA    100001           NA 1988-02-04 21:39:00
## 2       SRC1         NA    100001           NA 1988-02-04 21:42:42
## 3       SRC1         NA    100001           NA 1988-02-04 21:43:40
## 4       SRC1         NA    100001           NA 1988-02-04 21:47:54
## 5       SRC1         NA    100001           NA 1988-02-04 21:50:06
## 6       SRC1         NA    100001           NA 1988-02-04 21:57:08
str(CustInteractEnqFull)
## 'data.frame':    5343 obs. of  58 variables:
##  $ intrctn_enq_id         : num  1.99e+17 1.99e+17 1.99e+17 1.99e+17 1.99e+17 ...
##  $ intrctn_id             : num  1.99e+17 1.99e+17 1.99e+17 1.99e+17 1.99e+17 ...
##  $ intrctn_dttm           : POSIXct, format: "1988-02-04 21:39:00" "1988-02-04 21:42:42" ...
##  $ intrctn_ln_item_seq_num: int  1 1 1 1 1 1 1 1 1 2 ...
##  $ prev_intrctn_id        : num  1.99e+17 1.99e+17 1.99e+17 1.99e+17 1.99e+17 ...
##  $ prev_intrctn_dttm      : POSIXct, format: "1988-02-04 21:23:00" "1988-02-04 21:19:00" ...
##  $ promo_ofr_id           : Factor w/ 6 levels "ID1618150001",..: 4 3 2 5 2 2 5 5 4 5 ...
##  $ promo_ofr_ref_int_id   : int  1618150004 1618150003 1618150002 1618150005 1618150002 1618150002 1618150005 1618150005 1618150004 1618150005 ...
##  $ promo_initv_id         : Factor w/ 6 levels "ID914920001",..: 4 3 2 5 2 2 5 5 4 5 ...
##  $ promo_initv_ref_int_id : int  914920004 914920003 914920002 914920005 914920002 914920002 914920005 914920005 914920004 914920005 ...
##  $ intrctn_type           : Factor w/ 34 levels "add_items_into_cart",..: 21 21 5 5 21 5 5 5 5 15 ...
##  $ intrctn_dirctn         : Factor w/ 1 level "INBOUND": 1 1 1 1 1 1 1 1 1 1 ...
##  $ intrctn_ctgry          : Factor w/ 1 level "ENQUIRY": 1 1 1 1 1 1 1 1 1 1 ...
##  $ intrctn_sub_ctgry      : Factor w/ 10 levels "ACCESS","ACTION",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ intrctn_ctgry_id       : Factor w/ 10 levels "INTCATID_001",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ intrctn_reason         : Factor w/ 4 levels "AWARENESS","ENGAGE",..: 1 1 4 4 1 4 4 4 4 4 ...
##  $ intrctn_begin_dttm     : POSIXct, format: "1988-02-04 21:39:00" "1988-02-04 21:42:42" ...
##  $ intrctn_end_dttm       : POSIXct, format: "1988-02-04 21:39:00" "1988-02-04 21:42:42" ...
##  $ terminal_nm            : Factor w/ 14 levels "COUNTER_NO_1",..: 3 4 6 9 1 2 13 7 3 4 ...
##  $ act_type_id            : int  12090001 12090001 12090001 12090001 12090001 12090001 12090001 12090001 12090001 12090001 ...
##  $ act_type_cd            : Factor w/ 1 level "CD12090001": 1 1 1 1 1 1 1 1 1 1 ...
##  $ act_type               : Factor w/ 1 level "INTERACTION": 1 1 1 1 1 1 1 1 1 1 ...
##  $ act_id                 : num  1.99e+16 1.99e+16 1.99e+16 1.99e+16 1.99e+16 ...
##  $ emp_id                 : int  50004 50003 50003 50004 50004 50004 50004 50002 50002 50003 ...
##  $ emp_nm                 : Factor w/ 4 levels "CD50001","CD50002",..: 4 3 3 4 4 4 4 2 2 3 ...
##  $ chnl_id                : int  380013 380013 380013 380013 380013 380013 380013 380013 380013 380013 ...
##  $ chnl_cd                : Factor w/ 7 levels "CD380002","CD380003",..: 7 7 7 7 7 7 7 7 7 7 ...
##  $ chnl_nm                : Factor w/ 7 levels "Geo-Spatial",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ tch_pnt_id             : int  200004 200004 200004 200004 200004 200004 200004 200004 200004 200004 ...
##  $ tch_pnt_cd             : Factor w/ 4 levels "CD200001","CD200002",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ tch_pnt_nm             : Factor w/ 4 levels "App","In-Store",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ cust_id                : Factor w/ 947 levels "CUST100001","CUST100002",..: 3 2 1 4 5 7 6 8 11 2 ...
##  $ cust_ref_int_id        : int  100003 100002 100001 100004 100005 100007 100006 100008 100011 100002 ...
##  $ cust_nm                : Factor w/ 919 levels "Aaliyah Copeland",..: 796 913 900 429 827 680 227 858 616 913 ...
##  $ bsn_unit_id            : int  100031 100004 100043 100023 100039 100020 100007 100004 100034 100026 ...
##  $ bsn_unit_cd            : Factor w/ 98 levels "BSN_100001","BSN_100002",..: 31 4 43 23 39 20 7 4 34 26 ...
##  $ bsn_unit_nm            : Factor w/ 98 levels "AREA00001","AREA00002",..: 31 4 43 23 39 20 7 4 34 26 ...
##  $ locn_id                : int  100003 100010 100011 100003 100010 100011 100001 100009 100005 100005 ...
##  $ locn_cd                : Factor w/ 28 levels "LCN_100001","LCN_100002",..: 3 10 11 3 10 11 1 9 5 5 ...
##  $ locn_nm                : Factor w/ 28 levels "N_LCN_A00001",..: 3 10 11 3 10 11 1 9 5 5 ...
##  $ item_id                : Factor w/ 444 levels "CD5130001","CD5130002",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ item_ref_int_id        : int  5130001 5130001 5130001 5130001 5130001 5130001 5130001 5130001 5130001 5130001 ...
##  $ item_nm                : Factor w/ 212 levels "1 Ton A/C","1.2 Ton A/C",..: 142 142 142 142 142 142 142 142 142 142 ...
##  $ mrhrc_hier_grp_id      : int  300001 300001 300001 300001 300001 300001 300001 300001 300001 300001 ...
##  $ mrhrc_hier_grp_nm      : Factor w/ 3 levels "MRHRC L2 Group 3",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ st_id                  : int  36 36 36 36 36 36 36 36 36 36 ...
##  $ st_cd                  : Factor w/ 1 level "S": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ofr_intrctn_type       : Factor w/ 6 levels "CUSTOMER","EVANGELIST",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ intrctn_event          : Factor w/ 59 levels "InStore_AddItemsInCart",..: 11 11 4 4 11 4 4 4 4 7 ...
##  $ item_intrctn_type      : Factor w/ 4 levels "AWARENESS","ENGAGE",..: 1 1 4 4 1 4 4 4 4 4 ...
##  $ intrctn_desc           : Factor w/ 26 levels "Benefits of loyalty program",..: 22 22 26 26 22 26 26 26 26 22 ...
##  $ batch_run_id           : logi  NA NA NA NA NA NA ...
##  $ job_run_id             : logi  NA NA NA NA NA NA ...
##  $ src_sys_id             : Factor w/ 1 level "SRC1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ src_sys_nm             : logi  NA NA NA NA NA NA ...
##  $ tenant_id              : int  100001 100001 100001 100001 100001 100001 100001 100001 100001 100001 ...
##  $ rec_creat_by           : logi  NA NA NA NA NA NA ...
##  $ rec_creat_dttm         : POSIXct, format: "1988-02-04 21:39:00" "1988-02-04 21:42:42" ...
summary(CustInteractEnqFull)
##  intrctn_enq_id        intrctn_id         intrctn_dttm                
##  Min.   :1.988e+17   Min.   :1.988e+17   Min.   :1988-02-04 00:05:54  
##  1st Qu.:1.991e+17   1st Qu.:1.991e+17   1st Qu.:1991-03-07 05:50:00  
##  Median :1.994e+17   Median :1.994e+17   Median :1994-04-10 16:16:50  
##  Mean   :1.993e+17   Mean   :1.993e+17   Mean   :1993-08-09 14:35:39  
##  3rd Qu.:1.994e+17   3rd Qu.:1.994e+17   3rd Qu.:1994-05-16 05:09:11  
##  Max.   :2.000e+17   Max.   :2.000e+17   Max.   :2000-06-19 06:12:30  
##                                                                       
##  intrctn_ln_item_seq_num prev_intrctn_id     prev_intrctn_dttm            
##  Min.   : 1.000          Min.   :1.988e+17   Min.   :1988-02-04 00:05:54  
##  1st Qu.: 1.000          1st Qu.:1.991e+17   1st Qu.:1991-03-07 05:56:50  
##  Median : 3.000          Median :1.994e+17   Median :1994-04-10 16:27:00  
##  Mean   : 2.929          Mean   :1.993e+17   Mean   :1993-08-09 09:02:26  
##  3rd Qu.: 4.000          3rd Qu.:1.994e+17   3rd Qu.:1994-05-15 04:08:20  
##  Max.   :10.000          Max.   :2.000e+17   Max.   :2000-06-19 04:49:26  
##                                                                           
##        promo_ofr_id promo_ofr_ref_int_id     promo_initv_id
##  ID1618150001:850   Min.   :1.618e+09    ID914920001:850   
##  ID1618150002:878   1st Qu.:1.618e+09    ID914920002:878   
##  ID1618150003:936   Median :1.618e+09    ID914920003:936   
##  ID1618150004:881   Mean   :1.618e+09    ID914920004:881   
##  ID1618150005:905   3rd Qu.:1.618e+09    ID914920005:905   
##  ID1618150006:893   Max.   :1.618e+09    ID914920006:893   
##                                                            
##  promo_initv_ref_int_id              intrctn_type  intrctn_dirctn
##  Min.   :914920001      cart_abondoned     : 425   INBOUND:5343  
##  1st Qu.:914920002      cart_checkout      : 356                 
##  Median :914920004      add_items_into_cart: 349                 
##  Mean   :914920004      login              : 326                 
##  3rd Qu.:914920005      product_enquiry    : 317                 
##  Max.   :914920006      checkin            : 284                 
##                         (Other)            :3286                 
##  intrctn_ctgry  intrctn_sub_ctgry     intrctn_ctgry_id   intrctn_reason
##  ENQUIRY:5343   ACCESS  :1110     INTCATID_001:1110    AWARENESS:1264  
##                 SELECT  :1005     INTCATID_008:1005    ENGAGE   : 987  
##                 ENQUIRY : 835     INTCATID_003: 835    PURCHASE :1858  
##                 OPINION : 748     INTCATID_002: 748    RESEARCH :1234  
##                 TRANSACT: 518     INTCATID_005: 518                    
##                 EXIT    : 425     INTCATID_010: 425                    
##                 (Other) : 702     (Other)     : 702                    
##  intrctn_begin_dttm            intrctn_end_dttm             
##  Min.   :1988-02-04 00:05:54   Min.   :1988-02-04 00:05:54  
##  1st Qu.:1991-03-07 05:50:00   1st Qu.:1991-03-07 05:51:02  
##  Median :1994-04-10 16:16:50   Median :1994-04-10 16:16:50  
##  Mean   :1993-08-09 14:35:39   Mean   :1993-08-09 14:39:51  
##  3rd Qu.:1994-05-16 05:09:11   3rd Qu.:1994-05-16 05:09:11  
##  Max.   :2000-06-19 06:12:30   Max.   :2000-06-19 06:12:30  
##                                                             
##         terminal_nm    act_type_id           act_type_cd  
##  COUNTER_NO_10: 517   Min.   :12090001   CD12090001:5343  
##  COUNTER_NO_6 : 441   1st Qu.:12090001                    
##  COUNTER_NO_14: 429   Median :12090001                    
##  COUNTER_NO_1 : 414   Mean   :12090001                    
##  COUNTER_NO_8 : 412   3rd Qu.:12090001                    
##  COUNTER_NO_4 : 392   Max.   :12090001                    
##  (Other)      :2738                                       
##         act_type        act_id              emp_id          emp_nm    
##  INTERACTION:5343   Min.   :1.988e+16   Min.   :50001   CD50001:4003  
##                     1st Qu.:1.991e+16   1st Qu.:50001   CD50002: 429  
##                     Median :1.994e+16   Median :50001   CD50003: 441  
##                     Mean   :1.993e+16   Mean   :50002   CD50004: 470  
##                     3rd Qu.:1.994e+16   3rd Qu.:50002                 
##                     Max.   :2.000e+16   Max.   :50004                 
##                                                                       
##     chnl_id           chnl_cd                      chnl_nm    
##  Min.   :380002   CD380002:  83   Geo-Spatial          :1327  
##  1st Qu.:380007   CD380003: 108   In-Store Shopping App:1365  
##  Median :380009   CD380004:  72   Instagram            : 519  
##  Mean   :380009   CD380006: 519   Pinterest            :  72  
##  3rd Qu.:380009   CD380007:1869   Retailer Website     :1869  
##  Max.   :380013   CD380009:1365   Twitter              :  83  
##                   CD380013:1327   You Tube             : 108  
##    tch_pnt_id       tch_pnt_cd          tch_pnt_nm         cust_id    
##  Min.   :2e+05   CD200001: 782   App         :1365   CUST300043:  21  
##  1st Qu.:2e+05   CD200002:1869   In-Store    :1327   CUST300138:  19  
##  Median :2e+05   CD200003:1365   Social Media: 782   CUST500131:  19  
##  Mean   :2e+05   CD200004:1327   Website     :1869   CUST300023:  18  
##  3rd Qu.:2e+05                                       CUST300048:  17  
##  Max.   :2e+05                                       CUST500005:  17  
##                                                      (Other)   :5232  
##  cust_ref_int_id              cust_nm      bsn_unit_id    
##  Min.   :100001   Kayla Cain      :  26   Min.   :100001  
##  1st Qu.:200052   Tia Ramos       :  21   1st Qu.:200001  
##  Median :300098   Lilyana Grimes  :  19   Median :200017  
##  Mean   :298959   Martha Murray   :  19   Mean   :175189  
##  3rd Qu.:400145   Micah Carver    :  18   3rd Qu.:200033  
##  Max.   :500199   Braylen Mcfadden:  17   Max.   :200049  
##                   (Other)         :5223                   
##      bsn_unit_cd      bsn_unit_nm      locn_id             locn_cd    
##  BSN_200044: 106   SITE00044: 106   Min.   :100001   WEB_200012: 317  
##  BSN_200042: 104   SITE00042: 104   1st Qu.:200001   WEB_200003: 313  
##  BSN_200002:  97   SITE00002:  97   Median :200005   WEB_200001: 300  
##  BSN_200012:  95   SITE00012:  95   Mean   :175171   WEB_200002: 300  
##  BSN_200003:  93   SITE00003:  93   3rd Qu.:200010   WEB_200010: 299  
##  BSN_200008:  92   SITE00008:  92   Max.   :200014   WEB_200008: 293  
##  (Other)   :4756   (Other)  :4756                    (Other)   :3521  
##          locn_nm          item_id     item_ref_int_id  
##  N_WEB_A00012: 317   CD5130001:3394   Min.   :5130001  
##  N_WEB_A00003: 313   CD5130051:  11   1st Qu.:5130001  
##  N_WEB_A00001: 300   CD5130314:  11   Median :5130001  
##  N_WEB_A00002: 300   CD5130438:  11   Mean   :5130083  
##  N_WEB_A00010: 299   CD5130031:  10   3rd Qu.:5130145  
##  N_WEB_A00008: 293   CD5130034:  10   Max.   :5130446  
##  (Other)     :3521   (Other)  :1896                    
##            item_nm     mrhrc_hier_grp_id        mrhrc_hier_grp_nm
##  NAN           :3394   Min.   :200003    MRHRC L2 Group 3: 782   
##  Godrej Interio:  87   1st Qu.:300001    MRHRC L3 Group 1:1327   
##  Home Town     :  82   Median :300002    MRHRC L3 Group 2:3234   
##  Royal Oak     :  74   Mean   :285366                            
##  Evok          :  68   3rd Qu.:300002                            
##  Black Pottery :  56   Max.   :300002                            
##  (Other)       :1582                                             
##      st_id    st_cd       ofr_intrctn_type               intrctn_event 
##  Min.   :36   S:5343   CUSTOMER   : 705    InStore_CheckIn      : 284  
##  1st Qu.:36            EVANGELIST : 987    Mobile_CartAbondoned : 245  
##  Median :36            LEAD       :1337    Soc_Like             : 196  
##  Mean   :36            OPPORTUNITY: 467    ONL_Login            : 181  
##  3rd Qu.:36            PROSPECT   : 791    ONL_CartCheckOut     : 177  
##  Max.   :36            SUSPECT    :1056    InStore_NearToInStore: 176  
##                                            (Other)              :4084  
##  item_intrctn_type                            intrctn_desc  batch_run_id  
##  AWARENESS:1264    Looking for purchase of item     : 539   Mode:logical  
##  ENGAGE   : 987    Displays interest in product     : 477   NA's:5343     
##  PURCHASE :1671    Displays dis-satisfaction        : 425                 
##  RESEARCH :1421    Looking for offers for purchase  : 417                 
##                    Customer engagement towards brand: 374                 
##                    Displays willingness to buy      : 349                 
##                    (Other)                          :2762                 
##  job_run_id     src_sys_id  src_sys_nm       tenant_id     rec_creat_by  
##  Mode:logical   SRC1:5343   Mode:logical   Min.   :1e+05   Mode:logical  
##  NA's:5343                  NA's:5343      1st Qu.:1e+05   NA's:5343     
##                                            Median :1e+05                 
##                                            Mean   :1e+05                 
##                                            3rd Qu.:1e+05                 
##                                            Max.   :1e+05                 
##                                                                          
##  rec_creat_dttm               
##  Min.   :1988-02-04 00:05:54  
##  1st Qu.:1991-03-07 05:51:02  
##  Median :1994-04-10 16:16:50  
##  Mean   :1993-08-09 14:39:51  
##  3rd Qu.:1994-05-16 05:09:11  
##  Max.   :2000-06-19 06:12:30  
## 
library(dplyr)
CustInteractEnq <- CustInteractEnqFull %>% mutate(conversion = (CustInteractEnqFull$intrctn_type == "cart_checkout"))%>% select(c("cust_id","intrctn_dttm","chnl_nm","tch_pnt_nm","intrctn_type","intrctn_reason","intrctn_sub_ctgry","conversion")) %>% arrange(cust_id,intrctn_dttm)
summary(CustInteractEnq$intrctn_type)
##            add_items_into_cart        add_items_into_wishlist 
##                            349                            121 
##                 cart_abondoned                  cart_checkout 
##                            425                            356 
##                        checkin                 click_on_offer 
##                            284                            259 
##               click_on_product                        comment 
##                            222                             46 
##                        Connect         delete_items_from_cart 
##                            104                            137 
##     delete_items_from_wishlist                       download 
##                            121                             54 
##                       feedback                         follow 
##                            161                             47 
##                     in_parking                           like 
##                             94                            196 
##                          login                loyalty_enquiry 
##                            326                             67 
##             loyalty_enrollment loyalty_events_points_transfer 
##                            145                            179 
##                  near_to_store                  offer_enquiry 
##                            176                            158 
##   participation_instore_events                password_change 
##                             23                             72 
##                            pin                product_enquiry 
##                             72                            317 
##                 product_review                    read_review 
##                            132                            249 
##                      referrals             response_to_survey 
##                             81                            107 
##                          share                      subscribe 
##                             57                             54 
##                          tweet                           view 
##                             83                             69
head(CustInteractEnq,30)
##       cust_id        intrctn_dttm     chnl_nm   tch_pnt_nm
## 1  CUST100001 1988-02-04 21:43:40 Geo-Spatial     In-Store
## 2  CUST100001 1988-02-04 22:16:36 Geo-Spatial     In-Store
## 3  CUST100001 1988-02-04 22:50:36 Geo-Spatial     In-Store
## 4  CUST100001 1988-02-04 23:05:16 Geo-Spatial     In-Store
## 5  CUST100002 1988-02-04 21:42:42 Geo-Spatial     In-Store
## 6  CUST100002 1988-02-04 22:16:48 Geo-Spatial     In-Store
## 7  CUST100002 1988-02-04 22:43:26 Geo-Spatial     In-Store
## 8  CUST100003 1988-02-04 21:39:00 Geo-Spatial     In-Store
## 9  CUST100004 1988-02-04 21:47:54 Geo-Spatial     In-Store
## 10 CUST100004 1988-02-04 22:27:36 Geo-Spatial     In-Store
## 11 CUST100004 1988-02-04 22:55:30 Geo-Spatial     In-Store
## 12 CUST100004 1988-02-04 23:31:00 Geo-Spatial     In-Store
## 13 CUST100004 1988-02-06 08:27:58 Geo-Spatial     In-Store
## 14 CUST100004 1988-02-06 09:35:08 Geo-Spatial     In-Store
## 15 CUST100004 1988-02-06 10:49:36 Geo-Spatial     In-Store
## 16 CUST100004 1988-02-06 12:04:46 Geo-Spatial     In-Store
## 17 CUST100004 1988-02-06 12:57:38 Geo-Spatial     In-Store
## 18 CUST100005 1988-02-04 00:31:54 Geo-Spatial     In-Store
## 19 CUST100005 1988-02-04 01:10:40 Geo-Spatial     In-Store
## 20 CUST100005 1988-02-04 01:39:26 Geo-Spatial     In-Store
## 21 CUST100005 1988-02-04 02:15:26   Instagram Social Media
## 22 CUST100005 1988-02-04 21:50:06 Geo-Spatial     In-Store
## 23 CUST100005 1988-02-04 22:26:38 Geo-Spatial     In-Store
## 24 CUST100005 1988-02-04 23:03:56 Geo-Spatial     In-Store
## 25 CUST100005 1988-02-04 23:41:38 Geo-Spatial     In-Store
## 26 CUST100005 1988-02-05 00:01:08 Geo-Spatial     In-Store
## 27 CUST100005 1988-02-06 05:05:04 Geo-Spatial     In-Store
## 28 CUST100005 1988-02-06 06:02:38 Geo-Spatial     In-Store
## 29 CUST100005 1988-02-06 07:13:02 Geo-Spatial     In-Store
## 30 CUST100005 1988-02-06 08:25:56 Geo-Spatial     In-Store
##              intrctn_type intrctn_reason intrctn_sub_ctgry conversion
## 1                 checkin       RESEARCH            ACCESS      FALSE
## 2     add_items_into_cart       PURCHASE            SELECT      FALSE
## 3  delete_items_from_cart       PURCHASE          DESELECT      FALSE
## 4           cart_checkout       PURCHASE          TRANSACT       TRUE
## 5           near_to_store      AWARENESS            ACCESS      FALSE
## 6              in_parking       RESEARCH            ACCESS      FALSE
## 7                 checkin       RESEARCH            ACCESS      FALSE
## 8           near_to_store      AWARENESS            ACCESS      FALSE
## 9                 checkin       RESEARCH            ACCESS      FALSE
## 10    add_items_into_cart       PURCHASE            SELECT      FALSE
## 11 delete_items_from_cart       PURCHASE          DESELECT      FALSE
## 12         cart_abondoned       PURCHASE              EXIT      FALSE
## 13                checkin       RESEARCH            ACCESS      FALSE
## 14        product_enquiry      AWARENESS           ENQUIRY      FALSE
## 15          offer_enquiry      AWARENESS           ENQUIRY      FALSE
## 16    add_items_into_cart       PURCHASE            SELECT      FALSE
## 17          cart_checkout       PURCHASE          TRANSACT       TRUE
## 18        loyalty_enquiry      AWARENESS           ENQUIRY      FALSE
## 19        loyalty_enquiry       PURCHASE            ENROLL      FALSE
## 20          cart_checkout       PURCHASE          TRANSACT       TRUE
## 21                   like         ENGAGE           OPINION      FALSE
## 22          near_to_store      AWARENESS            ACCESS      FALSE
## 23             in_parking       RESEARCH            ACCESS      FALSE
## 24                checkin       RESEARCH            ACCESS      FALSE
## 25        product_enquiry      AWARENESS           ENQUIRY      FALSE
## 26          offer_enquiry      AWARENESS           ENQUIRY      FALSE
## 27                checkin       RESEARCH            ACCESS      FALSE
## 28        product_enquiry      AWARENESS           ENQUIRY      FALSE
## 29          offer_enquiry      AWARENESS           ENQUIRY      FALSE
## 30        loyalty_enquiry      AWARENESS           ENQUIRY      FALSE
CustInteractEnq <- CustInteractEnq %>%
  arrange(cust_id,intrctn_dttm) %>%
  group_by(cust_id) %>%
  mutate(path_no = ifelse(is.na(lag(cumsum(conversion))), 0, lag(cumsum(conversion))) + 1) %>%
  ungroup()
head(CustInteractEnq,50)
## # A tibble: 50 x 9
##    cust_… intrctn_dttm        chnl_… tch_p… intrc… intr… intr… conv… path…
##    <fctr> <dttm>              <fctr> <fctr> <fctr> <fct> <fct> <lgl> <dbl>
##  1 CUST1… 1988-02-04 21:43:40 Geo-S… In-St… check… RESE… ACCE… F      1.00
##  2 CUST1… 1988-02-04 22:16:36 Geo-S… In-St… add_i… PURC… SELE… F      1.00
##  3 CUST1… 1988-02-04 22:50:36 Geo-S… In-St… delet… PURC… DESE… F      1.00
##  4 CUST1… 1988-02-04 23:05:16 Geo-S… In-St… cart_… PURC… TRAN… T      1.00
##  5 CUST1… 1988-02-04 21:42:42 Geo-S… In-St… near_… AWAR… ACCE… F      1.00
##  6 CUST1… 1988-02-04 22:16:48 Geo-S… In-St… in_pa… RESE… ACCE… F      1.00
##  7 CUST1… 1988-02-04 22:43:26 Geo-S… In-St… check… RESE… ACCE… F      1.00
##  8 CUST1… 1988-02-04 21:39:00 Geo-S… In-St… near_… AWAR… ACCE… F      1.00
##  9 CUST1… 1988-02-04 21:47:54 Geo-S… In-St… check… RESE… ACCE… F      1.00
## 10 CUST1… 1988-02-04 22:27:36 Geo-S… In-St… add_i… PURC… SELE… F      1.00
## # ... with 40 more rows
df_paths_1 <- CustInteractEnq %>%
        filter(path_no == 1) %>%
        select(-path_no)
##### replace some channels #####
df_path_1_clean <- df_paths_1 %>%
        # removing NAs
        filter(!is.na(chnl_nm))
##### one- and multi-channel paths #####
df_path_1_clean <- df_path_1_clean %>%
        group_by(cust_id) %>%
        mutate(uniq_channel_tag = ifelse(length(unique(chnl_nm)) == 1, TRUE, FALSE)) %>%
        ungroup()
 
df_path_1_clean_uniq <- df_path_1_clean %>%
        filter(uniq_channel_tag == TRUE) %>%
        select(-uniq_channel_tag)
 
df_path_1_clean_multi <- df_path_1_clean %>%
        filter(uniq_channel_tag == FALSE) %>%
        select(-uniq_channel_tag)
head(df_path_1_clean_uniq,50)
## # A tibble: 50 x 8
##    cust_id    intrctn_dttm        chnl_… tch_p… intrct… intr… intrc… conv…
##    <fctr>     <dttm>              <fctr> <fctr> <fctr>  <fct> <fctr> <lgl>
##  1 CUST100001 1988-02-04 21:43:40 Geo-S… In-St… checkin RESE… ACCESS F    
##  2 CUST100001 1988-02-04 22:16:36 Geo-S… In-St… add_it… PURC… SELECT F    
##  3 CUST100001 1988-02-04 22:50:36 Geo-S… In-St… delete… PURC… DESEL… F    
##  4 CUST100001 1988-02-04 23:05:16 Geo-S… In-St… cart_c… PURC… TRANS… T    
##  5 CUST100002 1988-02-04 21:42:42 Geo-S… In-St… near_t… AWAR… ACCESS F    
##  6 CUST100002 1988-02-04 22:16:48 Geo-S… In-St… in_par… RESE… ACCESS F    
##  7 CUST100002 1988-02-04 22:43:26 Geo-S… In-St… checkin RESE… ACCESS F    
##  8 CUST100003 1988-02-04 21:39:00 Geo-S… In-St… near_t… AWAR… ACCESS F    
##  9 CUST100004 1988-02-04 21:47:54 Geo-S… In-St… checkin RESE… ACCESS F    
## 10 CUST100004 1988-02-04 22:27:36 Geo-S… In-St… add_it… PURC… SELECT F    
## # ... with 40 more rows
df_all_paths <- df_path_1_clean %>%
        group_by(cust_id) %>%
        summarise(path = paste(chnl_nm, collapse = ' > '),
                  conversion = sum(conversion)) %>%
        ungroup() %>%
        filter(conversion == 1)
tail(df_all_paths,50)
## # A tibble: 50 x 3
##    cust_id    path                                                  conve…
##    <fctr>     <chr>                                                  <int>
##  1 CUST500122 Pinterest > Instagram > In-Store Shopping App > In-S…      1
##  2 CUST500126 In-Store Shopping App > In-Store Shopping App > In-S…      1
##  3 CUST500127 In-Store Shopping App > In-Store Shopping App > In-S…      1
##  4 CUST500128 In-Store Shopping App > In-Store Shopping App > In-S…      1
##  5 CUST500129 Geo-Spatial > Geo-Spatial > Geo-Spatial > Geo-Spatia…      1
##  6 CUST500130 In-Store Shopping App > In-Store Shopping App > In-S…      1
##  7 CUST500131 Geo-Spatial > Geo-Spatial > Geo-Spatial > Geo-Spatia…      1
##  8 CUST500132 Retailer Website > Retailer Website > Retailer Websi…      1
##  9 CUST500133 In-Store Shopping App > In-Store Shopping App > In-S…      1
## 10 CUST500134 Geo-Spatial > Geo-Spatial > Geo-Spatial > Geo-Spatia…      1
## # ... with 40 more rows
mod_attrib <- markov_model(df_all_paths,
                           var_path = 'path',
                           var_conv = 'conversion',
                           out_more = TRUE)
mod_attrib$removal_effects
##            channel_name removal_effects
## 1           Geo-Spatial        0.405084
## 2      Retailer Website        0.485169
## 3 In-Store Shopping App        0.172690
## 4             Instagram        0.039689
## 5              You Tube        0.010020
## 6             Pinterest        0.006647
## 7               Twitter        0.003280
mod_attrib$result
##            channel_name total_conversions
## 1           Geo-Spatial       108.6162168
## 2      Retailer Website       130.0896142
## 3 In-Store Shopping App        46.3038147
## 4             Instagram        10.6419138
## 5              You Tube         2.6866884
## 6             Pinterest         1.7822772
## 7               Twitter         0.8794749
d_all <- data.frame(mod_attrib$result)
df_multi_paths <- df_path_1_clean_multi %>%
        group_by(cust_id) %>%
        summarise(path = paste(chnl_nm, collapse = ' > '),
                  conversion = sum(conversion)) %>%
        ungroup() %>%
        filter(conversion == 1)
mod_attrib_alt <- markov_model(df_multi_paths,
                           var_path = 'path',
                           var_conv = 'conversion',
                           out_more = TRUE)
mod_attrib_alt$removal_effects
##            channel_name removal_effects
## 1             Instagram        0.386570
## 2 In-Store Shopping App        0.193497
## 3           Geo-Spatial        0.646075
## 4      Retailer Website        0.774334
## 5              You Tube        0.097134
## 6             Pinterest        0.064652
## 7               Twitter        0.031910
mod_attrib_alt$result
##            channel_name total_conversions
## 1             Instagram         5.4615910
## 2 In-Store Shopping App         2.7337907
## 3           Geo-Spatial         9.1279649
## 4      Retailer Website        10.9400512
## 5              You Tube         1.3723418
## 6             Pinterest         0.9134252
## 7               Twitter         0.4508352
# adding unique paths
df_uniq_paths <- df_path_1_clean_uniq %>%
        filter(conversion == 1) %>%
        group_by(chnl_nm) %>%
        summarise(conversions = sum(conversion)) %>%
        ungroup()
d_multi <- data.frame(mod_attrib_alt$result)
d_split <- full_join(d_multi, df_uniq_paths, by = c('channel_name' = 'chnl_nm')) %>%
        mutate(result = total_conversions + conversions)
sum(d_all$total_conversions)
## [1] 301
sum(d_split$result,na.rm = TRUE)
## [1] 292.8018
df_hm <- df_all_paths %>%
        mutate(channel_name_ft = trimws(sub('>.*', '', path)),
               #channel_name_ft = sub(' ', '', channel_name_ft),
               channel_name_lt = trimws(sub('.*>', '', path)))
               #channel_name_lt = sub(' ', '', channel_name_lt))
tail(df_hm,50)
## # A tibble: 50 x 5
##    cust_id    path                                conve… channel… channel…
##    <fctr>     <chr>                                <int> <chr>    <chr>   
##  1 CUST500122 Pinterest > Instagram > In-Store S…      1 Pintere… In-Stor…
##  2 CUST500126 In-Store Shopping App > In-Store S…      1 In-Stor… In-Stor…
##  3 CUST500127 In-Store Shopping App > In-Store S…      1 In-Stor… In-Stor…
##  4 CUST500128 In-Store Shopping App > In-Store S…      1 In-Stor… In-Stor…
##  5 CUST500129 Geo-Spatial > Geo-Spatial > Geo-Sp…      1 Geo-Spa… Geo-Spa…
##  6 CUST500130 In-Store Shopping App > In-Store S…      1 In-Stor… In-Stor…
##  7 CUST500131 Geo-Spatial > Geo-Spatial > Geo-Sp…      1 Geo-Spa… Retaile…
##  8 CUST500132 Retailer Website > Retailer Websit…      1 Retaile… Retaile…
##  9 CUST500133 In-Store Shopping App > In-Store S…      1 In-Stor… In-Stor…
## 10 CUST500134 Geo-Spatial > Geo-Spatial > Geo-Sp…      1 Geo-Spa… Geo-Spa…
## # ... with 40 more rows
# first-touch conversions
df_ft <- df_hm %>%
        group_by(channel_name_ft) %>%
        summarise(FtouchConv = sum(conversion)) %>%
        ungroup()
df_ft
## # A tibble: 7 x 2
##   channel_name_ft       FtouchConv
##   <chr>                      <int>
## 1 Geo-Spatial                  120
## 2 In-Store Shopping App         46
## 3 Instagram                      7
## 4 Pinterest                      2
## 5 Retailer Website             122
## 6 Twitter                        1
## 7 You Tube                       3
# last-touch conversions
df_lt <- df_hm %>%
        group_by(channel_name_lt) %>%
        summarise(LtouchConv = sum(conversion)) %>%
        ungroup()
df_lt
## # A tibble: 3 x 2
##   channel_name_lt       LtouchConv
##   <chr>                      <int>
## 1 Geo-Spatial                  103
## 2 In-Store Shopping App         52
## 3 Retailer Website             146
#h_mod2 <- heuristic_models(CustInteractEnq, var_path = 'path', var_conv = 'conv')
h_mod2 <- merge(df_lt, df_ft, by.x = 'channel_name_lt', by.y = 'channel_name_ft')
h_mod2
##         channel_name_lt LtouchConv FtouchConv
## 1           Geo-Spatial        103        120
## 2 In-Store Shopping App         52         46
## 3      Retailer Website        146        122
# merging all models
all_models <- merge(h_mod2, mod_attrib$result, by.x = 'channel_name_lt', by.y = 'channel_name')
colnames(all_models)[c(1, 4)] <- c('channel_name', 'attribModConv')
all_models
##            channel_name LtouchConv FtouchConv attribModConv
## 1           Geo-Spatial        103        120     108.61622
## 2 In-Store Shopping App         52         46      46.30381
## 3      Retailer Website        146        122     130.08961
mod_attrib
## $result
##            channel_name total_conversions
## 1           Geo-Spatial       108.6162168
## 2      Retailer Website       130.0896142
## 3 In-Store Shopping App        46.3038147
## 4             Instagram        10.6419138
## 5              You Tube         2.6866884
## 6             Pinterest         1.7822772
## 7               Twitter         0.8794749
## 
## $transition_matrix
##             channel_from            channel_to transition_probability
## 1                (start)           Geo-Spatial            0.398671096
## 2                (start)      Retailer Website            0.405315615
## 3                (start) In-Store Shopping App            0.152823920
## 4                (start)             Instagram            0.023255814
## 5                (start)              You Tube            0.009966777
## 6                (start)             Pinterest            0.006644518
## 7                (start)               Twitter            0.003322259
## 8            Geo-Spatial          (conversion)            0.844262295
## 9            Geo-Spatial      Retailer Website            0.155737705
## 10      Retailer Website          (conversion)            1.000000000
## 11 In-Store Shopping App          (conversion)            1.000000000
## 12             Instagram In-Store Shopping App            0.416666667
## 13             Instagram      Retailer Website            0.416666667
## 14             Instagram           Geo-Spatial            0.166666667
## 15              You Tube             Instagram            0.666666667
## 16              You Tube In-Store Shopping App            0.333333333
## 17             Pinterest             Instagram            1.000000000
## 18               Twitter             Instagram            1.000000000
## 
## $removal_effects
##            channel_name removal_effects
## 1           Geo-Spatial        0.405084
## 2      Retailer Website        0.485169
## 3 In-Store Shopping App        0.172690
## 4             Instagram        0.039689
## 5              You Tube        0.010020
## 6             Pinterest        0.006647
## 7               Twitter        0.003280
df_plot_trans <- mod_attrib$transition_matrix

cols <- c("#e7f0fa", "#c9e2f6", "#95cbee", "#0099dc", "#4ab04a", "#ffd73e", "#eec73a",
          "#e29421")
t <- max(df_plot_trans$transition_probability)

ggplot(df_plot_trans, aes(y = channel_from, x = channel_to, fill = transition_probability)) +
        theme_minimal() +
        geom_tile(colour = "white", width = .9, height = .9) +
        scale_fill_gradientn(colours = cols, limits = c(0, t),
                             breaks = seq(0, t, by = t/4),
                             labels = c("0", round(t/4*1, 2), round(t/4*2, 2), round(t/4*3, 2), round(t/4*4, 2)),
                             guide = guide_colourbar(ticks = T, nbin = 50, barheight = .5, label = T, barwidth = 10)) +
        geom_text(aes(label = round(transition_probability, 2)), fontface = "bold", size = 4) +
        theme(legend.position = 'bottom',
              legend.direction = "horizontal",
              panel.grid.major = element_blank(),
              panel.grid.minor = element_blank(),
              plot.title = element_text(size = 20, face = "bold", vjust = 2, color = 'black', lineheight = 0.8),
              axis.title.x = element_text(size = 24, face = "bold"),
              axis.title.y = element_text(size = 24, face = "bold"),
              axis.text.y = element_text(size = 8, face = "bold", color = 'black'),
              axis.text.x = element_text(size = 8, angle = 90, hjust = 0.5, vjust = 0.5, face = "plain")) +
        ggtitle("Transition matrix heatmap")

# models comparison
all_mod_plot <- melt(all_models, id.vars = 'channel_name', variable.name = 'conv_type')
all_mod_plot$value <- round(all_mod_plot$value)

# slope chart
pal <- colorRampPalette(brewer.pal(10, "Set1"))
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
ggplot(all_mod_plot, aes(x = conv_type, y = value, group = channel_name)) +
        theme_solarized(base_size = 18, base_family = "", light = TRUE) +
        scale_color_manual(values = pal(10)) +
        scale_fill_manual(values = pal(10)) +
        geom_line(aes(color = channel_name), size = 2.5, alpha = 0.8) +
        geom_point(aes(color = channel_name), size = 5) +
        geom_label_repel(aes(label = paste0(channel_name, ': ', value), fill = factor(channel_name)),
                         alpha = 0.7,
                         fontface = 'bold', color = 'white', size = 5,
                         box.padding = unit(0.25, 'lines'), point.padding = unit(0.5, 'lines'),
                         max.iter = 100) +
        theme(legend.position = 'none',
              legend.title = element_text(size = 16, color = 'black'),
              legend.text = element_text(size = 16, vjust = 2, color = 'black'),
              plot.title = element_text(size = 20, face = "bold", vjust = 2, color = 'black', lineheight = 0.8),
              axis.title.x = element_text(size = 24, face = "bold"),
              axis.title.y = element_text(size = 16, face = "bold"),
              axis.text.x = element_text(size = 16, face = "bold", color = 'black'),
              axis.text.y = element_blank(),
              axis.ticks.x = element_blank(),
              axis.ticks.y = element_blank(),
              panel.border = element_blank(),
              panel.grid.major = element_line(colour = "grey", linetype = "dotted"),
              panel.grid.minor = element_blank(),
              strip.text = element_text(size = 16, hjust = 0.5, vjust = 0.5, face = "bold", color = 'black'),
              strip.background = element_rect(fill = "#f0b35f")) + 
        labs(x = 'Model', y = 'Conversions') +
        ggtitle('Models comparison') +
        guides(colour = guide_legend(override.aes = list(size = 4)))

mod_attrib$removal_effects
##            channel_name removal_effects
## 1           Geo-Spatial        0.405084
## 2      Retailer Website        0.485169
## 3 In-Store Shopping App        0.172690
## 4             Instagram        0.039689
## 5              You Tube        0.010020
## 6             Pinterest        0.006647
## 7               Twitter        0.003280
mod_attrib$result
##            channel_name total_conversions
## 1           Geo-Spatial       108.6162168
## 2      Retailer Website       130.0896142
## 3 In-Store Shopping App        46.3038147
## 4             Instagram        10.6419138
## 5              You Tube         2.6866884
## 6             Pinterest         1.7822772
## 7               Twitter         0.8794749
d_all <- data.frame(mod_attrib$result)
# attribution model for splitted multi and unique channel paths
df_multi_paths <- df_path_1_clean_multi %>%
        group_by(cust_id) %>%
        summarise(path = paste(chnl_nm, collapse = ' > '),
                  conversion = sum(conversion)) %>%
        ungroup() %>%
        filter(conversion == 1)
mod_attrib_alt <- markov_model(df_multi_paths,
                           var_path = 'path',
                           var_conv = 'conversion',
                           out_more = TRUE)
mod_attrib_alt$removal_effects
##            channel_name removal_effects
## 1             Instagram        0.386561
## 2 In-Store Shopping App        0.193437
## 3           Geo-Spatial        0.645352
## 4      Retailer Website        0.774268
## 5              You Tube        0.096711
## 6             Pinterest        0.064215
## 7               Twitter        0.032487
mod_attrib_alt$result
##            channel_name total_conversions
## 1             Instagram         5.4643053
## 2 In-Store Shopping App         2.7343649
## 3           Geo-Spatial         9.1224939
## 4      Retailer Website        10.9448102
## 5              You Tube         1.3670764
## 6             Pinterest         0.9077231
## 7               Twitter         0.4592261
# adding unique paths
df_uniq_paths <- df_path_1_clean_uniq %>%
        filter(conversion == 1) %>%
        group_by(chnl_nm) %>%
        summarise(conversions = sum(conversion)) %>%
        ungroup()
 
d_multi <- data.frame(mod_attrib_alt$result)
 
d_split <- full_join(d_multi, df_uniq_paths, by = c('channel_name' = 'chnl_nm')) %>%
        mutate(result = total_conversions + conversions)
 
sum(d_all$total_conversions)
## [1] 301
sum(d_split$result,na.rm = TRUE)
## [1] 292.8017
##### Generic Probabilistic Model #####
df_all_paths_compl <- df_path_1_clean %>%
        group_by(cust_id) %>%
        summarise(path = paste(chnl_nm, collapse = ' > '),
                  conversion = sum(conversion)) %>%
        ungroup() %>%
        mutate(null_conversion = ifelse(conversion == 1, 0, 1))
 
mod_attrib_complete <- markov_model(
        df_all_paths_compl,
        var_path = 'path',
        var_conv = 'conversion',
        var_null = 'null_conversion',
        out_more = TRUE
)
 
trans_matrix_prob <- mod_attrib_complete$transition_matrix %>%
        dmap_at(c(1, 2), as.character)

##### viz #####
edges <-
        data.frame(
                from = trans_matrix_prob$channel_from,
                to = trans_matrix_prob$channel_to,
                label = round(trans_matrix_prob$transition_probability, 2),
                font.size = trans_matrix_prob$transition_probability * 100,
                width = trans_matrix_prob$transition_probability * 15,
                shadow = TRUE,
                arrows = "to",
                color = list(color = "#95cbee", highlight = "red")
        )
 
nodes <- data_frame(id = c( c(trans_matrix_prob$channel_from), c(trans_matrix_prob$channel_to) )) %>%
        distinct(id) %>%
        arrange(id) %>%
        mutate(
                label = id,
                color = ifelse(
                        label %in% c('(start)', '(conversion)'),
                        '#4ab04a',
                        ifelse(label == '(null)', '#ce472e', '#ffd73e')
                ),
                shadow = TRUE,
                shape = "box"
        )
 
visNetwork(nodes,
           edges,
           height = "2000px",
           width = "100%",
           main = "Generic Probabilistic model's Transition Matrix") %>%
        visIgraphLayout(randomSeed = 123) %>%
        visNodes(size = 5) %>%
        visOptions(highlightNearest = TRUE) 
##### Generic Probabilistic Model #####
df_all_paths_compl <- df_path_1_clean %>%
        group_by(cust_id) %>%
        summarise(path = paste(chnl_nm, collapse = ' > '),
                  conversion = sum(conversion)) %>%
        ungroup() %>%
        mutate(null_conversion = ifelse(conversion == 1, 0, 1))
 
mod_attrib_complete <- markov_model(
        df_all_paths_compl,
        var_path = 'path',
        var_conv = 'conversion',
        var_null = 'null_conversion',
        out_more = TRUE
)
 
trans_matrix_prob <- mod_attrib_complete$transition_matrix %>%
        dmap_at(c(1, 2), as.character)
 
##### viz #####
edges <-
        data.frame(
                from = trans_matrix_prob$channel_from,
                to = trans_matrix_prob$channel_to,
                label = round(trans_matrix_prob$transition_probability, 2),
                font.size = trans_matrix_prob$transition_probability * 100,
                width = trans_matrix_prob$transition_probability * 15,
                shadow = TRUE,
                arrows = "to",
                color = list(color = "#95cbee", highlight = "red")
        )
 
nodes <- data_frame(id = c( c(trans_matrix_prob$channel_from), c(trans_matrix_prob$channel_to) )) %>%
        distinct(id) %>%
        arrange(id) %>%
        mutate(
                label = id,
                color = ifelse(
                        label %in% c('(start)', '(conversion)'),
                        '#4ab04a',
                        ifelse(label == '(null)', '#ce472e', '#ffd73e')
                ),
                shadow = TRUE,
                shape = "box"
        )
 
visNetwork(nodes,
           edges,
           height = "2000px",
           width = "100%",
           main = "Generic Probabilistic model's Transition Matrix") %>%
        visIgraphLayout(randomSeed = 123) %>%
        visNodes(size = 5) %>%
        visOptions(highlightNearest = TRUE)
##### Customer journey duration #####
# computing time lapses from the first contact to conversion/last contact
df_multi_paths_tl <- df_path_1_clean_multi %>%
        group_by(cust_id) %>%
        summarise(path = paste(chnl_nm, collapse = ' > '),
                  first_touch_date = min(intrctn_dttm),
                  last_touch_date = max(intrctn_dttm),
                  tot_time_lapse = round(as.numeric(last_touch_date - first_touch_date)),
                  conversion = sum(conversion)) %>%
        ungroup()
 
# distribution plot
ggplot(df_multi_paths_tl %>% filter(conversion == 1), aes(x = tot_time_lapse)) +
        theme_minimal() +
        geom_histogram(fill = '#4e79a7', binwidth = 1)

# cumulative distribution plot
ggplot(df_multi_paths_tl %>% filter(conversion == 1), aes(x = tot_time_lapse)) +
        theme_minimal() +
        stat_ecdf(geom = 'step', color = '#4e79a7', size = 2, alpha = 0.7) +
        geom_hline(yintercept = 0.90, color = '#e15759', size = 1.5) +
        geom_vline(xintercept = 4, color = '#e15759', size = 1.5, linetype = 2)

### removing not first events ###
df_customers <- CustInteractEnq %>%
 group_by(cust_id, intrctn_type) %>%
 filter(intrctn_dttm == min(intrctn_dttm)) %>%
 ungroup()
df_customersInStore <- df_customers %>% filter(tch_pnt_nm %in% c("In-Store"))
df_customersInStore[] <- lapply(df_customersInStore, function(x) if(is.factor(x)) factor(x) else x)
summary(df_customersInStore)
##        cust_id      intrctn_dttm                        chnl_nm    
##  CUST100035:   9   Min.   :1988-02-04 00:05:54   Geo-Spatial:1190  
##  CUST100194:   9   1st Qu.:1988-02-04 07:16:56                     
##  CUST100031:   8   Median :1988-02-04 14:50:19                     
##  CUST100055:   8   Mean   :1990-08-27 23:24:35                     
##  CUST100061:   8   3rd Qu.:1994-04-10 05:51:25                     
##  CUST100064:   8   Max.   :2000-06-19 06:12:30                     
##  (Other)   :1140                                                   
##     tch_pnt_nm                intrctn_type   intrctn_reason
##  In-Store:1190   checkin            :252   AWARENESS:390   
##                  near_to_store      :161   ENGAGE   : 69   
##                  add_items_into_cart:132   PURCHASE :387   
##                  product_enquiry    :107   RESEARCH :344   
##                  cart_checkout      :103                   
##                  in_parking         : 92                   
##                  (Other)            :343                   
##  intrctn_sub_ctgry conversion         path_no     
##  ACCESS  :505      Mode :logical   Min.   :1.000  
##  ENQUIRY :229      FALSE:1087      1st Qu.:1.000  
##  SELECT  :132      TRUE :103       Median :1.000  
##  TRANSACT:120                      Mean   :1.057  
##  DESELECT: 83                      3rd Qu.:1.000  
##  OPINION : 69                      Max.   :2.000  
##  (Other) : 52
summary(df_customersInStore$intrctn_sub_ctgry)
##   ACCESS DESELECT  ENQUIRY   ENROLL     EXIT  OPINION   SELECT TRANSACT 
##      505       83      229        1       51       69      132      120
df.content <- df_customersInStore %>% select(intrctn_type,intrctn_sub_ctgry)%>%
  group_by(intrctn_type,intrctn_sub_ctgry) %>% summarise(number=n())
df.content
## # A tibble: 15 x 3
## # Groups: intrctn_type [?]
##    intrctn_type                   intrctn_sub_ctgry number
##    <fctr>                         <fctr>             <int>
##  1 add_items_into_cart            SELECT               132
##  2 cart_abondoned                 EXIT                  51
##  3 cart_checkout                  TRANSACT             103
##  4 checkin                        ACCESS               252
##  5 delete_items_from_cart         DESELECT              83
##  6 feedback                       OPINION               23
##  7 in_parking                     ACCESS                92
##  8 loyalty_enquiry                ENQUIRY               39
##  9 loyalty_enquiry                ENROLL                 1
## 10 loyalty_events_points_transfer TRANSACT              17
## 11 near_to_store                  ACCESS               161
## 12 offer_enquiry                  ENQUIRY               83
## 13 participation_instore_events   OPINION               23
## 14 product_enquiry                ENQUIRY              107
## 15 response_to_survey             OPINION               23
df.all <- df.content %>%
 group_by(intrctn_sub_ctgry) %>%
 mutate(totnum = sum(number)) %>%
 ungroup() %>%
 mutate(dum = (max(totnum) - totnum)/2,
 maxx = totnum + dum,
 minx = dum)
head(df.all)
## # A tibble: 6 x 7
##   intrctn_type           intrctn_sub_ctgry number totnum   dum  maxx  minx
##   <fctr>                 <fctr>             <int>  <int> <dbl> <dbl> <dbl>
## 1 add_items_into_cart    SELECT               132    132   186   318   186
## 2 cart_abondoned         EXIT                  51     51   227   278   227
## 3 cart_checkout          TRANSACT             103    120   192   312   192
## 4 checkin                ACCESS               252    505     0   505     0
## 5 delete_items_from_cart DESELECT              83     83   211   294   211
## 6 feedback               OPINION               23     69   218   287   218
df.lines <- df.all %>%
 distinct(intrctn_sub_ctgry, maxx, minx)
df.lines
## # A tibble: 8 x 3
##   intrctn_sub_ctgry  maxx  minx
##   <fctr>            <dbl> <dbl>
## 1 SELECT              318   186
## 2 EXIT                278   227
## 3 TRANSACT            312   192
## 4 ACCESS              505     0
## 5 DESELECT            294   211
## 6 OPINION             287   218
## 7 ENQUIRY             367   138
## 8 ENROLL              253   252
df.dum <- df.all %>%
 distinct(intrctn_sub_ctgry,dum) %>%
 mutate(intrctn_type = 'dummy',
 number = dum) %>%
 select(intrctn_type, intrctn_sub_ctgry,number)

head(df.dum)
## # A tibble: 6 x 3
##   intrctn_type intrctn_sub_ctgry number
##   <chr>        <fctr>             <dbl>
## 1 dummy        SELECT               186
## 2 dummy        EXIT                 227
## 3 dummy        TRANSACT             192
## 4 dummy        ACCESS                 0
## 5 dummy        DESELECT             211
## 6 dummy        OPINION              218
conv <- df.all$totnum[df.all$intrctn_sub_ctgry == 'ACCESS']
df.rates <- df.all %>%
 distinct(intrctn_sub_ctgry, totnum) %>%
 mutate(prevnum = lag(totnum),
 rate = round(totnum / conv, 3)) %>%
 select(intrctn_sub_ctgry, totnum, prevnum, rate)
## Warning in totnum/conv: longer object length is not a multiple of shorter
## object length
df.rates <- na.omit(df.rates)

df.rates
## # A tibble: 7 x 4
##   intrctn_sub_ctgry totnum prevnum    rate
##   <fctr>             <int>   <int>   <dbl>
## 1 EXIT                  51     132 0.101  
## 2 TRANSACT             120      51 0.238  
## 3 ACCESS               505     120 1.00   
## 4 DESELECT              83     505 0.164  
## 5 OPINION               69      83 0.137  
## 6 ENQUIRY              229      69 0.453  
## 7 ENROLL                 1     229 0.00200
df.all <- df.all %>%
 select(intrctn_type,intrctn_sub_ctgry,number)
df.all
## # A tibble: 15 x 3
##    intrctn_type                   intrctn_sub_ctgry number
##    <fctr>                         <fctr>             <int>
##  1 add_items_into_cart            SELECT               132
##  2 cart_abondoned                 EXIT                  51
##  3 cart_checkout                  TRANSACT             103
##  4 checkin                        ACCESS               252
##  5 delete_items_from_cart         DESELECT              83
##  6 feedback                       OPINION               23
##  7 in_parking                     ACCESS                92
##  8 loyalty_enquiry                ENQUIRY               39
##  9 loyalty_enquiry                ENROLL                 1
## 10 loyalty_events_points_transfer TRANSACT              17
## 11 near_to_store                  ACCESS               161
## 12 offer_enquiry                  ENQUIRY               83
## 13 participation_instore_events   OPINION               23
## 14 product_enquiry                ENQUIRY              107
## 15 response_to_survey             OPINION               23
df.all <- rbind(df.all, df.dum)
df.all$intrctn_sub_ctgry <- factor(df.all$intrctn_sub_ctgry, levels = c('ENROLL','OPINION','TRANSACT','EXIT','DESELECT','SELECT','ENQUIRY','ACCESS'))
df.all <- df.all %>%
 arrange(desc(intrctn_sub_ctgry))
list1 <- df.all %>% distinct(intrctn_type) %>%
 filter(intrctn_type != 'dummy')
df.all$intrctn_type <- factor(df.all$intrctn_type, levels = c(as.character(list1$intrctn_type), 'dummy'))
#calculating position of labels
df.all <- df.all %>%
 arrange(intrctn_sub_ctgry,desc(intrctn_type)) %>%
 group_by(intrctn_sub_ctgry) %>%
 mutate(pos = cumsum(number) - 0.5*number) %>%
 ungroup()
# creating custom palette with 'white' color for dummies
cols <- c("#fec44f", "#fc9272", "#a1d99b", "#fee0d2",
 "#2ca25f", "#8856a7", "#43a2ca", "#fdbb84",
 "#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#fec44f", "#fc9272", "#a1d99b", "#fee0d2",
 "#2ca25f", "#8856a7", "#43a2ca", "#fdbb84",
 "#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#fec44f", "#fc9272", "#a1d99b", "#fee0d2")

# plotting chart
ggplot() +
 theme_minimal() +
 coord_flip() +
 scale_fill_manual(values=cols) +
 geom_bar(data=df.all, aes(x=intrctn_sub_ctgry, y=number, fill=intrctn_type), stat="identity", width=1) +
 geom_text(data=df.all[df.all$intrctn_type!='dummy', ],
 aes(x=intrctn_sub_ctgry, y=pos, label=paste0(intrctn_type, '-', number)),
 size=2, color='white', fontface="bold") +
 geom_ribbon(data=df.lines, aes(x=intrctn_sub_ctgry, ymax=max(maxx), ymin=maxx, group=1), fill='white') +
 geom_line(data=df.lines, aes(x=intrctn_sub_ctgry, y=maxx, group=1), color='darkred', size=2) +
 geom_ribbon(data=df.lines, aes(x=intrctn_sub_ctgry, ymax=minx, ymin=min(minx), group=1), fill='white') +
 geom_line(data=df.lines, aes(x=intrctn_sub_ctgry, y=minx, group=1), color='darkred', size=2) +
 geom_text(data=df.rates, aes(x=intrctn_sub_ctgry, y=(df.lines$minx[-1]), label=paste0(rate*100, '%')), hjust=1.2,
 color='darkblue', fontface="bold") +
 theme(legend.position='none', axis.ticks=element_blank(), axis.text.x=element_blank(), 
 axis.title.x=element_blank())

df_customersTotal <- df_customers %>% filter(tch_pnt_nm %in% c("Website"))
df_customersTotal[] <- lapply(df_customersTotal, function(x) if(is.factor(x)) factor(x) else x)
summary(df_customersTotal)
##        cust_id      intrctn_dttm                             chnl_nm    
##  CUST200043:  13   Min.   :1991-03-07 00:04:00   Retailer Website:1733  
##  CUST200017:  12   1st Qu.:1991-03-07 07:53:02                          
##  CUST200031:  10   Median :1991-03-08 11:06:38                          
##  CUST200070:  10   Mean   :1993-07-18 09:47:50                          
##  CUST200127:  10   3rd Qu.:1994-04-12 17:33:32                          
##  CUST200144:  10   Max.   :2000-06-19 04:49:06                          
##  (Other)   :1668                                                        
##    tch_pnt_nm                intrctn_type   intrctn_reason
##  Website:1733   login              :165   AWARENESS:281   
##                 cart_checkout      :146   ENGAGE   :181   
##                 add_items_into_cart:122   PURCHASE :877   
##                 cart_abondoned     :120   RESEARCH :394   
##                 click_on_offer     :113                   
##                 click_on_product   :110                   
##                 (Other)            :957                   
##  intrctn_sub_ctgry conversion         path_no     
##  SELECT  :454      Mode :logical   Min.   :1.000  
##  TRANSACT:255      FALSE:1587      1st Qu.:1.000  
##  ENQUIRY :216      TRUE :146       Median :1.000  
##  ACCESS  :165                      Mean   :1.153  
##  DESELECT:162                      3rd Qu.:1.000  
##  OPINION :130                      Max.   :3.000  
##  (Other) :351
summary(df_customersTotal$intrctn_sub_ctgry)
##   ACCESS   ACTION DESELECT  ENQUIRY   ENROLL     EXIT  OPINION  PROMOTE 
##      165       71      162      216      109      120      130       51 
##   SELECT TRANSACT 
##      454      255
dfTotal <- df_customersTotal %>% select(intrctn_type,intrctn_sub_ctgry)%>%
  group_by(intrctn_type,intrctn_sub_ctgry) %>% summarise(number=n())
dfTotal
## # A tibble: 19 x 3
## # Groups: intrctn_type [?]
##    intrctn_type                   intrctn_sub_ctgry number
##    <fctr>                         <fctr>             <int>
##  1 add_items_into_cart            SELECT               122
##  2 add_items_into_wishlist        SELECT               109
##  3 cart_abondoned                 EXIT                 120
##  4 cart_checkout                  TRANSACT             146
##  5 click_on_offer                 SELECT               113
##  6 click_on_product               SELECT               110
##  7 delete_items_from_cart         DESELECT              53
##  8 delete_items_from_wishlist     DESELECT             109
##  9 feedback                       OPINION               56
## 10 login                          ACCESS               165
## 11 loyalty_enrollment             ENROLL               109
## 12 loyalty_events_points_transfer TRANSACT             109
## 13 offer_enquiry                  ENQUIRY               39
## 14 password_change                ACTION                71
## 15 product_enquiry                ENQUIRY               77
## 16 product_review                 OPINION               40
## 17 read_review                    ENQUIRY              100
## 18 referrals                      PROMOTE               51
## 19 response_to_survey             OPINION               34
dfallTotal <- dfTotal %>%
 group_by(intrctn_sub_ctgry) %>%
 mutate(totnum = sum(number)) %>%
 ungroup() %>%
 mutate(dum = (max(totnum) - totnum)/2,
 maxx = totnum + dum,
 minx = dum)
dfallTotal
## # A tibble: 19 x 7
##    intrctn_type                   intrctn_s… numb… totn…   dum  maxx  minx
##    <fctr>                         <fctr>     <int> <int> <dbl> <dbl> <dbl>
##  1 add_items_into_cart            SELECT       122   454   0     454   0  
##  2 add_items_into_wishlist        SELECT       109   454   0     454   0  
##  3 cart_abondoned                 EXIT         120   120 167     287 167  
##  4 cart_checkout                  TRANSACT     146   255  99.5   354  99.5
##  5 click_on_offer                 SELECT       113   454   0     454   0  
##  6 click_on_product               SELECT       110   454   0     454   0  
##  7 delete_items_from_cart         DESELECT      53   162 146     308 146  
##  8 delete_items_from_wishlist     DESELECT     109   162 146     308 146  
##  9 feedback                       OPINION       56   130 162     292 162  
## 10 login                          ACCESS       165   165 144     310 144  
## 11 loyalty_enrollment             ENROLL       109   109 172     282 172  
## 12 loyalty_events_points_transfer TRANSACT     109   255  99.5   354  99.5
## 13 offer_enquiry                  ENQUIRY       39   216 119     335 119  
## 14 password_change                ACTION        71    71 192     262 192  
## 15 product_enquiry                ENQUIRY       77   216 119     335 119  
## 16 product_review                 OPINION       40   130 162     292 162  
## 17 read_review                    ENQUIRY      100   216 119     335 119  
## 18 referrals                      PROMOTE       51    51 202     252 202  
## 19 response_to_survey             OPINION       34   130 162     292 162
dflinesTotal <- dfallTotal %>%
 distinct(intrctn_sub_ctgry, maxx, minx)
dflinesTotal
## # A tibble: 10 x 3
##    intrctn_sub_ctgry  maxx  minx
##    <fctr>            <dbl> <dbl>
##  1 SELECT              454   0  
##  2 EXIT                287 167  
##  3 TRANSACT            354  99.5
##  4 DESELECT            308 146  
##  5 OPINION             292 162  
##  6 ACCESS              310 144  
##  7 ENROLL              282 172  
##  8 ENQUIRY             335 119  
##  9 ACTION              262 192  
## 10 PROMOTE             252 202
dfdumTotal <- dfallTotal %>%
 distinct(intrctn_sub_ctgry,dum) %>%
 mutate(intrctn_type = 'dummy',
 number = dum) %>%
 select(intrctn_type, intrctn_sub_ctgry,number)

head(dfdumTotal)
## # A tibble: 6 x 3
##   intrctn_type intrctn_sub_ctgry number
##   <chr>        <fctr>             <dbl>
## 1 dummy        SELECT               0  
## 2 dummy        EXIT               167  
## 3 dummy        TRANSACT            99.5
## 4 dummy        DESELECT           146  
## 5 dummy        OPINION            162  
## 6 dummy        ACCESS             144
conv <- dfallTotal$totnum[dfallTotal$intrctn_sub_ctgry == 'SELECT']
dfratesTotal <- dfallTotal %>%
 distinct(intrctn_sub_ctgry, totnum) %>%
 mutate(rate = round(totnum / conv, 3)) %>%
 select(intrctn_sub_ctgry, totnum, rate)
## Warning in totnum/conv: longer object length is not a multiple of shorter
## object length
dfratesTotal <- na.omit(dfratesTotal)

dfratesTotal
## # A tibble: 10 x 3
##    intrctn_sub_ctgry totnum  rate
##    <fctr>             <int> <dbl>
##  1 SELECT               454 1.00 
##  2 EXIT                 120 0.264
##  3 TRANSACT             255 0.562
##  4 DESELECT             162 0.357
##  5 OPINION              130 0.286
##  6 ACCESS               165 0.363
##  7 ENROLL               109 0.240
##  8 ENQUIRY              216 0.476
##  9 ACTION                71 0.156
## 10 PROMOTE               51 0.112
dfallTotal <- dfallTotal %>%
 select(intrctn_type,intrctn_sub_ctgry,number)
dfallTotal
## # A tibble: 19 x 3
##    intrctn_type                   intrctn_sub_ctgry number
##    <fctr>                         <fctr>             <int>
##  1 add_items_into_cart            SELECT               122
##  2 add_items_into_wishlist        SELECT               109
##  3 cart_abondoned                 EXIT                 120
##  4 cart_checkout                  TRANSACT             146
##  5 click_on_offer                 SELECT               113
##  6 click_on_product               SELECT               110
##  7 delete_items_from_cart         DESELECT              53
##  8 delete_items_from_wishlist     DESELECT             109
##  9 feedback                       OPINION               56
## 10 login                          ACCESS               165
## 11 loyalty_enrollment             ENROLL               109
## 12 loyalty_events_points_transfer TRANSACT             109
## 13 offer_enquiry                  ENQUIRY               39
## 14 password_change                ACTION                71
## 15 product_enquiry                ENQUIRY               77
## 16 product_review                 OPINION               40
## 17 read_review                    ENQUIRY              100
## 18 referrals                      PROMOTE               51
## 19 response_to_survey             OPINION               34
dfallTotalFinal <- rbind(dfallTotal, dfdumTotal)
dfallTotalFinal
## # A tibble: 29 x 3
##    intrctn_type               intrctn_sub_ctgry number
##    <fctr>                     <fctr>             <dbl>
##  1 add_items_into_cart        SELECT             122  
##  2 add_items_into_wishlist    SELECT             109  
##  3 cart_abondoned             EXIT               120  
##  4 cart_checkout              TRANSACT           146  
##  5 click_on_offer             SELECT             113  
##  6 click_on_product           SELECT             110  
##  7 delete_items_from_cart     DESELECT            53.0
##  8 delete_items_from_wishlist DESELECT           109  
##  9 feedback                   OPINION             56.0
## 10 login                      ACCESS             165  
## # ... with 19 more rows
dfallTotalFinal$intrctn_sub_ctgry <- factor(dfallTotalFinal$intrctn_sub_ctgry, levels = c('PROMOTE','ENROLL','ACTION','OPINION','TRANSACT','EXIT','DESELECT','ENQUIRY','ACCESS','SELECT'))
dfallTotalFinal <- dfallTotalFinal %>%
 arrange(desc(intrctn_sub_ctgry))
list1 <- dfallTotalFinal %>% distinct(intrctn_type) %>%
 filter(intrctn_type != 'dummy')
dfallTotalFinal$intrctn_type <- factor(dfallTotalFinal$intrctn_type, levels = c(as.character(list1$intrctn_type), 'dummy'))
#calculating position of labels
dfallTotalFinal <- dfallTotalFinal  %>%
 arrange(intrctn_sub_ctgry,desc(intrctn_type)) %>%
 group_by(intrctn_sub_ctgry) %>%
 mutate(pos = cumsum(number) - 0.5*number) %>%
 ungroup()

# creating custom palette with 'white' color for dummies
cols <- c("#fec44f", "#fc9272", "#a1d99b", "#fee0d2",
 "#2ca25f", "#8856a7", "#43a2ca", "#fdbb84",
 "#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#fec44f", "#fc9272", "#a1d99b", "#fee0d2",
 "#2ca25f", "#8856a7", "#43a2ca", "#fdbb84",
 "#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#e34a33", "#a6bddb", "#dd1c77", "#ffffff","#fec44f", "#fc9272", "#a1d99b", "#fee0d2")

# plotting chart
ggplot() +
 theme_minimal() +
 coord_flip() +
 scale_fill_manual(values=cols) +
 geom_bar(data=dfallTotalFinal, aes(x=intrctn_sub_ctgry, y=number, fill=intrctn_type), stat="identity", width=1) +
 geom_text(data=dfallTotalFinal[dfallTotalFinal$intrctn_type!='dummy', ],
 aes(x=intrctn_sub_ctgry, y=pos, label=paste0(intrctn_type, '-', number)),
 size=2, color='white', fontface="bold") +
 geom_ribbon(data=dflinesTotal, aes(x=intrctn_sub_ctgry, ymax=max(maxx), ymin=maxx, group=1), fill='white') +
 geom_line(data=dflinesTotal, aes(x=intrctn_sub_ctgry, y=maxx, group=1), color='darkred', size=2) +
 geom_ribbon(data=dflinesTotal, aes(x=intrctn_sub_ctgry, ymax=minx, ymin=min(minx), group=1), fill='white') +
 geom_line(data=dflinesTotal, aes(x=intrctn_sub_ctgry, y=minx, group=1), color='darkred', size=2) +
 geom_text(data=dfratesTotal, aes(x=intrctn_sub_ctgry, y=(dflinesTotal$minx), label=paste0(rate*100, '%')), hjust=1.2,
 color='darkblue', fontface="bold") +
 theme(legend.position='none', axis.ticks=element_blank(), axis.text.x=element_blank(), 
 axis.title.x=element_blank())