# Event Logs of Clients with Login Failures on Jan 12, 2021

library(bupaR, quietly = TRUE)
## Warning: package 'bupaR' was built under R version 3.6.2
## 
## Attaching package: 'bupaR'
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:utils':
## 
##     timestamp
library(processmapR, quietly = TRUE)
library(heuristicsmineR, quietly = TRUE)
## Warning: package 'heuristicsmineR' was built under R version 3.6.2
## 
## Attaching package: 'heuristicsmineR'
## The following objects are masked from 'package:processmapR':
## 
##     precedence_matrix, precedence_matrix_absolute
library(petrinetR, quietly = TRUE)
library(dplyr, quietly = TRUE)
## Warning: package 'dplyr' was built under R version 3.6.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Ingest Data

df = read.csv("heuristic_miner_example.csv")
str(df)
## 'data.frame':    22229 obs. of  4 variables:
##  $ case_id : Factor w/ 300 levels "Case_1","Case_10",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ activity: Factor w/ 31 levels "Add to Cart",..: 15 15 15 17 28 14 19 19 19 19 ...
##  $ channel : Factor w/ 15 levels "App","AutoR",..: 13 13 13 13 13 9 13 13 13 13 ...
##  $ tstamp  : Factor w/ 9717 levels "2021-01-12 05:00:37",..: 1274 1275 1276 1279 1280 1280 1281 1281 1281 1282 ...
df$tstamp = as.POSIXct(df$tstamp, format="%Y-%m-%d %H:%M:%OS", tz="EST")

# Descriptive Stats

summary(df)
##      case_id                           activity          channel     
##  Case_149:  619   Proposition Feedback     :11974   App      :12288  
##  Case_215:  499   Page Visit               : 5379   Web      : 8132  
##  Case_273:  490   Failed Login             :  722   Email    :  495  
##  Case_293:  470   Successful Login         :  565   AutoResp :  462  
##  Case_178:  394   Chat Initiated in ChatBot:  519   CallAgent:  448  
##  Case_258:  394   Email Communication Sent :  495   X_X      :  212  
##  (Other) :19363   (Other)                  : 2575   (Other)  :  192  
##      tstamp                   
##  Min.   :2021-01-12 05:00:37  
##  1st Qu.:2021-01-12 11:12:50  
##  Median :2021-01-12 18:41:33  
##  Mean   :2021-01-12 17:54:20  
##  3rd Qu.:2021-01-13 00:20:26  
##  Max.   :2021-01-13 04:59:59  
## 
df %>%
  group_by(activity) %>%
  tally %>%
  arrange(-n)
## # A tibble: 31 x 2
##    activity                        n
##    <fct>                       <int>
##  1 Proposition Feedback        11974
##  2 Page Visit                   5379
##  3 Failed Login                  722
##  4 Successful Login              565
##  5 Chat Initiated in ChatBot     519
##  6 Email Communication Sent      495
##  7 Password Change               368
##  8 Push Notification Status      367
##  9 Chat Initiated in LiveAgent   298
## 10 Case Status Updated           251
## # … with 21 more rows
df %>%
  filter(case_id == "Case_149") %>%
  arrange(tstamp) %>%
  top_n(30)
## Selecting by tstamp
##     case_id             activity channel              tstamp
## 1  Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 2  Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 3  Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 4  Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 5  Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 6  Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 7  Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 8  Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 9  Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 10 Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 11 Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 12 Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 13 Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 14 Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 15 Case_149 Proposition Feedback     App 2021-01-13 00:35:48
## 16 Case_149           Page Visit     App 2021-01-13 01:06:33
## 17 Case_149           Page Visit     App 2021-01-13 01:06:35
## 18 Case_149           Page Visit     App 2021-01-13 01:06:54
## 19 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 20 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 21 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 22 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 23 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 24 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 25 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 26 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 27 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 28 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 29 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 30 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 31 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 32 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 33 Case_149 Proposition Feedback     App 2021-01-13 01:08:14
## 34 Case_149     Successful Login     Web 2021-01-13 02:54:35
## 35 Case_149     Successful Login     Web 2021-01-13 02:59:38
df %>% 
  group_by(case_id) %>%
  tally %>%
  summarize(., count=n(), min=min(n), Q1=quantile(n, prob=0.25), 
            median=median(n), Q3=quantile(n, prob=0.75), max=max(n))
## # A tibble: 1 x 6
##   count   min    Q1 median    Q3   max
##   <int> <int> <dbl>  <dbl> <dbl> <int>
## 1   300     2    30     50  89.5   619
# Create Event Log
# Using Last Time Only When Event_Name Repeats

elog = df %>%
  select(case_id, activity, channel, tstamp) %>%
  group_by(case_id, activity) %>%
  arrange(tstamp) %>%
  summarise_all(last) %>% 
  mutate(aid=paste0(case_id,activity,channel), lid='complete') %>%
  eventlog(case_id = "case_id", activity_id="activity", 
           resource_id="channel", activity_instance_id="aid", 
           lifecycle_id="lid", timestamp="tstamp")
dim(elog)
## [1] 2327    7
elog %>% traces
## # A tibble: 259 x 3
##    trace                                     absolute_frequen… relative_frequen…
##    <chr>                                                 <int>             <dbl>
##  1 Failed Login,Password Change,Email Commu…                11            0.0367
##  2 Failed Login,Password Change,Email Commu…                 6            0.02  
##  3 Failed Login,Transaction Initiated,Passw…                 4            0.0133
##  4 Failed Login,Password Change,Email Commu…                 4            0.0133
##  5 Proposition Feedback,Failed Login,Passwo…                 3            0.01  
##  6 Password Change,Email Communication Sent                  3            0.01  
##  7 Password Change,Email Communication Sent…                 3            0.01  
##  8 Password Change,Secret Question/Answer C…                 3            0.01  
##  9 Failed Login,Password Change,Secret Ques…                 3            0.01  
## 10 Password Change,Email Communication Sent…                 3            0.01  
## # … with 249 more rows
# Process Map

elog %>% filter_trace_frequency(c(4,NA)) %>% process_map
# Dependency Using Heuristic Miner
# SUCCESSFULLY CLEANED UP EVENT LOG!

elog %>%
  filter_trace_frequency(c(2,NA)) %>%
  dependency_matrix %>% 
  render_dependency_matrix
elog %>%
  filter(case_id == "Case_149")
## Log of 19 events consisting of:
## 1 trace 
## 1 case 
## 19 instances of 19 activities 
## 5 resources 
## Events occurred from 2021-01-12 05:06:52 until 2021-01-13 02:59:38 
##  
## Variables were mapped as follows:
## Case identifier:     case_id 
## Activity identifier:     activity 
## Resource identifier:     channel 
## Activity instance identifier:    aid 
## Timestamp:           tstamp 
## Lifecycle transition:        lid 
## 
## # A tibble: 19 x 7
##    case_id  activity      channel tstamp              aid           lid   .order
##    <chr>    <fct>         <fct>   <dttm>              <chr>         <fct>  <int>
##  1 Case_149 Agent Call E… CallAg… 2021-01-12 17:34:10 Case_149Agen… comp…      1
##  2 Case_149 Agent Call S… CallAg… 2021-01-12 10:08:15 Case_149Agen… comp…      2
##  3 Case_149 Call Disconn… CallAg… 2021-01-12 17:33:51 Case_149Call… comp…      3
##  4 Case_149 Call Initiat… AutoRe… 2021-01-12 17:11:20 Case_149Call… comp…      4
##  5 Case_149 Case Closed   CallAg… 2021-01-12 12:45:40 Case_149Case… comp…      5
##  6 Case_149 Case Status … CallAg… 2021-01-12 12:44:40 Case_149Case… comp…      6
##  7 Case_149 Chat Initiat… App     2021-01-12 10:00:35 Case_149Chat… comp…      7
##  8 Case_149 Chat Initiat… App     2021-01-12 10:55:26 Case_149Chat… comp…      8
##  9 Case_149 Cust-Agent C… CallAg… 2021-01-12 17:21:14 Case_149Cust… comp…      9
## 10 Case_149 Email Commun… Email   2021-01-12 11:10:23 Case_149Emai… comp…     10
## 11 Case_149 Page Visit    App     2021-01-13 01:06:54 Case_149Page… comp…     11
## 12 Case_149 Password Cha… Web     2021-01-12 11:10:18 Case_149Pass… comp…     12
## 13 Case_149 Proposition … App     2021-01-13 01:08:14 Case_149Prop… comp…     13
## 14 Case_149 Push Notific… App     2021-01-12 06:39:45 Case_149Push… comp…     14
## 15 Case_149 Push Notific… App     2021-01-12 06:39:45 Case_149Push… comp…     15
## 16 Case_149 Push Notific… App     2021-01-12 05:06:52 Case_149Push… comp…     16
## 17 Case_149 Search        App     2021-01-12 10:00:19 Case_149Sear… comp…     17
## 18 Case_149 Secret Quest… Web     2021-01-12 11:10:18 Case_149Secr… comp…     18
## 19 Case_149 Successful L… Web     2021-01-13 02:59:38 Case_149Succ… comp…     19
# Causal Net

elog %>%
  filter_trace_frequency(c(2,NA)) %>%
  causal_net %>%
  render_causal_net
## Warning in check_dependencies(dependencies): Activities [Case Status
## Updated,Secret Question/Answer Change,Transaction Initiated] have neither
## an antecedent or consequent in the supplied dependency matrix.Consider using
## the `all_connected` or `endpoints_connected` parameter when generating the
## dependency matrix.
elog %>%
  causal_net %>%
  render_causal_net
## Warning in check_dependencies(dependencies): Activities [Add to Cart,Agent Call
## End,Agent Call Start,Call Disconnect with Agent,Cart Checkout,Cart Viewed,Case
## Closed,Case Opened,Chat Initiated in ChatBot,Chat Initiated in LiveAgent,Cust-
## Agent Call Classification,Product Viewed,Purchase Confirmation,Push Notification
## Action,Push Notification Failed,Push Notification Sent,Push Notification
## Status,Removed from Cart,Search,Transaction Initiated - IVR,UserID Change] have
## neither an antecedent or consequent in the supplied dependency matrix.Consider
## using the `all_connected` or `endpoints_connected` parameter when generating the
## dependency matrix.