Business Process Data Analysis

bupaR is an open-source suite for the handling and analysis of business process data in R developed by the Business Informatics research group at Hasselt University, Belgium. It builds upon the concept of an event log which is a logbook of events which have happened and were recorded within the execution of a business process. see http://www.bupar.net/index.html.

Create Event Log

library(bupaR)
## 
## Attaching package: 'bupaR'
## The following object is masked from 'package:utils':
## 
##     timestamp
library(eventdataR)
library(edeaR)


t=Sys.time()
data <- data.frame(case = rep("A",5),
                   activity_id = c("A","B","C","D","E"),
                   activity_instance_id = 1:5,
                   lifecycle_id = rep("complete",5),
                   timestamp = c(t,t+1000,t+2000,t+3000,t+4000),
                   resource = rep("resource 1", 5))
data
##   case activity_id activity_instance_id lifecycle_id           timestamp
## 1    A           A                    1     complete 2017-09-13 02:29:41
## 2    A           B                    2     complete 2017-09-13 02:46:21
## 3    A           C                    3     complete 2017-09-13 03:03:01
## 4    A           D                    4     complete 2017-09-13 03:19:41
## 5    A           E                    5     complete 2017-09-13 03:36:21
##     resource
## 1 resource 1
## 2 resource 1
## 3 resource 1
## 4 resource 1
## 5 resource 1
class(data)
## [1] "data.frame"
first_log <- bupaR::eventlog(data,case_id = "case",
                       activity_id = "activity_id",
                       activity_instance_id = "activity_instance_id",
                       lifecycle_id = "lifecycle_id",
                       timestamp = "timestamp",
                       resource_id = "resource")

activity_presence(first_log)
## # A tibble: 5 x 3
##   activity_id absolute relative
##        <fctr>    <int>    <dbl>
## 1           A        1        1
## 2           B        1        1
## 3           C        1        1
## 4           D        1        1
## 5           E        1        1
class(first_log)
## [1] "eventlog"   "tbl_df"     "tbl"        "data.frame"

Exploratory and Descriptive Event-Based Data Analysis

Exploratory and Descriptive Event-Based Data Analysis

data("patients")
class(patients)
## [1] "eventlog"   "tbl_df"     "tbl"        "data.frame"
head(patients)
## # A tibble: 6 x 6
##       handling patient employee handling_id registration_type
##          <chr>   <int>    <chr>       <int>             <chr>
## 1 Registration       1       r1           1             start
## 2 Registration       2       r1           2             start
## 3 Registration       3       r1           3             start
## 4 Registration       4       r1           4             start
## 5 Registration       5       r1           5             start
## 6 Registration       6       r1           6             start
## # ... with 1 more variables: time <dttm>
mylog = patients
activity_presence(mylog)
## # A tibble: 7 x 3
##                handling absolute relative
##                   <chr>    <int>    <dbl>
## 1          Registration      500    1.000
## 2 Triage and Assessment      500    1.000
## 3       Discuss Results      495    0.990
## 4             Check-out      492    0.984
## 5                 X-Ray      261    0.522
## 6            Blood test      237    0.474
## 7              MRI SCAN      236    0.472
x=end_activities(mylog, level_of_analysis="resource")    
plot(x)

data("sepsis")
mylog = sepsis
activity_presence(mylog)
## # A tibble: 16 x 3
##            Activity absolute    relative
##               <chr>    <int>       <dbl>
##  1  ER Registration     1050 1.000000000
##  2        ER Triage     1050 1.000000000
##  3 ER Sepsis Triage     1049 0.999047619
##  4       Leucocytes     1012 0.963809524
##  5              CRP     1007 0.959047619
##  6       LacticAcid      860 0.819047619
##  7   IV Antibiotics      823 0.783809524
##  8     Admission NC      800 0.761904762
##  9        IV Liquid      753 0.717142857
## 10        Release A      671 0.639047619
## 11        Return ER      294 0.280000000
## 12     Admission IC      110 0.104761905
## 13        Release B       56 0.053333333
## 14        Release C       25 0.023809524
## 15        Release D       24 0.022857143
## 16        Release E        6 0.005714286
x=end_activities(mylog, level_of_analysis="resource")    
plot(x)

Construct Processing Map

Process Map

process_map(patients)
# process_map(sepsis)

Resource Map

resource_map(patients)
# resource_map(sepsis)

Relative Duration in Hours

dotted_chart(patients, x = "relative", y ="duration", color = NULL, units ="hours")

dotted_chart(sepsis, x = "absolute", y ="duration", color = NULL, units ="hours")
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Spectral is 11
## Returning the palette you asked for with that many colors
## Warning: Removed 405 rows containing missing values (geom_point).

Relative Start in Hours

dotted_chart(patients, x = "relative", y ="start", color = NULL, units ="hours")

dotted_chart(sepsis, x = "absolute", y ="start", color = NULL, units ="hours")
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Spectral is 11
## Returning the palette you asked for with that many colors
## Warning: Removed 405 rows containing missing values (geom_point).

The most useful tool may be the interactive plot: idotted_chart(patients) to be run on the console

```