Discover Process Models With the Heuristics Miner

library(heuristicsmineR)
library(eventdataR)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.3     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## Warning: package 'readr' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(petrinetR)

patients %>% head()
## Log of 6 events consisting of:
## 1 trace 
## 6 cases 
## 6 instances of 1 activity 
## 1 resource 
## Events occurred from 2017-01-02 11:41:53 until 2017-01-04 16:07:47 
##  
## Variables were mapped as follows:
## Case identifier:     patient 
## Activity identifier:     handling 
## Resource identifier:     employee 
## Activity instance identifier:    handling_id 
## Timestamp:           time 
## Lifecycle transition:        registration_type 
## 
## # A tibble: 6 × 7
##   handling     patient employee handling_id registration_type time               
##   <fct>        <chr>   <fct>    <chr>       <fct>             <dttm>             
## 1 Registration 1       r1       1           start             2017-01-02 11:41:53
## 2 Registration 2       r1       2           start             2017-01-02 11:41:53
## 3 Registration 3       r1       3           start             2017-01-04 01:34:05
## 4 Registration 4       r1       4           start             2017-01-04 01:34:04
## 5 Registration 5       r1       5           start             2017-01-04 16:07:47
## 6 Registration 6       r1       6           start             2017-01-04 16:07:47
## # … with 1 more variable: .order <int>

Dependency graph and matrix

dependency_matrix(patients)
##                        consequent
## antecedent              Blood test Check-out Discuss Results       End
##   Blood test             0.0000000 0.0000000       0.0000000 0.0000000
##   Check-out              0.0000000 0.0000000       0.0000000 0.9979716
##   Discuss Results        0.0000000 0.9979716       0.0000000 0.0000000
##   End                    0.0000000 0.0000000       0.0000000 0.0000000
##   MRI SCAN               0.0000000 0.0000000       0.9957806 0.0000000
##   Registration           0.0000000 0.0000000       0.0000000 0.0000000
##   Start                  0.0000000 0.0000000       0.0000000 0.0000000
##   Triage and Assessment  0.9957983 0.0000000       0.0000000 0.0000000
##   X-Ray                  0.0000000 0.0000000       0.9961538 0.0000000
##                        consequent
## antecedent               MRI SCAN Registration Start Triage and Assessment
##   Blood test            0.9957806     0.000000     0              0.000000
##   Check-out             0.0000000     0.000000     0              0.000000
##   Discuss Results       0.0000000     0.000000     0              0.000000
##   End                   0.0000000     0.000000     0              0.000000
##   MRI SCAN              0.0000000     0.000000     0              0.000000
##   Registration          0.0000000     0.000000     0              0.998004
##   Start                 0.0000000     0.998004     0              0.000000
##   Triage and Assessment 0.0000000     0.000000     0              0.000000
##   X-Ray                 0.0000000     0.000000     0              0.000000
##                        consequent
## antecedent                  X-Ray
##   Blood test            0.0000000
##   Check-out             0.0000000
##   Discuss Results       0.0000000
##   End                   0.0000000
##   MRI SCAN              0.0000000
##   Registration          0.0000000
##   Start                 0.0000000
##   Triage and Assessment 0.9961832
##   X-Ray                 0.0000000
## attr(,"class")
## [1] "dependency_matrix" "matrix"            "array"

Causal graph

causal_net(patients)
## Nodes
## # A tibble: 9 × 12
##   act      from_id     n n_distinct_cases bindings_input bindings_output label  
##   <chr>      <int> <dbl>            <dbl> <list>         <list>          <chr>  
## 1 Blood t…       1   237              237 <list [1]>     <list [1]>      "Blood…
## 2 Check-o…       2   492              492 <list [1]>     <list [1]>      "Check…
## 3 Discuss…       3   495              495 <list [2]>     <list [1]>      "Discu…
## 4 End            4   500              500 <list [1]>     <list [0]>      "End"  
## 5 MRI SCAN       5   236              236 <list [1]>     <list [1]>      "MRI S…
## 6 Registr…       6   500              500 <list [1]>     <list [1]>      "Regis…
## 7 Start          7   500              500 <list [0]>     <list [1]>      "Start"
## 8 Triage …       8   500              500 <list [1]>     <list [2]>      "Triag…
## 9 X-Ray          9   261              261 <list [1]>     <list [1]>      "X-Ray…
## # … with 5 more variables: color_level <dbl>, shape <chr>, fontcolor <chr>,
## #   color <chr>, tooltip <chr>
## Edges
## # A tibble: 9 × 8
##   antecedent            consequent        dep from_id to_id     n label penwidth
##   <chr>                 <chr>           <dbl>   <int> <int> <dbl> <chr>    <dbl>
## 1 Triage and Assessment Blood test      0.996       8     1   237 237       2.90
## 2 Discuss Results       Check-out       0.998       3     2   495 495       4.96
## 3 MRI SCAN              Discuss Results 0.996       5     3   236 236       2.89
## 4 X-Ray                 Discuss Results 0.996       9     3   261 261       3.09
## 5 Check-out             End             0.998       2     4   492 492       4.94
## 6 Blood test            MRI SCAN        0.996       1     5   237 237       2.90
## 7 Start                 Registration    0.998       7     6   500 500       5   
## 8 Registration          Triage and Ass… 0.998       6     8   500 500       5   
## 9 Triage and Assessment X-Ray           0.996       8     9   261 261       3.09

Petri net

causal_net(patients) %>% as.petrinet() %>% render_PN()

processcheckR- rule-based conformance checking

  1. contains: activity occurs n times or more
  2. contains_exactly: activity occurs exactly n times
  3. absent: activity does not occur more than n - 1 times
  4. starts: case starts with activity
  5. ends: case ends with activity
  6. and: two activities always exist together
  7. succession: if activity A happens, B should happen after. 8. If B happens, A should have happened before.
  8. response: if activity A happens, B should happen after
  9. precedence: if activity B happens, A should have happend 11. before
  10. responded_existence: if activity A happens, B should also (have) happen(ed) (i.e. before or after A)

for example :Each patient should be registered at least once.

library(bupaR)
## 
## Attaching package: 'bupaR'
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:utils':
## 
##     timestamp
library(processcheckR)
## 
## Attaching package: 'processcheckR'
## The following object is masked from 'package:dplyr':
## 
##     contains
## The following object is masked from 'package:tidyr':
## 
##     contains
## The following object is masked from 'package:base':
## 
##     xor
patients %>%
check_rule(contains("Registration"))
## Log of 5442 events consisting of:
## 7 traces 
## 500 cases 
## 2721 instances of 7 activities 
## 7 resources 
## Events occurred from 2017-01-02 11:41:53 until 2018-05-05 07:16:02 
##  
## Variables were mapped as follows:
## Case identifier:     patient 
## Activity identifier:     handling 
## Resource identifier:     employee 
## Activity instance identifier:    handling_id 
## Timestamp:           time 
## Lifecycle transition:        registration_type 
## 
## # A tibble: 5,442 × 8
##    handling     patient employee handling_id registration_type time               
##    <fct>        <chr>   <fct>    <chr>       <fct>             <dttm>             
##  1 Registration 1       r1       1           start             2017-01-02 11:41:53
##  2 Registration 2       r1       2           start             2017-01-02 11:41:53
##  3 Registration 3       r1       3           start             2017-01-04 01:34:05
##  4 Registration 4       r1       4           start             2017-01-04 01:34:04
##  5 Registration 5       r1       5           start             2017-01-04 16:07:47
##  6 Registration 6       r1       6           start             2017-01-04 16:07:47
##  7 Registration 7       r1       7           start             2017-01-05 04:56:11
##  8 Registration 8       r1       8           start             2017-01-05 04:56:11
##  9 Registration 9       r1       9           start             2017-01-06 05:58:54
## 10 Registration 10      r1       10          start             2017-01-06 05:58:54
## # … with 5,432 more rows, and 2 more variables: .order <int>,
## #   contains_Registration_1 <lgl>