library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
warning(getwd())
## Warning: C:/Users/Benoit/Insync/UHasselt/Workspaces/RStudio/
## processdiscoverybenchmark/analysis
knitr::opts_knit$set(root.dir="..")
knitr::opts_chunk$set(echo = FALSE)
Here are some plots to get an idea about the data
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
##
## Call:
## lm(formula = avg_recall ~ ReocurringTasks + InfrequentPaths +
## Miner, data = summarized_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.82221 -0.17471 0.04338 0.28616 0.52807
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.713838 0.019090 37.393 < 2e-16 ***
## ReocurringTasks0.05 -0.059788 0.021537 -2.776 0.00553 **
## ReocurringTasks0.1 -0.109908 0.021537 -5.103 3.52e-07 ***
## ReocurringTasks0.15 -0.184704 0.021537 -8.576 < 2e-16 ***
## ReocurringTasks0.2 -0.171076 0.021537 -7.943 2.64e-15 ***
## ReocurringTasks0.25 -0.173407 0.021537 -8.052 1.11e-15 ***
## ReocurringTasks0.3 -0.241906 0.021537 -11.232 < 2e-16 ***
## InfrequentPathsTRUE 0.005599 0.011512 0.486 0.62671
## MinerHeuristics 0.102774 0.016280 6.313 3.09e-10 ***
## MinerILP 0.416193 0.016280 25.564 < 2e-16 ***
## MinerInductive 0.041874 0.016280 2.572 0.01015 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3392 on 3461 degrees of freedom
## Multiple R-squared: 0.2215, Adjusted R-squared: 0.2192
## F-statistic: 98.44 on 10 and 3461 DF, p-value: < 2.2e-16
Now we are going to treat the ReoccuringTasks as a continuous variable
##
## Call:
## lm(formula = avg_recall ~ as.numeric(as.character(ReocurringTasks)) +
## InfrequentPaths + Miner, data = summarized_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.79647 -0.17916 0.04542 0.28157 0.52922
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.688096 0.015530 44.307
## as.numeric(as.character(ReocurringTasks)) -0.724374 0.057677 -12.559
## InfrequentPathsTRUE 0.005599 0.011535 0.485
## MinerHeuristics 0.102774 0.016314 6.300
## MinerILP 0.416193 0.016314 25.512
## MinerInductive 0.041874 0.016314 2.567
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## as.numeric(as.character(ReocurringTasks)) < 2e-16 ***
## InfrequentPathsTRUE 0.6274
## MinerHeuristics 3.35e-10 ***
## MinerILP < 2e-16 ***
## MinerInductive 0.0103 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3399 on 3466 degrees of freedom
## Multiple R-squared: 0.2171, Adjusted R-squared: 0.216
## F-statistic: 192.3 on 5 and 3466 DF, p-value: < 2.2e-16
## Df Sum Sq Mean Sq F value Pr(>F)
## ReocurringTasks 6 20.4 3.405 30.497 <2e-16
## InfrequentPaths 1 0.0 0.027 0.244 0.6215
## Miner 3 92.8 30.929 277.047 <2e-16
## ReocurringTasks:InfrequentPaths 6 0.7 0.109 0.979 0.4377
## ReocurringTasks:Miner 18 14.1 0.782 7.006 <2e-16
## InfrequentPaths:Miner 3 1.0 0.325 2.914 0.0331
## ReocurringTasks:InfrequentPaths:Miner 18 1.1 0.059 0.526 0.9477
## Residuals 3416 381.4 0.112
##
## ReocurringTasks ***
## InfrequentPaths
## Miner ***
## ReocurringTasks:InfrequentPaths
## ReocurringTasks:Miner ***
## InfrequentPaths:Miner *
## ReocurringTasks:InfrequentPaths:Miner
## Residuals
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Warning in replications(paste("~", xx), data = mf): non-factors ignored:
## InfrequentPaths
## Warning in replications(paste("~", xx), data = mf): non-factors ignored:
## ReocurringTasks, InfrequentPaths
## Warning in replications(paste("~", xx), data = mf): non-factors ignored:
## InfrequentPaths, Miner
## Warning in replications(paste("~", xx), data = mf): non-factors ignored:
## ReocurringTasks, InfrequentPaths, Miner
## Warning in TukeyHSD.aov(model3, conf.level = 0.95): 'which' specified some
## non-factors which will be dropped
With some interaction terms based on the ANOVA analysis
##
## Call:
## lm(formula = avg_recall ~ as.numeric(as.character(ReocurringTasks)) +
## InfrequentPaths + Miner + Miner * InfrequentPaths + Miner *
## as.numeric(as.character(ReocurringTasks)), data = summarized_results)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.79387 -0.19095 0.00221 0.27727 0.61505
##
## Coefficients:
## Estimate
## (Intercept) 0.72961
## as.numeric(as.character(ReocurringTasks)) -1.08145
## InfrequentPathsTRUE 0.02970
## MinerHeuristics -0.01222
## MinerILP 0.26934
## MinerInductive 0.13767
## InfrequentPathsTRUE:MinerHeuristics 0.01068
## InfrequentPathsTRUE:MinerILP -0.03149
## InfrequentPathsTRUE:MinerInductive -0.07560
## as.numeric(as.character(ReocurringTasks)):MinerHeuristics 0.73099
## as.numeric(as.character(ReocurringTasks)):MinerILP 1.08399
## as.numeric(as.character(ReocurringTasks)):MinerInductive -0.38666
## Std. Error
## (Intercept) 0.02342
## as.numeric(as.character(ReocurringTasks)) 0.11361
## InfrequentPathsTRUE 0.02272
## MinerHeuristics 0.03312
## MinerILP 0.03312
## MinerInductive 0.03312
## InfrequentPathsTRUE:MinerHeuristics 0.03213
## InfrequentPathsTRUE:MinerILP 0.03213
## InfrequentPathsTRUE:MinerInductive 0.03213
## as.numeric(as.character(ReocurringTasks)):MinerHeuristics 0.16067
## as.numeric(as.character(ReocurringTasks)):MinerILP 0.16067
## as.numeric(as.character(ReocurringTasks)):MinerInductive 0.16067
## t value Pr(>|t|)
## (Intercept) 31.151 < 2e-16
## as.numeric(as.character(ReocurringTasks)) -9.519 < 2e-16
## InfrequentPathsTRUE 1.307 0.1913
## MinerHeuristics -0.369 0.7123
## MinerILP 8.132 5.85e-16
## MinerInductive 4.156 3.31e-05
## InfrequentPathsTRUE:MinerHeuristics 0.332 0.7396
## InfrequentPathsTRUE:MinerILP -0.980 0.3271
## InfrequentPathsTRUE:MinerInductive -2.353 0.0187
## as.numeric(as.character(ReocurringTasks)):MinerHeuristics 4.550 5.56e-06
## as.numeric(as.character(ReocurringTasks)):MinerILP 6.747 1.77e-11
## as.numeric(as.character(ReocurringTasks)):MinerInductive -2.407 0.0162
##
## (Intercept) ***
## as.numeric(as.character(ReocurringTasks)) ***
## InfrequentPathsTRUE
## MinerHeuristics
## MinerILP ***
## MinerInductive ***
## InfrequentPathsTRUE:MinerHeuristics
## InfrequentPathsTRUE:MinerILP
## InfrequentPathsTRUE:MinerInductive *
## as.numeric(as.character(ReocurringTasks)):MinerHeuristics ***
## as.numeric(as.character(ReocurringTasks)):MinerILP ***
## as.numeric(as.character(ReocurringTasks)):MinerInductive *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3347 on 3460 degrees of freedom
## Multiple R-squared: 0.2419, Adjusted R-squared: 0.2395
## F-statistic: 100.4 on 11 and 3460 DF, p-value: < 2.2e-16
Now, let’s have a look at some diagnostic plots
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Call: rlm(formula = avg_recall ~ as.numeric(as.character(ReocurringTasks)) +
## InfrequentPaths + Miner + Miner * InfrequentPaths + Miner *
## as.numeric(as.character(ReocurringTasks)), data = summarized_results)
## Residuals:
## Min 1Q Median 3Q Max
## -0.878210 -0.215771 0.002209 0.217716 0.660602
##
## Coefficients:
## Value
## (Intercept) 0.8037
## as.numeric(as.character(ReocurringTasks)) -1.5476
## InfrequentPathsTRUE 0.0745
## MinerHeuristics -0.0453
## MinerILP 0.1953
## MinerInductive 0.0874
## InfrequentPathsTRUE:MinerHeuristics -0.0171
## InfrequentPathsTRUE:MinerILP -0.0763
## InfrequentPathsTRUE:MinerInductive -0.1099
## as.numeric(as.character(ReocurringTasks)):MinerHeuristics 1.2177
## as.numeric(as.character(ReocurringTasks)):MinerILP 1.5501
## as.numeric(as.character(ReocurringTasks)):MinerInductive 0.0058
## Std. Error
## (Intercept) 0.0242
## as.numeric(as.character(ReocurringTasks)) 0.1176
## InfrequentPathsTRUE 0.0235
## MinerHeuristics 0.0343
## MinerILP 0.0343
## MinerInductive 0.0343
## InfrequentPathsTRUE:MinerHeuristics 0.0333
## InfrequentPathsTRUE:MinerILP 0.0333
## InfrequentPathsTRUE:MinerInductive 0.0333
## as.numeric(as.character(ReocurringTasks)):MinerHeuristics 0.1663
## as.numeric(as.character(ReocurringTasks)):MinerILP 0.1663
## as.numeric(as.character(ReocurringTasks)):MinerInductive 0.1663
## t value
## (Intercept) 33.1605
## as.numeric(as.character(ReocurringTasks)) -13.1642
## InfrequentPathsTRUE 3.1698
## MinerHeuristics -1.3216
## MinerILP 5.6971
## MinerInductive 2.5504
## InfrequentPathsTRUE:MinerHeuristics -0.5155
## InfrequentPathsTRUE:MinerILP -2.2953
## InfrequentPathsTRUE:MinerInductive -3.3054
## as.numeric(as.character(ReocurringTasks)):MinerHeuristics 7.3241
## as.numeric(as.character(ReocurringTasks)):MinerILP 9.3238
## as.numeric(as.character(ReocurringTasks)):MinerInductive 0.0350
##
## Residual standard error: 0.3222 on 3460 degrees of freedom