library(arules)
## Warning: package 'arules' was built under R version 4.2.1
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library("tidygraph")
## Warning: package 'tidygraph' was built under R version 4.2.1
##
## Attaching package: 'tidygraph'
## The following object is masked from 'package:stats':
##
## filter
library(arulesViz)
## Warning: package 'arulesViz' was built under R version 4.2.1
DSJobs<-read.transactions("D:/archive (2)/Data Science Jobs Salaries.csv", sep=",")
## Warning in asMethod(object): removing duplicated items in transactions
summary(DSJobs)
## transactions as itemMatrix in sparse format with
## 246 rows (elements/itemsets/transactions) and
## 383 columns (items) and a density of 0.02516504
##
## most frequent items:
## FT 2021e 100 L USD (Other)
## 231 179 134 132 126 1569
##
## element (itemset/transaction) length distribution:
## sizes
## 9 10 11
## 109 117 20
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.000 9.000 10.000 9.638 10.000 11.000
##
## includes extended item information - examples:
## labels
## 1 0
## 2 100
## 3 10000
inspect(DSJobs[1:10])
## items
## [1] {company_location,
## company_size,
## employee_residence,
## employment_type,
## experience_level,
## job_title,
## remote_ratio,
## salary,
## salary_currency,
## salary_in_usd,
## work_year}
## [2] {2021e,
## 50,
## 54000,
## 64369,
## Data Science Consultant,
## DE,
## EN,
## EUR,
## FT,
## L}
## [3] {100,
## 2020,
## 60000,
## 68428,
## Data Scientist,
## EUR,
## FT,
## GR,
## L,
## SE,
## US}
## [4] {0,
## 2021e,
## 85000,
## EX,
## FT,
## Head of Data Science,
## M,
## RU,
## USD}
## [5] {2021e,
## 230000,
## 50,
## EX,
## FT,
## Head of Data,
## L,
## RU,
## USD}
## [6] {100,
## 125000,
## 2021e,
## EN,
## FT,
## Machine Learning Engineer,
## S,
## US,
## USD}
## [7] {100,
## 120000,
## 2021e,
## Data Analytics Manager,
## FT,
## M,
## SE,
## US,
## USD}
## [8] {0,
## 2020,
## 450000,
## FT,
## M,
## MI,
## Research Scientist,
## US,
## USD}
## [9] {2020,
## 41000,
## 46759,
## 50,
## Data Analyst,
## EUR,
## FR,
## FT,
## L,
## MI}
## [10] {2020,
## 50,
## 65000,
## 74130,
## AT,
## Data Engineer,
## EUR,
## FT,
## L,
## MI}
##itemFrequency() function allows us to see the proportion of transactions that contain a
## a specific item
itemFrequency(DSJobs[, 1:5])
## 0 100 10000 100000 102000
## 0.162601626 0.544715447 0.004065041 0.020325203 0.004065041
## Visualizing item support-item frequency
itemFrequencyPlot(DSJobs, support=0.1, main="Items with 0.1 support")
itemFrequencyPlot(DSJobs, topN = 20, type = "absolute", main = "top 20 most frequent Items")
DSRules <- apriori(data = DSJobs, parameter=list(support=0.1, confidence=0.8, minlen=1))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.1 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 24
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[383 item(s), 246 transaction(s)] done [0.00s].
## sorting and recoding items ... [17 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.00s].
## writing ... [190 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(DSRules)
## set of 190 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2 3 4 5 6
## 1 18 62 67 34 8
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 4.000 3.732 4.000 6.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.1016 Min. :0.8000 Min. :0.1016 Min. :0.918
## 1st Qu.:0.1138 1st Qu.:0.9058 1st Qu.:0.1260 1st Qu.:1.006
## Median :0.1504 Median :0.9368 Median :0.1606 Median :1.056
## Mean :0.1878 Mean :0.9274 Mean :0.2028 Mean :1.345
## 3rd Qu.:0.2175 3rd Qu.:0.9630 3rd Qu.:0.2388 3rd Qu.:1.845
## Max. :0.9390 Max. :1.0000 Max. :1.0000 Max. :2.102
## count
## Min. : 25.00
## 1st Qu.: 28.00
## Median : 37.00
## Mean : 46.21
## 3rd Qu.: 53.50
## Max. :231.00
##
## mining info:
## data ntransactions support confidence
## DSJobs 246 0.1 0.8
## call
## apriori(data = DSJobs, parameter = list(support = 0.1, confidence = 0.8, minlen = 1))
## Sorting rules by confidence
DSRules <- sort(DSRules, by = "confidence", decreasing = TRUE)
inspect(DSRules[1:20])
## lhs rhs support confidence coverage lift
## [1] {Data Scientist, MI} => {FT} 0.1422764 1.0000000 0.1422764 1.064935
## [2] {Data Scientist, L} => {FT} 0.1219512 1.0000000 0.1219512 1.064935
## [3] {100, Data Scientist} => {FT} 0.1097561 1.0000000 0.1097561 1.064935
## [4] {2021e, Data Scientist} => {FT} 0.1585366 1.0000000 0.1585366 1.064935
## [5] {2020, MI} => {FT} 0.1219512 1.0000000 0.1219512 1.064935
## [6] {50, L} => {FT} 0.1869919 1.0000000 0.1869919 1.064935
## [7] {L, SE} => {FT} 0.1747967 1.0000000 0.1747967 1.064935
## [8] {2021e, 50, L} => {FT} 0.1382114 1.0000000 0.1382114 1.064935
## [9] {L, SE, US} => {FT} 0.1056911 1.0000000 0.1056911 1.064935
## [10] {L, SE, USD} => {FT} 0.1016260 1.0000000 0.1016260 1.064935
## [11] {2021e, L, SE} => {FT} 0.1504065 1.0000000 0.1504065 1.064935
## [12] {100, 2021e, SE, US} => {USD} 0.1056911 1.0000000 0.1056911 1.952381
## [13] {100, 2021e, L, US} => {USD} 0.1382114 1.0000000 0.1382114 1.952381
## [14] {100, 2021e, FT, SE, US} => {USD} 0.1016260 1.0000000 0.1016260 1.952381
## [15] {100, 2021e, FT, L, US} => {USD} 0.1341463 1.0000000 0.1341463 1.952381
## [16] {2021e, SE} => {FT} 0.2398374 0.9833333 0.2439024 1.047186
## [17] {Data Scientist} => {FT} 0.2357724 0.9830508 0.2398374 1.046885
## [18] {100, L, US} => {USD} 0.1910569 0.9791667 0.1951220 1.911706
## [19] {100, FT, L, US} => {USD} 0.1829268 0.9782609 0.1869919 1.909938
## [20] {0} => {FT} 0.1585366 0.9750000 0.1626016 1.038312
## count
## [1] 35
## [2] 30
## [3] 27
## [4] 39
## [5] 30
## [6] 46
## [7] 43
## [8] 34
## [9] 26
## [10] 25
## [11] 37
## [12] 26
## [13] 34
## [14] 25
## [15] 33
## [16] 59
## [17] 58
## [18] 47
## [19] 45
## [20] 39
DSRules <- sort(DSRules, by = "lift", decreasing = TRUE)
inspect(DSRules[1:20])
## lhs rhs support confidence coverage lift
## [1] {100, L, USD} => {US} 0.1910569 0.9400000 0.2032520 2.102182
## [2] {100, SE, USD} => {US} 0.1260163 0.9393939 0.1341463 2.100826
## [3] {100, FT, L, USD} => {US} 0.1829268 0.9375000 0.1951220 2.096591
## [4] {100, FT, SE, USD} => {US} 0.1178862 0.9354839 0.1260163 2.092082
## [5] {100, MI, USD} => {US} 0.1097561 0.9310345 0.1178862 2.082132
## [6] {100, 2021e, SE, USD} => {US} 0.1056911 0.9285714 0.1138211 2.076623
## [7] {100, FT, MI, USD} => {US} 0.1056911 0.9285714 0.1138211 2.076623
## [8] {100, 2021e, FT, SE, USD} => {US} 0.1016260 0.9259259 0.1097561 2.070707
## [9] {100, 2021e, L, USD} => {US} 0.1382114 0.9189189 0.1504065 2.055037
## [10] {100, 2021e, FT, L, USD} => {US} 0.1341463 0.9166667 0.1463415 2.050000
## [11] {2021e, SE, USD} => {US} 0.1300813 0.9142857 0.1422764 2.044675
## [12] {2021e, FT, SE, USD} => {US} 0.1260163 0.9117647 0.1382114 2.039037
## [13] {100, USD} => {US} 0.3252033 0.9090909 0.3577236 2.033058
## [14] {2021e, L, USD} => {US} 0.1910569 0.9038462 0.2113821 2.021329
## [15] {L, MI, USD} => {US} 0.1138211 0.9032258 0.1260163 2.019941
## [16] {100, FT, USD} => {US} 0.2967480 0.9012346 0.3292683 2.015488
## [17] {2021e, FT, L, USD} => {US} 0.1829268 0.9000000 0.2032520 2.012727
## [18] {FT, L, MI, USD} => {US} 0.1056911 0.8965517 0.1178862 2.005016
## [19] {100, 2021e, USD} => {US} 0.2317073 0.8906250 0.2601626 1.991761
## [20] {L, USD} => {US} 0.2560976 0.8873239 0.2886179 1.984379
## count
## [1] 47
## [2] 31
## [3] 45
## [4] 29
## [5] 27
## [6] 26
## [7] 26
## [8] 25
## [9] 34
## [10] 33
## [11] 32
## [12] 31
## [13] 80
## [14] 47
## [15] 28
## [16] 73
## [17] 45
## [18] 26
## [19] 57
## [20] 63
##Checking for rule redundancy
is.redundant(DSRules, measure = "confidence", confint = TRUE, level = 0.8)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE
## [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE
## [85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [97] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [133] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [145] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [169] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##Many rules are redundant.. removing them
RIndexes<-is.redundant(DSRules, measure = "confidence", confint = TRUE, level = 0.8)
QualRules<-DSRules[-RIndexes]
inspect(QualRules[1:10])
## lhs rhs support confidence coverage lift
## [1] {100, SE, USD} => {US} 0.1260163 0.9393939 0.1341463 2.100826
## [2] {100, FT, L, USD} => {US} 0.1829268 0.9375000 0.1951220 2.096591
## [3] {100, FT, SE, USD} => {US} 0.1178862 0.9354839 0.1260163 2.092082
## [4] {100, MI, USD} => {US} 0.1097561 0.9310345 0.1178862 2.082132
## [5] {100, 2021e, SE, USD} => {US} 0.1056911 0.9285714 0.1138211 2.076623
## [6] {100, FT, MI, USD} => {US} 0.1056911 0.9285714 0.1138211 2.076623
## [7] {100, 2021e, FT, SE, USD} => {US} 0.1016260 0.9259259 0.1097561 2.070707
## [8] {100, 2021e, L, USD} => {US} 0.1382114 0.9189189 0.1504065 2.055037
## [9] {100, 2021e, FT, L, USD} => {US} 0.1341463 0.9166667 0.1463415 2.050000
## [10] {2021e, SE, USD} => {US} 0.1300813 0.9142857 0.1422764 2.044675
## count
## [1] 31
## [2] 45
## [3] 29
## [4] 27
## [5] 26
## [6] 26
## [7] 25
## [8] 34
## [9] 33
## [10] 32
plot(QualRules, measure = c("support", "lift"), shading = "confidence")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(QualRules, shading = "order", control = list(main = "Two-key plot"))
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
is.significant(QualRules, DSJobs)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [49] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
SignifRules<-QualRules[is.significant(QualRules, DSJobs)]
inspect(SignifRules)
## lhs rhs support confidence coverage lift
## [1] {100, SE, USD} => {US} 0.1260163 0.9393939 0.1341463 2.100826
## [2] {100, FT, L, USD} => {US} 0.1829268 0.9375000 0.1951220 2.096591
## [3] {100, FT, SE, USD} => {US} 0.1178862 0.9354839 0.1260163 2.092082
## [4] {100, MI, USD} => {US} 0.1097561 0.9310345 0.1178862 2.082132
## [5] {100, 2021e, SE, USD} => {US} 0.1056911 0.9285714 0.1138211 2.076623
## [6] {100, FT, MI, USD} => {US} 0.1056911 0.9285714 0.1138211 2.076623
## [7] {100, 2021e, FT, SE, USD} => {US} 0.1016260 0.9259259 0.1097561 2.070707
## [8] {100, 2021e, L, USD} => {US} 0.1382114 0.9189189 0.1504065 2.055037
## [9] {100, 2021e, FT, L, USD} => {US} 0.1341463 0.9166667 0.1463415 2.050000
## [10] {2021e, SE, USD} => {US} 0.1300813 0.9142857 0.1422764 2.044675
## [11] {2021e, FT, SE, USD} => {US} 0.1260163 0.9117647 0.1382114 2.039037
## [12] {100, USD} => {US} 0.3252033 0.9090909 0.3577236 2.033058
## [13] {2021e, L, USD} => {US} 0.1910569 0.9038462 0.2113821 2.021329
## [14] {L, MI, USD} => {US} 0.1138211 0.9032258 0.1260163 2.019941
## [15] {100, FT, USD} => {US} 0.2967480 0.9012346 0.3292683 2.015488
## [16] {2021e, FT, L, USD} => {US} 0.1829268 0.9000000 0.2032520 2.012727
## [17] {FT, L, MI, USD} => {US} 0.1056911 0.8965517 0.1178862 2.005016
## [18] {100, 2021e, USD} => {US} 0.2317073 0.8906250 0.2601626 1.991761
## [19] {L, USD} => {US} 0.2560976 0.8873239 0.2886179 1.984379
## [20] {SE, USD} => {US} 0.1585366 0.8863636 0.1788618 1.982231
## [21] {FT, L, USD} => {US} 0.2439024 0.8823529 0.2764228 1.973262
## [22] {100, 2021e, FT, USD} => {US} 0.2113821 0.8813559 0.2398374 1.971032
## [23] {FT, SE, USD} => {US} 0.1504065 0.8809524 0.1707317 1.970130
## [24] {100, 2021e, SE, US} => {USD} 0.1056911 1.0000000 0.1056911 1.952381
## [25] {100, 2021e, L, US} => {USD} 0.1382114 1.0000000 0.1382114 1.952381
## [26] {100, 2021e, FT, SE, US} => {USD} 0.1016260 1.0000000 0.1016260 1.952381
## [27] {100, 2021e, FT, L, US} => {USD} 0.1341463 1.0000000 0.1341463 1.952381
## [28] {FT, MI, USD} => {US} 0.1544715 0.8636364 0.1788618 1.931405
## [29] {100, L, US} => {USD} 0.1910569 0.9791667 0.1951220 1.911706
## [30] {100, FT, L, US} => {USD} 0.1829268 0.9782609 0.1869919 1.909938
## [31] {MI, USD} => {US} 0.1626016 0.8510638 0.1910569 1.903288
## [32] {2021e, SE, US} => {USD} 0.1300813 0.9696970 0.1341463 1.893218
## [33] {100, SE, US} => {USD} 0.1260163 0.9687500 0.1300813 1.891369
## [34] {2021e, FT, SE, US} => {USD} 0.1260163 0.9687500 0.1300813 1.891369
## [35] {100, FT, SE, US} => {USD} 0.1178862 0.9666667 0.1219512 1.887302
## [36] {100, 2021e, US} => {USD} 0.2317073 0.9661017 0.2398374 1.886199
## [37] {2020, US} => {USD} 0.1138211 0.9655172 0.1178862 1.885057
## [38] {100, US} => {USD} 0.3252033 0.9638554 0.3373984 1.881813
## [39] {2020, FT, US} => {USD} 0.1056911 0.9629630 0.1097561 1.880071
## [40] {100, 2021e, FT, US} => {USD} 0.2113821 0.9629630 0.2195122 1.880071
## [41] {2021e, MI, USD} => {US} 0.1056911 0.8387097 0.1260163 1.875660
## [42] {100, FT, US} => {USD} 0.2967480 0.9605263 0.3089431 1.875313
## [43] {SE, US} => {USD} 0.1585366 0.9512195 0.1666667 1.857143
## [44] {FT, SE, US} => {USD} 0.1504065 0.9487179 0.1585366 1.852259
## [45] {2021e, USD} => {US} 0.3089431 0.8260870 0.3739837 1.847431
## [46] {US} => {USD} 0.4227642 0.9454545 0.4471545 1.845887
## [47] {USD} => {US} 0.4227642 0.8253968 0.5121951 1.845887
## [48] {2020, USD} => {US} 0.1138211 0.8235294 0.1382114 1.841711
## [49] {2021e, FT, USD} => {US} 0.2845528 0.8235294 0.3455285 1.841711
## [50] {FT, US} => {USD} 0.3902439 0.9411765 0.4146341 1.837535
## [51] {L, US} => {USD} 0.2560976 0.9402985 0.2723577 1.835821
## [52] {2021e, L, US} => {USD} 0.1910569 0.9400000 0.2032520 1.835238
## [53] {FT, USD} => {US} 0.3902439 0.8205128 0.4756098 1.834965
## [54] {2021e, US} => {USD} 0.3089431 0.9382716 0.3292683 1.831864
## [55] {FT, L, US} => {USD} 0.2439024 0.9375000 0.2601626 1.830357
## [56] {2021e, FT, L, US} => {USD} 0.1829268 0.9375000 0.1951220 1.830357
## [57] {L, MI, US} => {USD} 0.1138211 0.9333333 0.1219512 1.822222
## [58] {2021e, FT, US} => {USD} 0.2845528 0.9333333 0.3048780 1.822222
## [59] {100, MI, US} => {USD} 0.1097561 0.9310345 0.1178862 1.817734
## [60] {2020, FT, USD} => {US} 0.1056911 0.8125000 0.1300813 1.817045
## [61] {FT, L, MI, US} => {USD} 0.1056911 0.9285714 0.1138211 1.812925
## [62] {100, FT, MI, US} => {USD} 0.1056911 0.9285714 0.1138211 1.812925
## [63] {MI, US} => {USD} 0.1626016 0.9090909 0.1788618 1.774892
## [64] {FT, MI, US} => {USD} 0.1544715 0.9047619 0.1707317 1.766440
## [65] {2021e, MI, US} => {USD} 0.1056911 0.8666667 0.1219512 1.692063
## count
## [1] 31
## [2] 45
## [3] 29
## [4] 27
## [5] 26
## [6] 26
## [7] 25
## [8] 34
## [9] 33
## [10] 32
## [11] 31
## [12] 80
## [13] 47
## [14] 28
## [15] 73
## [16] 45
## [17] 26
## [18] 57
## [19] 63
## [20] 39
## [21] 60
## [22] 52
## [23] 37
## [24] 26
## [25] 34
## [26] 25
## [27] 33
## [28] 38
## [29] 47
## [30] 45
## [31] 40
## [32] 32
## [33] 31
## [34] 31
## [35] 29
## [36] 57
## [37] 28
## [38] 80
## [39] 26
## [40] 52
## [41] 26
## [42] 73
## [43] 39
## [44] 37
## [45] 76
## [46] 104
## [47] 104
## [48] 28
## [49] 70
## [50] 96
## [51] 63
## [52] 47
## [53] 96
## [54] 76
## [55] 60
## [56] 45
## [57] 28
## [58] 70
## [59] 27
## [60] 26
## [61] 26
## [62] 26
## [63] 40
## [64] 38
## [65] 26
summary(SignifRules)
## set of 65 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4 5 6
## 2 14 27 18 4
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 4.000 4.000 4.123 5.000 6.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.1016 Min. :0.8125 Min. :0.1016 Min. :1.692
## 1st Qu.:0.1138 1st Qu.:0.8966 1st Qu.:0.1260 1st Qu.:1.842
## Median :0.1545 Median :0.9286 Median :0.1707 Median :1.891
## Mean :0.1843 Mean :0.9204 Mean :0.2019 Mean :1.921
## 3rd Qu.:0.2317 3rd Qu.:0.9512 3rd Qu.:0.2602 3rd Qu.:2.005
## Max. :0.4228 Max. :1.0000 Max. :0.5122 Max. :2.101
## count
## Min. : 25.00
## 1st Qu.: 28.00
## Median : 38.00
## Mean : 45.34
## 3rd Qu.: 57.00
## Max. :104.00
##
## mining info:
## data ntransactions support confidence
## DSJobs 246 0.1 0.8
## call
## apriori(data = DSJobs, parameter = list(support = 0.1, confidence = 0.8, minlen = 1))
library("igraph")
## Warning: package 'igraph' was built under R version 4.2.1
##
## Attaching package: 'igraph'
## The following object is masked from 'package:tidygraph':
##
## groups
## The following object is masked from 'package:arules':
##
## union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
g <- associations2igraph(SignifRules)
plot(g)
as_tbl_graph(g)
## # A tbl_graph: 74 nodes and 268 edges
## #
## # A bipartite simple graph with 1 component
## #
## # Node Data: 74 × 9 (active)
## name label index type support confidence coverage lift count
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 2 100 2 1 NA NA NA NA NA
## 2 79 2020 79 1 NA NA NA NA NA
## 3 80 2021e 80 1 NA NA NA NA NA
## 4 313 FT 313 1 NA NA NA NA NA
## 5 333 L 333 1 NA NA NA NA NA
## 6 345 MI 345 1 NA NA NA NA NA
## # … with 68 more rows
## #
## # Edge Data: 268 × 2
## from to
## <int> <int>
## 1 1 10
## 2 7 10
## 3 9 10
## # … with 265 more rows
DSitemsets <- generatingItemsets(SignifRules)
DSitemsets
## set of 65 itemsets
DSg <- associations2igraph(DSitemsets, associationsAsNodes = FALSE)
plot(DSg, layout = layout_in_circle)
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.