library(arules)
## Warning: package 'arules' was built under R version 4.0.5
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
## Warning: package 'arulesViz' was built under R version 4.0.5
Groceries <- read.transactions("C:\\Users\\tariqm\\Documents\\R\\Datasets\\groceries.csv", sep = ",")
summary(Groceries)
## transactions as itemMatrix in sparse format with
## 9835 rows (elements/itemsets/transactions) and
## 169 columns (items) and a density of 0.02609146
##
## most frequent items:
## whole milk other vegetables rolls/buns soda
## 2513 1903 1809 1715
## yogurt (Other)
## 1372 34055
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 2159 1643 1299 1005 855 645 545 438 350 246 182 117 78 77 55 46
## 17 18 19 20 21 22 23 24 26 27 28 29 32
## 29 14 14 9 11 4 6 1 1 1 1 3 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 4.409 6.000 32.000
##
## includes extended item information - examples:
## labels
## 1 abrasive cleaner
## 2 artif. sweetener
## 3 baby cosmetics
rules <- apriori(Groceries, parameter = list(supp = 0.001, conf = .8, maxlen = 21))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 21 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.01s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.02s].
## writing ... [410 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(rules)
## set of 410 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4 5 6
## 29 229 140 12
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 4.000 4.000 4.329 5.000 6.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001017 Min. :0.8000 Min. :0.001017 Min. : 3.131
## 1st Qu.:0.001017 1st Qu.:0.8333 1st Qu.:0.001220 1st Qu.: 3.312
## Median :0.001220 Median :0.8462 Median :0.001322 Median : 3.588
## Mean :0.001247 Mean :0.8663 Mean :0.001449 Mean : 3.951
## 3rd Qu.:0.001322 3rd Qu.:0.9091 3rd Qu.:0.001627 3rd Qu.: 4.341
## Max. :0.003152 Max. :1.0000 Max. :0.003559 Max. :11.235
## count
## Min. :10.00
## 1st Qu.:10.00
## Median :12.00
## Mean :12.27
## 3rd Qu.:13.00
## Max. :31.00
##
## mining info:
## data ntransactions support confidence
## Groceries 9835 0.001 0.8
plot(rules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plot(rules, method="two-key plot", jitter = 100)

plot(rules, method="grouped")

# long loading time # plot(rules, method="paracoord")
set.seed(1234)
plot(rules, method = "graph")
## Warning: Too many rules supplied. Only plotting the best 100 rules using lift
## (change control parameter max if needed)

plot(rules, method = "graph", interactive=TRUE, shading = NA)
## Warning in plot.rules(rules, method = "graph", interactive = TRUE, shading =
## NA): The parameter interactive is deprecated. Use engine='interactive' instead.
## Warning: plot: Too many rules supplied. Only plotting the best 100 rules using
## 'support' (change control parameter max if needed)
plot(rules, method = "graph", engine = "htmlwidget")
## Warning: Too many rules supplied. Only plotting the best 100 rules using lift
## (change control parameter max if needed)
quality(rules)<-round(quality(rules),digits=3)
redundant <- is.redundant(rules, measure="confidence")
which(redundant)
## [1] 54 119 263 268 282 288 300 302 310 346 351 355 399 400 402 403 404 405
rules <- rules[!redundant]
rules <-sort(rules, by="lift", decreasing=TRUE)
summary(rules)
## set of 392 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4 5 6
## 29 227 130 6
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 4.000 4.000 4.288 5.000 6.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001000 Min. :0.8000 Min. :0.001000 Min. : 3.131
## 1st Qu.:0.001000 1st Qu.:0.8330 1st Qu.:0.001000 1st Qu.: 3.312
## Median :0.001000 Median :0.8460 Median :0.001000 Median : 3.588
## Mean :0.001158 Mean :0.8666 Mean :0.001408 Mean : 3.959
## 3rd Qu.:0.001000 3rd Qu.:0.9090 3rd Qu.:0.002000 3rd Qu.: 4.357
## Max. :0.003000 Max. :1.0000 Max. :0.004000 Max. :11.235
## count
## Min. :10.00
## 1st Qu.:10.00
## Median :12.00
## Mean :12.33
## 3rd Qu.:13.00
## Max. :31.00
##
## mining info:
## data ntransactions support confidence
## Groceries 9835 0.001 0.8
inspect(rules[1:10])
## lhs rhs support confidence coverage lift count
## [1] {liquor,
## red/blush wine} => {bottled beer} 0.002 0.905 0.002 11.235 19
## [2] {citrus fruit,
## fruit/vegetable juice,
## other vegetables,
## soda} => {root vegetables} 0.001 0.909 0.001 8.340 10
## [3] {oil,
## other vegetables,
## tropical fruit,
## whole milk,
## yogurt} => {root vegetables} 0.001 0.909 0.001 8.340 10
## [4] {citrus fruit,
## fruit/vegetable juice,
## grapes} => {tropical fruit} 0.001 0.846 0.001 8.064 11
## [5] {other vegetables,
## rice,
## whole milk,
## yogurt} => {root vegetables} 0.001 0.867 0.002 7.951 13
## [6] {oil,
## other vegetables,
## tropical fruit,
## whole milk} => {root vegetables} 0.001 0.867 0.002 7.951 13
## [7] {ham,
## other vegetables,
## pip fruit,
## yogurt} => {tropical fruit} 0.001 0.833 0.001 7.942 10
## [8] {beef,
## citrus fruit,
## other vegetables,
## tropical fruit} => {root vegetables} 0.001 0.833 0.001 7.645 10
## [9] {butter,
## cream cheese,
## root vegetables} => {yogurt} 0.001 0.909 0.001 6.517 10
## [10] {butter,
## sliced cheese,
## tropical fruit,
## whole milk} => {yogurt} 0.001 0.909 0.001 6.517 10
rules.top10 <- rules[1:10]
plot(rules.top10)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plot(rules.top10, method="two-key plot", jitter = 100)

plot(rules.top10, method="grouped")

plot(rules.top10, method="paracoord")

set.seed(1234)
plot(rules.top10, method = "graph")

plot(rules.top10, method = "graph", interactive=TRUE, shading=NA)
## Warning in plot.rules(rules.top10, method = "graph", interactive = TRUE, : The
## parameter interactive is deprecated. Use engine='interactive' instead.
plot(rules.top10, method = "graph", engine = "htmlwidget")
rules.wholemilk <- apriori(Groceries, parameter = list(supp = 0.001, conf = .8, maxlen = 21),
appearance = list(default="lhs",rhs="whole milk"),
control = list(verbose=F))
summary(rules.wholemilk)
## set of 252 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4 5 6
## 18 146 81 7
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 4.000 4.000 4.306 5.000 6.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001017 Min. :0.8000 Min. :0.001017 Min. :3.131
## 1st Qu.:0.001017 1st Qu.:0.8333 1st Qu.:0.001220 1st Qu.:3.261
## Median :0.001220 Median :0.8481 Median :0.001322 Median :3.319
## Mean :0.001256 Mean :0.8689 Mean :0.001457 Mean :3.401
## 3rd Qu.:0.001322 3rd Qu.:0.9091 3rd Qu.:0.001627 3rd Qu.:3.558
## Max. :0.002847 Max. :1.0000 Max. :0.003457 Max. :3.914
## count
## Min. :10.00
## 1st Qu.:10.00
## Median :12.00
## Mean :12.36
## 3rd Qu.:13.00
## Max. :28.00
##
## mining info:
## data ntransactions support confidence
## Groceries 9835 0.001 0.8
quality(rules.wholemilk)<-round(quality(rules.wholemilk),digits=3)
redundant <- is.redundant(rules.wholemilk, measure="confidence")
which(redundant)
## [1] 33 66 167 170 179 186 216 246 247 248 249
rules.wholemilk <- rules.wholemilk[!redundant]
rules.wholemilk <-sort(rules.wholemilk, by="lift", decreasing=TRUE)
summary(rules.wholemilk)
## set of 241 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4 5 6
## 18 144 76 3
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 4.000 4.000 4.266 5.000 6.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001000 Min. :0.8000 Min. :0.001000 Min. :3.131
## 1st Qu.:0.001000 1st Qu.:0.8330 1st Qu.:0.001000 1st Qu.:3.261
## Median :0.001000 Median :0.8570 Median :0.001000 Median :3.355
## Mean :0.001166 Mean :0.8694 Mean :0.001402 Mean :3.403
## 3rd Qu.:0.001000 3rd Qu.:0.9090 3rd Qu.:0.002000 3rd Qu.:3.558
## Max. :0.003000 Max. :1.0000 Max. :0.003000 Max. :3.914
## count
## Min. :10.0
## 1st Qu.:10.0
## Median :12.0
## Mean :12.4
## 3rd Qu.:13.0
## Max. :28.0
##
## mining info:
## data ntransactions support confidence
## Groceries 9835 0.001 0.8
inspect(rules.wholemilk[1:10])
## lhs rhs support confidence coverage lift count
## [1] {rice,
## sugar} => {whole milk} 0.001 1 0.001 3.914 12
## [2] {canned fish,
## hygiene articles} => {whole milk} 0.001 1 0.001 3.914 11
## [3] {butter,
## rice,
## root vegetables} => {whole milk} 0.001 1 0.001 3.914 10
## [4] {flour,
## root vegetables,
## whipped/sour cream} => {whole milk} 0.002 1 0.002 3.914 17
## [5] {butter,
## domestic eggs,
## soft cheese} => {whole milk} 0.001 1 0.001 3.914 10
## [6] {butter,
## hygiene articles,
## pip fruit} => {whole milk} 0.001 1 0.001 3.914 10
## [7] {hygiene articles,
## root vegetables,
## whipped/sour cream} => {whole milk} 0.001 1 0.001 3.914 10
## [8] {hygiene articles,
## pip fruit,
## root vegetables} => {whole milk} 0.001 1 0.001 3.914 10
## [9] {cream cheese,
## domestic eggs,
## sugar} => {whole milk} 0.001 1 0.001 3.914 11
## [10] {curd,
## domestic eggs,
## sugar} => {whole milk} 0.001 1 0.001 3.914 10
rules.wholemilk.top10 <- rules.wholemilk[1:10]
plot(rules.wholemilk.top10, method = "graph", engine = "htmlwidget")
rules.wholemilk2 <- apriori(Groceries, parameter = list(supp = 0.001, conf = .1, maxlen = 21),
appearance = list(default="rhs",lhs="whole milk"),
control = list(verbose=F))
summary(rules.wholemilk2)
## set of 24 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2
## 7 17
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 1.708 2.000 2.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.02613 Min. :0.1023 Min. :0.2555 Min. :0.8991
## 1st Qu.:0.03007 1st Qu.:0.1101 1st Qu.:0.2555 1st Qu.:1.0000
## Median :0.04118 Median :0.1281 Median :0.2555 Median :1.3916
## Mean :0.06927 Mean :0.1478 Mean :0.4727 Mean :1.3628
## 3rd Qu.:0.10595 3rd Qu.:0.1768 3rd Qu.:1.0000 3rd Qu.:1.5732
## Max. :0.19349 Max. :0.2929 Max. :1.0000 Max. :1.9461
## count
## Min. : 257.0
## 1st Qu.: 295.8
## Median : 405.0
## Mean : 681.3
## 3rd Qu.:1042.0
## Max. :1903.0
##
## mining info:
## data ntransactions support confidence
## Groceries 9835 0.001 0.1
quality(rules.wholemilk2)<-round(quality(rules.wholemilk2),digits=3)
redundant <- is.redundant(rules.wholemilk2, measure="confidence")
which(redundant)
## [1] 21
rules.wholemilk2 <- rules.wholemilk2[!redundant]
rules.wholemilk2 <-sort(rules.wholemilk2, by="lift", decreasing=TRUE)
summary(rules.wholemilk2)
## set of 23 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2
## 7 16
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 1.696 2.000 2.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.02600 Min. :0.1020 Min. :0.2560 Min. :1.000
## 1st Qu.:0.03000 1st Qu.:0.1100 1st Qu.:0.2560 1st Qu.:1.000
## Median :0.04200 Median :0.1260 Median :0.2560 Median :1.442
## Mean :0.07057 Mean :0.1474 Mean :0.4824 Mean :1.383
## 3rd Qu.:0.10700 3rd Qu.:0.1790 3rd Qu.:1.0000 3rd Qu.:1.575
## Max. :0.19300 Max. :0.2930 Max. :1.0000 Max. :1.946
## count
## Min. : 257.0
## 1st Qu.: 295.5
## Median : 416.0
## Mean : 693.8
## 3rd Qu.:1052.0
## Max. :1903.0
##
## mining info:
## data ntransactions support confidence
## Groceries 9835 0.001 0.1
inspect(rules.wholemilk2[1:10])
## lhs rhs support confidence coverage lift
## [1] {whole milk} => {butter} 0.028 0.108 0.256 1.946
## [2] {whole milk} => {curd} 0.026 0.102 0.256 1.919
## [3] {whole milk} => {domestic eggs} 0.030 0.117 0.256 1.850
## [4] {whole milk} => {whipped/sour cream} 0.032 0.126 0.256 1.760
## [5] {whole milk} => {root vegetables} 0.049 0.191 0.256 1.756
## [6] {whole milk} => {tropical fruit} 0.042 0.166 0.256 1.578
## [7] {whole milk} => {yogurt} 0.056 0.219 0.256 1.572
## [8] {whole milk} => {pip fruit} 0.030 0.118 0.256 1.557
## [9] {whole milk} => {other vegetables} 0.075 0.293 0.256 1.514
## [10] {whole milk} => {pastry} 0.033 0.130 0.256 1.463
## count
## [1] 271
## [2] 257
## [3] 295
## [4] 317
## [5] 481
## [6] 416
## [7] 551
## [8] 296
## [9] 736
## [10] 327
rules.wholemilk2.top10 <- rules.wholemilk2[1:10]
plot(rules.wholemilk2.top10, method = "graph", engine = "htmlwidget")