# Load libraries
library(arules)
library(arulesViz)
library(RColorBrewer)
library(dplyr)
library(knitr)
# Load dataset
data(Groceries)
# Confirm dataset loaded
dim(Groceries)
## [1] 9835 169
itemFrequencyPlot(
Groceries,
topN = 15,
type = "relative",
horiz = TRUE,
col = "darkred",
las = 1,
cex.names = 0.9,
xlab = "Top 15 Items by Support"
)

groceries <- aggregate(Groceries, itemInfo(Groceries)[["level2"]])
dim(groceries)
## [1] 9835 55
itemFrequencyPlot(
groceries,
topN = 15,
type = "relative",
horiz = TRUE,
col = "steelblue",
las = 1,
cex.names = 0.9,
xlab = "Top 15 Categories by Support"
)

rules <- apriori(
groceries,
parameter = list(support = 0.025, confidence = 0.05)
)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.05 0.1 1 none FALSE TRUE 5 0.025 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 245
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[55 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [32 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [344 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(rules)
## set of 344 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2 3 4
## 21 162 129 32
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 2.0 2.0 2.5 3.0 4.0
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.02542 Min. :0.05043 Min. :0.03427 Min. :0.6669
## 1st Qu.:0.03030 1st Qu.:0.18202 1st Qu.:0.07626 1st Qu.:1.2498
## Median :0.03854 Median :0.39522 Median :0.11657 Median :1.4770
## Mean :0.05276 Mean :0.37658 Mean :0.21184 Mean :1.4831
## 3rd Qu.:0.05236 3rd Qu.:0.51271 3rd Qu.:0.27300 3rd Qu.:1.7094
## Max. :0.44301 Max. :0.79841 Max. :1.00000 Max. :2.4073
## count
## Min. : 250.0
## 1st Qu.: 298.0
## Median : 379.0
## Mean : 518.9
## 3rd Qu.: 515.0
## Max. :4357.0
##
## mining info:
## data ntransactions support confidence
## groceries 9835 0.025 0.05
## call
## apriori(data = groceries, parameter = list(support = 0.025, confidence = 0.05))
top.rules <- head(sort(rules, by = "lift", decreasing = TRUE), 15)
summary(top.rules)
## set of 15 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4
## 4 6 5
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.500 3.000 3.067 4.000 4.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.02715 Min. :0.1061 Min. :0.04728 Min. :2.050
## 1st Qu.:0.02898 1st Qu.:0.2774 1st Qu.:0.05816 1st Qu.:2.082
## Median :0.02989 Median :0.4007 Median :0.08195 Median :2.105
## Mean :0.03193 Mean :0.3927 Mean :0.10692 Mean :2.146
## 3rd Qu.:0.03198 3rd Qu.:0.5375 3rd Qu.:0.10747 3rd Qu.:2.189
## Max. :0.04586 Max. :0.6074 Max. :0.27300 Max. :2.407
## count
## Min. :267.0
## 1st Qu.:285.0
## Median :294.0
## Mean :314.0
## 3rd Qu.:314.5
## Max. :451.0
##
## mining info:
## data ntransactions support confidence
## groceries 9835 0.025 0.05
## call
## apriori(data = groceries, parameter = list(support = 0.025, confidence = 0.05))
plot(top.rules, control = list(jitter = 1), shading = "lift")

vegie.rules <- subset(rules, subset = rhs %pin% "vegetables")
top5.vegie <- head(sort(vegie.rules, by = "lift", decreasing = TRUE), 5)
inspect(top5.vegie)
## lhs rhs support confidence
## [1] {beef, dairy produce} => {vegetables} 0.02989324 0.6074380
## [2] {poultry} => {vegetables} 0.02897814 0.5745968
## [3] {dairy produce, fruit, sausage} => {vegetables} 0.02714794 0.5741935
## [4] {beef} => {vegetables} 0.04585663 0.5595533
## [5] {dairy produce, vinegar/oils} => {vegetables} 0.03141840 0.5355286
## coverage lift count
## [1] 0.04921200 2.225010 294
## [2] 0.05043213 2.104715 285
## [3] 0.04728012 2.103238 267
## [4] 0.08195221 2.049612 451
## [5] 0.05866802 1.961610 309
top_table <- as(top5.vegie, "data.frame") %>%
select(rules, support, confidence, lift)
kable(top_table, digits = 3)
| 191 |
{beef,dairy produce} => {vegetables} |
0.030 |
0.607 |
2.225 |
| 24 |
{poultry} => {vegetables} |
0.029 |
0.575 |
2.105 |
| 314 |
{dairy produce,fruit,sausage} => {vegetables} |
0.027 |
0.574 |
2.103 |
| 82 |
{beef} => {vegetables} |
0.046 |
0.560 |
2.050 |
| 203 |
{dairy produce,vinegar/oils} => {vegetables} |
0.031 |
0.536 |
1.962 |
plot(top5.vegie, method = "graph", shading = "lift")
