# Load libraries
library(arules)
library(arulesViz)
library(RColorBrewer)
library(dplyr)
library(knitr)

# Load dataset
data(Groceries)

# Confirm dataset loaded
dim(Groceries)
## [1] 9835  169
itemFrequencyPlot(
  Groceries,
  topN = 15,
  type = "relative",
  horiz = TRUE,
  col = "darkred",
  las = 1,
  cex.names = 0.9,
  xlab = "Top 15 Items by Support"
)

groceries <- aggregate(Groceries, itemInfo(Groceries)[["level2"]])
dim(groceries)
## [1] 9835   55
itemFrequencyPlot(
  groceries,
  topN = 15,
  type = "relative",
  horiz = TRUE,
  col = "steelblue",
  las = 1,
  cex.names = 0.9,
  xlab = "Top 15 Categories by Support"
)

rules <- apriori(
  groceries,
  parameter = list(support = 0.025, confidence = 0.05)
)
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.05    0.1    1 none FALSE            TRUE       5   0.025      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 245 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[55 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [32 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [344 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
summary(rules)
## set of 344 rules
## 
## rule length distribution (lhs + rhs):sizes
##   1   2   3   4 
##  21 162 129  32 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0     2.0     2.0     2.5     3.0     4.0 
## 
## summary of quality measures:
##     support          confidence         coverage            lift       
##  Min.   :0.02542   Min.   :0.05043   Min.   :0.03427   Min.   :0.6669  
##  1st Qu.:0.03030   1st Qu.:0.18202   1st Qu.:0.07626   1st Qu.:1.2498  
##  Median :0.03854   Median :0.39522   Median :0.11657   Median :1.4770  
##  Mean   :0.05276   Mean   :0.37658   Mean   :0.21184   Mean   :1.4831  
##  3rd Qu.:0.05236   3rd Qu.:0.51271   3rd Qu.:0.27300   3rd Qu.:1.7094  
##  Max.   :0.44301   Max.   :0.79841   Max.   :1.00000   Max.   :2.4073  
##      count       
##  Min.   : 250.0  
##  1st Qu.: 298.0  
##  Median : 379.0  
##  Mean   : 518.9  
##  3rd Qu.: 515.0  
##  Max.   :4357.0  
## 
## mining info:
##       data ntransactions support confidence
##  groceries          9835   0.025       0.05
##                                                                             call
##  apriori(data = groceries, parameter = list(support = 0.025, confidence = 0.05))
top.rules <- head(sort(rules, by = "lift", decreasing = TRUE), 15)
summary(top.rules)
## set of 15 rules
## 
## rule length distribution (lhs + rhs):sizes
## 2 3 4 
## 4 6 5 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.500   3.000   3.067   4.000   4.000 
## 
## summary of quality measures:
##     support          confidence        coverage            lift      
##  Min.   :0.02715   Min.   :0.1061   Min.   :0.04728   Min.   :2.050  
##  1st Qu.:0.02898   1st Qu.:0.2774   1st Qu.:0.05816   1st Qu.:2.082  
##  Median :0.02989   Median :0.4007   Median :0.08195   Median :2.105  
##  Mean   :0.03193   Mean   :0.3927   Mean   :0.10692   Mean   :2.146  
##  3rd Qu.:0.03198   3rd Qu.:0.5375   3rd Qu.:0.10747   3rd Qu.:2.189  
##  Max.   :0.04586   Max.   :0.6074   Max.   :0.27300   Max.   :2.407  
##      count      
##  Min.   :267.0  
##  1st Qu.:285.0  
##  Median :294.0  
##  Mean   :314.0  
##  3rd Qu.:314.5  
##  Max.   :451.0  
## 
## mining info:
##       data ntransactions support confidence
##  groceries          9835   0.025       0.05
##                                                                             call
##  apriori(data = groceries, parameter = list(support = 0.025, confidence = 0.05))
plot(top.rules, control = list(jitter = 1), shading = "lift")

vegie.rules <- subset(rules, subset = rhs %pin% "vegetables")
top5.vegie <- head(sort(vegie.rules, by = "lift", decreasing = TRUE), 5)
inspect(top5.vegie)
##     lhs                                rhs          support    confidence
## [1] {beef, dairy produce}           => {vegetables} 0.02989324 0.6074380 
## [2] {poultry}                       => {vegetables} 0.02897814 0.5745968 
## [3] {dairy produce, fruit, sausage} => {vegetables} 0.02714794 0.5741935 
## [4] {beef}                          => {vegetables} 0.04585663 0.5595533 
## [5] {dairy produce, vinegar/oils}   => {vegetables} 0.03141840 0.5355286 
##     coverage   lift     count
## [1] 0.04921200 2.225010 294  
## [2] 0.05043213 2.104715 285  
## [3] 0.04728012 2.103238 267  
## [4] 0.08195221 2.049612 451  
## [5] 0.05866802 1.961610 309
top_table <- as(top5.vegie, "data.frame") %>%
  select(rules, support, confidence, lift)

kable(top_table, digits = 3)
rules support confidence lift
191 {beef,dairy produce} => {vegetables} 0.030 0.607 2.225
24 {poultry} => {vegetables} 0.029 0.575 2.105
314 {dairy produce,fruit,sausage} => {vegetables} 0.027 0.574 2.103
82 {beef} => {vegetables} 0.046 0.560 2.050
203 {dairy produce,vinegar/oils} => {vegetables} 0.031 0.536 1.962
plot(top5.vegie, method = "graph", shading = "lift")