Association Rules

Groceries Dataset

library(arules)
## Warning: package 'arules' was built under R version 3.4.4
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
## Warning: package 'arulesViz' was built under R version 3.4.4
## Loading required package: grid
library(grid)
data("Groceries")
#data()
class(Groceries)
## [1] "transactions"
## attr(,"package")
## [1] "arules"
# Item Frequency plot
windows()
itemFrequencyPlot(Groceries,topN=15)

# topN = top frequent items

class(Groceries) # transactions type 
## [1] "transactions"
## attr(,"package")
## [1] "arules"
rules <-
  apriori(Groceries,parameter=list(supp=0.002, confidence=0.60,minlen = 2, maxlen=4))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.6    0.1    1 none FALSE            TRUE       5   0.002      2
##  maxlen target   ext
##       4  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 19 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [147 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4
## Warning in apriori(Groceries, parameter = list(supp = 0.002, confidence =
## 0.6, : Mining stopped (maxlen reached). Only patterns up to a length of 4
## returned!
##  done [0.01s].
## writing ... [349 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
inspect(head(sort(rules), n=10))
##      lhs                     rhs              support confidence     lift count
## [1]  {butter,                                                                  
##       yogurt}             => {whole milk} 0.009354347  0.6388889 2.500387    92
## [2]  {root vegetables,                                                         
##       butter}             => {whole milk} 0.008235892  0.6377953 2.496107    81
## [3]  {root vegetables,                                                         
##       other vegetables,                                                        
##       yogurt}             => {whole milk} 0.007829181  0.6062992 2.372842    77
## [4]  {tropical fruit,                                                          
##       other vegetables,                                                        
##       yogurt}             => {whole milk} 0.007625826  0.6198347 2.425816    75
## [5]  {tropical fruit,                                                          
##       domestic eggs}      => {whole milk} 0.006914082  0.6071429 2.376144    68
## [6]  {butter,                                                                  
##       whipped/sour cream} => {whole milk} 0.006710727  0.6600000 2.583008    66
## [7]  {tropical fruit,                                                          
##       curd}               => {whole milk} 0.006507372  0.6336634 2.479936    64
## [8]  {tropical fruit,                                                          
##       butter}             => {whole milk} 0.006202339  0.6224490 2.436047    61
## [9]  {butter,                                                                  
##       domestic eggs}      => {whole milk} 0.005998983  0.6210526 2.430582    59
## [10] {pip fruit,                                                               
##       whipped/sour cream} => {whole milk} 0.005998983  0.6483516 2.537421    59
# Plotting rules as graphs 
plot(head(sort(rules, by = "lift"), n=10), method = "graph") #, control=list(cex=1.9))

# Default plot of Rules
plot(rules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

# Dark red spots are interesting as they represent high lift values 
plot(head(sort(rules, by = "lift"), n=10),method="grouped")