library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
data(Groceries)
set.seed(1)
head(Groceries)
## transactions in sparse format with
## 6 transactions (rows) and
## 169 items (columns)
groc<- as(Groceries,"transactions")
groc
## transactions in sparse format with
## 9835 transactions (rows) and
## 169 items (columns)
summary(groc)
## transactions as itemMatrix in sparse format with
## 9835 rows (elements/itemsets/transactions) and
## 169 columns (items) and a density of 0.02609146
##
## most frequent items:
## whole milk other vegetables rolls/buns soda
## 2513 1903 1809 1715
## yogurt (Other)
## 1372 34055
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 2159 1643 1299 1005 855 645 545 438 350 246 182 117 78 77 55
## 16 17 18 19 20 21 22 23 24 26 27 28 29 32
## 46 29 14 14 9 11 4 6 1 1 1 1 3 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 4.409 6.000 32.000
##
## includes extended item information - examples:
## labels level2 level1
## 1 frankfurter sausage meat and sausage
## 2 sausage sausage meat and sausage
## 3 liver loaf sausage meat and sausage
itemFrequencyPlot(groc,support=.06,cex.names=1.5)
The item frequency plot is there to show the most frequently bought items.
grocerules <- apriori(groc,parameter=list(support=0.01,confidence=0.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.01 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 98
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [88 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [15 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(grocerules)
## lhs rhs support confidence lift count
## [1] {curd,
## yogurt} => {whole milk} 0.01006609 0.5823529 2.279125 99
## [2] {other vegetables,
## butter} => {whole milk} 0.01148958 0.5736041 2.244885 113
## [3] {other vegetables,
## domestic eggs} => {whole milk} 0.01230300 0.5525114 2.162336 121
## [4] {yogurt,
## whipped/sour cream} => {whole milk} 0.01087951 0.5245098 2.052747 107
## [5] {other vegetables,
## whipped/sour cream} => {whole milk} 0.01464159 0.5070423 1.984385 144
## [6] {pip fruit,
## other vegetables} => {whole milk} 0.01352313 0.5175097 2.025351 133
## [7] {citrus fruit,
## root vegetables} => {other vegetables} 0.01037112 0.5862069 3.029608 102
## [8] {tropical fruit,
## root vegetables} => {other vegetables} 0.01230300 0.5845411 3.020999 121
## [9] {tropical fruit,
## root vegetables} => {whole milk} 0.01199797 0.5700483 2.230969 118
## [10] {tropical fruit,
## yogurt} => {whole milk} 0.01514997 0.5173611 2.024770 149
## [11] {root vegetables,
## yogurt} => {other vegetables} 0.01291307 0.5000000 2.584078 127
## [12] {root vegetables,
## yogurt} => {whole milk} 0.01453991 0.5629921 2.203354 143
## [13] {root vegetables,
## rolls/buns} => {other vegetables} 0.01220132 0.5020921 2.594890 120
## [14] {root vegetables,
## rolls/buns} => {whole milk} 0.01270971 0.5230126 2.046888 125
## [15] {other vegetables,
## yogurt} => {whole milk} 0.02226741 0.5128806 2.007235 219
inspect(sort(subset(grocerules, subset=lift > 2.25), by="support"))
## lhs rhs support confidence lift count
## [1] {root vegetables,
## yogurt} => {other vegetables} 0.01291307 0.5000000 2.584078 127
## [2] {tropical fruit,
## root vegetables} => {other vegetables} 0.01230300 0.5845411 3.020999 121
## [3] {root vegetables,
## rolls/buns} => {other vegetables} 0.01220132 0.5020921 2.594890 120
## [4] {citrus fruit,
## root vegetables} => {other vegetables} 0.01037112 0.5862069 3.029608 102
## [5] {curd,
## yogurt} => {whole milk} 0.01006609 0.5823529 2.279125 99
All the rules have fairly small support, confidence and lifts. To pick one, rule 2 is probably the most supported. It has the second best support, second best confidence and second best lift. This is strong considering the rules that come in first for support, confidence and lift, are low in other categories. By looking at all 3, it will give a better representation of all factors and not just focusing on one; for example, only looking at confidence. Rule 2 also has the second highest count, which means 121 cases occured with rule 2.