library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
## Loading required package: grid
groceries <- read.transactions("groceries.csv", sep = ",")
summary(groceries)
## transactions as itemMatrix in sparse format with
##  9835 rows (elements/itemsets/transactions) and
##  169 columns (items) and a density of 0.02609146 
## 
## most frequent items:
##       whole milk other vegetables       rolls/buns             soda 
##             2513             1903             1809             1715 
##           yogurt          (Other) 
##             1372            34055 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 2159 1643 1299 1005  855  645  545  438  350  246  182  117   78   77   55 
##   16   17   18   19   20   21   22   23   24   26   27   28   29   32 
##   46   29   14   14    9   11    4    6    1    1    1    1    3    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   3.000   4.409   6.000  32.000 
## 
## includes extended item information - examples:
##             labels
## 1 abrasive cleaner
## 2 artif. sweetener
## 3   baby cosmetics
str(groceries)
## Formal class 'transactions' [package "arules"] with 3 slots
##   ..@ data       :Formal class 'ngCMatrix' [package "Matrix"] with 5 slots
##   .. .. ..@ i       : int [1:43367] 29 88 118 132 33 157 167 166 38 91 ...
##   .. .. ..@ p       : int [1:9836] 0 4 7 8 12 16 21 22 27 28 ...
##   .. .. ..@ Dim     : int [1:2] 169 9835
##   .. .. ..@ Dimnames:List of 2
##   .. .. .. ..$ : NULL
##   .. .. .. ..$ : NULL
##   .. .. ..@ factors : list()
##   ..@ itemInfo   :'data.frame':  169 obs. of  1 variable:
##   .. ..$ labels: chr [1:169] "abrasive cleaner" "artif. sweetener" "baby cosmetics" "baby food" ...
##   ..@ itemsetInfo:'data.frame':  0 obs. of  0 variables
summary(groceries)
## transactions as itemMatrix in sparse format with
##  9835 rows (elements/itemsets/transactions) and
##  169 columns (items) and a density of 0.02609146 
## 
## most frequent items:
##       whole milk other vegetables       rolls/buns             soda 
##             2513             1903             1809             1715 
##           yogurt          (Other) 
##             1372            34055 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 2159 1643 1299 1005  855  645  545  438  350  246  182  117   78   77   55 
##   16   17   18   19   20   21   22   23   24   26   27   28   29   32 
##   46   29   14   14    9   11    4    6    1    1    1    1    3    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   3.000   4.409   6.000  32.000 
## 
## includes extended item information - examples:
##             labels
## 1 abrasive cleaner
## 2 artif. sweetener
## 3   baby cosmetics
itemFrequencyPlot(groceries,topN=10,type="absolute")

itemFrequencyPlot(groceries,topN=5)

inspect(groceries[1:10])
##      items                     
## [1]  {citrus fruit,            
##       margarine,               
##       ready soups,             
##       semi-finished bread}     
## [2]  {coffee,                  
##       tropical fruit,          
##       yogurt}                  
## [3]  {whole milk}              
## [4]  {cream cheese,            
##       meat spreads,            
##       pip fruit,               
##       yogurt}                  
## [5]  {condensed milk,          
##       long life bakery product,
##       other vegetables,        
##       whole milk}              
## [6]  {abrasive cleaner,        
##       butter,                  
##       rice,                    
##       whole milk,              
##       yogurt}                  
## [7]  {rolls/buns}              
## [8]  {bottled beer,            
##       liquor (appetizer),      
##       other vegetables,        
##       rolls/buns,              
##       UHT-milk}                
## [9]  {potted plants}           
## [10] {cereals,                 
##       whole milk}
itemFrequency(groceries[, 1:10])
## abrasive cleaner artif. sweetener   baby cosmetics        baby food 
##     0.0035587189     0.0032536858     0.0006100661     0.0001016777 
##             bags    baking powder bathroom cleaner             beef 
##     0.0004067107     0.0176919166     0.0027452974     0.0524656838 
##          berries        beverages 
##     0.0332486019     0.0260294865
itemFrequencyPlot(groceries, support = 0.1)

itemFrequencyPlot(groceries, topN = 30)

image(groceries[1:10])

image(sample(groceries, 250))

apriori(groceries)
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.8    0.1    1 none FALSE            TRUE       5     0.1      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 983 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [0 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
## set of 0 rules
groceryrules <- apriori(groceries, parameter = list(support =0.005, confidence = 0.1, minlen = 2))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.1    0.1    1 none FALSE            TRUE       5   0.005      2
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 49 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.02s].
## sorting and recoding items ... [120 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [1574 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
groceryrules
## set of 1574 rules
summary(groceryrules)
## set of 1574 rules
## 
## rule length distribution (lhs + rhs):sizes
##   2   3   4 
## 755 771  48 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.000   3.000   2.551   3.000   4.000 
## 
## summary of quality measures:
##     support           confidence          lift       
##  Min.   :0.005084   Min.   :0.1000   Min.   :0.4457  
##  1st Qu.:0.005897   1st Qu.:0.1459   1st Qu.:1.4783  
##  Median :0.007321   Median :0.2190   Median :1.8197  
##  Mean   :0.009783   Mean   :0.2562   Mean   :1.9074  
##  3rd Qu.:0.010346   3rd Qu.:0.3341   3rd Qu.:2.2459  
##  Max.   :0.074835   Max.   :0.7000   Max.   :4.6399  
## 
## mining info:
##       data ntransactions support confidence
##  groceries          9835   0.005        0.1
inspect(groceryrules[1:5])
##     lhs                rhs                support     confidence lift    
## [1] {cake bar}      => {whole milk}       0.005592272 0.4230769  1.655775
## [2] {dishes}        => {other vegetables} 0.005998983 0.3410405  1.762550
## [3] {dishes}        => {whole milk}       0.005287239 0.3005780  1.176357
## [4] {mustard}       => {whole milk}       0.005185562 0.4322034  1.691492
## [5] {potted plants} => {whole milk}       0.006914082 0.4000000  1.565460
inspect(sort(groceryrules, by = "lift")[1:10])
##      lhs                     rhs                      support confidence     lift
## [1]  {ham}                => {white bread}        0.005083884  0.1953125 4.639851
## [2]  {white bread}        => {ham}                0.005083884  0.1207729 4.639851
## [3]  {citrus fruit,                                                              
##       other vegetables,                                                          
##       whole milk}         => {root vegetables}    0.005795628  0.4453125 4.085493
## [4]  {butter,                                                                    
##       other vegetables}   => {whipped/sour cream} 0.005795628  0.2893401 4.036397
## [5]  {herbs}              => {root vegetables}    0.007015760  0.4312500 3.956477
## [6]  {other vegetables,                                                          
##       root vegetables}    => {onions}             0.005693950  0.1201717 3.875044
## [7]  {citrus fruit,                                                              
##       pip fruit}          => {tropical fruit}     0.005592272  0.4044118 3.854060
## [8]  {berries}            => {whipped/sour cream} 0.009049314  0.2721713 3.796886
## [9]  {whipped/sour cream} => {berries}            0.009049314  0.1262411 3.796886
## [10] {other vegetables,                                                          
##       tropical fruit,                                                            
##       whole milk}         => {root vegetables}    0.007015760  0.4107143 3.768074
hamrules <- subset(groceryrules, items %in% "ham")
inspect(hamrules)
##     lhs              rhs                support     confidence lift    
## [1] {ham}         => {white bread}      0.005083884 0.1953125  4.639851
## [2] {white bread} => {ham}              0.005083884 0.1207729  4.639851
## [3] {ham}         => {tropical fruit}   0.005388917 0.2070312  1.973016
## [4] {ham}         => {yogurt}           0.006710727 0.2578125  1.848095
## [5] {ham}         => {rolls/buns}       0.006914082 0.2656250  1.444125
## [6] {ham}         => {other vegetables} 0.009150991 0.3515625  1.816930
## [7] {ham}         => {whole milk}       0.011489578 0.4414062  1.727509
rules = apriori(groceries, parameter = list(supp = 0.001, conf = 0.1,maxlen=5))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.1    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target   ext
##       5  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 9 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5
## Warning in apriori(groceries, parameter = list(supp = 0.001, conf =
## 0.1, : Mining stopped (maxlen reached). Only patterns up to a length of 5
## returned!
##  done [0.02s].
## writing ... [32731 rule(s)] done [0.00s].
## creating S4 object  ... done [0.02s].
rules
## set of 32731 rules
options(digits=2)
rules = sort(rules, by="lift", decreasing=TRUE)
inspect(rules[1:10])
##      lhs                        rhs                     support confidence lift
## [1]  {bottled beer,                                                            
##       red/blush wine}        => {liquor}                 0.0019       0.40   36
## [2]  {hamburger meat,                                                          
##       soda}                  => {Instant food products}  0.0012       0.21   26
## [3]  {ham,                                                                     
##       white bread}           => {processed cheese}       0.0019       0.38   23
## [4]  {other vegetables,                                                        
##       root vegetables,                                                         
##       whole milk,                                                              
##       yogurt}                => {rice}                   0.0013       0.17   22
## [5]  {bottled beer,                                                            
##       liquor}                => {red/blush wine}         0.0019       0.41   21
## [6]  {Instant food products,                                                   
##       soda}                  => {hamburger meat}         0.0012       0.63   19
## [7]  {curd,                                                                    
##       sugar}                 => {flour}                  0.0011       0.32   19
## [8]  {salty snack,                                                             
##       soda}                  => {popcorn}                0.0012       0.13   18
## [9]  {baking powder,                                                           
##       sugar}                 => {flour}                  0.0010       0.31   18
## [10] {processed cheese,                                                        
##       white bread}           => {ham}                    0.0019       0.46   18
rules = sort(rules, by="confidence", decreasing=TRUE)
inspect(rules[1:10])
##      lhs                        rhs                support confidence lift
## [1]  {citrus fruit,                                                       
##       root vegetables,                                                    
##       soft cheese}           => {other vegetables}  0.0010          1  5.2
## [2]  {brown bread,                                                        
##       pip fruit,                                                          
##       whipped/sour cream}    => {other vegetables}  0.0011          1  5.2
## [3]  {grapes,                                                             
##       tropical fruit,                                                     
##       whole milk,                                                         
##       yogurt}                => {other vegetables}  0.0010          1  5.2
## [4]  {ham,                                                                
##       pip fruit,                                                          
##       tropical fruit,                                                     
##       yogurt}                => {other vegetables}  0.0010          1  5.2
## [5]  {ham,                                                                
##       pip fruit,                                                          
##       tropical fruit,                                                     
##       whole milk}            => {other vegetables}  0.0011          1  5.2
## [6]  {butter,                                                             
##       fruit/vegetable juice,                                              
##       tropical fruit,                                                     
##       whipped/sour cream}    => {other vegetables}  0.0010          1  5.2
## [7]  {newspapers,                                                         
##       rolls/buns,                                                         
##       soda,                                                               
##       whole milk}            => {other vegetables}  0.0010          1  5.2
## [8]  {citrus fruit,                                                       
##       root vegetables,                                                    
##       tropical fruit,                                                     
##       whipped/sour cream}    => {other vegetables}  0.0012          1  5.2
## [9]  {rice,                                                               
##       sugar}                 => {whole milk}        0.0012          1  3.9
## [10] {canned fish,                                                        
##       hygiene articles}      => {whole milk}        0.0011          1  3.9
table = crossTable(groceries)

table[1:5, 1:4]
##                  abrasive cleaner artif. sweetener baby cosmetics
## abrasive cleaner               35                1              0
## artif. sweetener                1               32              0
## baby cosmetics                  0                0              6
## baby food                       0                0              0
## bags                            0                0              0
##                  baby food
## abrasive cleaner         0
## artif. sweetener         0
## baby cosmetics           0
## baby food                1
## bags                     0
table["whole milk","whole milk"]
## [1] 2513
table["ham","yogurt"]
## [1] 66
ham.rules = apriori(data=groceries,parameter=list(support=0.001,confidence=0.3), appearance =list(default="lhs",rhs="ham"))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.3    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 9 
## 
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [2 rule(s)] done [0.00s].
## creating S4 object  ... done [0.02s].
ham.rules
## set of 2 rules
ham.rules = sort(ham.rules, decreasing=TRUE,by="lift")
inspect(ham.rules)
##     lhs                                         rhs   support confidence
## [1] {processed cheese,white bread}           => {ham} 0.0019  0.46      
## [2] {fruit/vegetable juice,processed cheese} => {ham} 0.0011  0.38      
##     lift
## [1] 18  
## [2] 15
table["white bread", "processed cheese"]
## [1] 41
table["white bread", "white bread"]
## [1] 414
41/414
## [1] 0.099
table["whole milk", "whole milk"]
## [1] 2513
table["yogurt", "whole milk"]
## [1] 551
551/2513
## [1] 0.22
plot(ham.rules, method="graph", measure="lift",shading="confidence")