library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
## Loading required package: grid
groceries <- read.transactions("groceries.csv", sep = ",")
summary(groceries)
## transactions as itemMatrix in sparse format with
## 9835 rows (elements/itemsets/transactions) and
## 169 columns (items) and a density of 0.02609146
##
## most frequent items:
## whole milk other vegetables rolls/buns soda
## 2513 1903 1809 1715
## yogurt (Other)
## 1372 34055
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 2159 1643 1299 1005 855 645 545 438 350 246 182 117 78 77 55
## 16 17 18 19 20 21 22 23 24 26 27 28 29 32
## 46 29 14 14 9 11 4 6 1 1 1 1 3 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 4.409 6.000 32.000
##
## includes extended item information - examples:
## labels
## 1 abrasive cleaner
## 2 artif. sweetener
## 3 baby cosmetics
str(groceries)
## Formal class 'transactions' [package "arules"] with 3 slots
## ..@ data :Formal class 'ngCMatrix' [package "Matrix"] with 5 slots
## .. .. ..@ i : int [1:43367] 29 88 118 132 33 157 167 166 38 91 ...
## .. .. ..@ p : int [1:9836] 0 4 7 8 12 16 21 22 27 28 ...
## .. .. ..@ Dim : int [1:2] 169 9835
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. ..@ factors : list()
## ..@ itemInfo :'data.frame': 169 obs. of 1 variable:
## .. ..$ labels: chr [1:169] "abrasive cleaner" "artif. sweetener" "baby cosmetics" "baby food" ...
## ..@ itemsetInfo:'data.frame': 0 obs. of 0 variables
summary(groceries)
## transactions as itemMatrix in sparse format with
## 9835 rows (elements/itemsets/transactions) and
## 169 columns (items) and a density of 0.02609146
##
## most frequent items:
## whole milk other vegetables rolls/buns soda
## 2513 1903 1809 1715
## yogurt (Other)
## 1372 34055
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 2159 1643 1299 1005 855 645 545 438 350 246 182 117 78 77 55
## 16 17 18 19 20 21 22 23 24 26 27 28 29 32
## 46 29 14 14 9 11 4 6 1 1 1 1 3 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 4.409 6.000 32.000
##
## includes extended item information - examples:
## labels
## 1 abrasive cleaner
## 2 artif. sweetener
## 3 baby cosmetics
itemFrequencyPlot(groceries,topN=10,type="absolute")

itemFrequencyPlot(groceries,topN=5)

inspect(groceries[1:10])
## items
## [1] {citrus fruit,
## margarine,
## ready soups,
## semi-finished bread}
## [2] {coffee,
## tropical fruit,
## yogurt}
## [3] {whole milk}
## [4] {cream cheese,
## meat spreads,
## pip fruit,
## yogurt}
## [5] {condensed milk,
## long life bakery product,
## other vegetables,
## whole milk}
## [6] {abrasive cleaner,
## butter,
## rice,
## whole milk,
## yogurt}
## [7] {rolls/buns}
## [8] {bottled beer,
## liquor (appetizer),
## other vegetables,
## rolls/buns,
## UHT-milk}
## [9] {potted plants}
## [10] {cereals,
## whole milk}
itemFrequency(groceries[, 1:10])
## abrasive cleaner artif. sweetener baby cosmetics baby food
## 0.0035587189 0.0032536858 0.0006100661 0.0001016777
## bags baking powder bathroom cleaner beef
## 0.0004067107 0.0176919166 0.0027452974 0.0524656838
## berries beverages
## 0.0332486019 0.0260294865
itemFrequencyPlot(groceries, support = 0.1)

itemFrequencyPlot(groceries, topN = 30)

image(groceries[1:10])

image(sample(groceries, 250))

apriori(groceries)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.1 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 983
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [0 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
## set of 0 rules
groceryrules <- apriori(groceries, parameter = list(support =0.005, confidence = 0.1, minlen = 2))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.1 0.1 1 none FALSE TRUE 5 0.005 2
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 49
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.02s].
## sorting and recoding items ... [120 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [1574 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
groceryrules
## set of 1574 rules
summary(groceryrules)
## set of 1574 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4
## 755 771 48
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 2.000 3.000 2.551 3.000 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.005084 Min. :0.1000 Min. :0.4457
## 1st Qu.:0.005897 1st Qu.:0.1459 1st Qu.:1.4783
## Median :0.007321 Median :0.2190 Median :1.8197
## Mean :0.009783 Mean :0.2562 Mean :1.9074
## 3rd Qu.:0.010346 3rd Qu.:0.3341 3rd Qu.:2.2459
## Max. :0.074835 Max. :0.7000 Max. :4.6399
##
## mining info:
## data ntransactions support confidence
## groceries 9835 0.005 0.1
inspect(groceryrules[1:5])
## lhs rhs support confidence lift
## [1] {cake bar} => {whole milk} 0.005592272 0.4230769 1.655775
## [2] {dishes} => {other vegetables} 0.005998983 0.3410405 1.762550
## [3] {dishes} => {whole milk} 0.005287239 0.3005780 1.176357
## [4] {mustard} => {whole milk} 0.005185562 0.4322034 1.691492
## [5] {potted plants} => {whole milk} 0.006914082 0.4000000 1.565460
inspect(sort(groceryrules, by = "lift")[1:10])
## lhs rhs support confidence lift
## [1] {ham} => {white bread} 0.005083884 0.1953125 4.639851
## [2] {white bread} => {ham} 0.005083884 0.1207729 4.639851
## [3] {citrus fruit,
## other vegetables,
## whole milk} => {root vegetables} 0.005795628 0.4453125 4.085493
## [4] {butter,
## other vegetables} => {whipped/sour cream} 0.005795628 0.2893401 4.036397
## [5] {herbs} => {root vegetables} 0.007015760 0.4312500 3.956477
## [6] {other vegetables,
## root vegetables} => {onions} 0.005693950 0.1201717 3.875044
## [7] {citrus fruit,
## pip fruit} => {tropical fruit} 0.005592272 0.4044118 3.854060
## [8] {berries} => {whipped/sour cream} 0.009049314 0.2721713 3.796886
## [9] {whipped/sour cream} => {berries} 0.009049314 0.1262411 3.796886
## [10] {other vegetables,
## tropical fruit,
## whole milk} => {root vegetables} 0.007015760 0.4107143 3.768074
hamrules <- subset(groceryrules, items %in% "ham")
inspect(hamrules)
## lhs rhs support confidence lift
## [1] {ham} => {white bread} 0.005083884 0.1953125 4.639851
## [2] {white bread} => {ham} 0.005083884 0.1207729 4.639851
## [3] {ham} => {tropical fruit} 0.005388917 0.2070312 1.973016
## [4] {ham} => {yogurt} 0.006710727 0.2578125 1.848095
## [5] {ham} => {rolls/buns} 0.006914082 0.2656250 1.444125
## [6] {ham} => {other vegetables} 0.009150991 0.3515625 1.816930
## [7] {ham} => {whole milk} 0.011489578 0.4414062 1.727509
rules = apriori(groceries, parameter = list(supp = 0.001, conf = 0.1,maxlen=5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.1 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 5 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5
## Warning in apriori(groceries, parameter = list(supp = 0.001, conf =
## 0.1, : Mining stopped (maxlen reached). Only patterns up to a length of 5
## returned!
## done [0.02s].
## writing ... [32731 rule(s)] done [0.00s].
## creating S4 object ... done [0.02s].
rules
## set of 32731 rules
options(digits=2)
rules = sort(rules, by="lift", decreasing=TRUE)
inspect(rules[1:10])
## lhs rhs support confidence lift
## [1] {bottled beer,
## red/blush wine} => {liquor} 0.0019 0.40 36
## [2] {hamburger meat,
## soda} => {Instant food products} 0.0012 0.21 26
## [3] {ham,
## white bread} => {processed cheese} 0.0019 0.38 23
## [4] {other vegetables,
## root vegetables,
## whole milk,
## yogurt} => {rice} 0.0013 0.17 22
## [5] {bottled beer,
## liquor} => {red/blush wine} 0.0019 0.41 21
## [6] {Instant food products,
## soda} => {hamburger meat} 0.0012 0.63 19
## [7] {curd,
## sugar} => {flour} 0.0011 0.32 19
## [8] {salty snack,
## soda} => {popcorn} 0.0012 0.13 18
## [9] {baking powder,
## sugar} => {flour} 0.0010 0.31 18
## [10] {processed cheese,
## white bread} => {ham} 0.0019 0.46 18
rules = sort(rules, by="confidence", decreasing=TRUE)
inspect(rules[1:10])
## lhs rhs support confidence lift
## [1] {citrus fruit,
## root vegetables,
## soft cheese} => {other vegetables} 0.0010 1 5.2
## [2] {brown bread,
## pip fruit,
## whipped/sour cream} => {other vegetables} 0.0011 1 5.2
## [3] {grapes,
## tropical fruit,
## whole milk,
## yogurt} => {other vegetables} 0.0010 1 5.2
## [4] {ham,
## pip fruit,
## tropical fruit,
## yogurt} => {other vegetables} 0.0010 1 5.2
## [5] {ham,
## pip fruit,
## tropical fruit,
## whole milk} => {other vegetables} 0.0011 1 5.2
## [6] {butter,
## fruit/vegetable juice,
## tropical fruit,
## whipped/sour cream} => {other vegetables} 0.0010 1 5.2
## [7] {newspapers,
## rolls/buns,
## soda,
## whole milk} => {other vegetables} 0.0010 1 5.2
## [8] {citrus fruit,
## root vegetables,
## tropical fruit,
## whipped/sour cream} => {other vegetables} 0.0012 1 5.2
## [9] {rice,
## sugar} => {whole milk} 0.0012 1 3.9
## [10] {canned fish,
## hygiene articles} => {whole milk} 0.0011 1 3.9
table = crossTable(groceries)
table[1:5, 1:4]
## abrasive cleaner artif. sweetener baby cosmetics
## abrasive cleaner 35 1 0
## artif. sweetener 1 32 0
## baby cosmetics 0 0 6
## baby food 0 0 0
## bags 0 0 0
## baby food
## abrasive cleaner 0
## artif. sweetener 0
## baby cosmetics 0
## baby food 1
## bags 0
table["whole milk","whole milk"]
## [1] 2513
table["ham","yogurt"]
## [1] 66
ham.rules = apriori(data=groceries,parameter=list(support=0.001,confidence=0.3), appearance =list(default="lhs",rhs="ham"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [2 rule(s)] done [0.00s].
## creating S4 object ... done [0.02s].
ham.rules
## set of 2 rules
ham.rules = sort(ham.rules, decreasing=TRUE,by="lift")
inspect(ham.rules)
## lhs rhs support confidence
## [1] {processed cheese,white bread} => {ham} 0.0019 0.46
## [2] {fruit/vegetable juice,processed cheese} => {ham} 0.0011 0.38
## lift
## [1] 18
## [2] 15
table["white bread", "processed cheese"]
## [1] 41
table["white bread", "white bread"]
## [1] 414
41/414
## [1] 0.099
table["whole milk", "whole milk"]
## [1] 2513
table["yogurt", "whole milk"]
## [1] 551
551/2513
## [1] 0.22
plot(ham.rules, method="graph", measure="lift",shading="confidence")
