The Process of Rule Selection
Lift
library(arules)
fp.df <- mlba::Faceplate
# remove first column and convert to matrix
fp.mat <- as.matrix(fp.df[, -1])
# convert the binary incidence matrix into a transactions database
fp.trans <- as(fp.mat, "transactions")
inspect(fp.trans)
#> items
#> [1] {Red, White, Green}
#> [2] {White, Orange}
#> [3] {White, Blue}
#> [4] {Red, White, Orange}
#> [5] {Red, Blue}
#> [6] {White, Blue}
#> [7] {Red, Blue}
#> [8] {Red, White, Blue, Green}
#> [9] {Red, White, Blue}
#> [10] {Yellow}
## get rules
# when running apriori(), include the minimum support, minimum confidence, and target
# as arguments.
rules <- apriori(fp.trans, parameter = list(supp = 0.2, conf = 0.5, target = "rules"))
#> Apriori
#>
#> Parameter specification:
#> confidence minval smax arem aval originalSupport maxtime support minlen
#> 0.5 0.1 1 none FALSE TRUE 5 0.2 1
#> maxlen target ext
#> 10 rules TRUE
#>
#> Algorithmic control:
#> filter tree heap memopt load sort verbose
#> 0.1 TRUE TRUE FALSE TRUE 2 TRUE
#>
#> Absolute minimum support count: 2
#>
#> set item appearances ...[0 item(s)] done [0.00s].
#> set transactions ...[6 item(s), 10 transaction(s)] done [0.00s].
#> sorting and recoding items ... [5 item(s)] done [0.00s].
#> creating transaction tree ... done [0.00s].
#> checking subsets of size 1 2 3 done [0.00s].
#> writing ... [18 rule(s)] done [0.00s].
#> creating S4 object ... done [0.00s].
# inspect the first six rules, sorted by their lift
inspect(head(sort(rules, by = "lift"), n = 6))
#> lhs rhs support confidence coverage lift count
#> [1] {Red, White} => {Green} 0.2 0.5 0.4 2.500000 2
#> [2] {Green} => {Red} 0.2 1.0 0.2 1.666667 2
#> [3] {White, Green} => {Red} 0.2 1.0 0.2 1.666667 2
#> [4] {Orange} => {White} 0.2 1.0 0.2 1.428571 2
#> [5] {Green} => {White} 0.2 1.0 0.2 1.428571 2
#> [6] {Red, Green} => {White} 0.2 1.0 0.2 1.428571 2
Example 2: Rules for Similar Book Purchases
all.books.df <- mlba::CharlesBookClub
# create a binary incidence matrix
count.books.df <- all.books.df[, 8:18]
incid.books.mat <- as.matrix(count.books.df > 0)
# convert the binary incidence matrix into a transactions database
books.trans <- as(incid.books.mat, "transactions")
inspect(books.trans[1:10])
#> items
#> [1] {YouthBks,
#> CookBks}
#> [2] {}
#> [3] {ChildBks,
#> YouthBks,
#> CookBks,
#> RefBks,
#> GeogBks,
#> ItalCook}
#> [4] {}
#> [5] {}
#> [6] {}
#> [7] {GeogBks}
#> [8] {ChildBks}
#> [9] {}
#> [10] {CookBks}
# plot data
itemFrequencyPlot(books.trans)

# run apriori function
rules <- apriori(books.trans,
parameter = list(supp= 200/4000, conf = 0.5, target = "rules"))
#> Apriori
#>
#> Parameter specification:
#> confidence minval smax arem aval originalSupport maxtime support minlen
#> 0.5 0.1 1 none FALSE TRUE 5 0.05 1
#> maxlen target ext
#> 10 rules TRUE
#>
#> Algorithmic control:
#> filter tree heap memopt load sort verbose
#> 0.1 TRUE TRUE FALSE TRUE 2 TRUE
#>
#> Absolute minimum support count: 200
#>
#> set item appearances ...[0 item(s)] done [0.00s].
#> set transactions ...[11 item(s), 4000 transaction(s)] done [0.00s].
#> sorting and recoding items ... [9 item(s)] done [0.00s].
#> creating transaction tree ... done [0.00s].
#> checking subsets of size 1 2 3 4 done [0.00s].
#> writing ... [72 rule(s)] done [0.00s].
#> creating S4 object ... done [0.00s].
# inspect top-30 rules sorted by lift
inspect(head(sort(rules, by = "lift"), n=30))
#> lhs rhs support confidence coverage
#> [1] {ChildBks, CookBks, GeogBks} => {YouthBks} 0.06325 0.5776256 0.10950
#> [2] {ChildBks, CookBks, RefBks} => {DoItYBks} 0.06125 0.5917874 0.10350
#> [3] {DoItYBks, GeogBks} => {YouthBks} 0.05450 0.5396040 0.10100
#> [4] {ChildBks, CookBks, RefBks} => {YouthBks} 0.05525 0.5338164 0.10350
#> [5] {ChildBks, CookBks, DoItYBks} => {YouthBks} 0.06700 0.5244618 0.12775
#> [6] {ChildBks, YouthBks, CookBks} => {DoItYBks} 0.06700 0.5583333 0.12000
#> [7] {ChildBks, RefBks} => {DoItYBks} 0.07100 0.5536062 0.12825
#> [8] {ChildBks, CookBks, GeogBks} => {DoItYBks} 0.06050 0.5525114 0.10950
#> [9] {ChildBks, GeogBks} => {YouthBks} 0.07550 0.5162393 0.14625
#> [10] {CookBks, GeogBks} => {YouthBks} 0.08025 0.5136000 0.15625
#> [11] {ChildBks, YouthBks, RefBks} => {CookBks} 0.05525 0.8911290 0.06200
#> [12] {ChildBks, YouthBks} => {DoItYBks} 0.08025 0.5440678 0.14750
#> [13] {CookBks, RefBks} => {DoItYBks} 0.07450 0.5330948 0.13975
#> [14] {CookBks, DoItYBks, RefBks} => {ChildBks} 0.06125 0.8221477 0.07450
#> [15] {ChildBks, DoItYBks, RefBks} => {CookBks} 0.06125 0.8626761 0.07100
#> [16] {ChildBks, CookBks} => {DoItYBks} 0.12775 0.5278926 0.24200
#> [17] {YouthBks, CookBks, RefBks} => {ChildBks} 0.05525 0.8095238 0.06825
#> [18] {YouthBks, GeogBks} => {DoItYBks} 0.05450 0.5215311 0.10450
#> [19] {YouthBks, CookBks} => {DoItYBks} 0.08375 0.5201863 0.16100
#> [20] {ChildBks, RefBks, GeogBks} => {CookBks} 0.05025 0.8481013 0.05925
#> [21] {YouthBks, CookBks, DoItYBks} => {ChildBks} 0.06700 0.8000000 0.08375
#> [22] {YouthBks, RefBks} => {CookBks} 0.06825 0.8400000 0.08125
#> [23] {ChildBks, YouthBks, GeogBks} => {CookBks} 0.06325 0.8377483 0.07550
#> [24] {ChildBks, YouthBks, DoItYBks} => {CookBks} 0.06700 0.8348910 0.08025
#> [25] {ChildBks, ArtBks} => {DoItYBks} 0.05375 0.5106888 0.10525
#> [26] {YouthBks, CookBks, GeogBks} => {ChildBks} 0.06325 0.7881620 0.08025
#> [27] {CookBks, DoItYBks, GeogBks} => {ChildBks} 0.06050 0.7806452 0.07750
#> [28] {ChildBks, GeogBks} => {DoItYBks} 0.07375 0.5042735 0.14625
#> [29] {YouthBks, DoItYBks} => {GeogBks} 0.05450 0.5278450 0.10325
#> [30] {CookBks, RefBks, GeogBks} => {ChildBks} 0.05025 0.7790698 0.06450
#> lift count
#> [1] 2.424452 253
#> [2] 2.323013 245
#> [3] 2.264864 218
#> [4] 2.240573 221
#> [5] 2.201309 268
#> [6] 2.191691 268
#> [7] 2.173135 284
#> [8] 2.168838 242
#> [9] 2.166797 302
#> [10] 2.155719 321
#> [11] 2.144715 221
#> [12] 2.135693 321
#> [13] 2.092619 298
#> [14] 2.086669 245
#> [15] 2.076236 245
#> [16] 2.072198 511
#> [17] 2.054629 221
#> [18] 2.047227 218
#> [19] 2.041948 335
#> [20] 2.041158 201
#> [21] 2.030457 268
#> [22] 2.021661 273
#> [23] 2.016242 253
#> [24] 2.009365 268
#> [25] 2.004667 215
#> [26] 2.000411 253
#> [27] 1.981333 242
#> [28] 1.979484 295
#> [29] 1.978801 218
#> [30] 1.977334 201