The goal of this tutorial is to remove a certain item from rules in a basket analysis. This way we could find underlying relationships.
# First we load the libraries for basket analysis
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
## Loading required package: grid
# In this tutorial we are going to use the dataset for Adult census
data("Adult")
Adult
## transactions in sparse format with
## 48842 transactions (rows) and
## 115 items (columns)
# We are going to create transaction rules using the arules package
my_rules<- apriori(Adult, parameter = list(supp = 0.1, conf = 0.1))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.1 0.1 1 none FALSE TRUE 5 0.1 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 4884
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[115 item(s), 48842 transaction(s)] done [0.03s].
## sorting and recoding items ... [31 item(s)] done [0.01s].
## creating transaction tree ... done [0.04s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.11s].
## writing ... [11248 rule(s)] done [0.00s].
## creating S4 object ... done [0.02s].
# We visualize the rules
plot(my_rules)
# Now we can inspect the top 10 rules by lift
inspect(head(my_rules, n = 10, by = "lift"))
## lhs rhs support confidence lift count
## [1] {age=Young,
## marital-status=Never-married,
## capital-gain=None} => {relationship=Own-child} 0.1014291 0.6181682 3.982663 4954
## [2] {age=Young,
## marital-status=Never-married} => {relationship=Own-child} 0.1038860 0.6165998 3.972559 5074
## [3] {age=Young,
## marital-status=Never-married,
## capital-loss=None} => {relationship=Own-child} 0.1015315 0.6164843 3.971815 4959
## [4] {marital-status=Never-married,
## relationship=Own-child,
## capital-gain=None} => {age=Young} 0.1014291 0.7541483 3.826125 4954
## [5] {marital-status=Never-married,
## relationship=Own-child,
## capital-loss=None} => {age=Young} 0.1015315 0.7517053 3.813731 4959
## [6] {marital-status=Never-married,
## relationship=Own-child} => {age=Young} 0.1038860 0.7517037 3.813723 5074
## [7] {relationship=Own-child,
## capital-gain=None,
## capital-loss=None} => {age=Young} 0.1020433 0.6942471 3.522221 4984
## [8] {relationship=Own-child,
## capital-gain=None} => {age=Young} 0.1045412 0.6939386 3.520655 5106
## [9] {age=Young,
## capital-gain=None} => {relationship=Own-child} 0.1045412 0.5462131 3.519079 5106
## [10] {age=Young,
## capital-gain=None,
## capital-loss=None} => {relationship=Own-child} 0.1020433 0.5461319 3.518556 4984
# We see that age=Young appears in every single rule
# We could find more interesting rules if we take away this specific item
# We will use the appearance variable to do so
# We are going to create transaction rules using the arules package
my_rules<- apriori(Adult, parameter = list(supp = 0.1, conf = 0.1),
appearance = list(none = c("age=Young")))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.1 0.1 1 none FALSE TRUE 5 0.1 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 4884
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[115 item(s), 48842 transaction(s)] done [0.04s].
## sorting and recoding items ... [30 item(s)] done [0.01s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.11s].
## writing ... [10874 rule(s)] done [0.00s].
## creating S4 object ... done [0.02s].
# Now we can inspect the top 10 rules by lift
inspect(head(my_rules, n = 10, by = "lift"))
## lhs rhs support confidence lift count
## [1] {marital-status=Never-married,
## race=White,
## capital-gain=None,
## capital-loss=None,
## native-country=United-States} => {relationship=Own-child} 0.1051349 0.4559176 2.937334 5135
## [2] {marital-status=Never-married,
## race=White,
## capital-gain=None,
## native-country=United-States} => {relationship=Own-child} 0.1076123 0.4511201 2.906425 5256
## [3] {marital-status=Never-married,
## race=White,
## capital-loss=None,
## native-country=United-States} => {relationship=Own-child} 0.1081856 0.4480244 2.886481 5284
## [4] {marital-status=Never-married,
## capital-gain=None,
## capital-loss=None,
## native-country=United-States} => {relationship=Own-child} 0.1220671 0.4445273 2.863950 5962
## [5] {marital-status=Never-married,
## race=White,
## native-country=United-States} => {relationship=Own-child} 0.1106630 0.4436874 2.858538 5405
## [6] {marital-status=Never-married,
## race=White,
## capital-gain=None,
## capital-loss=None} => {relationship=Own-child} 0.1111543 0.4431475 2.855060 5429
## [7] {marital-status=Never-married,
## capital-gain=None,
## native-country=United-States} => {relationship=Own-child} 0.1249949 0.4405716 2.838464 6105
## [8] {marital-status=Never-married,
## race=White,
## capital-gain=None} => {relationship=Own-child} 0.1137136 0.4386353 2.825989 5554
## [9] {marital-status=Never-married,
## capital-loss=None,
## native-country=United-States} => {relationship=Own-child} 0.1255272 0.4368054 2.814200 6131
## [10] {marital-status=Never-married,
## race=White,
## capital-loss=None} => {relationship=Own-child} 0.1142664 0.4357773 2.807576 5581
# We find now relationship=Own-child to be the most common parameter
# We can remove several items at the same time
# We are going to create transaction rules using the arules package
my_rules<- apriori(Adult, parameter = list(supp = 0.1, conf = 0.1),
appearance = list(none = c("age=Young", "relationship=Own-child")))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.1 0.1 1 none FALSE TRUE 5 0.1 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 4884
##
## set item appearances ...[2 item(s)] done [0.00s].
## set transactions ...[115 item(s), 48842 transaction(s)] done [0.03s].
## sorting and recoding items ... [29 item(s)] done [0.01s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.12s].
## writing ... [10718 rule(s)] done [0.00s].
## creating S4 object ... done [0.02s].
# Now we can inspect the top 10 rules by lift
inspect(head(my_rules, n = 10, by = "lift"))
## lhs rhs support confidence lift count
## [1] {age=Senior,
## marital-status=Married-civ-spouse,
## sex=Male,
## capital-gain=None,
## native-country=United-States} => {relationship=Husband} 0.1164367 0.9989461 2.474666 5687
## [2] {age=Senior,
## marital-status=Married-civ-spouse,
## race=White,
## sex=Male,
## capital-gain=None,
## native-country=United-States} => {relationship=Husband} 0.1083698 0.9988677 2.474472 5293
## [3] {age=Senior,
## marital-status=Married-civ-spouse,
## sex=Male,
## capital-gain=None,
## capital-loss=None,
## native-country=United-States} => {relationship=Husband} 0.1072438 0.9988558 2.474443 5238
## [4] {age=Senior,
## marital-status=Married-civ-spouse,
## sex=Male,
## native-country=United-States} => {relationship=Husband} 0.1345154 0.9987838 2.474265 6570
## [5] {age=Senior,
## marital-status=Married-civ-spouse,
## race=White,
## sex=Male,
## native-country=United-States} => {relationship=Husband} 0.1254248 0.9986958 2.474046 6126
## [6] {age=Senior,
## marital-status=Married-civ-spouse,
## sex=Male,
## capital-loss=None,
## native-country=United-States} => {relationship=Husband} 0.1253225 0.9986947 2.474044 6121
## [7] {age=Senior,
## marital-status=Married-civ-spouse,
## race=White,
## sex=Male,
## capital-loss=None,
## native-country=United-States} => {relationship=Husband} 0.1167233 0.9985987 2.473806 5701
## [8] {age=Senior,
## marital-status=Married-civ-spouse,
## race=White,
## sex=Male,
## capital-gain=None} => {relationship=Husband} 0.1163138 0.9977169 2.471621 5681
## [9] {age=Senior,
## marital-status=Married-civ-spouse,
## race=White,
## sex=Male} => {relationship=Husband} 0.1345154 0.9975706 2.471259 6570
## [10] {age=Senior,
## marital-status=Married-civ-spouse,
## race=White,
## sex=Male,
## capital-gain=None,
## capital-loss=None} => {relationship=Husband} 0.1070800 0.9975205 2.471135 5230
In this tutorial we have learnt how to remove items from the rules in order to find interesting underlying relationships.