1 Goal


The goal of this tutorial is to remove a certain item from rules in a basket analysis. This way we could find underlying relationships.


2 Data import


# First we load the libraries for basket analysis
library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
## Loading required package: grid
# In this tutorial we are going to use the dataset for Adult census
data("Adult")
Adult
## transactions in sparse format with
##  48842 transactions (rows) and
##  115 items (columns)

3 Creating Rules


# We are going to create transaction rules using the arules package
my_rules<- apriori(Adult, parameter = list(supp = 0.1, conf = 0.1))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.1    0.1    1 none FALSE            TRUE       5     0.1      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 4884 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[115 item(s), 48842 transaction(s)] done [0.03s].
## sorting and recoding items ... [31 item(s)] done [0.01s].
## creating transaction tree ... done [0.04s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.11s].
## writing ... [11248 rule(s)] done [0.00s].
## creating S4 object  ... done [0.02s].
# We visualize the rules
plot(my_rules)

# Now we can inspect the top 10 rules by lift
inspect(head(my_rules, n = 10, by = "lift"))
##      lhs                               rhs                        support confidence     lift count
## [1]  {age=Young,                                                                                   
##       marital-status=Never-married,                                                                
##       capital-gain=None}            => {relationship=Own-child} 0.1014291  0.6181682 3.982663  4954
## [2]  {age=Young,                                                                                   
##       marital-status=Never-married} => {relationship=Own-child} 0.1038860  0.6165998 3.972559  5074
## [3]  {age=Young,                                                                                   
##       marital-status=Never-married,                                                                
##       capital-loss=None}            => {relationship=Own-child} 0.1015315  0.6164843 3.971815  4959
## [4]  {marital-status=Never-married,                                                                
##       relationship=Own-child,                                                                      
##       capital-gain=None}            => {age=Young}              0.1014291  0.7541483 3.826125  4954
## [5]  {marital-status=Never-married,                                                                
##       relationship=Own-child,                                                                      
##       capital-loss=None}            => {age=Young}              0.1015315  0.7517053 3.813731  4959
## [6]  {marital-status=Never-married,                                                                
##       relationship=Own-child}       => {age=Young}              0.1038860  0.7517037 3.813723  5074
## [7]  {relationship=Own-child,                                                                      
##       capital-gain=None,                                                                           
##       capital-loss=None}            => {age=Young}              0.1020433  0.6942471 3.522221  4984
## [8]  {relationship=Own-child,                                                                      
##       capital-gain=None}            => {age=Young}              0.1045412  0.6939386 3.520655  5106
## [9]  {age=Young,                                                                                   
##       capital-gain=None}            => {relationship=Own-child} 0.1045412  0.5462131 3.519079  5106
## [10] {age=Young,                                                                                   
##       capital-gain=None,                                                                           
##       capital-loss=None}            => {relationship=Own-child} 0.1020433  0.5461319 3.518556  4984

4 Removing items from the Rules

4.1 Removing one item


# We see that age=Young appears in every single rule
# We could find more interesting rules if we take away this specific item
# We will use the appearance variable to do so

# We are going to create transaction rules using the arules package
my_rules<- apriori(Adult, parameter = list(supp = 0.1, conf = 0.1),
                   appearance = list(none = c("age=Young")))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.1    0.1    1 none FALSE            TRUE       5     0.1      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 4884 
## 
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[115 item(s), 48842 transaction(s)] done [0.04s].
## sorting and recoding items ... [30 item(s)] done [0.01s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.11s].
## writing ... [10874 rule(s)] done [0.00s].
## creating S4 object  ... done [0.02s].
# Now we can inspect the top 10 rules by lift
inspect(head(my_rules, n = 10, by = "lift"))
##      lhs                               rhs                        support confidence     lift count
## [1]  {marital-status=Never-married,                                                                
##       race=White,                                                                                  
##       capital-gain=None,                                                                           
##       capital-loss=None,                                                                           
##       native-country=United-States} => {relationship=Own-child} 0.1051349  0.4559176 2.937334  5135
## [2]  {marital-status=Never-married,                                                                
##       race=White,                                                                                  
##       capital-gain=None,                                                                           
##       native-country=United-States} => {relationship=Own-child} 0.1076123  0.4511201 2.906425  5256
## [3]  {marital-status=Never-married,                                                                
##       race=White,                                                                                  
##       capital-loss=None,                                                                           
##       native-country=United-States} => {relationship=Own-child} 0.1081856  0.4480244 2.886481  5284
## [4]  {marital-status=Never-married,                                                                
##       capital-gain=None,                                                                           
##       capital-loss=None,                                                                           
##       native-country=United-States} => {relationship=Own-child} 0.1220671  0.4445273 2.863950  5962
## [5]  {marital-status=Never-married,                                                                
##       race=White,                                                                                  
##       native-country=United-States} => {relationship=Own-child} 0.1106630  0.4436874 2.858538  5405
## [6]  {marital-status=Never-married,                                                                
##       race=White,                                                                                  
##       capital-gain=None,                                                                           
##       capital-loss=None}            => {relationship=Own-child} 0.1111543  0.4431475 2.855060  5429
## [7]  {marital-status=Never-married,                                                                
##       capital-gain=None,                                                                           
##       native-country=United-States} => {relationship=Own-child} 0.1249949  0.4405716 2.838464  6105
## [8]  {marital-status=Never-married,                                                                
##       race=White,                                                                                  
##       capital-gain=None}            => {relationship=Own-child} 0.1137136  0.4386353 2.825989  5554
## [9]  {marital-status=Never-married,                                                                
##       capital-loss=None,                                                                           
##       native-country=United-States} => {relationship=Own-child} 0.1255272  0.4368054 2.814200  6131
## [10] {marital-status=Never-married,                                                                
##       race=White,                                                                                  
##       capital-loss=None}            => {relationship=Own-child} 0.1142664  0.4357773 2.807576  5581

4.2 Removing several items


# We find now relationship=Own-child to be the most common parameter
# We can remove several items at the same time

# We are going to create transaction rules using the arules package
my_rules<- apriori(Adult, parameter = list(supp = 0.1, conf = 0.1),
                   appearance = list(none = c("age=Young", "relationship=Own-child")))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.1    0.1    1 none FALSE            TRUE       5     0.1      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 4884 
## 
## set item appearances ...[2 item(s)] done [0.00s].
## set transactions ...[115 item(s), 48842 transaction(s)] done [0.03s].
## sorting and recoding items ... [29 item(s)] done [0.01s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 done [0.12s].
## writing ... [10718 rule(s)] done [0.00s].
## creating S4 object  ... done [0.02s].
# Now we can inspect the top 10 rules by lift
inspect(head(my_rules, n = 10, by = "lift"))
##      lhs                                    rhs                      support confidence     lift count
## [1]  {age=Senior,                                                                                     
##       marital-status=Married-civ-spouse,                                                              
##       sex=Male,                                                                                       
##       capital-gain=None,                                                                              
##       native-country=United-States}      => {relationship=Husband} 0.1164367  0.9989461 2.474666  5687
## [2]  {age=Senior,                                                                                     
##       marital-status=Married-civ-spouse,                                                              
##       race=White,                                                                                     
##       sex=Male,                                                                                       
##       capital-gain=None,                                                                              
##       native-country=United-States}      => {relationship=Husband} 0.1083698  0.9988677 2.474472  5293
## [3]  {age=Senior,                                                                                     
##       marital-status=Married-civ-spouse,                                                              
##       sex=Male,                                                                                       
##       capital-gain=None,                                                                              
##       capital-loss=None,                                                                              
##       native-country=United-States}      => {relationship=Husband} 0.1072438  0.9988558 2.474443  5238
## [4]  {age=Senior,                                                                                     
##       marital-status=Married-civ-spouse,                                                              
##       sex=Male,                                                                                       
##       native-country=United-States}      => {relationship=Husband} 0.1345154  0.9987838 2.474265  6570
## [5]  {age=Senior,                                                                                     
##       marital-status=Married-civ-spouse,                                                              
##       race=White,                                                                                     
##       sex=Male,                                                                                       
##       native-country=United-States}      => {relationship=Husband} 0.1254248  0.9986958 2.474046  6126
## [6]  {age=Senior,                                                                                     
##       marital-status=Married-civ-spouse,                                                              
##       sex=Male,                                                                                       
##       capital-loss=None,                                                                              
##       native-country=United-States}      => {relationship=Husband} 0.1253225  0.9986947 2.474044  6121
## [7]  {age=Senior,                                                                                     
##       marital-status=Married-civ-spouse,                                                              
##       race=White,                                                                                     
##       sex=Male,                                                                                       
##       capital-loss=None,                                                                              
##       native-country=United-States}      => {relationship=Husband} 0.1167233  0.9985987 2.473806  5701
## [8]  {age=Senior,                                                                                     
##       marital-status=Married-civ-spouse,                                                              
##       race=White,                                                                                     
##       sex=Male,                                                                                       
##       capital-gain=None}                 => {relationship=Husband} 0.1163138  0.9977169 2.471621  5681
## [9]  {age=Senior,                                                                                     
##       marital-status=Married-civ-spouse,                                                              
##       race=White,                                                                                     
##       sex=Male}                          => {relationship=Husband} 0.1345154  0.9975706 2.471259  6570
## [10] {age=Senior,                                                                                     
##       marital-status=Married-civ-spouse,                                                              
##       race=White,                                                                                     
##       sex=Male,                                                                                       
##       capital-gain=None,                                                                              
##       capital-loss=None}                 => {relationship=Husband} 0.1070800  0.9975205 2.471135  5230

5 Conclusion


In this tutorial we have learnt how to remove items from the rules in order to find interesting underlying relationships.