Apriori Tutorial

1.Install Package

install.packages("arules",repos="http://cran.rstudio.com/")
## 
## The downloaded binary packages are in
##  /var/folders/3z/jqczpc_95yq_sbgl2665kg2c0000gq/T//RtmprGVTsp/downloaded_packages
library(Matrix)
library(arules)
## 
## Attaching package: 'arules'
## 
## The following objects are masked from 'package:base':
## 
##     %in%, write

2.Read Data

load("titanic.raw.rdata")
data <- titanic.raw
str(data)
## 'data.frame':    2201 obs. of  4 variables:
##  $ Class   : Factor w/ 4 levels "1st","2nd","3rd",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Sex     : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Age     : Factor w/ 2 levels "Adult","Child": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...

3.Apriori

rules <- apriori(data)
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport support minlen maxlen
##         0.8    0.1    1 none FALSE            TRUE     0.1      1     10
##  target   ext
##   rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## apriori - find association rules with the apriori algorithm
## version 4.21 (2004.05.09)        (c) 1996-2004   Christian Borgelt
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [27 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
inspect(rules)
##    lhs               rhs             support confidence      lift
## 1  {}             => {Age=Adult}   0.9504771  0.9504771 1.0000000
## 2  {Class=2nd}    => {Age=Adult}   0.1185825  0.9157895 0.9635051
## 3  {Class=1st}    => {Age=Adult}   0.1449341  0.9815385 1.0326798
## 4  {Sex=Female}   => {Age=Adult}   0.1930940  0.9042553 0.9513700
## 5  {Class=3rd}    => {Age=Adult}   0.2848705  0.8881020 0.9343750
## 6  {Survived=Yes} => {Age=Adult}   0.2971377  0.9198312 0.9677574
## 7  {Class=Crew}   => {Sex=Male}    0.3916402  0.9740113 1.2384742
## 8  {Class=Crew}   => {Age=Adult}   0.4020900  1.0000000 1.0521033
## 9  {Survived=No}  => {Sex=Male}    0.6197183  0.9154362 1.1639949
## 10 {Survived=No}  => {Age=Adult}   0.6533394  0.9651007 1.0153856
## 11 {Sex=Male}     => {Age=Adult}   0.7573830  0.9630272 1.0132040
## 12 {Sex=Female,                                                  
##     Survived=Yes} => {Age=Adult}   0.1435711  0.9186047 0.9664669
## 13 {Class=3rd,                                                   
##     Sex=Male}     => {Survived=No} 0.1917310  0.8274510 1.2222950
## 14 {Class=3rd,                                                   
##     Survived=No}  => {Age=Adult}   0.2162653  0.9015152 0.9484870
## 15 {Class=3rd,                                                   
##     Sex=Male}     => {Age=Adult}   0.2099046  0.9058824 0.9530818
## 16 {Sex=Male,                                                    
##     Survived=Yes} => {Age=Adult}   0.1535666  0.9209809 0.9689670
## 17 {Class=Crew,                                                  
##     Survived=No}  => {Sex=Male}    0.3044071  0.9955423 1.2658514
## 18 {Class=Crew,                                                  
##     Survived=No}  => {Age=Adult}   0.3057701  1.0000000 1.0521033
## 19 {Class=Crew,                                                  
##     Sex=Male}     => {Age=Adult}   0.3916402  1.0000000 1.0521033
## 20 {Class=Crew,                                                  
##     Age=Adult}    => {Sex=Male}    0.3916402  0.9740113 1.2384742
## 21 {Sex=Male,                                                    
##     Survived=No}  => {Age=Adult}   0.6038164  0.9743402 1.0251065
## 22 {Age=Adult,                                                   
##     Survived=No}  => {Sex=Male}    0.6038164  0.9242003 1.1751385
## 23 {Class=3rd,                                                   
##     Sex=Male,                                                    
##     Survived=No}  => {Age=Adult}   0.1758292  0.9170616 0.9648435
## 24 {Class=3rd,                                                   
##     Age=Adult,                                                   
##     Survived=No}  => {Sex=Male}    0.1758292  0.8130252 1.0337773
## 25 {Class=3rd,                                                   
##     Sex=Male,                                                    
##     Age=Adult}    => {Survived=No} 0.1758292  0.8376623 1.2373791
## 26 {Class=Crew,                                                  
##     Sex=Male,                                                    
##     Survived=No}  => {Age=Adult}   0.3044071  1.0000000 1.0521033
## 27 {Class=Crew,                                                  
##     Age=Adult,                                                   
##     Survived=No}  => {Sex=Male}    0.3044071  0.9955423 1.2658514

4.Set Target & Sort by Lift

rules <- apriori(data,parameter = list(minlen=2,supp=0.01,conf=0.8),appearance = list(rhs = c("Survived=No","Survived=Yes"),default="lhs"), control = list(verbose=F))
rules.sorted <- sort(rules,decreasing = TRUE,na.last=TRUE,by="lift")
inspect(rules.sorted)
##   lhs             rhs               support confidence     lift
## 1 {Class=2nd,                                                  
##    Age=Child}  => {Survived=Yes} 0.01090413  1.0000000 3.095640
## 2 {Class=1st,                                                  
##    Sex=Female} => {Survived=Yes} 0.06406179  0.9724138 3.010243
## 3 {Class=1st,                                                  
##    Sex=Female,                                                 
##    Age=Adult}  => {Survived=Yes} 0.06360745  0.9722222 3.009650
## 4 {Class=2nd,                                                  
##    Sex=Female} => {Survived=Yes} 0.04225352  0.8773585 2.715986
## 5 {Class=2nd,                                                  
##    Sex=Female,                                                 
##    Age=Adult}  => {Survived=Yes} 0.03634711  0.8602151 2.662916
## 6 {Class=2nd,                                                  
##    Sex=Male,                                                   
##    Age=Adult}  => {Survived=No}  0.06996820  0.9166667 1.354083
## 7 {Class=2nd,                                                  
##    Sex=Male}   => {Survived=No}  0.06996820  0.8603352 1.270871
## 8 {Class=3rd,                                                  
##    Sex=Male,                                                   
##    Age=Adult}  => {Survived=No}  0.17582917  0.8376623 1.237379
## 9 {Class=3rd,                                                  
##    Sex=Male}   => {Survived=No}  0.19173103  0.8274510 1.222295
subset.matrix <- is.subset(rules.sorted,rules.sorted)
subset.matrix[lower.tri(subset.matrix,diag = T)] <- NA
redundant <- colSums(subset.matrix,na.rm=T) >=1
which(redundant)
## [1] 3 5
rules.pruned <- rules.sorted[!redundant]
inspect(rules.pruned)
##   lhs             rhs               support confidence     lift
## 1 {Class=2nd,                                                  
##    Age=Child}  => {Survived=Yes} 0.01090413  1.0000000 3.095640
## 2 {Class=1st,                                                  
##    Sex=Female} => {Survived=Yes} 0.06406179  0.9724138 3.010243
## 3 {Class=2nd,                                                  
##    Sex=Female} => {Survived=Yes} 0.04225352  0.8773585 2.715986
## 4 {Class=2nd,                                                  
##    Sex=Male,                                                   
##    Age=Adult}  => {Survived=No}  0.06996820  0.9166667 1.354083
## 5 {Class=2nd,                                                  
##    Sex=Male}   => {Survived=No}  0.06996820  0.8603352 1.270871
## 6 {Class=3rd,                                                  
##    Sex=Male,                                                   
##    Age=Adult}  => {Survived=No}  0.17582917  0.8376623 1.237379
## 7 {Class=3rd,                                                  
##    Sex=Male}   => {Survived=No}  0.19173103  0.8274510 1.222295