load("C:/Users/Sonal/Documents/titanic.raw.rdata")
str(titanic.raw)
## 'data.frame': 2201 obs. of 4 variables:
## $ Class : Factor w/ 4 levels "1st","2nd","3rd",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Age : Factor w/ 2 levels "Adult","Child": 2 2 2 2 2 2 2 2 2 2 ...
## $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
# find association rules with default settings
rules <- apriori(titanic.raw)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.1 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 220
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [27 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(rules)
## lhs rhs support
## [1] {} => {Age=Adult} 0.9504771
## [2] {Class=2nd} => {Age=Adult} 0.1185825
## [3] {Class=1st} => {Age=Adult} 0.1449341
## [4] {Sex=Female} => {Age=Adult} 0.1930940
## [5] {Class=3rd} => {Age=Adult} 0.2848705
## [6] {Survived=Yes} => {Age=Adult} 0.2971377
## [7] {Class=Crew} => {Sex=Male} 0.3916402
## [8] {Class=Crew} => {Age=Adult} 0.4020900
## [9] {Survived=No} => {Sex=Male} 0.6197183
## [10] {Survived=No} => {Age=Adult} 0.6533394
## [11] {Sex=Male} => {Age=Adult} 0.7573830
## [12] {Sex=Female,Survived=Yes} => {Age=Adult} 0.1435711
## [13] {Class=3rd,Sex=Male} => {Survived=No} 0.1917310
## [14] {Class=3rd,Survived=No} => {Age=Adult} 0.2162653
## [15] {Class=3rd,Sex=Male} => {Age=Adult} 0.2099046
## [16] {Sex=Male,Survived=Yes} => {Age=Adult} 0.1535666
## [17] {Class=Crew,Survived=No} => {Sex=Male} 0.3044071
## [18] {Class=Crew,Survived=No} => {Age=Adult} 0.3057701
## [19] {Class=Crew,Sex=Male} => {Age=Adult} 0.3916402
## [20] {Class=Crew,Age=Adult} => {Sex=Male} 0.3916402
## [21] {Sex=Male,Survived=No} => {Age=Adult} 0.6038164
## [22] {Age=Adult,Survived=No} => {Sex=Male} 0.6038164
## [23] {Class=3rd,Sex=Male,Survived=No} => {Age=Adult} 0.1758292
## [24] {Class=3rd,Age=Adult,Survived=No} => {Sex=Male} 0.1758292
## [25] {Class=3rd,Sex=Male,Age=Adult} => {Survived=No} 0.1758292
## [26] {Class=Crew,Sex=Male,Survived=No} => {Age=Adult} 0.3044071
## [27] {Class=Crew,Age=Adult,Survived=No} => {Sex=Male} 0.3044071
## confidence lift
## [1] 0.9504771 1.0000000
## [2] 0.9157895 0.9635051
## [3] 0.9815385 1.0326798
## [4] 0.9042553 0.9513700
## [5] 0.8881020 0.9343750
## [6] 0.9198312 0.9677574
## [7] 0.9740113 1.2384742
## [8] 1.0000000 1.0521033
## [9] 0.9154362 1.1639949
## [10] 0.9651007 1.0153856
## [11] 0.9630272 1.0132040
## [12] 0.9186047 0.9664669
## [13] 0.8274510 1.2222950
## [14] 0.9015152 0.9484870
## [15] 0.9058824 0.9530818
## [16] 0.9209809 0.9689670
## [17] 0.9955423 1.2658514
## [18] 1.0000000 1.0521033
## [19] 1.0000000 1.0521033
## [20] 0.9740113 1.2384742
## [21] 0.9743402 1.0251065
## [22] 0.9242003 1.1751385
## [23] 0.9170616 0.9648435
## [24] 0.8130252 1.0337773
## [25] 0.8376623 1.2373791
## [26] 1.0000000 1.0521033
## [27] 0.9955423 1.2658514
# rules with rhs containing "Survived" only
rules <- apriori(titanic.raw,
control = list(verbose=F),
parameter = list(minlen=2, supp=0.005, conf=0.8),
appearance = list(rhs=c("Survived=No",
"Survived=Yes"),
default="lhs"))
rules.sorted <- sort(rules, by="lift")
inspect(rules.sorted)
## lhs rhs support
## [1] {Class=2nd,Age=Child} => {Survived=Yes} 0.010904134
## [2] {Class=2nd,Sex=Female,Age=Child} => {Survived=Yes} 0.005906406
## [3] {Class=1st,Sex=Female} => {Survived=Yes} 0.064061790
## [4] {Class=1st,Sex=Female,Age=Adult} => {Survived=Yes} 0.063607451
## [5] {Class=2nd,Sex=Female} => {Survived=Yes} 0.042253521
## [6] {Class=Crew,Sex=Female} => {Survived=Yes} 0.009086779
## [7] {Class=Crew,Sex=Female,Age=Adult} => {Survived=Yes} 0.009086779
## [8] {Class=2nd,Sex=Female,Age=Adult} => {Survived=Yes} 0.036347115
## [9] {Class=2nd,Sex=Male,Age=Adult} => {Survived=No} 0.069968196
## [10] {Class=2nd,Sex=Male} => {Survived=No} 0.069968196
## [11] {Class=3rd,Sex=Male,Age=Adult} => {Survived=No} 0.175829169
## [12] {Class=3rd,Sex=Male} => {Survived=No} 0.191731031
## confidence lift
## [1] 1.0000000 3.095640
## [2] 1.0000000 3.095640
## [3] 0.9724138 3.010243
## [4] 0.9722222 3.009650
## [5] 0.8773585 2.715986
## [6] 0.8695652 2.691861
## [7] 0.8695652 2.691861
## [8] 0.8602151 2.662916
## [9] 0.9166667 1.354083
## [10] 0.8603352 1.270871
## [11] 0.8376623 1.237379
## [12] 0.8274510 1.222295
# find redundant rules
subset.matrix <- is.subset(rules.sorted, rules.sorted)
subset.matrix[lower.tri(subset.matrix, diag=T)] <- NA
redundant <- colSums(subset.matrix, na.rm=T) >= 1
which(redundant)
## {Class=2nd,Sex=Female,Age=Child,Survived=Yes}
## 2
## {Class=1st,Sex=Female,Age=Adult,Survived=Yes}
## 4
## {Class=Crew,Sex=Female,Age=Adult,Survived=Yes}
## 7
## {Class=2nd,Sex=Female,Age=Adult,Survived=Yes}
## 8
# remove redundant rules
rules.pruned <- rules.sorted[!redundant]
inspect(rules.pruned)
## lhs rhs support
## [1] {Class=2nd,Age=Child} => {Survived=Yes} 0.010904134
## [2] {Class=1st,Sex=Female} => {Survived=Yes} 0.064061790
## [3] {Class=2nd,Sex=Female} => {Survived=Yes} 0.042253521
## [4] {Class=Crew,Sex=Female} => {Survived=Yes} 0.009086779
## [5] {Class=2nd,Sex=Male,Age=Adult} => {Survived=No} 0.069968196
## [6] {Class=2nd,Sex=Male} => {Survived=No} 0.069968196
## [7] {Class=3rd,Sex=Male,Age=Adult} => {Survived=No} 0.175829169
## [8] {Class=3rd,Sex=Male} => {Survived=No} 0.191731031
## confidence lift
## [1] 1.0000000 3.095640
## [2] 0.9724138 3.010243
## [3] 0.8773585 2.715986
## [4] 0.8695652 2.691861
## [5] 0.9166667 1.354083
## [6] 0.8603352 1.270871
## [7] 0.8376623 1.237379
## [8] 0.8274510 1.222295
library(arulesViz)
## Loading required package: grid
## Warning: failed to assign NativeSymbolInfo for lhs since lhs is already
## defined in the 'lazyeval' namespace
## Warning: failed to assign NativeSymbolInfo for rhs since rhs is already
## defined in the 'lazyeval' namespace
plot(rules)

plot(rules, method="graph", control=list(type="items"))

plot(rules, method="paracoord", control=list(reorder=TRUE))
