1.Install Package
install.packages("arules",repos="http://cran.rstudio.com/")
##
## The downloaded binary packages are in
## /var/folders/3z/jqczpc_95yq_sbgl2665kg2c0000gq/T//RtmprGVTsp/downloaded_packages
library(Matrix)
library(arules)
##
## Attaching package: 'arules'
##
## The following objects are masked from 'package:base':
##
## %in%, write
2.Read Data
load("titanic.raw.rdata")
data <- titanic.raw
str(data)
## 'data.frame': 2201 obs. of 4 variables:
## $ Class : Factor w/ 4 levels "1st","2nd","3rd",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Age : Factor w/ 2 levels "Adult","Child": 2 2 2 2 2 2 2 2 2 2 ...
## $ Survived: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
3.Apriori
rules <- apriori(data)
##
## Parameter specification:
## confidence minval smax arem aval originalSupport support minlen maxlen
## 0.8 0.1 1 none FALSE TRUE 0.1 1 10
## target ext
## rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## apriori - find association rules with the apriori algorithm
## version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [27 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(rules)
## lhs rhs support confidence lift
## 1 {} => {Age=Adult} 0.9504771 0.9504771 1.0000000
## 2 {Class=2nd} => {Age=Adult} 0.1185825 0.9157895 0.9635051
## 3 {Class=1st} => {Age=Adult} 0.1449341 0.9815385 1.0326798
## 4 {Sex=Female} => {Age=Adult} 0.1930940 0.9042553 0.9513700
## 5 {Class=3rd} => {Age=Adult} 0.2848705 0.8881020 0.9343750
## 6 {Survived=Yes} => {Age=Adult} 0.2971377 0.9198312 0.9677574
## 7 {Class=Crew} => {Sex=Male} 0.3916402 0.9740113 1.2384742
## 8 {Class=Crew} => {Age=Adult} 0.4020900 1.0000000 1.0521033
## 9 {Survived=No} => {Sex=Male} 0.6197183 0.9154362 1.1639949
## 10 {Survived=No} => {Age=Adult} 0.6533394 0.9651007 1.0153856
## 11 {Sex=Male} => {Age=Adult} 0.7573830 0.9630272 1.0132040
## 12 {Sex=Female,
## Survived=Yes} => {Age=Adult} 0.1435711 0.9186047 0.9664669
## 13 {Class=3rd,
## Sex=Male} => {Survived=No} 0.1917310 0.8274510 1.2222950
## 14 {Class=3rd,
## Survived=No} => {Age=Adult} 0.2162653 0.9015152 0.9484870
## 15 {Class=3rd,
## Sex=Male} => {Age=Adult} 0.2099046 0.9058824 0.9530818
## 16 {Sex=Male,
## Survived=Yes} => {Age=Adult} 0.1535666 0.9209809 0.9689670
## 17 {Class=Crew,
## Survived=No} => {Sex=Male} 0.3044071 0.9955423 1.2658514
## 18 {Class=Crew,
## Survived=No} => {Age=Adult} 0.3057701 1.0000000 1.0521033
## 19 {Class=Crew,
## Sex=Male} => {Age=Adult} 0.3916402 1.0000000 1.0521033
## 20 {Class=Crew,
## Age=Adult} => {Sex=Male} 0.3916402 0.9740113 1.2384742
## 21 {Sex=Male,
## Survived=No} => {Age=Adult} 0.6038164 0.9743402 1.0251065
## 22 {Age=Adult,
## Survived=No} => {Sex=Male} 0.6038164 0.9242003 1.1751385
## 23 {Class=3rd,
## Sex=Male,
## Survived=No} => {Age=Adult} 0.1758292 0.9170616 0.9648435
## 24 {Class=3rd,
## Age=Adult,
## Survived=No} => {Sex=Male} 0.1758292 0.8130252 1.0337773
## 25 {Class=3rd,
## Sex=Male,
## Age=Adult} => {Survived=No} 0.1758292 0.8376623 1.2373791
## 26 {Class=Crew,
## Sex=Male,
## Survived=No} => {Age=Adult} 0.3044071 1.0000000 1.0521033
## 27 {Class=Crew,
## Age=Adult,
## Survived=No} => {Sex=Male} 0.3044071 0.9955423 1.2658514
4.Set Target & Sort by Lift
rules <- apriori(data,parameter = list(minlen=2,supp=0.01,conf=0.8),appearance = list(rhs = c("Survived=No","Survived=Yes"),default="lhs"), control = list(verbose=F))
rules.sorted <- sort(rules,decreasing = TRUE,na.last=TRUE,by="lift")
inspect(rules.sorted)
## lhs rhs support confidence lift
## 1 {Class=2nd,
## Age=Child} => {Survived=Yes} 0.01090413 1.0000000 3.095640
## 2 {Class=1st,
## Sex=Female} => {Survived=Yes} 0.06406179 0.9724138 3.010243
## 3 {Class=1st,
## Sex=Female,
## Age=Adult} => {Survived=Yes} 0.06360745 0.9722222 3.009650
## 4 {Class=2nd,
## Sex=Female} => {Survived=Yes} 0.04225352 0.8773585 2.715986
## 5 {Class=2nd,
## Sex=Female,
## Age=Adult} => {Survived=Yes} 0.03634711 0.8602151 2.662916
## 6 {Class=2nd,
## Sex=Male,
## Age=Adult} => {Survived=No} 0.06996820 0.9166667 1.354083
## 7 {Class=2nd,
## Sex=Male} => {Survived=No} 0.06996820 0.8603352 1.270871
## 8 {Class=3rd,
## Sex=Male,
## Age=Adult} => {Survived=No} 0.17582917 0.8376623 1.237379
## 9 {Class=3rd,
## Sex=Male} => {Survived=No} 0.19173103 0.8274510 1.222295
subset.matrix <- is.subset(rules.sorted,rules.sorted)
subset.matrix[lower.tri(subset.matrix,diag = T)] <- NA
redundant <- colSums(subset.matrix,na.rm=T) >=1
which(redundant)
## [1] 3 5
rules.pruned <- rules.sorted[!redundant]
inspect(rules.pruned)
## lhs rhs support confidence lift
## 1 {Class=2nd,
## Age=Child} => {Survived=Yes} 0.01090413 1.0000000 3.095640
## 2 {Class=1st,
## Sex=Female} => {Survived=Yes} 0.06406179 0.9724138 3.010243
## 3 {Class=2nd,
## Sex=Female} => {Survived=Yes} 0.04225352 0.8773585 2.715986
## 4 {Class=2nd,
## Sex=Male,
## Age=Adult} => {Survived=No} 0.06996820 0.9166667 1.354083
## 5 {Class=2nd,
## Sex=Male} => {Survived=No} 0.06996820 0.8603352 1.270871
## 6 {Class=3rd,
## Sex=Male,
## Age=Adult} => {Survived=No} 0.17582917 0.8376623 1.237379
## 7 {Class=3rd,
## Sex=Male} => {Survived=No} 0.19173103 0.8274510 1.222295