了解R语言创建购物篮数据格式方法;深入了解Apriori算法的R语言实现
R语言,Rstudio, Rmarkdown
titanic
对titanic数据进行关联规则挖掘分析
输出关联规则,并结合可视化图像对结果进行解释;
Titanic频数统计数据是R语言自带的数据集,这个数据集已经是将原始数据进行频数 统计后的计数数据。首先我们读取数据,并将其转化成表格数据进行存储.
data(Titanic)
str(Titanic)
## 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
## - attr(*, "dimnames")=List of 4
## ..$ Class : chr [1:4] "1st" "2nd" "3rd" "Crew"
## ..$ Sex : chr [1:2] "Male" "Female"
## ..$ Age : chr [1:2] "Child" "Adult"
## ..$ Survived: chr [1:2] "No" "Yes"
Titanic
## , , Age = Child, Survived = No
##
## Sex
## Class Male Female
## 1st 0 0
## 2nd 0 0
## 3rd 35 17
## Crew 0 0
##
## , , Age = Adult, Survived = No
##
## Sex
## Class Male Female
## 1st 118 4
## 2nd 154 13
## 3rd 387 89
## Crew 670 3
##
## , , Age = Child, Survived = Yes
##
## Sex
## Class Male Female
## 1st 5 1
## 2nd 11 13
## 3rd 13 14
## Crew 0 0
##
## , , Age = Adult, Survived = Yes
##
## Sex
## Class Male Female
## 1st 57 140
## 2nd 14 80
## 3rd 75 76
## Crew 192 20
df <- as.data.frame(Titanic)
write.table(df,"./Titanic.txt")
write.csv(df,"./Titanic.csv")
head(df)
## Class Sex Age Survived Freq
## 1 1st Male Child No 0
## 2 2nd Male Child No 0
## 3 3rd Male Child No 35
## 4 Crew Male Child No 0
## 5 1st Female Child No 0
## 6 2nd Female Child No 0
summary(df)
## Class Sex Age Survived Freq
## 1st :8 Male :16 Child:16 No :16 Min. : 0.00
## 2nd :8 Female:16 Adult:16 Yes:16 1st Qu.: 0.75
## 3rd :8 Median : 13.50
## Crew:8 Mean : 68.78
## 3rd Qu.: 77.00
## Max. :670.00
将titanic数据还原成原始数据,并记录为titanic.raw
titanic.raw <- NULL
for(i in 1:4) {
titanic.raw <- cbind(titanic.raw, rep(as.character(df[,i]), df$Freq))
}
titanic.raw <- as.data.frame(titanic.raw)
head(titanic.raw)
## V1 V2 V3 V4
## 1 3rd Male Child No
## 2 3rd Male Child No
## 3 3rd Male Child No
## 4 3rd Male Child No
## 5 3rd Male Child No
## 6 3rd Male Child No
names(titanic.raw) <- names(df)[1:4]
dim(titanic.raw)
## [1] 2201 4
str(titanic.raw)
## 'data.frame': 2201 obs. of 4 variables:
## $ Class : chr "3rd" "3rd" "3rd" "3rd" ...
## $ Sex : chr "Male" "Male" "Male" "Male" ...
## $ Age : chr "Child" "Child" "Child" "Child" ...
## $ Survived: chr "No" "No" "No" "No" ...
head(titanic.raw)
## Class Sex Age Survived
## 1 3rd Male Child No
## 2 3rd Male Child No
## 3 3rd Male Child No
## 4 3rd Male Child No
## 5 3rd Male Child No
## 6 3rd Male Child No
summary(titanic.raw)
## Class Sex Age Survived
## Length:2201 Length:2201 Length:2201 Length:2201
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
write.csv(titanic.raw,"./titanic_raw.csv")
titanic<-read.csv("./titanic_raw.csv",header=T)[,2:5]
head(titanic)
## Class Sex Age Survived
## 1 3rd Male Child No
## 2 3rd Male Child No
## 3 3rd Male Child No
## 4 3rd Male Child No
## 5 3rd Male Child No
## 6 3rd Male Child No
library(arules)
library(arulesViz)
library(tidyverse)
#将各变量转换成factor型
for(i in 1:4){
titanic[,i]<-as.factor(titanic[,i])
}
titanic<-as(titanic,"transactions")
summary(titanic)
## transactions as itemMatrix in sparse format with
## 2201 rows (elements/itemsets/transactions) and
## 10 columns (items) and a density of 0.4
##
## most frequent items:
## Age=Adult Sex=Male Survived=No Class=Crew Survived=Yes (Other)
## 2092 1731 1490 885 711 1895
##
## element (itemset/transaction) length distribution:
## sizes
## 4
## 2201
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4 4 4 4 4 4
##
## includes extended item information - examples:
## labels variables levels
## 1 Class=1st Class 1st
## 2 Class=2nd Class 2nd
## 3 Class=3rd Class 3rd
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 2
## 3 3
itemFrequency(titanic)
## Class=1st Class=2nd Class=3rd Class=Crew Sex=Female Sex=Male
## 0.14766015 0.12948660 0.32076329 0.40208996 0.21353930 0.78646070
## Age=Adult Age=Child Survived=No Survived=Yes
## 0.95047706 0.04952294 0.67696502 0.32303498
itemFrequency<-titanic %>%
itemFrequency() %>%
sort(decreasing = TRUE)
itemFrequency[1:10] %>%
round(3) %>%
data.frame()
## .
## Age=Adult 0.950
## Sex=Male 0.786
## Survived=No 0.677
## Class=Crew 0.402
## Survived=Yes 0.323
## Class=3rd 0.321
## Sex=Female 0.214
## Class=1st 0.148
## Class=2nd 0.129
## Age=Child 0.050
par(mfrow=c(2,2))
titanic %>% itemFrequencyPlot(topN=10,type="absolute")
titanic %>% itemFrequencyPlot(topN=10,type="relative")
titanic %>% itemFrequencyPlot(topN=10, horiz=T)
titanic %>% itemFrequencyPlot(topN=10,support=0.005, horiz=T)
将支持度和置信度分别设置为support=0.005,confidence=0.8,分别生成最大频繁项集和频繁项集,并进行结果解读。
options(digits = 3) #设置3位有效数
parameter1<-list(supp = 0.005, conf = 0.8, maxlen =4,target="frequent itemsets")
frequent_itemsets <-titanic %>%
apriori(parameter=parameter1)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.005 1
## maxlen target ext
## 4 frequent itemsets TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 11
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [10 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## sorting transactions ... done [0.00s].
## writing ... [107 set(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(frequent_itemsets)
## items support count
## [1] {Age=Child} 0.04952 109
## [2] {Class=2nd} 0.12949 285
## [3] {Class=1st} 0.14766 325
## [4] {Sex=Female} 0.21354 470
## [5] {Class=3rd} 0.32076 706
## [6] {Survived=Yes} 0.32303 711
## [7] {Class=Crew} 0.40209 885
## [8] {Survived=No} 0.67697 1490
## [9] {Sex=Male} 0.78646 1731
## [10] {Age=Adult} 0.95048 2092
## [11] {Class=2nd, Age=Child} 0.01090 24
## [12] {Sex=Female, Age=Child} 0.02045 45
## [13] {Class=3rd, Age=Child} 0.03589 79
## [14] {Age=Child, Survived=Yes} 0.02590 57
## [15] {Age=Child, Survived=No} 0.02363 52
## [16] {Sex=Male, Age=Child} 0.02908 64
## [17] {Class=2nd, Sex=Female} 0.04816 106
## [18] {Class=2nd, Survived=Yes} 0.05361 118
## [19] {Class=2nd, Survived=No} 0.07587 167
## [20] {Class=2nd, Sex=Male} 0.08133 179
## [21] {Class=2nd, Age=Adult} 0.11858 261
## [22] {Class=1st, Sex=Female} 0.06588 145
## [23] {Class=1st, Survived=Yes} 0.09223 203
## [24] {Class=1st, Survived=No} 0.05543 122
## [25] {Class=1st, Sex=Male} 0.08178 180
## [26] {Class=1st, Age=Adult} 0.14493 319
## [27] {Class=3rd, Sex=Female} 0.08905 196
## [28] {Sex=Female, Survived=Yes} 0.15629 344
## [29] {Class=Crew, Sex=Female} 0.01045 23
## [30] {Sex=Female, Survived=No} 0.05725 126
## [31] {Sex=Female, Age=Adult} 0.19309 425
## [32] {Class=3rd, Survived=Yes} 0.08087 178
## [33] {Class=3rd, Survived=No} 0.23989 528
## [34] {Class=3rd, Sex=Male} 0.23171 510
## [35] {Class=3rd, Age=Adult} 0.28487 627
## [36] {Class=Crew, Survived=Yes} 0.09632 212
## [37] {Sex=Male, Survived=Yes} 0.16674 367
## [38] {Age=Adult, Survived=Yes} 0.29714 654
## [39] {Class=Crew, Survived=No} 0.30577 673
## [40] {Class=Crew, Sex=Male} 0.39164 862
## [41] {Class=Crew, Age=Adult} 0.40209 885
## [42] {Sex=Male, Survived=No} 0.61972 1364
## [43] {Age=Adult, Survived=No} 0.65334 1438
## [44] {Sex=Male, Age=Adult} 0.75738 1667
## [45] {Class=2nd, Sex=Female, Age=Child} 0.00591 13
## [46] {Class=2nd, Age=Child, Survived=Yes} 0.01090 24
## [47] {Class=3rd, Sex=Female, Age=Child} 0.01408 31
## [48] {Sex=Female, Age=Child, Survived=Yes} 0.01272 28
## [49] {Sex=Female, Age=Child, Survived=No} 0.00772 17
## [50] {Class=3rd, Age=Child, Survived=Yes} 0.01227 27
## [51] {Class=3rd, Age=Child, Survived=No} 0.02363 52
## [52] {Class=3rd, Sex=Male, Age=Child} 0.02181 48
## [53] {Sex=Male, Age=Child, Survived=Yes} 0.01318 29
## [54] {Sex=Male, Age=Child, Survived=No} 0.01590 35
## [55] {Class=2nd, Sex=Female, Survived=Yes} 0.04225 93
## [56] {Class=2nd, Sex=Female, Survived=No} 0.00591 13
## [57] {Class=2nd, Sex=Female, Age=Adult} 0.04225 93
## [58] {Class=2nd, Sex=Male, Survived=Yes} 0.01136 25
## [59] {Class=2nd, Age=Adult, Survived=Yes} 0.04271 94
## [60] {Class=2nd, Sex=Male, Survived=No} 0.06997 154
## [61] {Class=2nd, Age=Adult, Survived=No} 0.07587 167
## [62] {Class=2nd, Sex=Male, Age=Adult} 0.07633 168
## [63] {Class=1st, Sex=Female, Survived=Yes} 0.06406 141
## [64] {Class=1st, Sex=Female, Age=Adult} 0.06542 144
## [65] {Class=1st, Sex=Male, Survived=Yes} 0.02817 62
## [66] {Class=1st, Age=Adult, Survived=Yes} 0.08950 197
## [67] {Class=1st, Sex=Male, Survived=No} 0.05361 118
## [68] {Class=1st, Age=Adult, Survived=No} 0.05543 122
## [69] {Class=1st, Sex=Male, Age=Adult} 0.07951 175
## [70] {Class=3rd, Sex=Female, Survived=Yes} 0.04089 90
## [71] {Class=3rd, Sex=Female, Survived=No} 0.04816 106
## [72] {Class=3rd, Sex=Female, Age=Adult} 0.07497 165
## [73] {Class=Crew, Sex=Female, Survived=Yes} 0.00909 20
## [74] {Sex=Female, Age=Adult, Survived=Yes} 0.14357 316
## [75] {Class=Crew, Sex=Female, Age=Adult} 0.01045 23
## [76] {Sex=Female, Age=Adult, Survived=No} 0.04952 109
## [77] {Class=3rd, Sex=Male, Survived=Yes} 0.03998 88
## [78] {Class=3rd, Age=Adult, Survived=Yes} 0.06861 151
## [79] {Class=3rd, Sex=Male, Survived=No} 0.19173 422
## [80] {Class=3rd, Age=Adult, Survived=No} 0.21627 476
## [81] {Class=3rd, Sex=Male, Age=Adult} 0.20990 462
## [82] {Class=Crew, Sex=Male, Survived=Yes} 0.08723 192
## [83] {Class=Crew, Age=Adult, Survived=Yes} 0.09632 212
## [84] {Sex=Male, Age=Adult, Survived=Yes} 0.15357 338
## [85] {Class=Crew, Sex=Male, Survived=No} 0.30441 670
## [86] {Class=Crew, Age=Adult, Survived=No} 0.30577 673
## [87] {Class=Crew, Sex=Male, Age=Adult} 0.39164 862
## [88] {Sex=Male, Age=Adult, Survived=No} 0.60382 1329
## [89] {Class=2nd, Sex=Female, Age=Child, Survived=Yes} 0.00591 13
## [90] {Class=3rd, Sex=Female, Age=Child, Survived=Yes} 0.00636 14
## [91] {Class=3rd, Sex=Female, Age=Child, Survived=No} 0.00772 17
## [92] {Class=3rd, Sex=Male, Age=Child, Survived=Yes} 0.00591 13
## [93] {Class=3rd, Sex=Male, Age=Child, Survived=No} 0.01590 35
## [94] {Class=2nd, Sex=Female, Age=Adult, Survived=Yes} 0.03635 80
## [95] {Class=2nd, Sex=Female, Age=Adult, Survived=No} 0.00591 13
## [96] {Class=2nd, Sex=Male, Age=Adult, Survived=Yes} 0.00636 14
## [97] {Class=2nd, Sex=Male, Age=Adult, Survived=No} 0.06997 154
## [98] {Class=1st, Sex=Female, Age=Adult, Survived=Yes} 0.06361 140
## [99] {Class=1st, Sex=Male, Age=Adult, Survived=Yes} 0.02590 57
## [100] {Class=1st, Sex=Male, Age=Adult, Survived=No} 0.05361 118
## [101] {Class=3rd, Sex=Female, Age=Adult, Survived=Yes} 0.03453 76
## [102] {Class=3rd, Sex=Female, Age=Adult, Survived=No} 0.04044 89
## [103] {Class=Crew, Sex=Female, Age=Adult, Survived=Yes} 0.00909 20
## [104] {Class=3rd, Sex=Male, Age=Adult, Survived=Yes} 0.03408 75
## [105] {Class=3rd, Sex=Male, Age=Adult, Survived=No} 0.17583 387
## [106] {Class=Crew, Sex=Male, Age=Adult, Survived=Yes} 0.08723 192
## [107] {Class=Crew, Sex=Male, Age=Adult, Survived=No} 0.30441 670
options(digits = 3) #设置3位有效数
parameter2<-list(supp = 0.005, conf = 0.8, maxlen =4,target="maximally frequent itemsets")
maximally_frequent_itemsets <-titanic %>%
apriori(parameter=parameter2)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.005 1
## maxlen target ext
## 4 maximally frequent itemsets TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 11
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [10 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## filtering maximal item sets ... done [0.00s].
## sorting transactions ... done [0.00s].
## writing ... [19 set(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(maximally_frequent_itemsets)
## items support count
## [1] {Class=2nd, Sex=Female, Age=Child, Survived=Yes} 0.00591 13
## [2] {Class=3rd, Sex=Female, Age=Child, Survived=Yes} 0.00636 14
## [3] {Class=3rd, Sex=Female, Age=Child, Survived=No} 0.00772 17
## [4] {Class=3rd, Sex=Male, Age=Child, Survived=Yes} 0.00591 13
## [5] {Class=3rd, Sex=Male, Age=Child, Survived=No} 0.01590 35
## [6] {Class=2nd, Sex=Female, Age=Adult, Survived=Yes} 0.03635 80
## [7] {Class=2nd, Sex=Female, Age=Adult, Survived=No} 0.00591 13
## [8] {Class=2nd, Sex=Male, Age=Adult, Survived=Yes} 0.00636 14
## [9] {Class=2nd, Sex=Male, Age=Adult, Survived=No} 0.06997 154
## [10] {Class=1st, Sex=Female, Age=Adult, Survived=Yes} 0.06361 140
## [11] {Class=1st, Sex=Male, Age=Adult, Survived=Yes} 0.02590 57
## [12] {Class=1st, Sex=Male, Age=Adult, Survived=No} 0.05361 118
## [13] {Class=3rd, Sex=Female, Age=Adult, Survived=Yes} 0.03453 76
## [14] {Class=3rd, Sex=Female, Age=Adult, Survived=No} 0.04044 89
## [15] {Class=Crew, Sex=Female, Age=Adult, Survived=Yes} 0.00909 20
## [16] {Class=3rd, Sex=Male, Age=Adult, Survived=Yes} 0.03408 75
## [17] {Class=3rd, Sex=Male, Age=Adult, Survived=No} 0.17583 387
## [18] {Class=Crew, Sex=Male, Age=Adult, Survived=Yes} 0.08723 192
## [19] {Class=Crew, Sex=Male, Age=Adult, Survived=No} 0.30441 670
将支持度和置信度分别设置为support=0.005,confidence=0.8,选择lift>1的强关联规则,并进行结果解读。
options(digits = 3) #设置3位有效数
parameter3<-list(supp = 0.005, conf = 0.8, maxlen =4,target="rules")
rules <-titanic %>%
apriori(parameter=parameter3)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.005 1
## maxlen target ext
## 4 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 11
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [10 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [72 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
rules_sort<-rules %>%
sort(by="lift", decreasing = TRUE) %>% #将规则按照lift降序排列
subset(lift>1) %>% #选择lift>1的强关联规
inspect()
## lhs rhs support
## [1] {Class=2nd, Age=Adult, Survived=Yes} => {Sex=Female} 0.03635
## [2] {Age=Child, Survived=No} => {Class=3rd} 0.02363
## [3] {Sex=Female, Age=Child, Survived=No} => {Class=3rd} 0.00772
## [4] {Sex=Male, Age=Child, Survived=No} => {Class=3rd} 0.01590
## [5] {Class=2nd, Age=Child} => {Survived=Yes} 0.01090
## [6] {Class=2nd, Sex=Female, Age=Child} => {Survived=Yes} 0.00591
## [7] {Class=1st, Sex=Female} => {Survived=Yes} 0.06406
## [8] {Class=1st, Sex=Female, Age=Adult} => {Survived=Yes} 0.06361
## [9] {Class=2nd, Sex=Female} => {Survived=Yes} 0.04225
## [10] {Class=Crew, Sex=Female} => {Survived=Yes} 0.00909
## [11] {Class=Crew, Sex=Female, Age=Adult} => {Survived=Yes} 0.00909
## [12] {Class=2nd, Sex=Female, Age=Adult} => {Survived=Yes} 0.03635
## [13] {Sex=Female, Survived=No} => {Class=3rd} 0.04816
## [14] {Sex=Female, Age=Adult, Survived=No} => {Class=3rd} 0.04044
## [15] {Class=2nd, Sex=Male, Age=Adult} => {Survived=No} 0.06997
## [16] {Class=2nd, Sex=Male} => {Survived=No} 0.06997
## [17] {Class=Crew, Survived=No} => {Sex=Male} 0.30441
## [18] {Class=Crew, Age=Adult, Survived=No} => {Sex=Male} 0.30441
## [19] {Class=Crew} => {Sex=Male} 0.39164
## [20] {Class=Crew, Age=Adult} => {Sex=Male} 0.39164
## [21] {Class=3rd, Sex=Male, Age=Adult} => {Survived=No} 0.17583
## [22] {Class=1st, Survived=No} => {Sex=Male} 0.05361
## [23] {Class=1st, Age=Adult, Survived=No} => {Sex=Male} 0.05361
## [24] {Class=3rd, Sex=Male} => {Survived=No} 0.19173
## [25] {Age=Adult, Survived=No} => {Sex=Male} 0.60382
## [26] {Class=2nd, Survived=No} => {Sex=Male} 0.06997
## [27] {Class=2nd, Age=Adult, Survived=No} => {Sex=Male} 0.06997
## [28] {Survived=No} => {Sex=Male} 0.61972
## [29] {Class=Crew, Survived=Yes} => {Sex=Male} 0.08723
## [30] {Class=Crew, Age=Adult, Survived=Yes} => {Sex=Male} 0.08723
## [31] {Class=Crew} => {Age=Adult} 0.40209
## [32] {Class=2nd, Survived=No} => {Age=Adult} 0.07587
## [33] {Class=1st, Survived=No} => {Age=Adult} 0.05543
## [34] {Class=Crew, Sex=Female} => {Age=Adult} 0.01045
## [35] {Class=Crew, Survived=Yes} => {Age=Adult} 0.09632
## [36] {Class=Crew, Survived=No} => {Age=Adult} 0.30577
## [37] {Class=Crew, Sex=Male} => {Age=Adult} 0.39164
## [38] {Class=2nd, Sex=Female, Survived=No} => {Age=Adult} 0.00591
## [39] {Class=2nd, Sex=Male, Survived=No} => {Age=Adult} 0.06997
## [40] {Class=1st, Sex=Male, Survived=No} => {Age=Adult} 0.05361
## [41] {Class=Crew, Sex=Female, Survived=Yes} => {Age=Adult} 0.00909
## [42] {Class=Crew, Sex=Male, Survived=Yes} => {Age=Adult} 0.08723
## [43] {Class=Crew, Sex=Male, Survived=No} => {Age=Adult} 0.30441
## [44] {Class=1st, Sex=Female} => {Age=Adult} 0.06542
## [45] {Class=1st, Sex=Female, Survived=Yes} => {Age=Adult} 0.06361
## [46] {Class=3rd, Age=Adult, Survived=No} => {Sex=Male} 0.17583
## [47] {Class=1st} => {Age=Adult} 0.14493
## [48] {Sex=Male, Survived=No} => {Age=Adult} 0.60382
## [49] {Class=1st, Sex=Male} => {Age=Adult} 0.07951
## [50] {Class=1st, Survived=Yes} => {Age=Adult} 0.08950
## [51] {Survived=No} => {Age=Adult} 0.65334
## [52] {Sex=Male} => {Age=Adult} 0.75738
## confidence coverage lift count
## [1] 0.851 0.04271 3.99 80
## [2] 1.000 0.02363 3.12 52
## [3] 1.000 0.00772 3.12 17
## [4] 1.000 0.01590 3.12 35
## [5] 1.000 0.01090 3.10 24
## [6] 1.000 0.00591 3.10 13
## [7] 0.972 0.06588 3.01 141
## [8] 0.972 0.06542 3.01 140
## [9] 0.877 0.04816 2.72 93
## [10] 0.870 0.01045 2.69 20
## [11] 0.870 0.01045 2.69 20
## [12] 0.860 0.04225 2.66 80
## [13] 0.841 0.05725 2.62 106
## [14] 0.817 0.04952 2.55 89
## [15] 0.917 0.07633 1.35 154
## [16] 0.860 0.08133 1.27 154
## [17] 0.996 0.30577 1.27 670
## [18] 0.996 0.30577 1.27 670
## [19] 0.974 0.40209 1.24 862
## [20] 0.974 0.40209 1.24 862
## [21] 0.838 0.20990 1.24 387
## [22] 0.967 0.05543 1.23 118
## [23] 0.967 0.05543 1.23 118
## [24] 0.827 0.23171 1.22 422
## [25] 0.924 0.65334 1.18 1329
## [26] 0.922 0.07587 1.17 154
## [27] 0.922 0.07587 1.17 154
## [28] 0.915 0.67697 1.16 1364
## [29] 0.906 0.09632 1.15 192
## [30] 0.906 0.09632 1.15 192
## [31] 1.000 0.40209 1.05 885
## [32] 1.000 0.07587 1.05 167
## [33] 1.000 0.05543 1.05 122
## [34] 1.000 0.01045 1.05 23
## [35] 1.000 0.09632 1.05 212
## [36] 1.000 0.30577 1.05 673
## [37] 1.000 0.39164 1.05 862
## [38] 1.000 0.00591 1.05 13
## [39] 1.000 0.06997 1.05 154
## [40] 1.000 0.05361 1.05 118
## [41] 1.000 0.00909 1.05 20
## [42] 1.000 0.08723 1.05 192
## [43] 1.000 0.30441 1.05 670
## [44] 0.993 0.06588 1.04 144
## [45] 0.993 0.06406 1.04 140
## [46] 0.813 0.21627 1.03 387
## [47] 0.982 0.14766 1.03 319
## [48] 0.974 0.61972 1.03 1329
## [49] 0.972 0.08178 1.02 175
## [50] 0.970 0.09223 1.02 197
## [51] 0.965 0.67697 1.02 1438
## [52] 0.963 0.78646 1.01 1667
options(digits = 3) #设置3位有效数
rules_sort_supp<-rules %>%
sort(by="support", decreasing = TRUE) %>% #将规则按照support降序排列
subset(lift>1) %>% #选择lift>1的强关联规
inspect()
## lhs rhs support
## [1] {Sex=Male} => {Age=Adult} 0.75738
## [2] {Survived=No} => {Age=Adult} 0.65334
## [3] {Survived=No} => {Sex=Male} 0.61972
## [4] {Sex=Male, Survived=No} => {Age=Adult} 0.60382
## [5] {Age=Adult, Survived=No} => {Sex=Male} 0.60382
## [6] {Class=Crew} => {Age=Adult} 0.40209
## [7] {Class=Crew} => {Sex=Male} 0.39164
## [8] {Class=Crew, Sex=Male} => {Age=Adult} 0.39164
## [9] {Class=Crew, Age=Adult} => {Sex=Male} 0.39164
## [10] {Class=Crew, Survived=No} => {Age=Adult} 0.30577
## [11] {Class=Crew, Survived=No} => {Sex=Male} 0.30441
## [12] {Class=Crew, Sex=Male, Survived=No} => {Age=Adult} 0.30441
## [13] {Class=Crew, Age=Adult, Survived=No} => {Sex=Male} 0.30441
## [14] {Class=3rd, Sex=Male} => {Survived=No} 0.19173
## [15] {Class=3rd, Age=Adult, Survived=No} => {Sex=Male} 0.17583
## [16] {Class=3rd, Sex=Male, Age=Adult} => {Survived=No} 0.17583
## [17] {Class=1st} => {Age=Adult} 0.14493
## [18] {Class=Crew, Survived=Yes} => {Age=Adult} 0.09632
## [19] {Class=1st, Survived=Yes} => {Age=Adult} 0.08950
## [20] {Class=Crew, Survived=Yes} => {Sex=Male} 0.08723
## [21] {Class=Crew, Sex=Male, Survived=Yes} => {Age=Adult} 0.08723
## [22] {Class=Crew, Age=Adult, Survived=Yes} => {Sex=Male} 0.08723
## [23] {Class=1st, Sex=Male} => {Age=Adult} 0.07951
## [24] {Class=2nd, Survived=No} => {Age=Adult} 0.07587
## [25] {Class=2nd, Survived=No} => {Sex=Male} 0.06997
## [26] {Class=2nd, Sex=Male} => {Survived=No} 0.06997
## [27] {Class=2nd, Sex=Male, Survived=No} => {Age=Adult} 0.06997
## [28] {Class=2nd, Age=Adult, Survived=No} => {Sex=Male} 0.06997
## [29] {Class=2nd, Sex=Male, Age=Adult} => {Survived=No} 0.06997
## [30] {Class=1st, Sex=Female} => {Age=Adult} 0.06542
## [31] {Class=1st, Sex=Female} => {Survived=Yes} 0.06406
## [32] {Class=1st, Sex=Female, Survived=Yes} => {Age=Adult} 0.06361
## [33] {Class=1st, Sex=Female, Age=Adult} => {Survived=Yes} 0.06361
## [34] {Class=1st, Survived=No} => {Age=Adult} 0.05543
## [35] {Class=1st, Survived=No} => {Sex=Male} 0.05361
## [36] {Class=1st, Sex=Male, Survived=No} => {Age=Adult} 0.05361
## [37] {Class=1st, Age=Adult, Survived=No} => {Sex=Male} 0.05361
## [38] {Sex=Female, Survived=No} => {Class=3rd} 0.04816
## [39] {Class=2nd, Sex=Female} => {Survived=Yes} 0.04225
## [40] {Sex=Female, Age=Adult, Survived=No} => {Class=3rd} 0.04044
## [41] {Class=2nd, Sex=Female, Age=Adult} => {Survived=Yes} 0.03635
## [42] {Class=2nd, Age=Adult, Survived=Yes} => {Sex=Female} 0.03635
## [43] {Age=Child, Survived=No} => {Class=3rd} 0.02363
## [44] {Sex=Male, Age=Child, Survived=No} => {Class=3rd} 0.01590
## [45] {Class=2nd, Age=Child} => {Survived=Yes} 0.01090
## [46] {Class=Crew, Sex=Female} => {Age=Adult} 0.01045
## [47] {Class=Crew, Sex=Female} => {Survived=Yes} 0.00909
## [48] {Class=Crew, Sex=Female, Survived=Yes} => {Age=Adult} 0.00909
## [49] {Class=Crew, Sex=Female, Age=Adult} => {Survived=Yes} 0.00909
## [50] {Sex=Female, Age=Child, Survived=No} => {Class=3rd} 0.00772
## [51] {Class=2nd, Sex=Female, Age=Child} => {Survived=Yes} 0.00591
## [52] {Class=2nd, Sex=Female, Survived=No} => {Age=Adult} 0.00591
## confidence coverage lift count
## [1] 0.963 0.78646 1.01 1667
## [2] 0.965 0.67697 1.02 1438
## [3] 0.915 0.67697 1.16 1364
## [4] 0.974 0.61972 1.03 1329
## [5] 0.924 0.65334 1.18 1329
## [6] 1.000 0.40209 1.05 885
## [7] 0.974 0.40209 1.24 862
## [8] 1.000 0.39164 1.05 862
## [9] 0.974 0.40209 1.24 862
## [10] 1.000 0.30577 1.05 673
## [11] 0.996 0.30577 1.27 670
## [12] 1.000 0.30441 1.05 670
## [13] 0.996 0.30577 1.27 670
## [14] 0.827 0.23171 1.22 422
## [15] 0.813 0.21627 1.03 387
## [16] 0.838 0.20990 1.24 387
## [17] 0.982 0.14766 1.03 319
## [18] 1.000 0.09632 1.05 212
## [19] 0.970 0.09223 1.02 197
## [20] 0.906 0.09632 1.15 192
## [21] 1.000 0.08723 1.05 192
## [22] 0.906 0.09632 1.15 192
## [23] 0.972 0.08178 1.02 175
## [24] 1.000 0.07587 1.05 167
## [25] 0.922 0.07587 1.17 154
## [26] 0.860 0.08133 1.27 154
## [27] 1.000 0.06997 1.05 154
## [28] 0.922 0.07587 1.17 154
## [29] 0.917 0.07633 1.35 154
## [30] 0.993 0.06588 1.04 144
## [31] 0.972 0.06588 3.01 141
## [32] 0.993 0.06406 1.04 140
## [33] 0.972 0.06542 3.01 140
## [34] 1.000 0.05543 1.05 122
## [35] 0.967 0.05543 1.23 118
## [36] 1.000 0.05361 1.05 118
## [37] 0.967 0.05543 1.23 118
## [38] 0.841 0.05725 2.62 106
## [39] 0.877 0.04816 2.72 93
## [40] 0.817 0.04952 2.55 89
## [41] 0.860 0.04225 2.66 80
## [42] 0.851 0.04271 3.99 80
## [43] 1.000 0.02363 3.12 52
## [44] 1.000 0.01590 3.12 35
## [45] 1.000 0.01090 3.10 24
## [46] 1.000 0.01045 1.05 23
## [47] 0.870 0.01045 2.69 20
## [48] 1.000 0.00909 1.05 20
## [49] 0.870 0.01045 2.69 20
## [50] 1.000 0.00772 3.12 17
## [51] 1.000 0.00591 3.10 13
## [52] 1.000 0.00591 1.05 13
rules_sort_conf<-rules %>%
sort(by="confidence", decreasing = TRUE) %>% #将规则按照confidence降序排列
subset(lift>1) %>% #选择lift>1的强关联规
inspect()
## lhs rhs support
## [1] {Class=Crew} => {Age=Adult} 0.40209
## [2] {Class=2nd, Age=Child} => {Survived=Yes} 0.01090
## [3] {Age=Child, Survived=No} => {Class=3rd} 0.02363
## [4] {Class=2nd, Survived=No} => {Age=Adult} 0.07587
## [5] {Class=1st, Survived=No} => {Age=Adult} 0.05543
## [6] {Class=Crew, Sex=Female} => {Age=Adult} 0.01045
## [7] {Class=Crew, Survived=Yes} => {Age=Adult} 0.09632
## [8] {Class=Crew, Survived=No} => {Age=Adult} 0.30577
## [9] {Class=Crew, Sex=Male} => {Age=Adult} 0.39164
## [10] {Class=2nd, Sex=Female, Age=Child} => {Survived=Yes} 0.00591
## [11] {Sex=Female, Age=Child, Survived=No} => {Class=3rd} 0.00772
## [12] {Sex=Male, Age=Child, Survived=No} => {Class=3rd} 0.01590
## [13] {Class=2nd, Sex=Female, Survived=No} => {Age=Adult} 0.00591
## [14] {Class=2nd, Sex=Male, Survived=No} => {Age=Adult} 0.06997
## [15] {Class=1st, Sex=Male, Survived=No} => {Age=Adult} 0.05361
## [16] {Class=Crew, Sex=Female, Survived=Yes} => {Age=Adult} 0.00909
## [17] {Class=Crew, Sex=Male, Survived=Yes} => {Age=Adult} 0.08723
## [18] {Class=Crew, Sex=Male, Survived=No} => {Age=Adult} 0.30441
## [19] {Class=Crew, Survived=No} => {Sex=Male} 0.30441
## [20] {Class=Crew, Age=Adult, Survived=No} => {Sex=Male} 0.30441
## [21] {Class=1st, Sex=Female} => {Age=Adult} 0.06542
## [22] {Class=1st, Sex=Female, Survived=Yes} => {Age=Adult} 0.06361
## [23] {Class=1st} => {Age=Adult} 0.14493
## [24] {Sex=Male, Survived=No} => {Age=Adult} 0.60382
## [25] {Class=Crew} => {Sex=Male} 0.39164
## [26] {Class=Crew, Age=Adult} => {Sex=Male} 0.39164
## [27] {Class=1st, Sex=Female} => {Survived=Yes} 0.06406
## [28] {Class=1st, Sex=Male} => {Age=Adult} 0.07951
## [29] {Class=1st, Sex=Female, Age=Adult} => {Survived=Yes} 0.06361
## [30] {Class=1st, Survived=Yes} => {Age=Adult} 0.08950
## [31] {Class=1st, Survived=No} => {Sex=Male} 0.05361
## [32] {Class=1st, Age=Adult, Survived=No} => {Sex=Male} 0.05361
## [33] {Survived=No} => {Age=Adult} 0.65334
## [34] {Sex=Male} => {Age=Adult} 0.75738
## [35] {Age=Adult, Survived=No} => {Sex=Male} 0.60382
## [36] {Class=2nd, Survived=No} => {Sex=Male} 0.06997
## [37] {Class=2nd, Age=Adult, Survived=No} => {Sex=Male} 0.06997
## [38] {Class=2nd, Sex=Male, Age=Adult} => {Survived=No} 0.06997
## [39] {Survived=No} => {Sex=Male} 0.61972
## [40] {Class=Crew, Survived=Yes} => {Sex=Male} 0.08723
## [41] {Class=Crew, Age=Adult, Survived=Yes} => {Sex=Male} 0.08723
## [42] {Class=2nd, Sex=Female} => {Survived=Yes} 0.04225
## [43] {Class=Crew, Sex=Female} => {Survived=Yes} 0.00909
## [44] {Class=Crew, Sex=Female, Age=Adult} => {Survived=Yes} 0.00909
## [45] {Class=2nd, Sex=Male} => {Survived=No} 0.06997
## [46] {Class=2nd, Sex=Female, Age=Adult} => {Survived=Yes} 0.03635
## [47] {Class=2nd, Age=Adult, Survived=Yes} => {Sex=Female} 0.03635
## [48] {Sex=Female, Survived=No} => {Class=3rd} 0.04816
## [49] {Class=3rd, Sex=Male, Age=Adult} => {Survived=No} 0.17583
## [50] {Class=3rd, Sex=Male} => {Survived=No} 0.19173
## [51] {Sex=Female, Age=Adult, Survived=No} => {Class=3rd} 0.04044
## [52] {Class=3rd, Age=Adult, Survived=No} => {Sex=Male} 0.17583
## confidence coverage lift count
## [1] 1.000 0.40209 1.05 885
## [2] 1.000 0.01090 3.10 24
## [3] 1.000 0.02363 3.12 52
## [4] 1.000 0.07587 1.05 167
## [5] 1.000 0.05543 1.05 122
## [6] 1.000 0.01045 1.05 23
## [7] 1.000 0.09632 1.05 212
## [8] 1.000 0.30577 1.05 673
## [9] 1.000 0.39164 1.05 862
## [10] 1.000 0.00591 3.10 13
## [11] 1.000 0.00772 3.12 17
## [12] 1.000 0.01590 3.12 35
## [13] 1.000 0.00591 1.05 13
## [14] 1.000 0.06997 1.05 154
## [15] 1.000 0.05361 1.05 118
## [16] 1.000 0.00909 1.05 20
## [17] 1.000 0.08723 1.05 192
## [18] 1.000 0.30441 1.05 670
## [19] 0.996 0.30577 1.27 670
## [20] 0.996 0.30577 1.27 670
## [21] 0.993 0.06588 1.04 144
## [22] 0.993 0.06406 1.04 140
## [23] 0.982 0.14766 1.03 319
## [24] 0.974 0.61972 1.03 1329
## [25] 0.974 0.40209 1.24 862
## [26] 0.974 0.40209 1.24 862
## [27] 0.972 0.06588 3.01 141
## [28] 0.972 0.08178 1.02 175
## [29] 0.972 0.06542 3.01 140
## [30] 0.970 0.09223 1.02 197
## [31] 0.967 0.05543 1.23 118
## [32] 0.967 0.05543 1.23 118
## [33] 0.965 0.67697 1.02 1438
## [34] 0.963 0.78646 1.01 1667
## [35] 0.924 0.65334 1.18 1329
## [36] 0.922 0.07587 1.17 154
## [37] 0.922 0.07587 1.17 154
## [38] 0.917 0.07633 1.35 154
## [39] 0.915 0.67697 1.16 1364
## [40] 0.906 0.09632 1.15 192
## [41] 0.906 0.09632 1.15 192
## [42] 0.877 0.04816 2.72 93
## [43] 0.870 0.01045 2.69 20
## [44] 0.870 0.01045 2.69 20
## [45] 0.860 0.08133 1.27 154
## [46] 0.860 0.04225 2.66 80
## [47] 0.851 0.04271 3.99 80
## [48] 0.841 0.05725 2.62 106
## [49] 0.838 0.20990 1.24 387
## [50] 0.827 0.23171 1.22 422
## [51] 0.817 0.04952 2.55 89
## [52] 0.813 0.21627 1.03 387
第一种情况,输出规则按照lift降序排列:
parameter<- list(minlen=2, supp=0.005, conf=0.8)
appearance1<- list(rhs=c('Survived=No', 'Survived=Yes'),default='lhs')
options(digits = 3) #设置3位有效数
parameter2<- list(minlen=2, supp=0.005, conf=0.8)
appearance1<- list(rhs=c('Survived=No', 'Survived=Yes'),default='lhs')
rules<-titanic %>%
apriori(parameter=parameter2,appearance=appearance1)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.005 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 11
##
## set item appearances ...[2 item(s)] done [0.00s].
## set transactions ...[10 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [10 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [12 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
rules %>%
sort(by="lift",decreasing = TRUE) %>%
subset(lift>1) %>%
inspect()
## lhs rhs support confidence
## [1] {Class=2nd, Age=Child} => {Survived=Yes} 0.01090 1.000
## [2] {Class=2nd, Sex=Female, Age=Child} => {Survived=Yes} 0.00591 1.000
## [3] {Class=1st, Sex=Female} => {Survived=Yes} 0.06406 0.972
## [4] {Class=1st, Sex=Female, Age=Adult} => {Survived=Yes} 0.06361 0.972
## [5] {Class=2nd, Sex=Female} => {Survived=Yes} 0.04225 0.877
## [6] {Class=Crew, Sex=Female} => {Survived=Yes} 0.00909 0.870
## [7] {Class=Crew, Sex=Female, Age=Adult} => {Survived=Yes} 0.00909 0.870
## [8] {Class=2nd, Sex=Female, Age=Adult} => {Survived=Yes} 0.03635 0.860
## [9] {Class=2nd, Sex=Male, Age=Adult} => {Survived=No} 0.06997 0.917
## [10] {Class=2nd, Sex=Male} => {Survived=No} 0.06997 0.860
## [11] {Class=3rd, Sex=Male, Age=Adult} => {Survived=No} 0.17583 0.838
## [12] {Class=3rd, Sex=Male} => {Survived=No} 0.19173 0.827
## coverage lift count
## [1] 0.01090 3.10 24
## [2] 0.00591 3.10 13
## [3] 0.06588 3.01 141
## [4] 0.06542 3.01 140
## [5] 0.04816 2.72 93
## [6] 0.01045 2.69 20
## [7] 0.01045 2.69 20
## [8] 0.04225 2.66 80
## [9] 0.07633 1.35 154
## [10] 0.08133 1.27 154
## [11] 0.20990 1.24 387
## [12] 0.23171 1.22 422
第二种情况,输出规则按照confidence降序排列
parameter <- list(minlen=3, supp=0.002, conf=0.2)
appearance <- list(rhs=c('Survived=Yes'),
lhs=c('Class=1st', 'Class=2nd', 'Class=3rd',
'Age=Child', 'Age=Adult'), default='none')
options(digits = 3) #设置3位有效数
parameter3<- list(minlen=3, supp=0.002, conf=0.2)
appearance2<- list(rhs=c('Survived=Yes'),
lhs=c('Class=1st', 'Class=2nd', 'Class=3rd',
'Age=Child', 'Age=Adult'), default='none')
rules<-titanic %>%
apriori(parameter=parameter3,appearance=appearance2)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.002 3
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 4
##
## set item appearances ...[6 item(s)] done [0.00s].
## set transactions ...[6 item(s), 2201 transaction(s)] done [0.00s].
## sorting and recoding items ... [6 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [6 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
survived_rules<-rules %>%
sort(by="lift",decreasing = TRUE) %>%
subset(lift>1)
inspect(survived_rules)
## lhs rhs support confidence coverage lift
## [1] {Class=2nd, Age=Child} => {Survived=Yes} 0.01090 1.000 0.01090 3.10
## [2] {Class=1st, Age=Child} => {Survived=Yes} 0.00273 1.000 0.00273 3.10
## [3] {Class=1st, Age=Adult} => {Survived=Yes} 0.08950 0.618 0.14493 1.91
## [4] {Class=2nd, Age=Adult} => {Survived=Yes} 0.04271 0.360 0.11858 1.11
## [5] {Class=3rd, Age=Child} => {Survived=Yes} 0.01227 0.342 0.03589 1.06
## count
## [1] 24
## [2] 6
## [3] 197
## [4] 94
## [5] 27
要求将上述第二种情况的关联规则结果进行绘图解读,绘图形式包括method=“grouped”, “graph”,“paracoord”。
library(arulesViz)
survived_rules %>% plot(method ="graph",measure="lift",
shading = "confidence")
survived_rules %>% plot(method ="grouped",measure="lift",
shading = "confidence")
survived_rules %>% plot(method ="paracoord",measure="lift",
shading = "confidence")
根据个人情况进行撰写(略)