## 分别做9个组内部80个因子之间的关联不是9个大组之间的关联应该就是把数据表分成9个子表,
## 然后用子表各做一个80个因子的关联
## 加载包
library(readxl)
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(ca)
library(dplyr)
## -------------------------------------------------------------------------
## data.table + dplyr code now lives in dtplyr.
## Please library(dtplyr)!
## -------------------------------------------------------------------------
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
theme_set(theme_bw(base_family = "STKaiti"))
## 读取数据
ansdata <- read_excel("Array_data_nobackground_normalised.xlsx")
summary(ansdata)
## Factor Factor_group Array data
## Length:6400 Length:6400 Min. : 0.76
## Class :character Class :character 1st Qu.: 80.23
## Mode :character Mode :character Median : 143.06
## Mean : 1487.96
## 3rd Qu.: 316.36
## Max. :59801.26
##
## Patient Id Collection_Date Illness_Day Outcome
## Length:6400 Length:6400 Min. : 3.00 Length:6400
## Class :character Class :character 1st Qu.:11.00 Class :character
## Mode :character Mode :character Median :15.00 Mode :character
## Mean :17.89
## 3rd Qu.:23.00
## Max. :68.00
## NA's :400
## Age Sex Disease Phase Sampling Categry
## Min. : 6.00 Length:6400 Min. :1.0 Length:6400
## 1st Qu.:41.00 Class :character 1st Qu.:2.0 Class :character
## Median :55.00 Mode :character Median :2.0 Mode :character
## Mean :53.67 Mean :2.2
## 3rd Qu.:67.00 3rd Qu.:3.0
## Max. :82.00 Max. :4.0
## NA's :400 NA's :400
## 处理缺失值
sum(is.na(ansdata))
## [1] 2480
par(cex = 0.8)
# VIM::aggr(ansdata)
colnames(ansdata) <- c("factor","factorgroup","arraydata","paintID",
"collectiondate","illnessday","outcom","age","sex",
"diseaPH","SampleCategry")
summary(ansdata)
## factor factorgroup arraydata
## Length:6400 Length:6400 Min. : 0.76
## Class :character Class :character 1st Qu.: 80.23
## Mode :character Mode :character Median : 143.06
## Mean : 1487.96
## 3rd Qu.: 316.36
## Max. :59801.26
##
## paintID collectiondate illnessday outcom
## Length:6400 Length:6400 Min. : 3.00 Length:6400
## Class :character Class :character 1st Qu.:11.00 Class :character
## Mode :character Mode :character Median :15.00 Mode :character
## Mean :17.89
## 3rd Qu.:23.00
## Max. :68.00
## NA's :400
## age sex diseaPH SampleCategry
## Min. : 6.00 Length:6400 Min. :1.0 Length:6400
## 1st Qu.:41.00 Class :character 1st Qu.:2.0 Class :character
## Median :55.00 Mode :character Median :2.0 Mode :character
## Mean :53.67 Mean :2.2
## 3rd Qu.:67.00 3rd Qu.:3.0
## Max. :82.00 Max. :4.0
## NA's :400 NA's :400
usedata <- ansdata[c("factor","factorgroup","illnessday","outcom","age","sex",
"diseaPH","SampleCategry")]
summary(usedata)
## factor factorgroup illnessday outcom
## Length:6400 Length:6400 Min. : 3.00 Length:6400
## Class :character Class :character 1st Qu.:11.00 Class :character
## Mode :character Mode :character Median :15.00 Mode :character
## Mean :17.89
## 3rd Qu.:23.00
## Max. :68.00
## NA's :400
## age sex diseaPH SampleCategry
## Min. : 6.00 Length:6400 Min. :1.0 Length:6400
## 1st Qu.:41.00 Class :character 1st Qu.:2.0 Class :character
## Median :55.00 Mode :character Median :2.0 Mode :character
## Mean :53.67 Mean :2.2
## 3rd Qu.:67.00 3rd Qu.:3.0
## Max. :82.00 Max. :4.0
## NA's :400 NA's :400
VIM::aggr(usedata)

## 对数据进行分析
# ## 剔除带有缺失值的行
# usedata <- na.omit(usedata)
# VIM::aggr(usedata)
## 整理数据
usedata <- usedata%>%
group_by(factor,factorgroup,sex,outcom)
table(usedata$outcom)
##
## Fatal Healthy Mild Severe
## 1600 400 1040 3360
table(usedata$SampleCategry)
##
## All other First Last
## 2720 2320 1360
table(paste(usedata$outcom,usedata$SampleCategry,sep = "-"))
##
## Fatal-All other Fatal-First Fatal-Last Healthy-First
## 640 480 480 400
## Mild-All other Mild-First Severe-All other Severe-First
## 480 560 1600 880
## Severe-Last
## 880
## 生成新的数据列
usedata$outSC <- paste(usedata$outcom,usedata$SampleCategry,sep = "-")
table(usedata$outSC)
##
## Fatal-All other Fatal-First Fatal-Last Healthy-First
## 640 480 480 400
## Mild-All other Mild-First Severe-All other Severe-First
## 480 560 1600 880
## Severe-Last
## 880
osc_group <- unique(usedata$outSC)
osc_group
## [1] "Fatal-All other" "Fatal-First" "Fatal-Last"
## [4] "Healthy-First" "Mild-All other" "Mild-First"
## [7] "Severe-All other" "Severe-First" "Severe-Last"
guanliandata <- usedata[c("factor","factorgroup","illnessday","age","sex",
"diseaPH","outSC")]
## 对年龄数据和存活天数数据分组
guanliandata$age <- cut_width(guanliandata$age,10)
summary(guanliandata$illnessday)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 3.00 11.00 15.00 17.89 23.00 68.00 400
guanliandata$illnessday <- cut_width(guanliandata$illnessday,10)
## 转换数据类型
guanliandata <- data.frame(apply(guanliandata,2,as.factor))
summary(guanliandata)
## factor factorgroup illnessday age
## ANG : 80 白介素 :1120 (5,15] :3040 (75,85]:1360
## BDNF : 80 干扰素 : 80 (15,25]:1440 (35,45]:1280
## BLC : 80 集落刺激因子: 240 (25,35]: 720 (55,65]:1120
## CCL11 : 80 趋化因子 :2080 (35,45]: 320 (45,55]:1040
## CCL24 : 80 生长因子 :2080 [-5,5] : 320 (25,35]: 640
## CCL26 : 80 肿瘤坏死因子: 320 (Other): 160 (Other): 560
## (Other):5920 NA's : 480 NA's : 400 NA's : 400
## sex diseaPH outSC
## F :1600 1 :1440 Severe-All other:1600
## M :4400 2 :2800 Severe-First : 880
## NA's: 400 3 : 880 Severe-Last : 880
## 4 : 880 Fatal-All other : 640
## NA's: 400 Mild-First : 560
## Fatal-First : 480
## (Other) :1360
## 针对 Fatal-All other 数据的关联分析
falor <- guanliandata[guanliandata$outSC == "Fatal-All other",]
str(falor)
## 'data.frame': 640 obs. of 7 variables:
## $ factor : Factor w/ 80 levels "ANG","BDNF","BLC",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ factorgroup: Factor w/ 6 levels "白介素","干扰素",..: 5 5 4 4 4 4 4 3 3 3 ...
## $ illnessday : Factor w/ 7 levels "(15,25]","(25,35]",..: 5 5 5 5 5 5 5 5 5 5 ...
## $ age : Factor w/ 8 levels "(15,25]","(25,35]",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ sex : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
## $ diseaPH : Factor w/ 4 levels " 1"," 2"," 3",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ outSC : Factor w/ 9 levels "Fatal-All other",..: 1 1 1 1 1 1 1 1 1 1 ...
falorrd <- as(falor,"transactions")
## 频繁项集
par(cex = 0.8,family = "STKaiti")
itemFrequencyPlot(falorrd,topN = 30,main = "Fatal-All other数据频繁的项")

## 挖掘关联规则
guize <- apriori(falorrd,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3),
appearance = list(rhs = c("outSC=Fatal-All other"),
default = "lhs"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 192
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[101 item(s), 640 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [6 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 6 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 5 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.167 3.000 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.3750 Min. :1 Min. :1
## 1st Qu.:0.3750 1st Qu.:1 1st Qu.:1
## Median :0.3750 Median :1 Median :1
## Mean :0.3958 Mean :1 Mean :1
## 3rd Qu.:0.3750 3rd Qu.:1 3rd Qu.:1
## Max. :0.5000 Max. :1 Max. :1
##
## mining info:
## data ntransactions support confidence
## falorrd 640 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {illnessday=(15,25],
## sex=F} => {outSC=Fatal-All other} 0.375 1 1
## [2] {sex=F,
## diseaPH= 3} => {outSC=Fatal-All other} 0.375 1 1
## [3] {illnessday=(5,15],
## sex=M} => {outSC=Fatal-All other} 0.500 1 1
## [4] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Fatal-All other} 0.375 1 1
## [5] {sex=M,
## diseaPH= 2} => {outSC=Fatal-All other} 0.375 1 1
## [6] {illnessday=(5,15],
## sex=M,
## diseaPH= 2} => {outSC=Fatal-All other} 0.375 1 1
plot(guize,method = "graph")

## 分析关联规则的结果如果直接限定右边的选项为outSC=Fatal-All other,
## 得到的规则在本数据中可疑认为没有意义,置信度,提升度均为1
guize <- apriori(falorrd,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 192
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[101 item(s), 640 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [22 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 22 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 18 4
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.182 3.000 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.375 Min. :0.7500 Min. :1.000
## 1st Qu.:0.375 1st Qu.:0.7500 1st Qu.:1.125
## Median :0.375 Median :1.0000 Median :1.500
## Mean :0.392 Mean :0.9091 Mean :1.591
## 3rd Qu.:0.375 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :0.500 Max. :1.0000 Max. :2.000
##
## mining info:
## data ntransactions support confidence
## falorrd 640 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {sex=F,
## diseaPH= 3} => {outSC=Fatal-All other} 0.375 1.00 1.0
## [2] {diseaPH= 3,
## outSC=Fatal-All other} => {sex=F} 0.375 1.00 2.0
## [3] {sex=F,
## outSC=Fatal-All other} => {diseaPH= 3} 0.375 0.75 2.0
## [4] {illnessday=(15,25],
## sex=F} => {outSC=Fatal-All other} 0.375 1.00 1.0
## [5] {illnessday=(15,25],
## outSC=Fatal-All other} => {sex=F} 0.375 1.00 2.0
## [6] {sex=F,
## outSC=Fatal-All other} => {illnessday=(15,25]} 0.375 0.75 2.0
## [7] {illnessday=(5,15],
## sex=M} => {diseaPH= 2} 0.375 0.75 1.5
## [8] {illnessday=(5,15],
## diseaPH= 2} => {sex=M} 0.375 1.00 2.0
## [9] {sex=M,
## diseaPH= 2} => {illnessday=(5,15]} 0.375 1.00 2.0
## [10] {illnessday=(5,15],
## sex=M} => {outSC=Fatal-All other} 0.500 1.00 1.0
## [11] {illnessday=(5,15],
## outSC=Fatal-All other} => {sex=M} 0.500 1.00 2.0
## [12] {sex=M,
## outSC=Fatal-All other} => {illnessday=(5,15]} 0.500 1.00 2.0
## [13] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Fatal-All other} 0.375 1.00 1.0
## [14] {illnessday=(5,15],
## outSC=Fatal-All other} => {diseaPH= 2} 0.375 0.75 1.5
## [15] {diseaPH= 2,
## outSC=Fatal-All other} => {illnessday=(5,15]} 0.375 0.75 1.5
## [16] {sex=M,
## diseaPH= 2} => {outSC=Fatal-All other} 0.375 1.00 1.0
## [17] {sex=M,
## outSC=Fatal-All other} => {diseaPH= 2} 0.375 0.75 1.5
## [18] {diseaPH= 2,
## outSC=Fatal-All other} => {sex=M} 0.375 0.75 1.5
## [19] {illnessday=(5,15],
## sex=M,
## diseaPH= 2} => {outSC=Fatal-All other} 0.375 1.00 1.0
## [20] {illnessday=(5,15],
## sex=M,
## outSC=Fatal-All other} => {diseaPH= 2} 0.375 0.75 1.5
## [21] {illnessday=(5,15],
## diseaPH= 2,
## outSC=Fatal-All other} => {sex=M} 0.375 1.00 2.0
## [22] {sex=M,
## diseaPH= 2,
## outSC=Fatal-All other} => {illnessday=(5,15]} 0.375 1.00 2.0
inspect(sort(guize,by = "lift"))
## lhs rhs support confidence lift
## [1] {diseaPH= 3,
## outSC=Fatal-All other} => {sex=F} 0.375 1.00 2.0
## [2] {sex=F,
## outSC=Fatal-All other} => {diseaPH= 3} 0.375 0.75 2.0
## [3] {illnessday=(15,25],
## outSC=Fatal-All other} => {sex=F} 0.375 1.00 2.0
## [4] {sex=F,
## outSC=Fatal-All other} => {illnessday=(15,25]} 0.375 0.75 2.0
## [5] {illnessday=(5,15],
## diseaPH= 2} => {sex=M} 0.375 1.00 2.0
## [6] {sex=M,
## diseaPH= 2} => {illnessday=(5,15]} 0.375 1.00 2.0
## [7] {illnessday=(5,15],
## outSC=Fatal-All other} => {sex=M} 0.500 1.00 2.0
## [8] {sex=M,
## outSC=Fatal-All other} => {illnessday=(5,15]} 0.500 1.00 2.0
## [9] {illnessday=(5,15],
## diseaPH= 2,
## outSC=Fatal-All other} => {sex=M} 0.375 1.00 2.0
## [10] {sex=M,
## diseaPH= 2,
## outSC=Fatal-All other} => {illnessday=(5,15]} 0.375 1.00 2.0
## [11] {illnessday=(5,15],
## sex=M} => {diseaPH= 2} 0.375 0.75 1.5
## [12] {illnessday=(5,15],
## outSC=Fatal-All other} => {diseaPH= 2} 0.375 0.75 1.5
## [13] {diseaPH= 2,
## outSC=Fatal-All other} => {illnessday=(5,15]} 0.375 0.75 1.5
## [14] {sex=M,
## outSC=Fatal-All other} => {diseaPH= 2} 0.375 0.75 1.5
## [15] {diseaPH= 2,
## outSC=Fatal-All other} => {sex=M} 0.375 0.75 1.5
## [16] {illnessday=(5,15],
## sex=M,
## outSC=Fatal-All other} => {diseaPH= 2} 0.375 0.75 1.5
## [17] {sex=F,
## diseaPH= 3} => {outSC=Fatal-All other} 0.375 1.00 1.0
## [18] {illnessday=(15,25],
## sex=F} => {outSC=Fatal-All other} 0.375 1.00 1.0
## [19] {illnessday=(5,15],
## sex=M} => {outSC=Fatal-All other} 0.500 1.00 1.0
## [20] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Fatal-All other} 0.375 1.00 1.0
## [21] {sex=M,
## diseaPH= 2} => {outSC=Fatal-All other} 0.375 1.00 1.0
## [22] {illnessday=(5,15],
## sex=M,
## diseaPH= 2} => {outSC=Fatal-All other} 0.375 1.00 1.0
## 这些是发现的规则,提升度lift大于1的规则是有意义的规则
plot(guize,method = "graph")

## 针对 Fatal-First 数据的关联分析
guizdata <- guanliandata[guanliandata$outSC == "Fatal-First",]
guizdata <- as(guizdata,"transactions")
## 频繁项集
par(cex = 0.8,family = "STKaiti")
itemFrequencyPlot(guizdata,topN = 30,main = "Fatal-First数据频繁的项")

## 挖掘关联规则
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3),
appearance = list(rhs = c("outSC=Fatal-First"),
default = "lhs"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 144
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[98 item(s), 480 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [10 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 10 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 8 2
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.0 3.0 3.0 3.2 3.0 4.0
##
## summary of quality measures:
## support confidence lift
## Min. :0.3250 Min. :1 Min. :1
## 1st Qu.:0.3333 1st Qu.:1 1st Qu.:1
## Median :0.4167 Median :1 Median :1
## Mean :0.4317 Mean :1 Mean :1
## 3rd Qu.:0.5000 3rd Qu.:1 3rd Qu.:1
## Max. :0.6667 Max. :1 Max. :1
##
## mining info:
## data ntransactions support confidence
## guizdata 480 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {factorgroup=生长因子,
## illnessday=(5,15]} => {outSC=Fatal-First} 0.3250000 1 1
## [2] {factorgroup=趋化因子,
## illnessday=(5,15]} => {outSC=Fatal-First} 0.3250000 1 1
## [3] {sex=F,
## diseaPH= 2} => {outSC=Fatal-First} 0.3333333 1 1
## [4] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Fatal-First} 0.3333333 1 1
## [5] {illnessday=(5,15],
## sex=F} => {outSC=Fatal-First} 0.5000000 1 1
## [6] {sex=M,
## diseaPH= 1} => {outSC=Fatal-First} 0.5000000 1 1
## [7] {illnessday=(5,15],
## sex=M} => {outSC=Fatal-First} 0.5000000 1 1
## [8] {illnessday=(5,15],
## diseaPH= 1} => {outSC=Fatal-First} 0.6666667 1 1
## [9] {illnessday=(5,15],
## sex=F,
## diseaPH= 2} => {outSC=Fatal-First} 0.3333333 1 1
## [10] {illnessday=(5,15],
## sex=M,
## diseaPH= 1} => {outSC=Fatal-First} 0.5000000 1 1
plot(guize,method = "graph")

## 分析关联规则的结果如果直接限定右边的选项为outSC=Fatal-All other,
## 得到的规则在本数据中可疑认为没有意义,置信度,提升度均为1
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 144
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[98 item(s), 480 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [38 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 38 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 30 8
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.211 3.000 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.3250 Min. :0.3250 Min. :1.000
## 1st Qu.:0.3333 1st Qu.:0.7500 1st Qu.:1.000
## Median :0.4167 Median :1.0000 Median :1.000
## Mean :0.4285 Mean :0.8658 Mean :1.237
## 3rd Qu.:0.5000 3rd Qu.:1.0000 3rd Qu.:1.500
## Max. :0.6667 Max. :1.0000 Max. :2.000
##
## mining info:
## data ntransactions support confidence
## guizdata 480 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {factorgroup=生长因子,
## illnessday=(5,15]} => {outSC=Fatal-First} 0.3250000 1.0000000 1.0
## [2] {factorgroup=生长因子,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.3250000 1.0000000 1.0
## [3] {illnessday=(5,15],
## outSC=Fatal-First} => {factorgroup=生长因子} 0.3250000 0.3250000 1.0
## [4] {factorgroup=趋化因子,
## illnessday=(5,15]} => {outSC=Fatal-First} 0.3250000 1.0000000 1.0
## [5] {factorgroup=趋化因子,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.3250000 1.0000000 1.0
## [6] {illnessday=(5,15],
## outSC=Fatal-First} => {factorgroup=趋化因子} 0.3250000 0.3250000 1.0
## [7] {sex=F,
## diseaPH= 2} => {illnessday=(5,15]} 0.3333333 1.0000000 1.0
## [8] {illnessday=(5,15],
## diseaPH= 2} => {sex=F} 0.3333333 1.0000000 2.0
## [9] {illnessday=(5,15],
## sex=F} => {diseaPH= 2} 0.3333333 0.6666667 2.0
## [10] {sex=F,
## diseaPH= 2} => {outSC=Fatal-First} 0.3333333 1.0000000 1.0
## [11] {diseaPH= 2,
## outSC=Fatal-First} => {sex=F} 0.3333333 1.0000000 2.0
## [12] {sex=F,
## outSC=Fatal-First} => {diseaPH= 2} 0.3333333 0.6666667 2.0
## [13] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Fatal-First} 0.3333333 1.0000000 1.0
## [14] {diseaPH= 2,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.3333333 1.0000000 1.0
## [15] {illnessday=(5,15],
## outSC=Fatal-First} => {diseaPH= 2} 0.3333333 0.3333333 1.0
## [16] {sex=M,
## diseaPH= 1} => {illnessday=(5,15]} 0.5000000 1.0000000 1.0
## [17] {illnessday=(5,15],
## sex=M} => {diseaPH= 1} 0.5000000 1.0000000 1.5
## [18] {illnessday=(5,15],
## diseaPH= 1} => {sex=M} 0.5000000 0.7500000 1.5
## [19] {sex=M,
## diseaPH= 1} => {outSC=Fatal-First} 0.5000000 1.0000000 1.0
## [20] {sex=M,
## outSC=Fatal-First} => {diseaPH= 1} 0.5000000 1.0000000 1.5
## [21] {diseaPH= 1,
## outSC=Fatal-First} => {sex=M} 0.5000000 0.7500000 1.5
## [22] {illnessday=(5,15],
## sex=M} => {outSC=Fatal-First} 0.5000000 1.0000000 1.0
## [23] {sex=M,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.5000000 1.0000000 1.0
## [24] {illnessday=(5,15],
## outSC=Fatal-First} => {sex=M} 0.5000000 0.5000000 1.0
## [25] {illnessday=(5,15],
## sex=F} => {outSC=Fatal-First} 0.5000000 1.0000000 1.0
## [26] {sex=F,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.5000000 1.0000000 1.0
## [27] {illnessday=(5,15],
## outSC=Fatal-First} => {sex=F} 0.5000000 0.5000000 1.0
## [28] {illnessday=(5,15],
## diseaPH= 1} => {outSC=Fatal-First} 0.6666667 1.0000000 1.0
## [29] {diseaPH= 1,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.6666667 1.0000000 1.0
## [30] {illnessday=(5,15],
## outSC=Fatal-First} => {diseaPH= 1} 0.6666667 0.6666667 1.0
## [31] {illnessday=(5,15],
## sex=F,
## diseaPH= 2} => {outSC=Fatal-First} 0.3333333 1.0000000 1.0
## [32] {sex=F,
## diseaPH= 2,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.3333333 1.0000000 1.0
## [33] {illnessday=(5,15],
## diseaPH= 2,
## outSC=Fatal-First} => {sex=F} 0.3333333 1.0000000 2.0
## [34] {illnessday=(5,15],
## sex=F,
## outSC=Fatal-First} => {diseaPH= 2} 0.3333333 0.6666667 2.0
## [35] {illnessday=(5,15],
## sex=M,
## diseaPH= 1} => {outSC=Fatal-First} 0.5000000 1.0000000 1.0
## [36] {sex=M,
## diseaPH= 1,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.5000000 1.0000000 1.0
## [37] {illnessday=(5,15],
## sex=M,
## outSC=Fatal-First} => {diseaPH= 1} 0.5000000 1.0000000 1.5
## [38] {illnessday=(5,15],
## diseaPH= 1,
## outSC=Fatal-First} => {sex=M} 0.5000000 0.7500000 1.5
inspect(sort(guize,by = "lift"))
## lhs rhs support confidence lift
## [1] {illnessday=(5,15],
## diseaPH= 2} => {sex=F} 0.3333333 1.0000000 2.0
## [2] {illnessday=(5,15],
## sex=F} => {diseaPH= 2} 0.3333333 0.6666667 2.0
## [3] {diseaPH= 2,
## outSC=Fatal-First} => {sex=F} 0.3333333 1.0000000 2.0
## [4] {sex=F,
## outSC=Fatal-First} => {diseaPH= 2} 0.3333333 0.6666667 2.0
## [5] {illnessday=(5,15],
## diseaPH= 2,
## outSC=Fatal-First} => {sex=F} 0.3333333 1.0000000 2.0
## [6] {illnessday=(5,15],
## sex=F,
## outSC=Fatal-First} => {diseaPH= 2} 0.3333333 0.6666667 2.0
## [7] {illnessday=(5,15],
## sex=M} => {diseaPH= 1} 0.5000000 1.0000000 1.5
## [8] {illnessday=(5,15],
## diseaPH= 1} => {sex=M} 0.5000000 0.7500000 1.5
## [9] {sex=M,
## outSC=Fatal-First} => {diseaPH= 1} 0.5000000 1.0000000 1.5
## [10] {diseaPH= 1,
## outSC=Fatal-First} => {sex=M} 0.5000000 0.7500000 1.5
## [11] {illnessday=(5,15],
## sex=M,
## outSC=Fatal-First} => {diseaPH= 1} 0.5000000 1.0000000 1.5
## [12] {illnessday=(5,15],
## diseaPH= 1,
## outSC=Fatal-First} => {sex=M} 0.5000000 0.7500000 1.5
## [13] {factorgroup=生长因子,
## illnessday=(5,15]} => {outSC=Fatal-First} 0.3250000 1.0000000 1.0
## [14] {factorgroup=生长因子,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.3250000 1.0000000 1.0
## [15] {illnessday=(5,15],
## outSC=Fatal-First} => {factorgroup=生长因子} 0.3250000 0.3250000 1.0
## [16] {factorgroup=趋化因子,
## illnessday=(5,15]} => {outSC=Fatal-First} 0.3250000 1.0000000 1.0
## [17] {factorgroup=趋化因子,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.3250000 1.0000000 1.0
## [18] {illnessday=(5,15],
## outSC=Fatal-First} => {factorgroup=趋化因子} 0.3250000 0.3250000 1.0
## [19] {sex=F,
## diseaPH= 2} => {illnessday=(5,15]} 0.3333333 1.0000000 1.0
## [20] {sex=F,
## diseaPH= 2} => {outSC=Fatal-First} 0.3333333 1.0000000 1.0
## [21] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Fatal-First} 0.3333333 1.0000000 1.0
## [22] {diseaPH= 2,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.3333333 1.0000000 1.0
## [23] {illnessday=(5,15],
## outSC=Fatal-First} => {diseaPH= 2} 0.3333333 0.3333333 1.0
## [24] {sex=M,
## diseaPH= 1} => {illnessday=(5,15]} 0.5000000 1.0000000 1.0
## [25] {sex=M,
## diseaPH= 1} => {outSC=Fatal-First} 0.5000000 1.0000000 1.0
## [26] {illnessday=(5,15],
## sex=M} => {outSC=Fatal-First} 0.5000000 1.0000000 1.0
## [27] {sex=M,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.5000000 1.0000000 1.0
## [28] {illnessday=(5,15],
## outSC=Fatal-First} => {sex=M} 0.5000000 0.5000000 1.0
## [29] {illnessday=(5,15],
## sex=F} => {outSC=Fatal-First} 0.5000000 1.0000000 1.0
## [30] {sex=F,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.5000000 1.0000000 1.0
## [31] {illnessday=(5,15],
## outSC=Fatal-First} => {sex=F} 0.5000000 0.5000000 1.0
## [32] {illnessday=(5,15],
## diseaPH= 1} => {outSC=Fatal-First} 0.6666667 1.0000000 1.0
## [33] {diseaPH= 1,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.6666667 1.0000000 1.0
## [34] {illnessday=(5,15],
## outSC=Fatal-First} => {diseaPH= 1} 0.6666667 0.6666667 1.0
## [35] {illnessday=(5,15],
## sex=F,
## diseaPH= 2} => {outSC=Fatal-First} 0.3333333 1.0000000 1.0
## [36] {sex=F,
## diseaPH= 2,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.3333333 1.0000000 1.0
## [37] {illnessday=(5,15],
## sex=M,
## diseaPH= 1} => {outSC=Fatal-First} 0.5000000 1.0000000 1.0
## [38] {sex=M,
## diseaPH= 1,
## outSC=Fatal-First} => {illnessday=(5,15]} 0.5000000 1.0000000 1.0
## 这些是发现的规则,提升度lift大于1的规则是有意义的规则
plot(guize,method = "graph")

## 针对 "Fatal-Last" 数据的关联分析
osc_group
## [1] "Fatal-All other" "Fatal-First" "Fatal-Last"
## [4] "Healthy-First" "Mild-All other" "Mild-First"
## [7] "Severe-All other" "Severe-First" "Severe-Last"
guizdata <- guanliandata[guanliandata$outSC == "Fatal-Last",]
guizdata <- as(guizdata,"transactions")
## 频繁项集
par(cex = 0.8,family = "STKaiti")
itemFrequencyPlot(guizdata,topN = 30,main = "Fatal-Last数据频繁的项")

## 挖掘关联规则
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3),
appearance = list(rhs = c("outSC=Fatal-Last"),
default = "lhs"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 144
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[101 item(s), 480 transaction(s)] done [0.00s].
## sorting and recoding items ... [10 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [6 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 6 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 5 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.167 3.000 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.3333 Min. :1 Min. :1
## 1st Qu.:0.3333 1st Qu.:1 1st Qu.:1
## Median :0.3333 Median :1 Median :1
## Mean :0.3611 Mean :1 Mean :1
## 3rd Qu.:0.3333 3rd Qu.:1 3rd Qu.:1
## Max. :0.5000 Max. :1 Max. :1
##
## mining info:
## data ntransactions support confidence
## guizdata 480 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {illnessday=(15,25],
## diseaPH= 2} => {outSC=Fatal-Last} 0.3333333 1 1
## [2] {sex=M,
## diseaPH= 2} => {outSC=Fatal-Last} 0.3333333 1 1
## [3] {sex=F,
## diseaPH= 4} => {outSC=Fatal-Last} 0.3333333 1 1
## [4] {illnessday=(35,45],
## sex=F} => {outSC=Fatal-Last} 0.3333333 1 1
## [5] {illnessday=(15,25],
## sex=M} => {outSC=Fatal-Last} 0.5000000 1 1
## [6] {illnessday=(15,25],
## sex=M,
## diseaPH= 2} => {outSC=Fatal-Last} 0.3333333 1 1
plot(guize,method = "graph")

## 分析关联规则的结果如果直接限定右边的选项为outSC=Fatal-All other,
## 得到的规则在本数据中可疑认为没有意义,置信度,提升度均为1
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 144
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[101 item(s), 480 transaction(s)] done [0.00s].
## sorting and recoding items ... [10 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [22 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 22 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 18 4
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.182 3.000 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.3333 Min. :0.6667 Min. :1.000
## 1st Qu.:0.3333 1st Qu.:0.7500 1st Qu.:1.250
## Median :0.3333 Median :1.0000 Median :2.000
## Mean :0.3561 Mean :0.9091 Mean :1.727
## 3rd Qu.:0.3333 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :0.5000 Max. :1.0000 Max. :2.000
##
## mining info:
## data ntransactions support confidence
## guizdata 480 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {illnessday=(15,25],
## diseaPH= 2} => {sex=M} 0.3333333 1.0000000 2
## [2] {sex=M,
## diseaPH= 2} => {illnessday=(15,25]} 0.3333333 1.0000000 2
## [3] {illnessday=(15,25],
## sex=M} => {diseaPH= 2} 0.3333333 0.6666667 2
## [4] {illnessday=(15,25],
## diseaPH= 2} => {outSC=Fatal-Last} 0.3333333 1.0000000 1
## [5] {diseaPH= 2,
## outSC=Fatal-Last} => {illnessday=(15,25]} 0.3333333 1.0000000 2
## [6] {illnessday=(15,25],
## outSC=Fatal-Last} => {diseaPH= 2} 0.3333333 0.6666667 2
## [7] {sex=M,
## diseaPH= 2} => {outSC=Fatal-Last} 0.3333333 1.0000000 1
## [8] {diseaPH= 2,
## outSC=Fatal-Last} => {sex=M} 0.3333333 1.0000000 2
## [9] {sex=M,
## outSC=Fatal-Last} => {diseaPH= 2} 0.3333333 0.6666667 2
## [10] {illnessday=(35,45],
## sex=F} => {outSC=Fatal-Last} 0.3333333 1.0000000 1
## [11] {illnessday=(35,45],
## outSC=Fatal-Last} => {sex=F} 0.3333333 1.0000000 2
## [12] {sex=F,
## outSC=Fatal-Last} => {illnessday=(35,45]} 0.3333333 0.6666667 2
## [13] {sex=F,
## diseaPH= 4} => {outSC=Fatal-Last} 0.3333333 1.0000000 1
## [14] {diseaPH= 4,
## outSC=Fatal-Last} => {sex=F} 0.3333333 1.0000000 2
## [15] {sex=F,
## outSC=Fatal-Last} => {diseaPH= 4} 0.3333333 0.6666667 2
## [16] {illnessday=(15,25],
## sex=M} => {outSC=Fatal-Last} 0.5000000 1.0000000 1
## [17] {illnessday=(15,25],
## outSC=Fatal-Last} => {sex=M} 0.5000000 1.0000000 2
## [18] {sex=M,
## outSC=Fatal-Last} => {illnessday=(15,25]} 0.5000000 1.0000000 2
## [19] {illnessday=(15,25],
## sex=M,
## diseaPH= 2} => {outSC=Fatal-Last} 0.3333333 1.0000000 1
## [20] {illnessday=(15,25],
## diseaPH= 2,
## outSC=Fatal-Last} => {sex=M} 0.3333333 1.0000000 2
## [21] {sex=M,
## diseaPH= 2,
## outSC=Fatal-Last} => {illnessday=(15,25]} 0.3333333 1.0000000 2
## [22] {illnessday=(15,25],
## sex=M,
## outSC=Fatal-Last} => {diseaPH= 2} 0.3333333 0.6666667 2
inspect(sort(guize,by = "lift"))
## lhs rhs support confidence lift
## [1] {illnessday=(15,25],
## diseaPH= 2} => {sex=M} 0.3333333 1.0000000 2
## [2] {sex=M,
## diseaPH= 2} => {illnessday=(15,25]} 0.3333333 1.0000000 2
## [3] {illnessday=(15,25],
## sex=M} => {diseaPH= 2} 0.3333333 0.6666667 2
## [4] {diseaPH= 2,
## outSC=Fatal-Last} => {illnessday=(15,25]} 0.3333333 1.0000000 2
## [5] {illnessday=(15,25],
## outSC=Fatal-Last} => {diseaPH= 2} 0.3333333 0.6666667 2
## [6] {diseaPH= 2,
## outSC=Fatal-Last} => {sex=M} 0.3333333 1.0000000 2
## [7] {sex=M,
## outSC=Fatal-Last} => {diseaPH= 2} 0.3333333 0.6666667 2
## [8] {illnessday=(35,45],
## outSC=Fatal-Last} => {sex=F} 0.3333333 1.0000000 2
## [9] {sex=F,
## outSC=Fatal-Last} => {illnessday=(35,45]} 0.3333333 0.6666667 2
## [10] {diseaPH= 4,
## outSC=Fatal-Last} => {sex=F} 0.3333333 1.0000000 2
## [11] {sex=F,
## outSC=Fatal-Last} => {diseaPH= 4} 0.3333333 0.6666667 2
## [12] {illnessday=(15,25],
## outSC=Fatal-Last} => {sex=M} 0.5000000 1.0000000 2
## [13] {sex=M,
## outSC=Fatal-Last} => {illnessday=(15,25]} 0.5000000 1.0000000 2
## [14] {illnessday=(15,25],
## diseaPH= 2,
## outSC=Fatal-Last} => {sex=M} 0.3333333 1.0000000 2
## [15] {sex=M,
## diseaPH= 2,
## outSC=Fatal-Last} => {illnessday=(15,25]} 0.3333333 1.0000000 2
## [16] {illnessday=(15,25],
## sex=M,
## outSC=Fatal-Last} => {diseaPH= 2} 0.3333333 0.6666667 2
## [17] {illnessday=(15,25],
## diseaPH= 2} => {outSC=Fatal-Last} 0.3333333 1.0000000 1
## [18] {sex=M,
## diseaPH= 2} => {outSC=Fatal-Last} 0.3333333 1.0000000 1
## [19] {illnessday=(35,45],
## sex=F} => {outSC=Fatal-Last} 0.3333333 1.0000000 1
## [20] {sex=F,
## diseaPH= 4} => {outSC=Fatal-Last} 0.3333333 1.0000000 1
## [21] {illnessday=(15,25],
## sex=M} => {outSC=Fatal-Last} 0.5000000 1.0000000 1
## [22] {illnessday=(15,25],
## sex=M,
## diseaPH= 2} => {outSC=Fatal-Last} 0.3333333 1.0000000 1
## 这些是发现的规则,提升度lift大于1的规则是有意义的规则
plot(guize,method = "graph")

## 针对 "Healthy-First" 数据的关联分析
osc_group
## [1] "Fatal-All other" "Fatal-First" "Fatal-Last"
## [4] "Healthy-First" "Mild-All other" "Mild-First"
## [7] "Severe-All other" "Severe-First" "Severe-Last"
guizdata <- guanliandata[guanliandata$outSC == "Healthy-First",]
guizdata <- as(guizdata,"transactions")
summary(guizdata)
## transactions as itemMatrix in sparse format with
## 400 rows (elements/itemsets/transactions) and
## 116 columns (items) and a density of 0.02521552
##
## most frequent items:
## outSC=Healthy-First factorgroup=趋化因子 factorgroup=生长因子
## 400 130 130
## factorgroup=白介素 factorgroup=肿瘤坏死因子 (Other)
## 70 20 420
##
## element (itemset/transaction) length distribution:
## sizes
## 2 3
## 30 370
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 3.000 3.000 2.925 3.000 3.000
##
## includes extended item information - examples:
## labels variables levels
## 1 factor=ANG factor ANG
## 2 factor=BDNF factor BDNF
## 3 factor=BLC factor BLC
##
## includes extended transaction information - examples:
## transactionID
## 1 1601
## 2 1602
## 3 1603
## 频繁项集
par(cex = 0.8,family = "STKaiti")
itemFrequencyPlot(guizdata,topN = 30,main = "Healthy-First数据频繁的项")

## 挖掘关联规则
guize <- apriori(guizdata,parameter = list(supp = 0.05, ##支持度
conf = 0.1, ## 置信度
minlen = 2),
appearance = list(rhs = c("outSC=Healthy-First"),
default = "lhs"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.1 0.1 1 none FALSE TRUE 5 0.05 2
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 20
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[87 item(s), 400 transaction(s)] done [0.00s].
## sorting and recoding items ... [5 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [4 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 4 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 4
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence lift
## Min. :0.0500 Min. :1 Min. :1
## 1st Qu.:0.1437 1st Qu.:1 1st Qu.:1
## Median :0.2500 Median :1 Median :1
## Mean :0.2188 Mean :1 Mean :1
## 3rd Qu.:0.3250 3rd Qu.:1 3rd Qu.:1
## Max. :0.3250 Max. :1 Max. :1
##
## mining info:
## data ntransactions support confidence
## guizdata 400 0.05 0.1
inspect(guize)
## lhs rhs support confidence
## [1] {factorgroup=肿瘤坏死因子} => {outSC=Healthy-First} 0.050 1
## [2] {factorgroup=白介素} => {outSC=Healthy-First} 0.175 1
## [3] {factorgroup=生长因子} => {outSC=Healthy-First} 0.325 1
## [4] {factorgroup=趋化因子} => {outSC=Healthy-First} 0.325 1
## lift
## [1] 1
## [2] 1
## [3] 1
## [4] 1
plot(guize,method = "graph")

## 分析关联规则的结果如果直接限定右边的选项为outSC=Fatal-All other,
## 得到的规则在本数据中可疑认为没有意义,置信度,提升度均为1
guize <- apriori(guizdata,parameter = list(supp = 0.05, ##支持度
conf = 0.1, ## 置信度
minlen = 2))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.1 0.1 1 none FALSE TRUE 5 0.05 2
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 20
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[87 item(s), 400 transaction(s)] done [0.00s].
## sorting and recoding items ... [5 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [7 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 7 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 7
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence lift
## Min. :0.0500 Min. :0.1750 Min. :1
## 1st Qu.:0.1750 1st Qu.:0.3250 1st Qu.:1
## Median :0.3250 Median :1.0000 Median :1
## Mean :0.2429 Mean :0.6893 Mean :1
## 3rd Qu.:0.3250 3rd Qu.:1.0000 3rd Qu.:1
## Max. :0.3250 Max. :1.0000 Max. :1
##
## mining info:
## data ntransactions support confidence
## guizdata 400 0.05 0.1
inspect(guize)
## lhs rhs support
## [1] {factorgroup=肿瘤坏死因子} => {outSC=Healthy-First} 0.050
## [2] {factorgroup=白介素} => {outSC=Healthy-First} 0.175
## [3] {outSC=Healthy-First} => {factorgroup=白介素} 0.175
## [4] {factorgroup=生长因子} => {outSC=Healthy-First} 0.325
## [5] {outSC=Healthy-First} => {factorgroup=生长因子} 0.325
## [6] {factorgroup=趋化因子} => {outSC=Healthy-First} 0.325
## [7] {outSC=Healthy-First} => {factorgroup=趋化因子} 0.325
## confidence lift
## [1] 1.000 1
## [2] 1.000 1
## [3] 0.175 1
## [4] 1.000 1
## [5] 0.325 1
## [6] 1.000 1
## [7] 0.325 1
inspect(sort(guize,by = "lift"))
## lhs rhs support
## [1] {factorgroup=肿瘤坏死因子} => {outSC=Healthy-First} 0.050
## [2] {factorgroup=白介素} => {outSC=Healthy-First} 0.175
## [3] {outSC=Healthy-First} => {factorgroup=白介素} 0.175
## [4] {factorgroup=生长因子} => {outSC=Healthy-First} 0.325
## [5] {outSC=Healthy-First} => {factorgroup=生长因子} 0.325
## [6] {factorgroup=趋化因子} => {outSC=Healthy-First} 0.325
## [7] {outSC=Healthy-First} => {factorgroup=趋化因子} 0.325
## confidence lift
## [1] 1.000 1
## [2] 1.000 1
## [3] 0.175 1
## [4] 1.000 1
## [5] 0.325 1
## [6] 1.000 1
## [7] 0.325 1
## 这些是发现的规则,提升度lift大于1的规则是有意义的规则
plot(guize,method = "graph")

## 针对 "Mild-All other" 数据的关联分析
osc_group
## [1] "Fatal-All other" "Fatal-First" "Fatal-Last"
## [4] "Healthy-First" "Mild-All other" "Mild-First"
## [7] "Severe-All other" "Severe-First" "Severe-Last"
guizdata <- guanliandata[guanliandata$outSC == "Mild-All other",]
guizdata <- as(guizdata,"transactions")
summary(guizdata)
## transactions as itemMatrix in sparse format with
## 480 rows (elements/itemsets/transactions) and
## 116 columns (items) and a density of 0.05969828
##
## most frequent items:
## diseaPH= 2 outSC=Mild-All other sex=M
## 480 480 400
## illnessday=(15,25] age=(35,45] (Other)
## 320 240 1404
##
## element (itemset/transaction) length distribution:
## sizes
## 6 7
## 36 444
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.000 7.000 7.000 6.925 7.000 7.000
##
## includes extended item information - examples:
## labels variables levels
## 1 factor=ANG factor ANG
## 2 factor=BDNF factor BDNF
## 3 factor=BLC factor BLC
##
## includes extended transaction information - examples:
## transactionID
## 1 2001
## 2 2002
## 3 2003
## 频繁项集
par(cex = 0.8,family = "STKaiti")
itemFrequencyPlot(guizdata,topN = 30,main = "Mild-All other数据频繁的项")

## 挖掘关联规则
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3),
appearance = list(rhs = c("outSC=Mild-All other"),
default = "lhs"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 144
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[96 item(s), 480 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [14 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 14 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 10 4
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.286 3.750 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.3250 Min. :1 Min. :1
## 1st Qu.:0.3333 1st Qu.:1 1st Qu.:1
## Median :0.3333 Median :1 Median :1
## Mean :0.4274 Mean :1 Mean :1
## 3rd Qu.:0.5000 3rd Qu.:1 3rd Qu.:1
## Max. :0.8333 Max. :1 Max. :1
##
## mining info:
## data ntransactions support confidence
## guizdata 480 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {factorgroup=生长因子,
## diseaPH= 2} => {outSC=Mild-All other} 0.3250000 1 1
## [2] {factorgroup=趋化因子,
## diseaPH= 2} => {outSC=Mild-All other} 0.3250000 1 1
## [3] {illnessday=(5,15],
## sex=M} => {outSC=Mild-All other} 0.3333333 1 1
## [4] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1 1
## [5] {illnessday=(15,25],
## age=(35,45]} => {outSC=Mild-All other} 0.3333333 1 1
## [6] {age=(35,45],
## sex=M} => {outSC=Mild-All other} 0.3333333 1 1
## [7] {age=(35,45],
## diseaPH= 2} => {outSC=Mild-All other} 0.5000000 1 1
## [8] {illnessday=(15,25],
## sex=M} => {outSC=Mild-All other} 0.5000000 1 1
## [9] {illnessday=(15,25],
## diseaPH= 2} => {outSC=Mild-All other} 0.6666667 1 1
## [10] {sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.8333333 1 1
## [11] {illnessday=(5,15],
## sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1 1
## [12] {illnessday=(15,25],
## age=(35,45],
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1 1
## [13] {age=(35,45],
## sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1 1
## [14] {illnessday=(15,25],
## sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.5000000 1 1
plot(guize,method = "graph")

## 分析关联规则的结果如果直接限定右边的选项为outSC=Fatal-All other,
## 得到的规则在本数据中可疑认为没有意义,置信度,提升度均为1
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 144
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[96 item(s), 480 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [58 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 58 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 42 16
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.276 4.000 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.3250 Min. :0.325 Min. :0.8000
## 1st Qu.:0.3333 1st Qu.:0.600 1st Qu.:1.0000
## Median :0.3333 Median :1.000 Median :1.0000
## Mean :0.4129 Mean :0.792 Mean :0.9897
## 3rd Qu.:0.5000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :0.8333 Max. :1.000 Max. :1.2000
##
## mining info:
## data ntransactions support confidence
## guizdata 480 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {factorgroup=生长因子,
## diseaPH= 2} => {outSC=Mild-All other} 0.3250000 1.0000000 1.0
## [2] {factorgroup=生长因子,
## outSC=Mild-All other} => {diseaPH= 2} 0.3250000 1.0000000 1.0
## [3] {diseaPH= 2,
## outSC=Mild-All other} => {factorgroup=生长因子} 0.3250000 0.3250000 1.0
## [4] {factorgroup=趋化因子,
## diseaPH= 2} => {outSC=Mild-All other} 0.3250000 1.0000000 1.0
## [5] {factorgroup=趋化因子,
## outSC=Mild-All other} => {diseaPH= 2} 0.3250000 1.0000000 1.0
## [6] {diseaPH= 2,
## outSC=Mild-All other} => {factorgroup=趋化因子} 0.3250000 0.3250000 1.0
## [7] {illnessday=(5,15],
## sex=M} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [8] {illnessday=(5,15],
## diseaPH= 2} => {sex=M} 0.3333333 1.0000000 1.2
## [9] {sex=M,
## diseaPH= 2} => {illnessday=(5,15]} 0.3333333 0.4000000 1.2
## [10] {illnessday=(5,15],
## sex=M} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [11] {illnessday=(5,15],
## outSC=Mild-All other} => {sex=M} 0.3333333 1.0000000 1.2
## [12] {sex=M,
## outSC=Mild-All other} => {illnessday=(5,15]} 0.3333333 0.4000000 1.2
## [13] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [14] {illnessday=(5,15],
## outSC=Mild-All other} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [15] {diseaPH= 2,
## outSC=Mild-All other} => {illnessday=(5,15]} 0.3333333 0.3333333 1.0
## [16] {illnessday=(15,25],
## age=(35,45]} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [17] {age=(35,45],
## diseaPH= 2} => {illnessday=(15,25]} 0.3333333 0.6666667 1.0
## [18] {illnessday=(15,25],
## diseaPH= 2} => {age=(35,45]} 0.3333333 0.5000000 1.0
## [19] {illnessday=(15,25],
## age=(35,45]} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [20] {age=(35,45],
## outSC=Mild-All other} => {illnessday=(15,25]} 0.3333333 0.6666667 1.0
## [21] {illnessday=(15,25],
## outSC=Mild-All other} => {age=(35,45]} 0.3333333 0.5000000 1.0
## [22] {age=(35,45],
## sex=M} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [23] {age=(35,45],
## diseaPH= 2} => {sex=M} 0.3333333 0.6666667 0.8
## [24] {sex=M,
## diseaPH= 2} => {age=(35,45]} 0.3333333 0.4000000 0.8
## [25] {age=(35,45],
## sex=M} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [26] {age=(35,45],
## outSC=Mild-All other} => {sex=M} 0.3333333 0.6666667 0.8
## [27] {sex=M,
## outSC=Mild-All other} => {age=(35,45]} 0.3333333 0.4000000 0.8
## [28] {age=(35,45],
## diseaPH= 2} => {outSC=Mild-All other} 0.5000000 1.0000000 1.0
## [29] {age=(35,45],
## outSC=Mild-All other} => {diseaPH= 2} 0.5000000 1.0000000 1.0
## [30] {diseaPH= 2,
## outSC=Mild-All other} => {age=(35,45]} 0.5000000 0.5000000 1.0
## [31] {illnessday=(15,25],
## sex=M} => {diseaPH= 2} 0.5000000 1.0000000 1.0
## [32] {illnessday=(15,25],
## diseaPH= 2} => {sex=M} 0.5000000 0.7500000 0.9
## [33] {sex=M,
## diseaPH= 2} => {illnessday=(15,25]} 0.5000000 0.6000000 0.9
## [34] {illnessday=(15,25],
## sex=M} => {outSC=Mild-All other} 0.5000000 1.0000000 1.0
## [35] {illnessday=(15,25],
## outSC=Mild-All other} => {sex=M} 0.5000000 0.7500000 0.9
## [36] {sex=M,
## outSC=Mild-All other} => {illnessday=(15,25]} 0.5000000 0.6000000 0.9
## [37] {illnessday=(15,25],
## diseaPH= 2} => {outSC=Mild-All other} 0.6666667 1.0000000 1.0
## [38] {illnessday=(15,25],
## outSC=Mild-All other} => {diseaPH= 2} 0.6666667 1.0000000 1.0
## [39] {diseaPH= 2,
## outSC=Mild-All other} => {illnessday=(15,25]} 0.6666667 0.6666667 1.0
## [40] {sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.8333333 1.0000000 1.0
## [41] {sex=M,
## outSC=Mild-All other} => {diseaPH= 2} 0.8333333 1.0000000 1.0
## [42] {diseaPH= 2,
## outSC=Mild-All other} => {sex=M} 0.8333333 0.8333333 1.0
## [43] {illnessday=(5,15],
## sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [44] {illnessday=(5,15],
## sex=M,
## outSC=Mild-All other} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [45] {illnessday=(5,15],
## diseaPH= 2,
## outSC=Mild-All other} => {sex=M} 0.3333333 1.0000000 1.2
## [46] {sex=M,
## diseaPH= 2,
## outSC=Mild-All other} => {illnessday=(5,15]} 0.3333333 0.4000000 1.2
## [47] {illnessday=(15,25],
## age=(35,45],
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [48] {illnessday=(15,25],
## age=(35,45],
## outSC=Mild-All other} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [49] {age=(35,45],
## diseaPH= 2,
## outSC=Mild-All other} => {illnessday=(15,25]} 0.3333333 0.6666667 1.0
## [50] {illnessday=(15,25],
## diseaPH= 2,
## outSC=Mild-All other} => {age=(35,45]} 0.3333333 0.5000000 1.0
## [51] {age=(35,45],
## sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [52] {age=(35,45],
## sex=M,
## outSC=Mild-All other} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [53] {age=(35,45],
## diseaPH= 2,
## outSC=Mild-All other} => {sex=M} 0.3333333 0.6666667 0.8
## [54] {sex=M,
## diseaPH= 2,
## outSC=Mild-All other} => {age=(35,45]} 0.3333333 0.4000000 0.8
## [55] {illnessday=(15,25],
## sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.5000000 1.0000000 1.0
## [56] {illnessday=(15,25],
## sex=M,
## outSC=Mild-All other} => {diseaPH= 2} 0.5000000 1.0000000 1.0
## [57] {illnessday=(15,25],
## diseaPH= 2,
## outSC=Mild-All other} => {sex=M} 0.5000000 0.7500000 0.9
## [58] {sex=M,
## diseaPH= 2,
## outSC=Mild-All other} => {illnessday=(15,25]} 0.5000000 0.6000000 0.9
inspect(sort(guize,by = "lift"))
## lhs rhs support confidence lift
## [1] {illnessday=(5,15],
## diseaPH= 2} => {sex=M} 0.3333333 1.0000000 1.2
## [2] {sex=M,
## diseaPH= 2} => {illnessday=(5,15]} 0.3333333 0.4000000 1.2
## [3] {illnessday=(5,15],
## outSC=Mild-All other} => {sex=M} 0.3333333 1.0000000 1.2
## [4] {sex=M,
## outSC=Mild-All other} => {illnessday=(5,15]} 0.3333333 0.4000000 1.2
## [5] {illnessday=(5,15],
## diseaPH= 2,
## outSC=Mild-All other} => {sex=M} 0.3333333 1.0000000 1.2
## [6] {sex=M,
## diseaPH= 2,
## outSC=Mild-All other} => {illnessday=(5,15]} 0.3333333 0.4000000 1.2
## [7] {factorgroup=生长因子,
## diseaPH= 2} => {outSC=Mild-All other} 0.3250000 1.0000000 1.0
## [8] {factorgroup=生长因子,
## outSC=Mild-All other} => {diseaPH= 2} 0.3250000 1.0000000 1.0
## [9] {diseaPH= 2,
## outSC=Mild-All other} => {factorgroup=生长因子} 0.3250000 0.3250000 1.0
## [10] {factorgroup=趋化因子,
## diseaPH= 2} => {outSC=Mild-All other} 0.3250000 1.0000000 1.0
## [11] {factorgroup=趋化因子,
## outSC=Mild-All other} => {diseaPH= 2} 0.3250000 1.0000000 1.0
## [12] {diseaPH= 2,
## outSC=Mild-All other} => {factorgroup=趋化因子} 0.3250000 0.3250000 1.0
## [13] {illnessday=(5,15],
## sex=M} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [14] {illnessday=(5,15],
## sex=M} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [15] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [16] {illnessday=(5,15],
## outSC=Mild-All other} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [17] {diseaPH= 2,
## outSC=Mild-All other} => {illnessday=(5,15]} 0.3333333 0.3333333 1.0
## [18] {illnessday=(15,25],
## age=(35,45]} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [19] {age=(35,45],
## diseaPH= 2} => {illnessday=(15,25]} 0.3333333 0.6666667 1.0
## [20] {illnessday=(15,25],
## diseaPH= 2} => {age=(35,45]} 0.3333333 0.5000000 1.0
## [21] {illnessday=(15,25],
## age=(35,45]} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [22] {age=(35,45],
## outSC=Mild-All other} => {illnessday=(15,25]} 0.3333333 0.6666667 1.0
## [23] {illnessday=(15,25],
## outSC=Mild-All other} => {age=(35,45]} 0.3333333 0.5000000 1.0
## [24] {age=(35,45],
## sex=M} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [25] {age=(35,45],
## sex=M} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [26] {age=(35,45],
## diseaPH= 2} => {outSC=Mild-All other} 0.5000000 1.0000000 1.0
## [27] {age=(35,45],
## outSC=Mild-All other} => {diseaPH= 2} 0.5000000 1.0000000 1.0
## [28] {diseaPH= 2,
## outSC=Mild-All other} => {age=(35,45]} 0.5000000 0.5000000 1.0
## [29] {illnessday=(15,25],
## sex=M} => {diseaPH= 2} 0.5000000 1.0000000 1.0
## [30] {illnessday=(15,25],
## sex=M} => {outSC=Mild-All other} 0.5000000 1.0000000 1.0
## [31] {illnessday=(15,25],
## diseaPH= 2} => {outSC=Mild-All other} 0.6666667 1.0000000 1.0
## [32] {illnessday=(15,25],
## outSC=Mild-All other} => {diseaPH= 2} 0.6666667 1.0000000 1.0
## [33] {diseaPH= 2,
## outSC=Mild-All other} => {illnessday=(15,25]} 0.6666667 0.6666667 1.0
## [34] {sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.8333333 1.0000000 1.0
## [35] {sex=M,
## outSC=Mild-All other} => {diseaPH= 2} 0.8333333 1.0000000 1.0
## [36] {diseaPH= 2,
## outSC=Mild-All other} => {sex=M} 0.8333333 0.8333333 1.0
## [37] {illnessday=(5,15],
## sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [38] {illnessday=(5,15],
## sex=M,
## outSC=Mild-All other} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [39] {illnessday=(15,25],
## age=(35,45],
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [40] {illnessday=(15,25],
## age=(35,45],
## outSC=Mild-All other} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [41] {age=(35,45],
## diseaPH= 2,
## outSC=Mild-All other} => {illnessday=(15,25]} 0.3333333 0.6666667 1.0
## [42] {illnessday=(15,25],
## diseaPH= 2,
## outSC=Mild-All other} => {age=(35,45]} 0.3333333 0.5000000 1.0
## [43] {age=(35,45],
## sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.3333333 1.0000000 1.0
## [44] {age=(35,45],
## sex=M,
## outSC=Mild-All other} => {diseaPH= 2} 0.3333333 1.0000000 1.0
## [45] {illnessday=(15,25],
## sex=M,
## diseaPH= 2} => {outSC=Mild-All other} 0.5000000 1.0000000 1.0
## [46] {illnessday=(15,25],
## sex=M,
## outSC=Mild-All other} => {diseaPH= 2} 0.5000000 1.0000000 1.0
## [47] {illnessday=(15,25],
## diseaPH= 2} => {sex=M} 0.5000000 0.7500000 0.9
## [48] {sex=M,
## diseaPH= 2} => {illnessday=(15,25]} 0.5000000 0.6000000 0.9
## [49] {illnessday=(15,25],
## outSC=Mild-All other} => {sex=M} 0.5000000 0.7500000 0.9
## [50] {sex=M,
## outSC=Mild-All other} => {illnessday=(15,25]} 0.5000000 0.6000000 0.9
## [51] {illnessday=(15,25],
## diseaPH= 2,
## outSC=Mild-All other} => {sex=M} 0.5000000 0.7500000 0.9
## [52] {sex=M,
## diseaPH= 2,
## outSC=Mild-All other} => {illnessday=(15,25]} 0.5000000 0.6000000 0.9
## [53] {age=(35,45],
## diseaPH= 2} => {sex=M} 0.3333333 0.6666667 0.8
## [54] {sex=M,
## diseaPH= 2} => {age=(35,45]} 0.3333333 0.4000000 0.8
## [55] {age=(35,45],
## outSC=Mild-All other} => {sex=M} 0.3333333 0.6666667 0.8
## [56] {sex=M,
## outSC=Mild-All other} => {age=(35,45]} 0.3333333 0.4000000 0.8
## [57] {age=(35,45],
## diseaPH= 2,
## outSC=Mild-All other} => {sex=M} 0.3333333 0.6666667 0.8
## [58] {sex=M,
## diseaPH= 2,
## outSC=Mild-All other} => {age=(35,45]} 0.3333333 0.4000000 0.8
## 这些是发现的规则,提升度lift大于1的规则是有意义的规则
plot(guize,method = "graph")

## 针对 "Mild-First" 数据的关联分析
osc_group
## [1] "Fatal-All other" "Fatal-First" "Fatal-Last"
## [4] "Healthy-First" "Mild-All other" "Mild-First"
## [7] "Severe-All other" "Severe-First" "Severe-Last"
guizdata <- guanliandata[guanliandata$outSC == "Mild-First",]
guizdata <- as(guizdata,"transactions")
## 频繁项集
par(cex = 0.8,family = "STKaiti")
itemFrequencyPlot(guizdata,topN = 30,main = "Mild-First数据频繁的项")

## 挖掘关联规则
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3),
appearance = list(rhs = c("outSC=Mild-First"),
default = "lhs"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 168
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[98 item(s), 560 transaction(s)] done [0.00s].
## sorting and recoding items ... [7 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [3 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 3 rules
##
## rule length distribution (lhs + rhs):sizes
## 3
## 3
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 3 3 3 3 3
##
## summary of quality measures:
## support confidence lift
## Min. :0.4286 Min. :1 Min. :1
## 1st Qu.:0.4286 1st Qu.:1 1st Qu.:1
## Median :0.4286 Median :1 Median :1
## Mean :0.4762 Mean :1 Mean :1
## 3rd Qu.:0.5000 3rd Qu.:1 3rd Qu.:1
## Max. :0.5714 Max. :1 Max. :1
##
## mining info:
## data ntransactions support confidence
## guizdata 560 0.3 0.3
inspect(guize)
## lhs rhs support
## [1] {illnessday=(5,15],diseaPH= 1} => {outSC=Mild-First} 0.4285714
## [2] {sex=M,diseaPH= 1} => {outSC=Mild-First} 0.4285714
## [3] {illnessday=(5,15],sex=M} => {outSC=Mild-First} 0.5714286
## confidence lift
## [1] 1 1
## [2] 1 1
## [3] 1 1
plot(guize,method = "graph")

## 分析关联规则的结果如果直接限定右边的选项为outSC=Fatal-All other,
## 得到的规则在本数据中可疑认为没有意义,置信度,提升度均为1
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 168
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[98 item(s), 560 transaction(s)] done [0.00s].
## sorting and recoding items ... [7 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [9 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 9 rules
##
## rule length distribution (lhs + rhs):sizes
## 3
## 9
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 3 3 3 3 3
##
## summary of quality measures:
## support confidence lift
## Min. :0.4286 Min. :0.6000 Min. :0.8400
## 1st Qu.:0.4286 1st Qu.:0.6000 1st Qu.:0.8400
## Median :0.4286 Median :0.8000 Median :1.0000
## Mean :0.4762 Mean :0.7778 Mean :0.9556
## 3rd Qu.:0.5714 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :0.5714 Max. :1.0000 Max. :1.1200
##
## mining info:
## data ntransactions support confidence
## guizdata 560 0.3 0.3
inspect(guize)
## lhs rhs support
## [1] {illnessday=(5,15],sex=M} => {outSC=Mild-First} 0.5714286
## [2] {illnessday=(5,15],outSC=Mild-First} => {sex=M} 0.5714286
## [3] {sex=M,outSC=Mild-First} => {illnessday=(5,15]} 0.5714286
## [4] {illnessday=(5,15],diseaPH= 1} => {outSC=Mild-First} 0.4285714
## [5] {illnessday=(5,15],outSC=Mild-First} => {diseaPH= 1} 0.4285714
## [6] {diseaPH= 1,outSC=Mild-First} => {illnessday=(5,15]} 0.4285714
## [7] {sex=M,diseaPH= 1} => {outSC=Mild-First} 0.4285714
## [8] {sex=M,outSC=Mild-First} => {diseaPH= 1} 0.4285714
## [9] {diseaPH= 1,outSC=Mild-First} => {sex=M} 0.4285714
## confidence lift
## [1] 1.0 1.00
## [2] 0.8 1.12
## [3] 0.8 1.12
## [4] 1.0 1.00
## [5] 0.6 0.84
## [6] 0.6 0.84
## [7] 1.0 1.00
## [8] 0.6 0.84
## [9] 0.6 0.84
inspect(sort(guize,by = "lift"))
## lhs rhs support
## [1] {illnessday=(5,15],outSC=Mild-First} => {sex=M} 0.5714286
## [2] {sex=M,outSC=Mild-First} => {illnessday=(5,15]} 0.5714286
## [3] {illnessday=(5,15],sex=M} => {outSC=Mild-First} 0.5714286
## [4] {illnessday=(5,15],diseaPH= 1} => {outSC=Mild-First} 0.4285714
## [5] {sex=M,diseaPH= 1} => {outSC=Mild-First} 0.4285714
## [6] {illnessday=(5,15],outSC=Mild-First} => {diseaPH= 1} 0.4285714
## [7] {diseaPH= 1,outSC=Mild-First} => {illnessday=(5,15]} 0.4285714
## [8] {sex=M,outSC=Mild-First} => {diseaPH= 1} 0.4285714
## [9] {diseaPH= 1,outSC=Mild-First} => {sex=M} 0.4285714
## confidence lift
## [1] 0.8 1.12
## [2] 0.8 1.12
## [3] 1.0 1.00
## [4] 1.0 1.00
## [5] 1.0 1.00
## [6] 0.6 0.84
## [7] 0.6 0.84
## [8] 0.6 0.84
## [9] 0.6 0.84
## 这些是发现的规则,提升度lift大于1的规则是有意义的规则
plot(guize,method = "graph")

## 针对 "Severe-All other" 数据的关联分析
osc_group
## [1] "Fatal-All other" "Fatal-First" "Fatal-Last"
## [4] "Healthy-First" "Mild-All other" "Mild-First"
## [7] "Severe-All other" "Severe-First" "Severe-Last"
guizdata <- guanliandata[guanliandata$outSC == "Severe-All other",]
guizdata <- as(guizdata,"transactions")
## 频繁项集
par(cex = 0.8,family = "STKaiti")
itemFrequencyPlot(guizdata,topN = 30,main = "Severe-All other数据频繁的项")

## 挖掘关联规则
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3),
appearance = list(rhs = c("outSC=Severe-All other"),
default = "lhs"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 480
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[100 item(s), 1600 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [6 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 6 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 5 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.167 3.000 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.3000 Min. :1 Min. :1
## 1st Qu.:0.3375 1st Qu.:1 1st Qu.:1
## Median :0.4500 Median :1 Median :1
## Mean :0.4333 Mean :1 Mean :1
## 3rd Qu.:0.4875 3rd Qu.:1 3rd Qu.:1
## Max. :0.6000 Max. :1 Max. :1
##
## mining info:
## data ntransactions support confidence
## guizdata 1600 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {age=(75,85],
## sex=M} => {outSC=Severe-All other} 0.30 1 1
## [2] {illnessday=(15,25],
## sex=M} => {outSC=Severe-All other} 0.30 1 1
## [3] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Severe-All other} 0.50 1 1
## [4] {illnessday=(5,15],
## sex=M} => {outSC=Severe-All other} 0.45 1 1
## [5] {sex=M,
## diseaPH= 2} => {outSC=Severe-All other} 0.60 1 1
## [6] {illnessday=(5,15],
## sex=M,
## diseaPH= 2} => {outSC=Severe-All other} 0.45 1 1
plot(guize,method = "graph")

## 分析关联规则的结果如果直接限定右边的选项为outSC=Fatal-All other,
## 得到的规则在本数据中可疑认为没有意义,置信度,提升度均为1
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 480
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[100 item(s), 1600 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [22 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 22 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 18 4
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.182 3.000 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.3000 Min. :0.3529 Min. :0.8824
## 1st Qu.:0.3375 1st Qu.:0.7500 1st Qu.:1.0000
## Median :0.4500 Median :0.9000 Median :1.0588
## Mean :0.4364 Mean :0.8392 Mean :1.1402
## 3rd Qu.:0.4875 3rd Qu.:1.0000 3rd Qu.:1.3655
## Max. :0.6000 Max. :1.0000 Max. :1.5000
##
## mining info:
## data ntransactions support confidence
## guizdata 1600 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {age=(75,85],
## sex=M} => {outSC=Severe-All other} 0.30 1.0000000 1.0000000
## [2] {age=(75,85],
## outSC=Severe-All other} => {sex=M} 0.30 1.0000000 1.1764706
## [3] {sex=M,
## outSC=Severe-All other} => {age=(75,85]} 0.30 0.3529412 1.1764706
## [4] {illnessday=(15,25],
## sex=M} => {outSC=Severe-All other} 0.30 1.0000000 1.0000000
## [5] {illnessday=(15,25],
## outSC=Severe-All other} => {sex=M} 0.30 0.7500000 0.8823529
## [6] {sex=M,
## outSC=Severe-All other} => {illnessday=(15,25]} 0.30 0.3529412 0.8823529
## [7] {illnessday=(5,15],
## diseaPH= 2} => {sex=M} 0.45 0.9000000 1.0588235
## [8] {illnessday=(5,15],
## sex=M} => {diseaPH= 2} 0.45 1.0000000 1.4285714
## [9] {sex=M,
## diseaPH= 2} => {illnessday=(5,15]} 0.45 0.7500000 1.5000000
## [10] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Severe-All other} 0.50 1.0000000 1.0000000
## [11] {illnessday=(5,15],
## outSC=Severe-All other} => {diseaPH= 2} 0.50 1.0000000 1.4285714
## [12] {diseaPH= 2,
## outSC=Severe-All other} => {illnessday=(5,15]} 0.50 0.7142857 1.4285714
## [13] {illnessday=(5,15],
## sex=M} => {outSC=Severe-All other} 0.45 1.0000000 1.0000000
## [14] {illnessday=(5,15],
## outSC=Severe-All other} => {sex=M} 0.45 0.9000000 1.0588235
## [15] {sex=M,
## outSC=Severe-All other} => {illnessday=(5,15]} 0.45 0.5294118 1.0588235
## [16] {sex=M,
## diseaPH= 2} => {outSC=Severe-All other} 0.60 1.0000000 1.0000000
## [17] {diseaPH= 2,
## outSC=Severe-All other} => {sex=M} 0.60 0.8571429 1.0084034
## [18] {sex=M,
## outSC=Severe-All other} => {diseaPH= 2} 0.60 0.7058824 1.0084034
## [19] {illnessday=(5,15],
## sex=M,
## diseaPH= 2} => {outSC=Severe-All other} 0.45 1.0000000 1.0000000
## [20] {illnessday=(5,15],
## diseaPH= 2,
## outSC=Severe-All other} => {sex=M} 0.45 0.9000000 1.0588235
## [21] {illnessday=(5,15],
## sex=M,
## outSC=Severe-All other} => {diseaPH= 2} 0.45 1.0000000 1.4285714
## [22] {sex=M,
## diseaPH= 2,
## outSC=Severe-All other} => {illnessday=(5,15]} 0.45 0.7500000 1.5000000
inspect(sort(guize,by = "lift"))
## lhs rhs support confidence lift
## [1] {sex=M,
## diseaPH= 2} => {illnessday=(5,15]} 0.45 0.7500000 1.5000000
## [2] {sex=M,
## diseaPH= 2,
## outSC=Severe-All other} => {illnessday=(5,15]} 0.45 0.7500000 1.5000000
## [3] {illnessday=(5,15],
## sex=M} => {diseaPH= 2} 0.45 1.0000000 1.4285714
## [4] {illnessday=(5,15],
## outSC=Severe-All other} => {diseaPH= 2} 0.50 1.0000000 1.4285714
## [5] {diseaPH= 2,
## outSC=Severe-All other} => {illnessday=(5,15]} 0.50 0.7142857 1.4285714
## [6] {illnessday=(5,15],
## sex=M,
## outSC=Severe-All other} => {diseaPH= 2} 0.45 1.0000000 1.4285714
## [7] {age=(75,85],
## outSC=Severe-All other} => {sex=M} 0.30 1.0000000 1.1764706
## [8] {sex=M,
## outSC=Severe-All other} => {age=(75,85]} 0.30 0.3529412 1.1764706
## [9] {illnessday=(5,15],
## diseaPH= 2} => {sex=M} 0.45 0.9000000 1.0588235
## [10] {illnessday=(5,15],
## outSC=Severe-All other} => {sex=M} 0.45 0.9000000 1.0588235
## [11] {sex=M,
## outSC=Severe-All other} => {illnessday=(5,15]} 0.45 0.5294118 1.0588235
## [12] {illnessday=(5,15],
## diseaPH= 2,
## outSC=Severe-All other} => {sex=M} 0.45 0.9000000 1.0588235
## [13] {diseaPH= 2,
## outSC=Severe-All other} => {sex=M} 0.60 0.8571429 1.0084034
## [14] {sex=M,
## outSC=Severe-All other} => {diseaPH= 2} 0.60 0.7058824 1.0084034
## [15] {age=(75,85],
## sex=M} => {outSC=Severe-All other} 0.30 1.0000000 1.0000000
## [16] {illnessday=(15,25],
## sex=M} => {outSC=Severe-All other} 0.30 1.0000000 1.0000000
## [17] {illnessday=(5,15],
## diseaPH= 2} => {outSC=Severe-All other} 0.50 1.0000000 1.0000000
## [18] {illnessday=(5,15],
## sex=M} => {outSC=Severe-All other} 0.45 1.0000000 1.0000000
## [19] {sex=M,
## diseaPH= 2} => {outSC=Severe-All other} 0.60 1.0000000 1.0000000
## [20] {illnessday=(5,15],
## sex=M,
## diseaPH= 2} => {outSC=Severe-All other} 0.45 1.0000000 1.0000000
## [21] {sex=M,
## outSC=Severe-All other} => {illnessday=(15,25]} 0.30 0.3529412 0.8823529
## [22] {illnessday=(15,25],
## outSC=Severe-All other} => {sex=M} 0.30 0.7500000 0.8823529
## 这些是发现的规则,提升度lift大于1的规则是有意义的规则
plot(guize,method = "graph")

## 针对 "Severe-First" 数据的关联分析
osc_group
## [1] "Fatal-All other" "Fatal-First" "Fatal-Last"
## [4] "Healthy-First" "Mild-All other" "Mild-First"
## [7] "Severe-All other" "Severe-First" "Severe-Last"
guizdata <- guanliandata[guanliandata$outSC == "Severe-First",]
guizdata <- as(guizdata,"transactions")
## 频繁项集
par(cex = 0.8,family = "STKaiti")
itemFrequencyPlot(guizdata,topN = 30,main = "Severe-First数据频繁的项")

## 挖掘关联规则
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3),
appearance = list(rhs = c("outSC=Severe-First"),
default = "lhs"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 264
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[98 item(s), 880 transaction(s)] done [0.00s].
## sorting and recoding items ... [7 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [7 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 7 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 5 2
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.286 3.500 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.3636 Min. :1 Min. :1
## 1st Qu.:0.3636 1st Qu.:1 1st Qu.:1
## Median :0.5455 Median :1 Median :1
## Mean :0.5065 Mean :1 Mean :1
## 3rd Qu.:0.5455 3rd Qu.:1 3rd Qu.:1
## Max. :0.8182 Max. :1 Max. :1
##
## mining info:
## data ntransactions support confidence
## guizdata 880 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {illnessday=(5,15],
## age=(75,85]} => {outSC=Severe-First} 0.3636364 1 1
## [2] {age=(75,85],
## sex=M} => {outSC=Severe-First} 0.3636364 1 1
## [3] {illnessday=(5,15],
## diseaPH= 1} => {outSC=Severe-First} 0.5454545 1 1
## [4] {sex=M,
## diseaPH= 1} => {outSC=Severe-First} 0.5454545 1 1
## [5] {illnessday=(5,15],
## sex=M} => {outSC=Severe-First} 0.8181818 1 1
## [6] {illnessday=(5,15],
## age=(75,85],
## sex=M} => {outSC=Severe-First} 0.3636364 1 1
## [7] {illnessday=(5,15],
## sex=M,
## diseaPH= 1} => {outSC=Severe-First} 0.5454545 1 1
plot(guize,method = "graph")

## 分析关联规则的结果如果直接限定右边的选项为outSC=Fatal-All other,
## 得到的规则在本数据中可疑认为没有意义,置信度,提升度均为1
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 264
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[98 item(s), 880 transaction(s)] done [0.00s].
## sorting and recoding items ... [7 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [29 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 29 rules
##
## rule length distribution (lhs + rhs):sizes
## 3 4
## 21 8
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 3.000 3.000 3.276 4.000 4.000
##
## summary of quality measures:
## support confidence lift
## Min. :0.3636 Min. :0.4444 Min. :0.9167
## 1st Qu.:0.3636 1st Qu.:0.6667 1st Qu.:1.0000
## Median :0.5455 Median :1.0000 Median :1.2222
## Mean :0.4922 Mean :0.8602 Mean :1.1054
## 3rd Qu.:0.5455 3rd Qu.:1.0000 3rd Qu.:1.2222
## Max. :0.8182 Max. :1.0000 Max. :1.2222
##
## mining info:
## data ntransactions support confidence
## guizdata 880 0.3 0.3
inspect(guize)
## lhs rhs support confidence lift
## [1] {illnessday=(5,15],
## age=(75,85]} => {sex=M} 0.3636364 1.0000000 1.2222222
## [2] {age=(75,85],
## sex=M} => {illnessday=(5,15]} 0.3636364 1.0000000 1.2222222
## [3] {illnessday=(5,15],
## sex=M} => {age=(75,85]} 0.3636364 0.4444444 1.2222222
## [4] {illnessday=(5,15],
## age=(75,85]} => {outSC=Severe-First} 0.3636364 1.0000000 1.0000000
## [5] {age=(75,85],
## outSC=Severe-First} => {illnessday=(5,15]} 0.3636364 1.0000000 1.2222222
## [6] {illnessday=(5,15],
## outSC=Severe-First} => {age=(75,85]} 0.3636364 0.4444444 1.2222222
## [7] {age=(75,85],
## sex=M} => {outSC=Severe-First} 0.3636364 1.0000000 1.0000000
## [8] {age=(75,85],
## outSC=Severe-First} => {sex=M} 0.3636364 1.0000000 1.2222222
## [9] {sex=M,
## outSC=Severe-First} => {age=(75,85]} 0.3636364 0.4444444 1.2222222
## [10] {illnessday=(5,15],
## diseaPH= 1} => {sex=M} 0.5454545 1.0000000 1.2222222
## [11] {sex=M,
## diseaPH= 1} => {illnessday=(5,15]} 0.5454545 1.0000000 1.2222222
## [12] {illnessday=(5,15],
## sex=M} => {diseaPH= 1} 0.5454545 0.6666667 0.9166667
## [13] {illnessday=(5,15],
## diseaPH= 1} => {outSC=Severe-First} 0.5454545 1.0000000 1.0000000
## [14] {diseaPH= 1,
## outSC=Severe-First} => {illnessday=(5,15]} 0.5454545 0.7500000 0.9166667
## [15] {illnessday=(5,15],
## outSC=Severe-First} => {diseaPH= 1} 0.5454545 0.6666667 0.9166667
## [16] {sex=M,
## diseaPH= 1} => {outSC=Severe-First} 0.5454545 1.0000000 1.0000000
## [17] {diseaPH= 1,
## outSC=Severe-First} => {sex=M} 0.5454545 0.7500000 0.9166667
## [18] {sex=M,
## outSC=Severe-First} => {diseaPH= 1} 0.5454545 0.6666667 0.9166667
## [19] {illnessday=(5,15],
## sex=M} => {outSC=Severe-First} 0.8181818 1.0000000 1.0000000
## [20] {illnessday=(5,15],
## outSC=Severe-First} => {sex=M} 0.8181818 1.0000000 1.2222222
## [21] {sex=M,
## outSC=Severe-First} => {illnessday=(5,15]} 0.8181818 1.0000000 1.2222222
## [22] {illnessday=(5,15],
## age=(75,85],
## sex=M} => {outSC=Severe-First} 0.3636364 1.0000000 1.0000000
## [23] {illnessday=(5,15],
## age=(75,85],
## outSC=Severe-First} => {sex=M} 0.3636364 1.0000000 1.2222222
## [24] {age=(75,85],
## sex=M,
## outSC=Severe-First} => {illnessday=(5,15]} 0.3636364 1.0000000 1.2222222
## [25] {illnessday=(5,15],
## sex=M,
## outSC=Severe-First} => {age=(75,85]} 0.3636364 0.4444444 1.2222222
## [26] {illnessday=(5,15],
## sex=M,
## diseaPH= 1} => {outSC=Severe-First} 0.5454545 1.0000000 1.0000000
## [27] {illnessday=(5,15],
## diseaPH= 1,
## outSC=Severe-First} => {sex=M} 0.5454545 1.0000000 1.2222222
## [28] {sex=M,
## diseaPH= 1,
## outSC=Severe-First} => {illnessday=(5,15]} 0.5454545 1.0000000 1.2222222
## [29] {illnessday=(5,15],
## sex=M,
## outSC=Severe-First} => {diseaPH= 1} 0.5454545 0.6666667 0.9166667
inspect(sort(guize,by = "lift"))
## lhs rhs support confidence lift
## [1] {illnessday=(5,15],
## age=(75,85]} => {sex=M} 0.3636364 1.0000000 1.2222222
## [2] {age=(75,85],
## sex=M} => {illnessday=(5,15]} 0.3636364 1.0000000 1.2222222
## [3] {age=(75,85],
## outSC=Severe-First} => {illnessday=(5,15]} 0.3636364 1.0000000 1.2222222
## [4] {age=(75,85],
## outSC=Severe-First} => {sex=M} 0.3636364 1.0000000 1.2222222
## [5] {illnessday=(5,15],
## diseaPH= 1} => {sex=M} 0.5454545 1.0000000 1.2222222
## [6] {sex=M,
## diseaPH= 1} => {illnessday=(5,15]} 0.5454545 1.0000000 1.2222222
## [7] {illnessday=(5,15],
## outSC=Severe-First} => {sex=M} 0.8181818 1.0000000 1.2222222
## [8] {sex=M,
## outSC=Severe-First} => {illnessday=(5,15]} 0.8181818 1.0000000 1.2222222
## [9] {illnessday=(5,15],
## age=(75,85],
## outSC=Severe-First} => {sex=M} 0.3636364 1.0000000 1.2222222
## [10] {age=(75,85],
## sex=M,
## outSC=Severe-First} => {illnessday=(5,15]} 0.3636364 1.0000000 1.2222222
## [11] {illnessday=(5,15],
## diseaPH= 1,
## outSC=Severe-First} => {sex=M} 0.5454545 1.0000000 1.2222222
## [12] {sex=M,
## diseaPH= 1,
## outSC=Severe-First} => {illnessday=(5,15]} 0.5454545 1.0000000 1.2222222
## [13] {illnessday=(5,15],
## sex=M} => {age=(75,85]} 0.3636364 0.4444444 1.2222222
## [14] {illnessday=(5,15],
## outSC=Severe-First} => {age=(75,85]} 0.3636364 0.4444444 1.2222222
## [15] {sex=M,
## outSC=Severe-First} => {age=(75,85]} 0.3636364 0.4444444 1.2222222
## [16] {illnessday=(5,15],
## sex=M,
## outSC=Severe-First} => {age=(75,85]} 0.3636364 0.4444444 1.2222222
## [17] {illnessday=(5,15],
## age=(75,85]} => {outSC=Severe-First} 0.3636364 1.0000000 1.0000000
## [18] {age=(75,85],
## sex=M} => {outSC=Severe-First} 0.3636364 1.0000000 1.0000000
## [19] {illnessday=(5,15],
## diseaPH= 1} => {outSC=Severe-First} 0.5454545 1.0000000 1.0000000
## [20] {sex=M,
## diseaPH= 1} => {outSC=Severe-First} 0.5454545 1.0000000 1.0000000
## [21] {illnessday=(5,15],
## sex=M} => {outSC=Severe-First} 0.8181818 1.0000000 1.0000000
## [22] {illnessday=(5,15],
## age=(75,85],
## sex=M} => {outSC=Severe-First} 0.3636364 1.0000000 1.0000000
## [23] {illnessday=(5,15],
## sex=M,
## diseaPH= 1} => {outSC=Severe-First} 0.5454545 1.0000000 1.0000000
## [24] {illnessday=(5,15],
## sex=M} => {diseaPH= 1} 0.5454545 0.6666667 0.9166667
## [25] {diseaPH= 1,
## outSC=Severe-First} => {illnessday=(5,15]} 0.5454545 0.7500000 0.9166667
## [26] {illnessday=(5,15],
## outSC=Severe-First} => {diseaPH= 1} 0.5454545 0.6666667 0.9166667
## [27] {diseaPH= 1,
## outSC=Severe-First} => {sex=M} 0.5454545 0.7500000 0.9166667
## [28] {sex=M,
## outSC=Severe-First} => {diseaPH= 1} 0.5454545 0.6666667 0.9166667
## [29] {illnessday=(5,15],
## sex=M,
## outSC=Severe-First} => {diseaPH= 1} 0.5454545 0.6666667 0.9166667
## 这些是发现的规则,提升度lift大于1的规则是有意义的规则
plot(guize,method = "graph")

## 针对 "Severe-Last" 数据的关联分析
osc_group
## [1] "Fatal-All other" "Fatal-First" "Fatal-Last"
## [4] "Healthy-First" "Mild-All other" "Mild-First"
## [7] "Severe-All other" "Severe-First" "Severe-Last"
guizdata <- guanliandata[guanliandata$outSC == "Severe-Last",]
guizdata <- as(guizdata,"transactions")
## 频繁项集
par(cex = 0.8,family = "STKaiti")
itemFrequencyPlot(guizdata,topN = 30,main = "Severe-Last数据频繁的项")

## 挖掘关联规则
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3),
appearance = list(rhs = c("outSC=Severe-Last"),
default = "lhs"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 264
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[102 item(s), 880 transaction(s)] done [0.00s].
## sorting and recoding items ... [7 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [3 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 3 rules
##
## rule length distribution (lhs + rhs):sizes
## 3
## 3
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 3 3 3 3 3
##
## summary of quality measures:
## support confidence lift
## Min. :0.3636 Min. :1 Min. :1
## 1st Qu.:0.3636 1st Qu.:1 1st Qu.:1
## Median :0.3636 Median :1 Median :1
## Mean :0.4242 Mean :1 Mean :1
## 3rd Qu.:0.4545 3rd Qu.:1 3rd Qu.:1
## Max. :0.5455 Max. :1 Max. :1
##
## mining info:
## data ntransactions support confidence
## guizdata 880 0.3 0.3
inspect(guize)
## lhs rhs support confidence
## [1] {age=(75,85],sex=M} => {outSC=Severe-Last} 0.3636364 1
## [2] {illnessday=(25,35],sex=M} => {outSC=Severe-Last} 0.3636364 1
## [3] {sex=M,diseaPH= 4} => {outSC=Severe-Last} 0.5454545 1
## lift
## [1] 1
## [2] 1
## [3] 1
plot(guize,method = "graph")

## 分析关联规则的结果如果直接限定右边的选项为outSC=Fatal-All other,
## 得到的规则在本数据中可疑认为没有意义,置信度,提升度均为1
guize <- apriori(guizdata,parameter = list(supp = 0.3, ##支持度
conf = 0.3, ## 置信度
minlen = 3))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.3 3
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 264
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[102 item(s), 880 transaction(s)] done [0.00s].
## sorting and recoding items ... [7 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [9 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(guize)
## set of 9 rules
##
## rule length distribution (lhs + rhs):sizes
## 3
## 9
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3 3 3 3 3 3
##
## summary of quality measures:
## support confidence lift
## Min. :0.3636 Min. :0.4444 Min. :0.9778
## 1st Qu.:0.3636 1st Qu.:0.6667 1st Qu.:1.0000
## Median :0.3636 Median :0.8571 Median :1.0000
## Mean :0.4242 Mean :0.8014 Mean :1.0550
## 3rd Qu.:0.5455 3rd Qu.:1.0000 3rd Qu.:1.0476
## Max. :0.5455 Max. :1.0000 Max. :1.2222
##
## mining info:
## data ntransactions support confidence
## guizdata 880 0.3 0.3
inspect(guize)
## lhs rhs
## [1] {age=(75,85],sex=M} => {outSC=Severe-Last}
## [2] {age=(75,85],outSC=Severe-Last} => {sex=M}
## [3] {sex=M,outSC=Severe-Last} => {age=(75,85]}
## [4] {illnessday=(25,35],sex=M} => {outSC=Severe-Last}
## [5] {illnessday=(25,35],outSC=Severe-Last} => {sex=M}
## [6] {sex=M,outSC=Severe-Last} => {illnessday=(25,35]}
## [7] {sex=M,diseaPH= 4} => {outSC=Severe-Last}
## [8] {diseaPH= 4,outSC=Severe-Last} => {sex=M}
## [9] {sex=M,outSC=Severe-Last} => {diseaPH= 4}
## support confidence lift
## [1] 0.3636364 1.0000000 1.0000000
## [2] 0.3636364 1.0000000 1.2222222
## [3] 0.3636364 0.4444444 1.2222222
## [4] 0.3636364 1.0000000 1.0000000
## [5] 0.3636364 0.8000000 0.9777778
## [6] 0.3636364 0.4444444 0.9777778
## [7] 0.5454545 1.0000000 1.0000000
## [8] 0.5454545 0.8571429 1.0476190
## [9] 0.5454545 0.6666667 1.0476190
inspect(sort(guize,by = "lift"))
## lhs rhs
## [1] {age=(75,85],outSC=Severe-Last} => {sex=M}
## [2] {sex=M,outSC=Severe-Last} => {age=(75,85]}
## [3] {diseaPH= 4,outSC=Severe-Last} => {sex=M}
## [4] {sex=M,outSC=Severe-Last} => {diseaPH= 4}
## [5] {age=(75,85],sex=M} => {outSC=Severe-Last}
## [6] {illnessday=(25,35],sex=M} => {outSC=Severe-Last}
## [7] {sex=M,diseaPH= 4} => {outSC=Severe-Last}
## [8] {illnessday=(25,35],outSC=Severe-Last} => {sex=M}
## [9] {sex=M,outSC=Severe-Last} => {illnessday=(25,35]}
## support confidence lift
## [1] 0.3636364 1.0000000 1.2222222
## [2] 0.3636364 0.4444444 1.2222222
## [3] 0.5454545 0.8571429 1.0476190
## [4] 0.5454545 0.6666667 1.0476190
## [5] 0.3636364 1.0000000 1.0000000
## [6] 0.3636364 1.0000000 1.0000000
## [7] 0.5454545 1.0000000 1.0000000
## [8] 0.3636364 0.8000000 0.9777778
## [9] 0.3636364 0.4444444 0.9777778
## 这些是发现的规则,提升度lift大于1的规则是有意义的规则
plot(guize,method = "graph")
