Simple Arthritis Data for Associaiton Rule Mining

There are three different results on the improvement of given medicine trial(解盲) Question is which combination on age and sex may lead to Marked improvement.

Load Data and convert numberical data to catagorical data

library(vcd)
## Warning: package 'vcd' was built under R version 3.3.3
## Loading required package: grid
data("Arthritis")
df <- Arthritis
head(df)
##   ID Treatment  Sex Age Improved
## 1 57   Treated Male  27     Some
## 2 46   Treated Male  29     None
## 3 77   Treated Male  30     None
## 4 17   Treated Male  32   Marked
## 5 36   Treated Male  46   Marked
## 6 23   Treated Male  58   Marked
df$AgeGroup <-cut(df$Age, c(0,40,60,80))
keeps <- c("Treatment", "Sex","AgeGroup","Improved")
newdata <- df[keeps]

Finding The Rules

require(arules)
## Loading required package: arules
## Warning: package 'arules' was built under R version 3.3.3
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
rule <- apriori(newdata, 
                # min support & confidence
                parameter=list(minlen=3, supp=0.1, conf=0.7),  
                appearance = list(default="lhs", rhs=c("Improved=Some","Improved=None","Improved=Marked")))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.7    0.1    1 none FALSE            TRUE       5     0.1      3
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 8 
## 
## set item appearances ...[3 item(s)] done [0.00s].
## set transactions ...[10 item(s), 84 transaction(s)] done [0.00s].
## sorting and recoding items ... [10 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [4 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
inspect(rule)
##     lhs                    rhs                 support confidence     lift
## [1] {Treatment=Placebo,                                                   
##      Sex=Male}          => {Improved=None}   0.1190476  0.9090909 1.818182
## [2] {Treatment=Treated,                                                   
##      AgeGroup=(40,60]}  => {Improved=Marked} 0.1666667  0.7777778 2.333333
## [3] {Treatment=Placebo,                                                   
##      AgeGroup=(40,60]}  => {Improved=None}   0.2023810  0.7391304 1.478261
## [4] {Treatment=Treated,                                                   
##      Sex=Female,                                                          
##      AgeGroup=(40,60]}  => {Improved=Marked} 0.1309524  0.7857143 2.357143
sort.rule <- sort(rule, by="lift")
inspect(sort.rule)
##     lhs                    rhs                 support confidence     lift
## [1] {Treatment=Treated,                                                   
##      Sex=Female,                                                          
##      AgeGroup=(40,60]}  => {Improved=Marked} 0.1309524  0.7857143 2.357143
## [2] {Treatment=Treated,                                                   
##      AgeGroup=(40,60]}  => {Improved=Marked} 0.1666667  0.7777778 2.333333
## [3] {Treatment=Placebo,                                                   
##      Sex=Male}          => {Improved=None}   0.1190476  0.9090909 1.818182
## [4] {Treatment=Placebo,                                                   
##      AgeGroup=(40,60]}  => {Improved=None}   0.2023810  0.7391304 1.478261

Visulization of the Association

require(arulesViz)
## Loading required package: arulesViz
## Warning: package 'arulesViz' was built under R version 3.3.3
plot(sort.rule)

plot(sort.rule, method="graph", control=list(edgeCol=2, type="items",main="Arithritis Treatment Study"))

plot(sort.rule, method="grouped",control=list(col=4))

plot(sort.rule, method="paracoord", control=list(col = rainbow(4), main="Arithritis Treatment Study"))

itemsets <- eclat(newdata, parameter = list(support = 0.2, minlen=2))
## Eclat
## 
## parameter specification:
##  tidLists support minlen maxlen            target   ext
##     FALSE     0.2      2     10 frequent itemsets FALSE
## 
## algorithmic control:
##  sparse sort verbose
##       7   -2    TRUE
## 
## Absolute minimum support count: 16 
## 
## create itemset ... 
## set transactions ...[10 item(s), 84 transaction(s)] done [0.00s].
## sorting and recoding items ... [8 item(s)] done [0.00s].
## creating bit matrix ... [8 row(s), 84 column(s)] done [0.00s].
## writing  ... [15 set(s)] done [0.00s].
## Creating S4 object  ... done [0.00s].
plot(itemsets, method="graph",control=list(edgeCol=2, type="items",main="Arithritis Treatment Study"))