library(KoNLP)
library(arules)
library(arulesViz)
f <- file("http://dl.dropbox.com/u/8686172/paper_title_utf8.txt", encoding = "UTF-8")
fl <- readLines(f)
close(f)
tran <- Map(extractNoun, fl)
tran <- unique(tran)
tran <- sapply(tran, unique)
tran <- sapply(tran, function(x) {
Filter(function(y) {
nchar(y) <= 4 && nchar(y) > 1 && is.hangul(y)
}, x)
})
tran <- Filter(function(x) {
length(x) >= 2
}, tran)
names(tran) <- paste("Tr", 1:length(tran), sep = "")
wordtran <- as(tran, "transactions")
ares <- apriori(wordtran, parameter = list(minlen = 2, supp = 0.02, conf = 0.9))
##
## parameter specification:
## confidence minval smax arem aval originalSupport support minlen maxlen
## 0.9 0.1 1 none FALSE TRUE 0.02 2 10
## target ext
## rules FALSE
##
## algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## apriori - find association rules with the apriori algorithm
## version 4.21 (2004.05.09) (c) 1996-2004 Christian Borgelt
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[1566 item(s), 1261 transaction(s)] done [0.00s].
## sorting and recoding items ... [52 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [29 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
# remove subsets
ares.sorted <- sort(ares, by = "lift")
subset.matrix <- is.subset(ares.sorted, ares.sorted)
subset.matrix[lower.tri(subset.matrix, diag = T)] <- NA
redundant <- colSums(subset.matrix, na.rm = T) >= 1
which(redundant)
## [1] 27
ares.pruned <- ares.sorted[!redundant]
다양한 플로팅
plot(ares)
plot(ares, method = "groupped")
## Error: Unknown method: 'groupped'
plot(ares, method = "graph")
plot(ares, method = "graph", control = list(type = "items"))
plot(ares, method = "paracoord", control = list(reorder = TRUE))