multi culture paper title analysis

library(KoNLP)
library(arules)
library(arulesViz)

f <- file("http://dl.dropbox.com/u/8686172/paper_title_utf8.txt", encoding = "UTF-8")
fl <- readLines(f)
close(f)
tran <- Map(extractNoun, fl)
tran <- unique(tran)
tran <- sapply(tran, unique)
tran <- sapply(tran, function(x) {
    Filter(function(y) {
        nchar(y) <= 4 && nchar(y) > 1 && is.hangul(y)
    }, x)
})
tran <- Filter(function(x) {
    length(x) >= 2
}, tran)
names(tran) <- paste("Tr", 1:length(tran), sep = "")
wordtran <- as(tran, "transactions")
ares <- apriori(wordtran, parameter = list(minlen = 2, supp = 0.02, conf = 0.9))
## 
## parameter specification:
##  confidence minval smax arem  aval originalSupport support minlen maxlen
##         0.9    0.1    1 none FALSE            TRUE    0.02      2     10
##  target   ext
##   rules FALSE
## 
## algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## apriori - find association rules with the apriori algorithm
## version 4.21 (2004.05.09)        (c) 1996-2004   Christian Borgelt
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[1566 item(s), 1261 transaction(s)] done [0.00s].
## sorting and recoding items ... [52 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [29 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].

# remove subsets
ares.sorted <- sort(ares, by = "lift")
subset.matrix <- is.subset(ares.sorted, ares.sorted)
subset.matrix[lower.tri(subset.matrix, diag = T)] <- NA
redundant <- colSums(subset.matrix, na.rm = T) >= 1
which(redundant)
## [1] 27

ares.pruned <- ares.sorted[!redundant]

다양한 플로팅

plot(ares)

plot of chunk plot


plot(ares, method = "groupped")
## Error: Unknown method: 'groupped'

plot(ares, method = "graph")

plot of chunk plot


plot(ares, method = "graph", control = list(type = "items"))

plot of chunk plot


plot(ares, method = "paracoord", control = list(reorder = TRUE))

plot of chunk plot