1. 연관 규칙 분석

1.1 키워드 추출

library(rJava)
library(KoNLP)
library(rvest)
x <- readLines(file("data/sample_news.txt", encoding = "EUC-KR"))
x

library(KoNLP)
noun <- lapply(x, extractNoun)
noun <- lapply(x1, function(x) x[nchar(x)>1])
nounVec <- unlist(noun)
nounFreq <- table(nounVec)

keywords <- names(head(sort(nounFreq,decreasing = T),20))

1.2 키워드 행렬 만들기

docs <- as.data.frame(txt)
library(makeDTM)
dtm <- makeDTM(docs,key=keywords,TEXT.name = "txt")
dtm.df <- as.data.frame(dtm)
dtm.df

1.3 출현 여부만 기록

dtm.abovemean <- ifelse(dtm>mean(apply(dtm,2,mean)),1,0)
dtm.abovemean

library(arules)
rules <- apriori(dtm.abovemean,parameter = list(supp=0.3,conf=0.8))
inspect(sort(rules))

1.4 특정 규칙 보기

rules2 <- subset(rules,subset=lhs%pin%"경제"&confidence>0.7)
inspect(sort(rules2))

1.5 시각화 — 안됨

library(arulesViz)
plot(rules,method="graph",nodeCol="blue")