if("rJava" %in% installed.packages("rJava") == FALSE)install.packages("rJava")
library(rJava)
if("memoise" %in% installed.packages("memoise") == FALSE)install.packages("memoise")
library(memoise)
if("KoNLP" %in% installed.packages("KoNLP") == FALSE)install.packages("KoNLP")
library(KoNLP)
if("tm" %in% installed.packages("tm") == FALSE)install.packages("tm")
library(tm)
if("wordcloud" %in% installed.packages("wordcloud") == FALSE)install.packages("wordcloud")
library(wordcloud)
if("dplyr" %in% installed.packages("dplyr") == FALSE)install.packages("dplyr")
library(dplyr)
if("stringr" %in% installed.packages("stringr") == FALSE)install.packages("stringr")
library(stringr)
# 글자에 색 입혀줌
if("RColorBrewer" %in% installed.packages("RColorBrewer") == FALSE)install.packages("RColorBrewer")
library(RColorBrewer)
책에 있는 내용으로 text mining
twitter <- read.csv("twitter.csv", header= T, stringsAsFactors = F, fileEncoding = "UTF-8")
head(twitter)
View(head(twitter))
twitter <- dplyr::rename(
  twitter,
  no = 번호,
  id = 계정이름,
  date = 작성일,
  tw =내용
) 

# txt <- stringr::str_replace_all(txt,"\\W"," ")
# head(txt)
# txt <- stringr::str_replace_all(txt,"[^[:alpha:]]"," ")
# txt

#특수문자 제거
twitter$tw <- stringr::str_replace_all(twitter$tw,"\\W"," ")
head(twitter$tw)
twitter <- stringr::str_replace_all(twitter$tw,"[^[:alpha:]]"," ")


# 트윗에서 명사 추출

# step 3. 명사만 추출
nouns <- 
  sapply(
    twitter,
    KoNLP::extractNoun,
    USE.NAMES = F
  )

# 추출한 명사 list를 문자열 벡터로 변환, 단어별 빈도표 작성
wordcount <- table(unlist(nouns))
df_word <- as.data.frame(wordcount,stringsAsFactors = F)
df_word <- dplyr::rename(df_word,
                         word = Var1,
                         freq = Freq)

# 두 글자 이상 단어만 추출
df_word <- filter(df_word,nchar(word)>=2)
top20 <- df_word %>% 
  arrange(desc(freq)) %>% 
  head(20)
top20


# 단어 빈도 막대 그래프 만들기
library(ggplot2)

order <- arrange(top20, freq)$word

ggplot(data = top20, aes(x = word, y = freq)) + ylim(0,2500) + 
  geom_col()+
  coord_flip()+
  scale_x_discrete(limit = order) +
  geom_text(aes(label = freq),hjust = -0.3)


# 워드 클라우드 만들기

wordcloud(words = df_word$word,
          freq = df_word$freq,
          min.freq = 10,
          max.words = 200,
          random.order = F,
          rot.per= .1,
          scale = c(6,0.2),
          colors = brewer.pal(8,"Dark2"))
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmlmKCJySmF2YSIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoInJKYXZhIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygickphdmEiKQ0KbGlicmFyeShySmF2YSkNCmlmKCJtZW1vaXNlIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygibWVtb2lzZSIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIm1lbW9pc2UiKQ0KbGlicmFyeShtZW1vaXNlKQ0KaWYoIktvTkxQIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygiS29OTFAiKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJLb05MUCIpDQpsaWJyYXJ5KEtvTkxQKQ0KaWYoInRtIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygidG0iKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJ0bSIpDQpsaWJyYXJ5KHRtKQ0KaWYoIndvcmRjbG91ZCIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoIndvcmRjbG91ZCIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIndvcmRjbG91ZCIpDQpsaWJyYXJ5KHdvcmRjbG91ZCkNCmlmKCJkcGx5ciIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoImRwbHlyIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiKQ0KbGlicmFyeShkcGx5cikNCmlmKCJzdHJpbmdyIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygic3RyaW5nciIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoInN0cmluZ3IiKQ0KbGlicmFyeShzdHJpbmdyKQ0KIyDquIDsnpDsl5Ag7IOJIOyehe2YgOykjA0KaWYoIlJDb2xvckJyZXdlciIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoIlJDb2xvckJyZXdlciIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIlJDb2xvckJyZXdlciIpDQpsaWJyYXJ5KFJDb2xvckJyZXdlcikNCuyxheyXkCDsnojripQg64K07Jqp7Jy866GcIHRleHQgbWluaW5nDQp0d2l0dGVyIDwtIHJlYWQuY3N2KCJ0d2l0dGVyLmNzdiIsIGhlYWRlcj0gVCwgc3RyaW5nc0FzRmFjdG9ycyA9IEYsIGZpbGVFbmNvZGluZyA9ICJVVEYtOCIpDQpoZWFkKHR3aXR0ZXIpDQpWaWV3KGhlYWQodHdpdHRlcikpDQp0d2l0dGVyIDwtIGRwbHlyOjpyZW5hbWUoDQogIHR3aXR0ZXIsDQogIG5vID0g67KI7Zi4LA0KICBpZCA9IOqzhOygleydtOumhCwNCiAgZGF0ZSA9IOyekeyEseydvCwNCiAgdHcgPeuCtOyaqQ0KKSANCg0KIyB0eHQgPC0gc3RyaW5ncjo6c3RyX3JlcGxhY2VfYWxsKHR4dCwiXFxXIiwiICIpDQojIGhlYWQodHh0KQ0KIyB0eHQgPC0gc3RyaW5ncjo6c3RyX3JlcGxhY2VfYWxsKHR4dCwiW15bOmFscGhhOl1dIiwiICIpDQojIHR4dA0KDQoj7Yq57IiY66y47J6QIOygnOqxsA0KdHdpdHRlciR0dyA8LSBzdHJpbmdyOjpzdHJfcmVwbGFjZV9hbGwodHdpdHRlciR0dywiXFxXIiwiICIpDQpoZWFkKHR3aXR0ZXIkdHcpDQp0d2l0dGVyIDwtIHN0cmluZ3I6OnN0cl9yZXBsYWNlX2FsbCh0d2l0dGVyJHR3LCJbXls6YWxwaGE6XV0iLCIgIikNCg0KDQojIO2KuOycl+yXkOyEnCDrqoXsgqwg7LaU7LacDQoNCiMgc3RlcCAzLiDrqoXsgqzrp4wg7LaU7LacDQpub3VucyA8LSANCiAgc2FwcGx5KA0KICAgIHR3aXR0ZXIsDQogICAgS29OTFA6OmV4dHJhY3ROb3VuLA0KICAgIFVTRS5OQU1FUyA9IEYNCiAgKQ0KDQojIOy2lOy2nO2VnCDrqoXsgqwgbGlzdOulvCDrrLjsnpDsl7Qg67Kh7YSw66GcIOuzgO2ZmCwg64uo7Ja067OEIOu5iOuPhO2RnCDsnpHshLENCndvcmRjb3VudCA8LSB0YWJsZSh1bmxpc3Qobm91bnMpKQ0KZGZfd29yZCA8LSBhcy5kYXRhLmZyYW1lKHdvcmRjb3VudCxzdHJpbmdzQXNGYWN0b3JzID0gRikNCmRmX3dvcmQgPC0gZHBseXI6OnJlbmFtZShkZl93b3JkLA0KICAgICAgICAgICAgICAgICAgICAgICAgIHdvcmQgPSBWYXIxLA0KICAgICAgICAgICAgICAgICAgICAgICAgIGZyZXEgPSBGcmVxKQ0KDQojIOuRkCDquIDsnpAg7J207IOBIOuLqOyWtOunjCDstpTstpwNCmRmX3dvcmQgPC0gZmlsdGVyKGRmX3dvcmQsbmNoYXIod29yZCk+PTIpDQp0b3AyMCA8LSBkZl93b3JkICU+JSANCiAgYXJyYW5nZShkZXNjKGZyZXEpKSAlPiUgDQogIGhlYWQoMjApDQp0b3AyMA0KDQoNCiMg64uo7Ja0IOu5iOuPhCDrp4nrjIAg6re4656Y7ZSEIOunjOuTpOq4sA0KbGlicmFyeShnZ3Bsb3QyKQ0KDQpvcmRlciA8LSBhcnJhbmdlKHRvcDIwLCBmcmVxKSR3b3JkDQoNCmdncGxvdChkYXRhID0gdG9wMjAsIGFlcyh4ID0gd29yZCwgeSA9IGZyZXEpKSArIHlsaW0oMCwyNTAwKSArIA0KICBnZW9tX2NvbCgpKw0KICBjb29yZF9mbGlwKCkrDQogIHNjYWxlX3hfZGlzY3JldGUobGltaXQgPSBvcmRlcikgKw0KICBnZW9tX3RleHQoYWVzKGxhYmVsID0gZnJlcSksaGp1c3QgPSAtMC4zKQ0KDQoNCiMg7JuM65OcIO2BtOudvOyasOuTnCDrp4zrk6TquLANCg0Kd29yZGNsb3VkKHdvcmRzID0gZGZfd29yZCR3b3JkLA0KICAgICAgICAgIGZyZXEgPSBkZl93b3JkJGZyZXEsDQogICAgICAgICAgbWluLmZyZXEgPSAxMCwNCiAgICAgICAgICBtYXgud29yZHMgPSAyMDAsDQogICAgICAgICAgcmFuZG9tLm9yZGVyID0gRiwNCiAgICAgICAgICByb3QucGVyPSAuMSwNCiAgICAgICAgICBzY2FsZSA9IGMoNiwwLjIpLA0KICAgICAgICAgIGNvbG9ycyA9IGJyZXdlci5wYWwoOCwiRGFyazIiKSkNCmBgYA0K