if("rJava" %in% installed.packages("rJava") == FALSE)install.packages("rJava")
library(rJava)
if("memoise" %in% installed.packages("memoise") == FALSE)install.packages("memoise")
library(memoise)
if("stringr" %in% installed.packages("stringr") == FALSE)install.packages("stringr")
library(stringr)
if("KoNLP" %in% installed.packages("KoNLP") == FALSE)install.packages("KoNLP")
library(KoNLP)
if("wordcloud" %in% installed.packages("wordcloud") == FALSE)install.packages("wordcloud")
library(wordcloud)
if("RColorBrewer" %in% installed.packages("RColorBrewer") == FALSE)install.packages("RColorBrewer")
library(RColorBrewer)
if("dplyr" %in% installed.packages("dplyr") == FALSE)install.packages("dplyr")
library("dplyr")

install.packages("readxl")
library("readxl")

setwd('C:\\Users\\Administrator\\rlang_weekend2\\Data_R_181013')
verbatim  <- read_excel("Verbatim.xlsx", 1)
# 만약 sheet가 여러개이면 숫자만, 2, 3, 4로 변경가능
head(verbatim)

useSejongDic()
KoNLP::buildDictionary(
  ext_dic = c('sejong','woorimalsam')
)

verbatim <- str_replace_all(verbatim,"\\W"," ")
head(verbatim)

nouns <- sapply(
                verbatim,
                extractNoun,
                USE.NAMES = F
)

nouns <- extractNoun(verbatim)
wordcount <- table(unlist(nouns))
df_word <- as.data.frame(wordcount,stringAsFactor = F)
df_word <- dplyr::rename(df_word,
                         word = Var1,
                         freq = Freq)
# head(df_word)
df_word <- filter(df_word, nchar(word)>=2)

top20 <- df_word %>% 
  arranage(desc(freq)) %>% 
  head(20)
LS0tDQp0aXRsZTogIlZlcmJhdGltIFByYWN0aXZlIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KDQpgYGB7cn0NCmlmKCJySmF2YSIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoInJKYXZhIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygickphdmEiKQ0KbGlicmFyeShySmF2YSkNCmlmKCJtZW1vaXNlIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygibWVtb2lzZSIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIm1lbW9pc2UiKQ0KbGlicmFyeShtZW1vaXNlKQ0KaWYoInN0cmluZ3IiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJzdHJpbmdyIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygic3RyaW5nciIpDQpsaWJyYXJ5KHN0cmluZ3IpDQppZigiS29OTFAiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJLb05MUCIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIktvTkxQIikNCmxpYnJhcnkoS29OTFApDQppZigid29yZGNsb3VkIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygid29yZGNsb3VkIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygid29yZGNsb3VkIikNCmxpYnJhcnkod29yZGNsb3VkKQ0KaWYoIlJDb2xvckJyZXdlciIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoIlJDb2xvckJyZXdlciIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIlJDb2xvckJyZXdlciIpDQpsaWJyYXJ5KFJDb2xvckJyZXdlcikNCmlmKCJkcGx5ciIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoImRwbHlyIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiKQ0KbGlicmFyeSgiZHBseXIiKQ0KDQppbnN0YWxsLnBhY2thZ2VzKCJyZWFkeGwiKQ0KbGlicmFyeSgicmVhZHhsIikNCg0Kc2V0d2QoJ0M6XFxVc2Vyc1xcQWRtaW5pc3RyYXRvclxccmxhbmdfd2Vla2VuZDJcXERhdGFfUl8xODEwMTMnKQ0KdmVyYmF0aW0gIDwtIHJlYWRfZXhjZWwoIlZlcmJhdGltLnhsc3giLCAxKQ0KIyDrp4zslb0gc2hlZXTqsIAg7Jes65+s6rCc7J2066m0IOyIq+yekOunjCwgMiwgMywgNOuhnCDrs4Dqsr3qsIDriqUNCmhlYWQodmVyYmF0aW0pDQoNCnVzZVNlam9uZ0RpYygpDQpLb05MUDo6YnVpbGREaWN0aW9uYXJ5KA0KICBleHRfZGljID0gYygnc2Vqb25nJywnd29vcmltYWxzYW0nKQ0KKQ0KDQp2ZXJiYXRpbSA8LSBzdHJfcmVwbGFjZV9hbGwodmVyYmF0aW0sIlxcVyIsIiAiKQ0KaGVhZCh2ZXJiYXRpbSkNCg0Kbm91bnMgPC0gc2FwcGx5KA0KICAgICAgICAgICAgICAgIHZlcmJhdGltLA0KICAgICAgICAgICAgICAgIGV4dHJhY3ROb3VuLA0KICAgICAgICAgICAgICAgIFVTRS5OQU1FUyA9IEYNCikNCg0Kbm91bnMgPC0gZXh0cmFjdE5vdW4odmVyYmF0aW0pDQp3b3JkY291bnQgPC0gdGFibGUodW5saXN0KG5vdW5zKSkNCmRmX3dvcmQgPC0gYXMuZGF0YS5mcmFtZSh3b3JkY291bnQsc3RyaW5nQXNGYWN0b3IgPSBGKQ0KZGZfd29yZCA8LSBkcGx5cjo6cmVuYW1lKGRmX3dvcmQsDQogICAgICAgICAgICAgICAgICAgICAgICAgd29yZCA9IFZhcjEsDQogICAgICAgICAgICAgICAgICAgICAgICAgZnJlcSA9IEZyZXEpDQojIGhlYWQoZGZfd29yZCkNCmRmX3dvcmQgPC0gZmlsdGVyKGRmX3dvcmQsIG5jaGFyKHdvcmQpPj0yKQ0KDQp0b3AyMCA8LSBkZl93b3JkICU+JSANCiAgYXJyYW5hZ2UoZGVzYyhmcmVxKSkgJT4lIA0KICBoZWFkKDIwKQ0KDQoNCmBgYA0KDQo=