if("rJava" %in% installed.packages("rJava") == FALSE)install.packages("rJava")
library(rJava)
if("memoise" %in% installed.packages("memoise") == FALSE)install.packages("memoise")
library(memoise)
if("KoNLP" %in% installed.packages("KoNLP") == FALSE)install.packages("KoNLP")
library(KoNLP)
if("tm" %in% installed.packages("tm") == FALSE)install.packages("tm")
library(tm)
if("wordcloud" %in% installed.packages("wordcloud") == FALSE)install.packages("wordcloud")
library(wordcloud)
if("dplyr" %in% installed.packages("dplyr") == FALSE)install.packages("dplyr")
library(dplyr)
if("stringr" %in% installed.packages("stringr") == FALSE)install.packages("stringr")
library(stringr)
# 글자에 색 입혀줌
if("RColorBrewer" %in% installed.packages("RColorBrewer") == FALSE)install.packages("RColorBrewer")
library(RColorBrewer)

KoNLP::useSejongDic()


txt <- readLines("https://www.dropbox.com/s/qg1r6x4zhvzbqqt/jeju.txt?dl=1")
head(txt,10)


# step 1. 대상파일 읽어들이기.(데이터를 로딩한다.)
txt <- readLines("jeju.txt")
head(txt)
# step 2. 특수문자 제거
txt <- stringr::str_replace_all(txt,"\\W"," ")
head(txt)
txt <- stringr::str_replace_all(txt,"[^[:alpha:]]"," ")
txt
# step 3. 명사만 추출
nouns <- 
  sapply(
    txt,
    KoNLP::extractNoun,
    USE.NAMES = F
  )

class(nouns)
head(nouns,10)
cdata <- unlist(nouns)

cdata <- unlist(nouns)

cdata <- stringr::str_replace_all(nouns,"[^[:alpha:]]"," ")
cdata <- gsub(" ","",cdata)

# step3-1. 특정단어 삭제하기

gsubTxt <- readLines("제주도여행코스gsub(1).txt")
cnt <- length(gsubTxt)
print(paste("삭제하려는 단어의 수:",cnt))
for(i in 1:cnt){
cdata <- gsub(gsubTxt[i],"", cdata)
}
cdata
# 한 글자 뽑아내기
cdata <- Filter(function(x){nchar(x)>=2},cdata)
write(unlist(cdata),"jeju_2.txt")
nouns <- read.table("jeju_2.txt")
nrow(nouns)
wordount <- table(nouns)
head(sort(wordcount,decreasing = T),30)

# step 4. 단어별 빈도표 작성
wordcount <- table(unlist(nouns2))
head(wordcount,10)
top10 <- head(sort(wordcount,decreasing = 10))
pie(top10,
    col=rainbow(10),
    radius=1,
    main="제주도 추천 여행 코스 TOP10")


df_word <- as.data.frame(wordcount, stringsAsFactors = F)

# step 5. 변수명 수정
names(df_word) # 자동으로 Var1, Freq 생성됨
df_word <- dplyr::rename(
  df_word,
  word = Var1,
  freq = Freq
)

# step 6. 두 글자 이상 단어 추출
df_word <- dplyr::filter(df_word, nchar(word)>=2)

df_word

# step 7. 빈도순 정렬 후 상위 20개 단어만 추출
top_20 <- df_word %>% dplyr::arrange(desc(freq)) %>% head(20)
top_20

wordcloud::wordcloud(
  words = df_word$word,
  freq = df_word$freq,
  min.freq = 2,
  max.words = 200,
  random.order =  F,
  rot.per = .1,
  scale = c(4,0.3),
  colors = brewer.pal(8,"Dark2")
)
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCg0KYGBge3J9DQppZigickphdmEiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJySmF2YSIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoInJKYXZhIikNCmxpYnJhcnkockphdmEpDQppZigibWVtb2lzZSIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoIm1lbW9pc2UiKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJtZW1vaXNlIikNCmxpYnJhcnkobWVtb2lzZSkNCmlmKCJLb05MUCIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoIktvTkxQIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygiS29OTFAiKQ0KbGlicmFyeShLb05MUCkNCmlmKCJ0bSIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoInRtIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygidG0iKQ0KbGlicmFyeSh0bSkNCmlmKCJ3b3JkY2xvdWQiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJ3b3JkY2xvdWQiKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJ3b3JkY2xvdWQiKQ0KbGlicmFyeSh3b3JkY2xvdWQpDQppZigiZHBseXIiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJkcGx5ciIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoImRwbHlyIikNCmxpYnJhcnkoZHBseXIpDQppZigic3RyaW5nciIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoInN0cmluZ3IiKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJzdHJpbmdyIikNCmxpYnJhcnkoc3RyaW5ncikNCiMg6riA7J6Q7JeQIOyDiSDsnoXtmIDspIwNCmlmKCJSQ29sb3JCcmV3ZXIiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJSQ29sb3JCcmV3ZXIiKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJSQ29sb3JCcmV3ZXIiKQ0KbGlicmFyeShSQ29sb3JCcmV3ZXIpDQoNCktvTkxQOjp1c2VTZWpvbmdEaWMoKQ0KDQoNCnR4dCA8LSByZWFkTGluZXMoImh0dHBzOi8vd3d3LmRyb3Bib3guY29tL3MvcWcxcjZ4NHpodnpicXF0L2planUudHh0P2RsPTEiKQ0KaGVhZCh0eHQsMTApDQoNCg0KIyBzdGVwIDEuIOuMgOyDge2MjOydvCDsnb3slrTrk6TsnbTquLAuKOuNsOydtO2EsOulvCDroZzrlKntlZzri6QuKQ0KdHh0IDwtIHJlYWRMaW5lcygiamVqdS50eHQiKQ0KaGVhZCh0eHQpDQojIHN0ZXAgMi4g7Yq57IiY66y47J6QIOygnOqxsA0KdHh0IDwtIHN0cmluZ3I6OnN0cl9yZXBsYWNlX2FsbCh0eHQsIlxcVyIsIiAiKQ0KaGVhZCh0eHQpDQp0eHQgPC0gc3RyaW5ncjo6c3RyX3JlcGxhY2VfYWxsKHR4dCwiW15bOmFscGhhOl1dIiwiICIpDQp0eHQNCiMgc3RlcCAzLiDrqoXsgqzrp4wg7LaU7LacDQpub3VucyA8LSANCiAgc2FwcGx5KA0KICAgIHR4dCwNCiAgICBLb05MUDo6ZXh0cmFjdE5vdW4sDQogICAgVVNFLk5BTUVTID0gRg0KICApDQoNCmNsYXNzKG5vdW5zKQ0KaGVhZChub3VucywxMCkNCmNkYXRhIDwtIHVubGlzdChub3VucykNCg0KY2RhdGEgPC0gdW5saXN0KG5vdW5zKQ0KDQpjZGF0YSA8LSBzdHJpbmdyOjpzdHJfcmVwbGFjZV9hbGwobm91bnMsIlteWzphbHBoYTpdXSIsIiAiKQ0KY2RhdGEgPC0gZ3N1YigiICIsIiIsY2RhdGEpDQoNCiMgc3RlcDMtMS4g7Yq57KCV64uo7Ja0IOyCreygnO2VmOq4sA0KDQpnc3ViVHh0IDwtIHJlYWRMaW5lcygi7KCc7KO864+E7Jes7ZaJ7L2U7IqkZ3N1YigxKS50eHQiKQ0KY250IDwtIGxlbmd0aChnc3ViVHh0KQ0KcHJpbnQocGFzdGUoIuyCreygnO2VmOugpOuKlCDri6jslrTsnZgg7IiYOiIsY250KSkNCmZvcihpIGluIDE6Y250KXsNCmNkYXRhIDwtIGdzdWIoZ3N1YlR4dFtpXSwiIiwgY2RhdGEpDQp9DQpjZGF0YQ0KIyDtlZwg6riA7J6QIOu9keyVhOuCtOq4sA0KY2RhdGEgPC0gRmlsdGVyKGZ1bmN0aW9uKHgpe25jaGFyKHgpPj0yfSxjZGF0YSkNCndyaXRlKHVubGlzdChjZGF0YSksImplanVfMi50eHQiKQ0Kbm91bnMgPC0gcmVhZC50YWJsZSgiamVqdV8yLnR4dCIpDQpucm93KG5vdW5zKQ0Kd29yZG91bnQgPC0gdGFibGUobm91bnMpDQpoZWFkKHNvcnQod29yZGNvdW50LGRlY3JlYXNpbmcgPSBUKSwzMCkNCg0KIyBzdGVwIDQuIOuLqOyWtOuzhCDruYjrj4TtkZwg7J6R7ISxDQp3b3JkY291bnQgPC0gdGFibGUodW5saXN0KG5vdW5zMikpDQpoZWFkKHdvcmRjb3VudCwxMCkNCnRvcDEwIDwtIGhlYWQoc29ydCh3b3JkY291bnQsZGVjcmVhc2luZyA9IDEwKSkNCnBpZSh0b3AxMCwNCiAgICBjb2w9cmFpbmJvdygxMCksDQogICAgcmFkaXVzPTEsDQogICAgbWFpbj0i7KCc7KO864+EIOy2lOyynCDsl6ztlokg7L2U7IqkIFRPUDEwIikNCg0KDQpkZl93b3JkIDwtIGFzLmRhdGEuZnJhbWUod29yZGNvdW50LCBzdHJpbmdzQXNGYWN0b3JzID0gRikNCg0KIyBzdGVwIDUuIOuzgOyImOuqhSDsiJjsoJUNCm5hbWVzKGRmX3dvcmQpICMg7J6Q64+Z7Jy866GcIFZhcjEsIEZyZXEg7IOd7ISx65CoDQpkZl93b3JkIDwtIGRwbHlyOjpyZW5hbWUoDQogIGRmX3dvcmQsDQogIHdvcmQgPSBWYXIxLA0KICBmcmVxID0gRnJlcQ0KKQ0KDQojIHN0ZXAgNi4g65GQIOq4gOyekCDsnbTsg4Eg64uo7Ja0IOy2lOy2nA0KZGZfd29yZCA8LSBkcGx5cjo6ZmlsdGVyKGRmX3dvcmQsIG5jaGFyKHdvcmQpPj0yKQ0KDQpkZl93b3JkDQoNCiMgc3RlcCA3LiDruYjrj4TsiJwg7KCV66CsIO2bhCDsg4HsnIQgMjDqsJwg64uo7Ja066eMIOy2lOy2nA0KdG9wXzIwIDwtIGRmX3dvcmQgJT4lIGRwbHlyOjphcnJhbmdlKGRlc2MoZnJlcSkpICU+JSBoZWFkKDIwKQ0KdG9wXzIwDQoNCndvcmRjbG91ZDo6d29yZGNsb3VkKA0KICB3b3JkcyA9IGRmX3dvcmQkd29yZCwNCiAgZnJlcSA9IGRmX3dvcmQkZnJlcSwNCiAgbWluLmZyZXEgPSAyLA0KICBtYXgud29yZHMgPSAyMDAsDQogIHJhbmRvbS5vcmRlciA9ICBGLA0KICByb3QucGVyID0gLjEsDQogIHNjYWxlID0gYyg0LDAuMyksDQogIGNvbG9ycyA9IGJyZXdlci5wYWwoOCwiRGFyazIiKQ0KKQ0KDQpgYGANCg==