if("stringr" %in% installed.packages("stringr") == FALSE)install.packages("stringr")
library(stringr)
if("KoNLP" %in% installed.packages("KoNLP") == FALSE)install.packages("KoNLP")
library(KoNLP)
if("wordcloud" %in% installed.packages("wordcloud") == FALSE)install.packages("wordcloud")
library(wordcloud)
if("RColorBrewer" %in% installed.packages("RColorBrewer") == FALSE)install.packages("RColorBrewer")
library(RColorBrewer)

setwd("C:\\Users\\Administrator\\rlang_weekend2\\Data_R_181006")
getwd()
tgt1 <- readLines("jeju.txt")
gsb <- readLines("jeju_gsb.txt")
mrg <- readLines("jeju_mrg.txt")
useSejongDic()
mrg <- data.frame(mrg,"ncn") #비서술형 명사 (as per KAIST mind map)
KoNLP::buildDictionary(
  ext_dic = c('sejong','woorimalsam'),
  user_dic = mrg
)
tgt1 <- sapply(tgt1, 
               extractNoun, 
               USE.NAMES = F, 
               autoSpacing=T)
# tgt1
tgt2 <- unlist(tgt1)
tgt3 <- stringr::str_replace_all(tgt2,'[^[:alpha:]]','')  # 정규식에 따라 한글과 알파벳으로 시작하지 않으면 제외

# function을 만듦

# 필요없는 단어 제거, 일단 gsub4 function 없이 wordcloud 실행한 후 필요없는 단어를 확인하고 아래 function을 작성 순서
gsub4 <- function(){
  gsb <- readLines("jeju_gsb.txt")
  gsb
  gsb <-  c(
    ' ', '[~!@#$%&*()_+=?<>]',"\\[",
    '[ㄱ-ㅎ]','(ㅜ|ㅠ)',"\\d+"
  )
  i <- 0
  for(i in 1:length(gsb)){
    tgt4 <- gsub(gsb[i],"",tgt3)
  }
  return(tgt4)
}

gsub5 <- function(){
  tgt5 = gsub("\\S*일출", "성산일출봉", tgt4)
  tgt5 = gsub("성산\\S*", "성산일출봉", tgt4)
  tgt5 = gsub("한라\\S*", "한라산", tgt5)
  tgt5 = gsub("랜드", "에코랜드", tgt5)
  tgt5 = gsub("에코에코랜드", "에코랜드", tgt5)
  tgt5 = gsub("주상\\S*", "주상절리", tgt5)
  tgt5 = gsub("\\S*절리", "주상절리", tgt5)
  tgt5 = gsub("용두", "용두암", tgt5)
  tgt5 = gsub("용두암암", "용두암", tgt5)
  tgt5<- gsub('폭포','',tgt5)
  tgt5<- gsub('연폭','',tgt5)
  tgt5 = gsub("천지", "천지연폭포", tgt5)
  tgt5<- gsub('공원','',tgt5)
  tgt5<- gsub('관광','',tgt5)
  tgt5<- gsub("산방\\S*","산방산",tgt5)
  tgt5<- gsub('까지','',tgt5)
  tgt5<- gsub('으로','',tgt5)
  tgt5<- gsub('박물','',tgt5)
  tgt5<- gsub('일출','',tgt5)
  return (tgt5)
}

tgt4 <- gsub4()
tgt5 <- gsub5()
tgt5
gsb_cnt <- length(gsb)
i <- 0
for(i in 1:gsb_cnt){
  tgt5 <- gsub(gsb[i],"",tgt5)
}
tgt6 <- Filter(function(x){nchar(x) >=2},tgt5) # 한 글자 제외
# tgt6
tgt6 <- unlist(tgt6)
write(tgt6, 'jeju_temp.txt')
tgt7 <- read.table('jeju_temp.txt')
# class(tgt7)
tgt8 <- table(tgt7)
# class(tgt8)
tgt9 <- head(sort(tgt8,decreasing = T),30)
tgt9

pal <- brewer.pal(8,"Dark2")
set.seed(1233) # 모양이 일치 안하면 숫자를 바꿔가며 모양 찾기
wordcloud(
  names(tgt8),
  freq = tgt8,
  scale = c(2.5,0.1), # 단어크기 0.1 ~ 2.5
  rot.per =0.25, # 회전비율
  min.freq = 2, # 최저 빈도수 2회이상
  random.order = F, # 고빈도 단어 중앙배치
  random.color = T,
  colors = pal
)

제주도 여행코스 (2) - 차트그리기

top10 <- head(sort(tgt9, decreasing = T),10)
top10
pie(top10,main="제주도 여행코스 탑10")

pie(top10,
    col = rainbow(10),
    radius = 1,
    main = "제주도 여행코스 탑 10"
)
pct <- round(top10/sum(top10)*100, 1)
names(top10)
lab <- paste(names(top10),'\n',pct,"%")
pie(top10,
    col = rainbow(10),
    radius = 1,
    cex = 2,
    labels = lab,
    main = "제주도 여행코스 탑 10"
)

바차트


bp <- barplot(
              top10,
              main = "제주도 여행코스 탑 10",
              col = rainbow(10),
              cex.names = 1.8,
              las = 2, 
              ylim = c(0, 30)
)

text(x = bp,
     y = top10,
     labels = lab, 
     col = 'black',
     cex = 1.5
     )

text(
    x = bp,
    y = top10-5,
    labels = paste(top10,"건"),
    col = 'black',
    cex = 1.5
)

3D 파이차트

install.packages("plotrix")
library(plotrix)
plotrix::pie3D(
  top10,
  col = rainbow(10),
  cex = 1.0,
  labels = lab, 
  explode = 0.1
)
LS0tDQp0aXRsZTogIuygnOyjvOuPhCBUZXh0IE1pbmluZ19Lb05scCINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyfQ0KaWYoInN0cmluZ3IiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJzdHJpbmdyIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygic3RyaW5nciIpDQpsaWJyYXJ5KHN0cmluZ3IpDQppZigiS29OTFAiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJLb05MUCIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIktvTkxQIikNCmxpYnJhcnkoS29OTFApDQppZigid29yZGNsb3VkIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygid29yZGNsb3VkIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygid29yZGNsb3VkIikNCmxpYnJhcnkod29yZGNsb3VkKQ0KaWYoIlJDb2xvckJyZXdlciIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoIlJDb2xvckJyZXdlciIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIlJDb2xvckJyZXdlciIpDQpsaWJyYXJ5KFJDb2xvckJyZXdlcikNCg0Kc2V0d2QoIkM6XFxVc2Vyc1xcQWRtaW5pc3RyYXRvclxccmxhbmdfd2Vla2VuZDJcXERhdGFfUl8xODEwMDYiKQ0KZ2V0d2QoKQ0KdGd0MSA8LSByZWFkTGluZXMoImplanUudHh0IikNCmdzYiA8LSByZWFkTGluZXMoImplanVfZ3NiLnR4dCIpDQptcmcgPC0gcmVhZExpbmVzKCJqZWp1X21yZy50eHQiKQ0KdXNlU2Vqb25nRGljKCkNCm1yZyA8LSBkYXRhLmZyYW1lKG1yZywibmNuIikgI+u5hOyEnOyIoO2YlSDrqoXsgqwgKGFzIHBlciBLQUlTVCBtaW5kIG1hcCkNCktvTkxQOjpidWlsZERpY3Rpb25hcnkoDQogIGV4dF9kaWMgPSBjKCdzZWpvbmcnLCd3b29yaW1hbHNhbScpLA0KICB1c2VyX2RpYyA9IG1yZw0KKQ0KdGd0MSA8LSBzYXBwbHkodGd0MSwgDQogICAgICAgICAgICAgICBleHRyYWN0Tm91biwgDQogICAgICAgICAgICAgICBVU0UuTkFNRVMgPSBGLCANCiAgICAgICAgICAgICAgIGF1dG9TcGFjaW5nPVQpDQojIHRndDENCnRndDIgPC0gdW5saXN0KHRndDEpDQp0Z3QzIDwtIHN0cmluZ3I6OnN0cl9yZXBsYWNlX2FsbCh0Z3QyLCdbXls6YWxwaGE6XV0nLCcnKSAgIyDsoJXqt5zsi53sl5Ag65Sw6528IO2VnOq4gOqzvCDslYztjIzrsrPsnLzroZwg7Iuc7J6R7ZWY7KeAIOyViuycvOuptCDsoJzsmbgNCg0KIyBmdW5jdGlvbuydhCDrp4zrk6YNCg0KIyDtlYTsmpTsl4bripQg64uo7Ja0IOygnOqxsCwg7J2864uoIGdzdWI0IGZ1bmN0aW9uIOyXhuydtCB3b3JkY2xvdWQg7Iuk7ZaJ7ZWcIO2bhCDtlYTsmpTsl4bripQg64uo7Ja066W8IO2ZleyduO2VmOqzoCDslYTrnpggZnVuY3Rpb27snYQg7J6R7ISxIOyInOyEnA0KZ3N1YjQgPC0gZnVuY3Rpb24oKXsNCiAgZ3NiIDwtIHJlYWRMaW5lcygiamVqdV9nc2IudHh0IikNCiAgZ3NiDQogIGdzYiA8LSAgYygNCiAgICAnICcsICdbfiFAIyQlJiooKV8rPT88Pl0nLCJcXFsiLA0KICAgICdb44SxLeOFjl0nLCco44WcfOOFoCknLCJcXGQrIg0KICApDQogIGkgPC0gMA0KICBmb3IoaSBpbiAxOmxlbmd0aChnc2IpKXsNCiAgICB0Z3Q0IDwtIGdzdWIoZ3NiW2ldLCIiLHRndDMpDQogIH0NCiAgcmV0dXJuKHRndDQpDQp9DQoNCmdzdWI1IDwtIGZ1bmN0aW9uKCl7DQogIHRndDUgPSBnc3ViKCJcXFMq7J287LacIiwgIuyEseyCsOydvOy2nOu0iSIsIHRndDQpDQogIHRndDUgPSBnc3ViKCLshLHsgrBcXFMqIiwgIuyEseyCsOydvOy2nOu0iSIsIHRndDQpDQogIHRndDUgPSBnc3ViKCLtlZzrnbxcXFMqIiwgIu2VnOudvOyCsCIsIHRndDUpDQogIHRndDUgPSBnc3ViKCLrnpzrk5wiLCAi7JeQ7L2U656c65OcIiwgdGd0NSkNCiAgdGd0NSA9IGdzdWIoIuyXkOy9lOyXkOy9lOuenOuTnCIsICLsl5DsvZTrnpzrk5wiLCB0Z3Q1KQ0KICB0Z3Q1ID0gZ3N1Yigi7KO87IOBXFxTKiIsICLso7zsg4HsoIjrpqwiLCB0Z3Q1KQ0KICB0Z3Q1ID0gZ3N1YigiXFxTKuygiOumrCIsICLso7zsg4HsoIjrpqwiLCB0Z3Q1KQ0KICB0Z3Q1ID0gZ3N1Yigi7Jqp65GQIiwgIuyaqeuRkOyVlCIsIHRndDUpDQogIHRndDUgPSBnc3ViKCLsmqnrkZDslZTslZQiLCAi7Jqp65GQ7JWUIiwgdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+2Pre2PrCcsJycsdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+yXsO2PrScsJycsdGd0NSkNCiAgdGd0NSA9IGdzdWIoIuyynOyngCIsICLsspzsp4Dsl7Dtj63tj6wiLCB0Z3Q1KQ0KICB0Z3Q1PC0gZ3N1Yign6rO17JuQJywnJyx0Z3Q1KQ0KICB0Z3Q1PC0gZ3N1Yign6rSA6rSRJywnJyx0Z3Q1KQ0KICB0Z3Q1PC0gZ3N1Yigi7IKw67CpXFxTKiIsIuyCsOuwqeyCsCIsdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+q5jOyngCcsJycsdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+ycvOuhnCcsJycsdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+uwleusvCcsJycsdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+ydvOy2nCcsJycsdGd0NSkNCiAgcmV0dXJuICh0Z3Q1KQ0KfQ0KDQp0Z3Q0IDwtIGdzdWI0KCkNCnRndDUgPC0gZ3N1YjUoKQ0KdGd0NQ0KZ3NiX2NudCA8LSBsZW5ndGgoZ3NiKQ0KaSA8LSAwDQpmb3IoaSBpbiAxOmdzYl9jbnQpew0KICB0Z3Q1IDwtIGdzdWIoZ3NiW2ldLCIiLHRndDUpDQp9DQp0Z3Q2IDwtIEZpbHRlcihmdW5jdGlvbih4KXtuY2hhcih4KSA+PTJ9LHRndDUpICMg7ZWcIOq4gOyekCDsoJzsmbgNCiMgdGd0Ng0KdGd0NiA8LSB1bmxpc3QodGd0NikNCndyaXRlKHRndDYsICdqZWp1X3RlbXAudHh0JykNCnRndDcgPC0gcmVhZC50YWJsZSgnamVqdV90ZW1wLnR4dCcpDQojIGNsYXNzKHRndDcpDQp0Z3Q4IDwtIHRhYmxlKHRndDcpDQojIGNsYXNzKHRndDgpDQp0Z3Q5IDwtIGhlYWQoc29ydCh0Z3Q4LGRlY3JlYXNpbmcgPSBUKSwzMCkNCnRndDkNCg0KcGFsIDwtIGJyZXdlci5wYWwoOCwiRGFyazIiKQ0Kc2V0LnNlZWQoMTIzMykgIyDrqqjslpHsnbQg7J287LmYIOyViO2VmOuptCDsiKvsnpDrpbwg67CU6r+U6rCA66mwIOuqqOyWkSDssL7quLANCndvcmRjbG91ZCgNCiAgbmFtZXModGd0OCksDQogIGZyZXEgPSB0Z3Q4LA0KICBzY2FsZSA9IGMoMi41LDAuMSksICMg64uo7Ja07YGs6riwIDAuMSB+IDIuNQ0KICByb3QucGVyID0wLjI1LCAjIO2ajOyghOu5hOycqA0KICBtaW4uZnJlcSA9IDIsICMg7LWc7KCAIOu5iOuPhOyImCAy7ZqM7J207IOBDQogIHJhbmRvbS5vcmRlciA9IEYsICMg6rOg67mI64+EIOuLqOyWtCDspJHslZnrsLDsuZgNCiAgcmFuZG9tLmNvbG9yID0gVCwNCiAgY29sb3JzID0gcGFsDQopDQpgYGANCiMjIyDsoJzso7zrj4Qg7Jes7ZaJ7L2U7IqkICgyKSAtIOywqO2KuOq3uOumrOq4sA0KYGBge3J9DQp0b3AxMCA8LSBoZWFkKHNvcnQodGd0OSwgZGVjcmVhc2luZyA9IFQpLDEwKQ0KdG9wMTANCnBpZSh0b3AxMCxtYWluPSLsoJzso7zrj4Qg7Jes7ZaJ7L2U7IqkIO2DkTEwIikNCg0KcGllKHRvcDEwLA0KICAgIGNvbCA9IHJhaW5ib3coMTApLA0KICAgIHJhZGl1cyA9IDEsDQogICAgbWFpbiA9ICLsoJzso7zrj4Qg7Jes7ZaJ7L2U7IqkIO2DkSAxMCINCikNCnBjdCA8LSByb3VuZCh0b3AxMC9zdW0odG9wMTApKjEwMCwgMSkNCm5hbWVzKHRvcDEwKQ0KbGFiIDwtIHBhc3RlKG5hbWVzKHRvcDEwKSwnXG4nLHBjdCwiJSIpDQpwaWUodG9wMTAsDQogICAgY29sID0gcmFpbmJvdygxMCksDQogICAgcmFkaXVzID0gMSwNCiAgICBjZXggPSAyLA0KICAgIGxhYmVscyA9IGxhYiwNCiAgICBtYWluID0gIuygnOyjvOuPhCDsl6ztlonsvZTsiqQg7YORIDEwIg0KKQ0KYGBgDQoNCiMjIyDrsJTssKjtirgNCmBgYHtyfQ0KDQpicCA8LSBiYXJwbG90KA0KICAgICAgICAgICAgICB0b3AxMCwNCiAgICAgICAgICAgICAgbWFpbiA9ICLsoJzso7zrj4Qg7Jes7ZaJ7L2U7IqkIO2DkSAxMCIsDQogICAgICAgICAgICAgIGNvbCA9IHJhaW5ib3coMTApLA0KICAgICAgICAgICAgICBjZXgubmFtZXMgPSAxLjgsDQogICAgICAgICAgICAgIGxhcyA9IDIsIA0KICAgICAgICAgICAgICB5bGltID0gYygwLCAzMCkNCikNCg0KdGV4dCh4ID0gYnAsDQogICAgIHkgPSB0b3AxMCwNCiAgICAgbGFiZWxzID0gbGFiLCANCiAgICAgY29sID0gJ2JsYWNrJywNCiAgICAgY2V4ID0gMS41DQogICAgICkNCg0KdGV4dCgNCiAgICB4ID0gYnAsDQogICAgeSA9IHRvcDEwLTUsDQogICAgbGFiZWxzID0gcGFzdGUodG9wMTAsIuqxtCIpLA0KICAgIGNvbCA9ICdibGFjaycsDQogICAgY2V4ID0gMS41DQopDQpgYGANCg0KDQojIyMgM0Qg7YyM7J207LCo7Yq4DQpgYGB7cn0NCmluc3RhbGwucGFja2FnZXMoInBsb3RyaXgiKQ0KbGlicmFyeShwbG90cml4KQ0KcGxvdHJpeDo6cGllM0QoDQogIHRvcDEwLA0KICBjb2wgPSByYWluYm93KDEwKSwNCiAgY2V4ID0gMS4wLA0KICBsYWJlbHMgPSBsYWIsIA0KICBleHBsb2RlID0gMC4xDQopDQoNCmBgYA0K