if("stringr" %in% installed.packages("stringr") == FALSE)install.packages("stringr")
library(stringr)
if("KoNLP" %in% installed.packages("KoNLP") == FALSE)install.packages("KoNLP")
library(KoNLP)
if("wordcloud" %in% installed.packages("wordcloud") == FALSE)install.packages("wordcloud")
library(wordcloud)
if("RColorBrewer" %in% installed.packages("RColorBrewer") == FALSE)install.packages("RColorBrewer")
library(RColorBrewer)
# getwd()
tgt1 <- readLines("jeju.txt")
mrg <- readLines("jeju__mrg.txt")
gsb <- readLines("jeju__gsb.txt")
# class(mrg)
useSejongDic()
mrg <- data.frame(mrg,"ncn") # 비서술형명사
KoNLP::buildDictionary(
ext_dic = c('sejong','woorimalsam'),
user_dic = mrg
)
tgt1 <- sapply(tgt1,
extractNoun,
USE.NAMES = F,
autoSpacing=T)
tgt2 <- unlist(tgt1)
tgt3 <- stringr::str_replace_all(tgt2,'[^[:alpha:]]','')
gsub4 <- function(){
gsb <- readLines("jeju__gsb.txt")
gsb
gsb <- c(
' ', '[~!@#$%&*()_+=?<>]',"\\[",
'[ㄱ-ㅎ]','(ㅜ|ㅠ)',"\\d+"
)
i <- 0
for(i in 1:length(gsb)){
tgt4 <- gsub(gsb[i],"",tgt3)
}
return (tgt4)
}
gsub5 <- function(){
tgt5 = gsub("\\S*일출", "성산일출봉", tgt4)
tgt5 = gsub("성산\\S*", "성산일출봉", tgt4)
#tgt5 = gsub("성산\\S*", "성산일출봉", tgt5)
tgt5 = gsub("한라\\S*", "한라산", tgt5)
tgt5 = gsub("랜드", "에코랜드", tgt5)
tgt5 = gsub("에코에코랜드", "에코랜드", tgt5)
tgt5 = gsub("주상\\S*", "주상절리", tgt5)
tgt5 = gsub("\\S*절리", "주상절리", tgt5)
tgt5 = gsub("용두", "용두암", tgt5)
tgt5 = gsub("용두암암", "용두암", tgt5)
tgt5<- gsub('폭포','',tgt5)
tgt5<- gsub('연폭','',tgt5)
tgt5 = gsub("천지", "천지연폭포", tgt5)
tgt5<- gsub('공원','',tgt5)
tgt5<- gsub('관광','',tgt5)
tgt5<- gsub("산방\\S*","산방산",tgt5)
tgt5<- gsub('까지','',tgt5)
tgt5<- gsub('으로','',tgt5)
tgt5<- gsub('박물','',tgt5)
return (tgt5)
}
tgt4 <- gsub4()
tgt4
tgt5 <- gsub5()
gsb__cnt <- length(gsb)
i <- 0
for(i in 1:gsb__cnt){
tgt5 <- gsub(gsb[i],"",tgt5)
}
tgt6 <- Filter(function(x){nchar(x) >= 2},tgt5)
# tgt6
tgt6 <- unlist(tgt6)
write(tgt6,'jeju__temp.txt')
tgt7 <- read.table('jeju__temp.txt')
# class(tgt7)
tgt8 <- table(tgt7)
# class(tgt8)
tgt9 <- head(sort(tgt8,decreasing = T),30)
tgt9
pal <- brewer.pal(8,"Dark2")
set.seed(1234)
wordcloud(
names(tgt8),
freq = tgt8,
scale = c(2.5,0.1), # 단어크기 0.1 ~ 2.5
rot.per = 0.25, # 회전비율
min.freq = 2, # 최저 빈도수 2회이상
random.order = F, # 고빈도 단어 중앙배치
random.color = T,
colors = pal
)
### 제주도 여행코스 (2) - 차트그리기
top10 <- head(sort(tgt9,decreasing = T),10)
top10
pie(top10, main = "제주도 여행코스 탑 10")
pie(top10,
col = rainbow(10),
radius = 1,
main = "제주도 여행코스 탑 10"
)
pct <- round(top10/sum(top10)*100, 1)
names(top10)
lab <- paste(names(top10),'\n',pct,"%")
pie(top10,
col = rainbow(10),
radius = 1,
cex = 0.8,
labels = lab,
main = "제주도 여행코스 탑 10"
)
## 바차트
bp <- barplot(
top10,
main = "제주도 여행코스 탑 10",
col = rainbow(10),
cex.names = 1.8,
las = 2,
ylim = c(0,30)
)
text(x = bp,
y = top10,
labels = lab,
col = "black",
cex = 1.0)
text(x = bp,
y = top10-5,
labels = paste(top10,"건"),
col = "black",
cex = 0.7)
## 3D 파이차트
install.packages("plotrix")
library(plotrix)
plotrix::pie3D(
top10,
col = rainbow(10),
cex = 1.0,
labels = lab,
explode = 0.1
)
# tm은 영어, konlp 한글 분석할 때 사용
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCmBgYHtyfQ0KaWYoInN0cmluZ3IiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJzdHJpbmdyIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygic3RyaW5nciIpDQpsaWJyYXJ5KHN0cmluZ3IpDQppZigiS29OTFAiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJLb05MUCIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIktvTkxQIikNCmxpYnJhcnkoS29OTFApDQppZigid29yZGNsb3VkIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygid29yZGNsb3VkIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygid29yZGNsb3VkIikNCmxpYnJhcnkod29yZGNsb3VkKQ0KaWYoIlJDb2xvckJyZXdlciIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoIlJDb2xvckJyZXdlciIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIlJDb2xvckJyZXdlciIpDQpsaWJyYXJ5KFJDb2xvckJyZXdlcikNCiMgZ2V0d2QoKQ0KdGd0MSA8LSByZWFkTGluZXMoImplanUudHh0IikNCm1yZyA8LSByZWFkTGluZXMoImplanVfX21yZy50eHQiKQ0KZ3NiIDwtIHJlYWRMaW5lcygiamVqdV9fZ3NiLnR4dCIpDQojIGNsYXNzKG1yZykNCnVzZVNlam9uZ0RpYygpDQptcmcgPC0gZGF0YS5mcmFtZShtcmcsIm5jbiIpICMg67mE7ISc7Iig7ZiV66qF7IKsDQpLb05MUDo6YnVpbGREaWN0aW9uYXJ5KA0KICBleHRfZGljID0gYygnc2Vqb25nJywnd29vcmltYWxzYW0nKSwNCiAgdXNlcl9kaWMgPSBtcmcNCikNCnRndDEgPC0gc2FwcGx5KHRndDEsIA0KICAgICAgICAgICAgICAgZXh0cmFjdE5vdW4sDQogICAgICAgICAgICAgICBVU0UuTkFNRVMgPSBGLA0KICAgICAgICAgICAgICAgYXV0b1NwYWNpbmc9VCkNCnRndDIgPC0gdW5saXN0KHRndDEpDQp0Z3QzIDwtIHN0cmluZ3I6OnN0cl9yZXBsYWNlX2FsbCh0Z3QyLCdbXls6YWxwaGE6XV0nLCcnKQ0KZ3N1YjQgPC0gZnVuY3Rpb24oKXsNCiAgZ3NiIDwtIHJlYWRMaW5lcygiamVqdV9fZ3NiLnR4dCIpDQogIGdzYg0KICBnc2IgPC0gYygNCiAgICAnICcsICdbfiFAIyQlJiooKV8rPT88Pl0nLCJcXFsiLA0KICAgICdb44SxLeOFjl0nLCco44WcfOOFoCknLCJcXGQrIg0KICApDQogIGkgPC0gMA0KICBmb3IoaSBpbiAxOmxlbmd0aChnc2IpKXsNCiAgICB0Z3Q0IDwtIGdzdWIoZ3NiW2ldLCIiLHRndDMpDQogIH0NCiAgcmV0dXJuICh0Z3Q0KQ0KfQ0KZ3N1YjUgPC0gZnVuY3Rpb24oKXsNCiAgdGd0NSA9IGdzdWIoIlxcUyrsnbzstpwiLCAi7ISx7IKw7J287Lac67SJIiwgdGd0NCkNCiAgdGd0NSA9IGdzdWIoIuyEseyCsFxcUyoiLCAi7ISx7IKw7J287Lac67SJIiwgdGd0NCkNCiAgI3RndDUgPSBnc3ViKCLshLHsgrBcXFMqIiwgIuyEseyCsOydvOy2nOu0iSIsIHRndDUpDQogIHRndDUgPSBnc3ViKCLtlZzrnbxcXFMqIiwgIu2VnOudvOyCsCIsIHRndDUpDQogIHRndDUgPSBnc3ViKCLrnpzrk5wiLCAi7JeQ7L2U656c65OcIiwgdGd0NSkNCiAgdGd0NSA9IGdzdWIoIuyXkOy9lOyXkOy9lOuenOuTnCIsICLsl5DsvZTrnpzrk5wiLCB0Z3Q1KQ0KICB0Z3Q1ID0gZ3N1Yigi7KO87IOBXFxTKiIsICLso7zsg4HsoIjrpqwiLCB0Z3Q1KQ0KICB0Z3Q1ID0gZ3N1YigiXFxTKuygiOumrCIsICLso7zsg4HsoIjrpqwiLCB0Z3Q1KQ0KICB0Z3Q1ID0gZ3N1Yigi7Jqp65GQIiwgIuyaqeuRkOyVlCIsIHRndDUpDQogIHRndDUgPSBnc3ViKCLsmqnrkZDslZTslZQiLCAi7Jqp65GQ7JWUIiwgdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+2Pre2PrCcsJycsdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+yXsO2PrScsJycsdGd0NSkNCiAgdGd0NSA9IGdzdWIoIuyynOyngCIsICLsspzsp4Dsl7Dtj63tj6wiLCB0Z3Q1KQ0KICB0Z3Q1PC0gZ3N1Yign6rO17JuQJywnJyx0Z3Q1KQ0KICB0Z3Q1PC0gZ3N1Yign6rSA6rSRJywnJyx0Z3Q1KQ0KICB0Z3Q1PC0gZ3N1Yigi7IKw67CpXFxTKiIsIuyCsOuwqeyCsCIsdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+q5jOyngCcsJycsdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+ycvOuhnCcsJycsdGd0NSkNCiAgdGd0NTwtIGdzdWIoJ+uwleusvCcsJycsdGd0NSkNCiAgcmV0dXJuICh0Z3Q1KQ0KfQ0KDQp0Z3Q0IDwtIGdzdWI0KCkNCnRndDQNCnRndDUgPC0gZ3N1YjUoKQ0KZ3NiX19jbnQgPC0gbGVuZ3RoKGdzYikNCmkgPC0gMA0KZm9yKGkgaW4gMTpnc2JfX2NudCl7DQogIHRndDUgPC0gZ3N1Yihnc2JbaV0sIiIsdGd0NSkNCn0NCnRndDYgPC0gRmlsdGVyKGZ1bmN0aW9uKHgpe25jaGFyKHgpID49IDJ9LHRndDUpDQojIHRndDYNCnRndDYgPC0gdW5saXN0KHRndDYpDQp3cml0ZSh0Z3Q2LCdqZWp1X190ZW1wLnR4dCcpDQp0Z3Q3IDwtIHJlYWQudGFibGUoJ2planVfX3RlbXAudHh0JykNCiMgY2xhc3ModGd0NykNCnRndDggPC0gdGFibGUodGd0NykNCiMgY2xhc3ModGd0OCkNCnRndDkgPC0gaGVhZChzb3J0KHRndDgsZGVjcmVhc2luZyA9IFQpLDMwKQ0KdGd0OQ0KcGFsIDwtIGJyZXdlci5wYWwoOCwiRGFyazIiKQ0Kc2V0LnNlZWQoMTIzNCkNCndvcmRjbG91ZCgNCiAgbmFtZXModGd0OCksDQogIGZyZXEgPSB0Z3Q4LA0KICBzY2FsZSA9IGMoMi41LDAuMSksICMg64uo7Ja07YGs6riwIDAuMSB+IDIuNQ0KICByb3QucGVyID0gMC4yNSwgIyDtmozsoITruYTsnKgNCiAgbWluLmZyZXEgPSAyLCAjIOy1nOyggCDruYjrj4TsiJggMu2ajOydtOyDgQ0KICByYW5kb20ub3JkZXIgPSBGLCAjIOqzoOu5iOuPhCDri6jslrQg7KSR7JWZ67Cw7LmYDQogIHJhbmRvbS5jb2xvciA9IFQsDQogIGNvbG9ycyA9IHBhbA0KKQ0KDQojIyMg7KCc7KO864+EIOyXrO2Wiey9lOyKpCAoMikgLSDssKjtirjqt7jrpqzquLANCg0KdG9wMTAgPC0gaGVhZChzb3J0KHRndDksZGVjcmVhc2luZyA9IFQpLDEwKQ0KdG9wMTANCnBpZSh0b3AxMCwgbWFpbiA9ICLsoJzso7zrj4Qg7Jes7ZaJ7L2U7IqkIO2DkSAxMCIpDQpwaWUodG9wMTAsDQogICAgY29sID0gcmFpbmJvdygxMCksDQogICAgcmFkaXVzID0gMSwNCiAgICBtYWluID0gIuygnOyjvOuPhCDsl6ztlonsvZTsiqQg7YORIDEwIg0KICAgICkNCnBjdCA8LSByb3VuZCh0b3AxMC9zdW0odG9wMTApKjEwMCwgMSkNCm5hbWVzKHRvcDEwKQ0KbGFiIDwtIHBhc3RlKG5hbWVzKHRvcDEwKSwnXG4nLHBjdCwiJSIpDQpwaWUodG9wMTAsDQogICAgY29sID0gcmFpbmJvdygxMCksDQogICAgcmFkaXVzID0gMSwNCiAgICBjZXggPSAwLjgsDQogICAgbGFiZWxzID0gbGFiLA0KICAgIG1haW4gPSAi7KCc7KO864+EIOyXrO2Wiey9lOyKpCDtg5EgMTAiDQopDQoNCiMjIOuwlOywqO2KuA0KYnAgPC0gYmFycGxvdCgNCiAgdG9wMTAsDQogIG1haW4gPSAi7KCc7KO864+EIOyXrO2Wiey9lOyKpCDtg5EgMTAiLA0KICBjb2wgPSByYWluYm93KDEwKSwNCiAgY2V4Lm5hbWVzID0gMS44LA0KICBsYXMgPSAyLA0KICB5bGltID0gYygwLDMwKQ0KKQ0KdGV4dCh4ID0gYnAsDQogICAgIHkgPSB0b3AxMCwNCiAgICAgbGFiZWxzID0gbGFiLA0KICAgICBjb2wgPSAiYmxhY2siLA0KICAgICBjZXggPSAxLjApDQp0ZXh0KHggPSBicCwNCiAgICAgeSA9IHRvcDEwLTUsDQogICAgIGxhYmVscyA9IHBhc3RlKHRvcDEwLCLqsbQiKSwNCiAgICAgY29sID0gImJsYWNrIiwNCiAgICAgY2V4ID0gMC43KQ0KDQojIyAzRCDtjIzsnbTssKjtirgNCmluc3RhbGwucGFja2FnZXMoInBsb3RyaXgiKQ0KbGlicmFyeShwbG90cml4KQ0KcGxvdHJpeDo6cGllM0QoDQogIHRvcDEwLA0KICBjb2wgPSByYWluYm93KDEwKSwNCiAgY2V4ID0gMS4wLA0KICBsYWJlbHMgPSBsYWIsDQogIGV4cGxvZGUgPSAwLjENCikNCg0KIyB0beydgCDsmIHslrQsIGtvbmxwIO2VnOq4gCDrtoTshJ3tlaAg65WMIOyCrOyaqQ0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KYGBgDQo=