if("tm" %in% installed.packages("tm") == FALSE)install.packages("tm")
library(tm)
library(wordcloud)
tgt <- readLines("https://www.dropbox.com/s/mh5h6ulvtr1ri6z/Shilla__Hotel__Review.txt?dl=1")
tgt1 <- VCorpus(VectorSource(tgt))
tgt1 <- tm_map(tgt1, stripWhitespace) # 공백처리
tgt1 <- tm_map(tgt1, tolower) # 알파벳이면 소문자
tgt1 <- tm_map(tgt1, removePunctuation) # 마침표, 공백, 세미콜론, 콜론제거
gsb <- c(stopwords(
('english')
))
tgt1 <- tm_map(tgt1,removeWords,gsb)
tgt1 <- tm_map(tgt1, PlainTextDocument)
tgt2 <- TermDocumentMatrix(tgt1)
findFreqTerms(tgt2, 0.5)
findAssocs(tgt2, "신라호텔", 0.5)
tgt3 <- as.matrix(tgt2)
head(tgt3)
tgt4 <- sort(rowSums(tgt3),decreasing = T)
tgt4
pal <- brewer.pal(8,"Dark2")
set.seed(1234)
wordcloud(
names(tgt4),
freq = tgt4,
scale = c(2.5,0.1), # 단어크기 0.1 ~ 2.5
rot.per = 0.25, # 회전비율
min.freq = 2, # 최저 빈도수 2회이상
random.order = F, # 고빈도 단어 중앙배치
random.color = T,
colors = pal
)
legend(
0.3,1.0,
"서울신라호텔 이용후기",
cex = 0.8,
fill = NA,
border = NA,
bg = 'white',
text.col = 'red',
text.font = 2,
box.col = 'red',
)
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCmBgYHtyfQ0KaWYoInRtIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygidG0iKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJ0bSIpDQpsaWJyYXJ5KHRtKQ0KbGlicmFyeSh3b3JkY2xvdWQpDQp0Z3QgPC0gcmVhZExpbmVzKCJodHRwczovL3d3dy5kcm9wYm94LmNvbS9zL21oNWg2dWx2dHIxcmk2ei9TaGlsbGFfX0hvdGVsX19SZXZpZXcudHh0P2RsPTEiKQ0KdGd0MSA8LSBWQ29ycHVzKFZlY3RvclNvdXJjZSh0Z3QpKQ0KdGd0MSA8LSB0bV9tYXAodGd0MSwgc3RyaXBXaGl0ZXNwYWNlKSAjIOqzteuwseyymOumrA0KdGd0MSA8LSB0bV9tYXAodGd0MSwgdG9sb3dlcikgIyDslYztjIzrsrPsnbTrqbQg7IaM66y47J6QDQp0Z3QxIDwtIHRtX21hcCh0Z3QxLCByZW1vdmVQdW5jdHVhdGlvbikgIyDrp4jsuajtkZwsIOqzteuwsSwg7IS466+47L2c66GgLCDsvZzroaDsoJzqsbANCmdzYiA8LSBjKHN0b3B3b3JkcygNCiAgKCdlbmdsaXNoJykNCikpDQp0Z3QxIDwtIHRtX21hcCh0Z3QxLHJlbW92ZVdvcmRzLGdzYikNCnRndDEgPC0gdG1fbWFwKHRndDEsIFBsYWluVGV4dERvY3VtZW50KQ0KdGd0MiA8LSBUZXJtRG9jdW1lbnRNYXRyaXgodGd0MSkNCmZpbmRGcmVxVGVybXModGd0MiwgMC41KQ0KZmluZEFzc29jcyh0Z3QyLCAi7Iug65287Zi47YWUIiwgMC41KQ0KdGd0MyA8LSBhcy5tYXRyaXgodGd0MikNCmhlYWQodGd0MykNCnRndDQgPC0gc29ydChyb3dTdW1zKHRndDMpLGRlY3JlYXNpbmcgPSBUKQ0KdGd0NA0KcGFsIDwtIGJyZXdlci5wYWwoOCwiRGFyazIiKQ0Kc2V0LnNlZWQoMTIzNCkNCndvcmRjbG91ZCgNCiAgbmFtZXModGd0NCksDQogIGZyZXEgPSB0Z3Q0LA0KICBzY2FsZSA9IGMoMi41LDAuMSksICMg64uo7Ja07YGs6riwIDAuMSB+IDIuNQ0KICByb3QucGVyID0gMC4yNSwgIyDtmozsoITruYTsnKgNCiAgbWluLmZyZXEgPSAyLCAjIOy1nOyggCDruYjrj4TsiJggMu2ajOydtOyDgQ0KICByYW5kb20ub3JkZXIgPSBGLCAjIOqzoOu5iOuPhCDri6jslrQg7KSR7JWZ67Cw7LmYDQogIHJhbmRvbS5jb2xvciA9IFQsDQogIGNvbG9ycyA9IHBhbA0KKQ0KbGVnZW5kKA0KICAwLjMsMS4wLA0KICAi7ISc7Jq47Iug65287Zi47YWUIOydtOyaqe2bhOq4sCIsDQogIGNleCA9IDAuOCwNCiAgZmlsbCA9IE5BLA0KICBib3JkZXIgPSBOQSwNCiAgYmcgPSAnd2hpdGUnLA0KICB0ZXh0LmNvbCA9ICdyZWQnLA0KICB0ZXh0LmZvbnQgPSAyLA0KICBib3guY29sID0gJ3JlZCcsDQogICkNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQpgYGANCg0K