패키지 준비

if("rJava" %in% installed.packages("rJava") == FALSE)install.packages("rJava")
library(rJava)
if("memoise" %in% installed.packages("memoise") == FALSE)install.packages("memoise")
library(memoise)
if("KoNLP" %in% installed.packages("KoNLP") == FALSE)install.packages("KoNLP")
library(KoNLP)
if("tm" %in% installed.packages("tm") == FALSE)install.packages("tm")
library(tm)
if("wordcloud" %in% installed.packages("wordcloud") == FALSE)install.packages("wordcloud")
library(wordcloud)
if("dplyr" %in% installed.packages("dplyr") == FALSE)install.packages("dplyr")
library(dplyr)
library(ggplot2)
if("stringr" %in% installed.packages("stringr") == FALSE)install.packages("stringr")
library(stringr)
if("RColorBrewer" %in% installed.packages("RColorBrewer") == FALSE)install.packages("RColorBrewer")
library(RColorBrewer)

KoNLP::useSejongDic()
getwd()

setp 1. 데이터를 로딩한다

txt <- readLines("hiphop.txt")
head(txt)

step 2. 특수문자를 제거

txt <- stringr::str_replace_all(txt, "\\W", " ") # 특수문자 제거, 한글만 남기는건 대문자 W
txt
txt <- stringr::str_replace_all(txt,"[^[:alpha:]]"," ") 

step 3. 명사만 추출

nouns <-
  sapply(
    txt,
    extractNoun,
    USE.NAMES = F
  )

step 4. 단어별로 빈도표 작성

wordcount <- table(unlist(nouns))
df_word <- as.data.frame(wordcount, stringsAsFactors = F)

step 5. 변수명 수정

names(df_word)
df_word <- dplyr::rename(
  df_word,
  word = Var1,
  freq = Freq
)
df_word

step 6. 2글자 이상만 추출

df_word <- dplyr::filter(df_word, nchar(word)>=2)
df_word

step 7. 빈도순 정렬 후 상위 20단어만 추출

top_20 <- df_word %>% 
  dplyr::arrange(desc(freq)) %>% 
  head(20)
top_20

step 8. Word cloud 만들기

wordcloud::wordcloud(
  words = df_word$word,
  freq = df_word$freq,
  min.freq = 2,
  max.words = 200,
  random.order = F,
  rot.per = .1,
  scale = c(4, 0.3),
  colors = brewer.pal(8, "Dark2")
)
LS0tDQp0aXRsZTogIu2eme2VqSDqsIDsgqwg7YWN7Iqk7Yq4IOuniOydtOuLnSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQojIyMg7Yyo7YKk7KeAIOykgOu5hA0KDQpgYGB7cn0NCmlmKCJySmF2YSIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoInJKYXZhIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygickphdmEiKQ0KbGlicmFyeShySmF2YSkNCmlmKCJtZW1vaXNlIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygibWVtb2lzZSIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIm1lbW9pc2UiKQ0KbGlicmFyeShtZW1vaXNlKQ0KaWYoIktvTkxQIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygiS29OTFAiKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJLb05MUCIpDQpsaWJyYXJ5KEtvTkxQKQ0KaWYoInRtIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygidG0iKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJ0bSIpDQpsaWJyYXJ5KHRtKQ0KaWYoIndvcmRjbG91ZCIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoIndvcmRjbG91ZCIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIndvcmRjbG91ZCIpDQpsaWJyYXJ5KHdvcmRjbG91ZCkNCmlmKCJkcGx5ciIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoImRwbHlyIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoZ2dwbG90MikNCmlmKCJzdHJpbmdyIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygic3RyaW5nciIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoInN0cmluZ3IiKQ0KbGlicmFyeShzdHJpbmdyKQ0KaWYoIlJDb2xvckJyZXdlciIgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoIlJDb2xvckJyZXdlciIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIlJDb2xvckJyZXdlciIpDQpsaWJyYXJ5KFJDb2xvckJyZXdlcikNCg0KS29OTFA6OnVzZVNlam9uZ0RpYygpDQpnZXR3ZCgpDQpgYGANCg0KIyMjIyBzZXRwIDEuIOuNsOydtO2EsOulvCDroZzrlKntlZzri6QNCmBgYHtyfQ0KdHh0IDwtIHJlYWRMaW5lcygiaGlwaG9wLnR4dCIpDQpoZWFkKHR4dCkNCmBgYA0KIyMjIyBzdGVwIDIuIO2KueyImOusuOyekOulvCDsoJzqsbANCmBgYHtyfQ0KdHh0IDwtIHN0cmluZ3I6OnN0cl9yZXBsYWNlX2FsbCh0eHQsICJcXFciLCAiICIpICMg7Yq57IiY66y47J6QIOygnOqxsCwg7ZWc6riA66eMIOuCqOq4sOuKlOqxtCDrjIDrrLjsnpAgVw0KdHh0DQp0eHQgPC0gc3RyaW5ncjo6c3RyX3JlcGxhY2VfYWxsKHR4dCwiW15bOmFscGhhOl1dIiwiICIpIA0KYGBgDQojIyMjIHN0ZXAgMy4g66qF7IKs66eMIOy2lOy2nA0KYGBge3J9DQpub3VucyA8LQ0KICBzYXBwbHkoDQogICAgdHh0LA0KICAgIGV4dHJhY3ROb3VuLA0KICAgIFVTRS5OQU1FUyA9IEYNCiAgKQ0KDQpgYGANCiMjIyMgc3RlcCA0LiDri6jslrTrs4TroZwg67mI64+E7ZGcIOyekeyEsQ0KYGBge3J9DQp3b3JkY291bnQgPC0gdGFibGUodW5saXN0KG5vdW5zKSkNCmRmX3dvcmQgPC0gYXMuZGF0YS5mcmFtZSh3b3JkY291bnQsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGKQ0KYGBgDQoNCiMjIyMgc3RlcCA1LiDrs4DsiJjrqoUg7IiY7KCVDQpgYGB7cn0NCm5hbWVzKGRmX3dvcmQpDQpkZl93b3JkIDwtIGRwbHlyOjpyZW5hbWUoDQogIGRmX3dvcmQsDQogIHdvcmQgPSBWYXIxLA0KICBmcmVxID0gRnJlcQ0KKQ0KZGZfd29yZA0KYGBgDQojIyMjIHN0ZXAgNi4gMuq4gOyekCDsnbTsg4Hrp4wg7LaU7LacDQpgYGB7cn0NCmRmX3dvcmQgPC0gZHBseXI6OmZpbHRlcihkZl93b3JkLCBuY2hhcih3b3JkKT49MikNCmRmX3dvcmQNCmBgYA0KIyMjIyBzdGVwIDcuIOu5iOuPhOyInCDsoJXroKwg7ZuEIOyDgeychCAyMOuLqOyWtOunjCDstpTstpwNCmBgYHtyfQ0KdG9wXzIwIDwtIGRmX3dvcmQgJT4lIA0KICBkcGx5cjo6YXJyYW5nZShkZXNjKGZyZXEpKSAlPiUgDQogIGhlYWQoMjApDQp0b3BfMjANCmBgYA0KIyMjIyBzdGVwIDguIFdvcmQgY2xvdWQg66eM65Ok6riwDQpgYGB7cn0NCndvcmRjbG91ZDo6d29yZGNsb3VkKA0KICB3b3JkcyA9IGRmX3dvcmQkd29yZCwNCiAgZnJlcSA9IGRmX3dvcmQkZnJlcSwNCiAgbWluLmZyZXEgPSAyLA0KICBtYXgud29yZHMgPSAyMDAsDQogIHJhbmRvbS5vcmRlciA9IEYsDQogIHJvdC5wZXIgPSAuMSwNCiAgc2NhbGUgPSBjKDQsIDAuMyksDQogIGNvbG9ycyA9IGJyZXdlci5wYWwoOCwgIkRhcmsyIikNCikNCmBgYA0KDQoNCg0K