레퍼럴

데이터 준비하기

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readxl)
library(stringr)
referral <- read_excel("referral.xlsx")
referral_copy <- referral
summary(referral_copy)

##   travelrule           market            context         
##  Length:3           Length:3           Length:3          
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character

referral_copy$context <- str_replace_all(referral_copy$context, "\\W", " ")

단어 빈도표 만들기

library(KoNLP)

## Checking user defined dictionary!

library(dplyr)
nouns <- extractNoun(referral_copy$context)
wordcount <- table(unlist(nouns))
df_word <- as.data.frame(wordcount, stringsAsFactors = F)
df_word <- dplyr::rename(df_word,
                  word = Var1,
                  freq = Freq)

두 글자 이상으로 된 단어를 추출하고, 빈도 순으로 정렬해 가장많이 사용된 단어 20개를 추출

df_word <- filter(df_word, nchar(word) >= 2)
top30 <- df_word %>% 
  arrange(desc(freq)) %>% 
  head(30)
top30

##            word freq
## 1          비트   14
## 2          프로    6
## 3        오케이    4
## 4           BTX    3
## 5        고팍스    3
## 6  마이키핀월렛    3
## 7      바이낸스    3
## 8      바이비트    3
## 9          보라    3
## 10         블록    3
## 11       비트겟    3
## 12         에이    3
## 13         지닥    3
## 14   캐셔레스트    3
## 15         코빗    3
## 16     코어닥스    3
## 17   코인베이스    3
## 18       코인원    3
## 19       크라켄    3
## 20   포블게이트    3
## 21       프라뱅    3
## 22       플라이    3
## 23       후오비    3
## 24          FTX    2
## 25       글로벌    2
## 26         빗썸    2
## 27       빗크몬    2
## 28       빙엑스    2
## 29       업비트    2
## 30         엑스    2

단어 빈도 막대 그래프 만들기

library(ggplot2)
order1 <- arrange(top30, freq)$word
ggplot(data = top30, aes(x = word, y = freq)) +
  ylim(0, 20) +
  geom_col() +
  coord_flip() +
  scale_x_discrete(limit = order1) +
  geom_text(aes(label = freq), hjust = -0.3)

워드 클라우드 만들기

library(wordcloud)

## Loading required package: RColorBrewer

library(RColorBrewer)
pal <- brewer.pal(9, "Blues")[5:9]
set.seed(1234)

wordcloud(word = df_word$word,
          freq = df_word$freq,
          min.freq = 2,
          max.words = 200,
          random.order = F,
          rot.per = .1,
          scale = c(6, 0.5),
          colors = pal)

레퍼럴

Na Hyeon Su

03 DEC 2022

데이터 준비하기

단어 빈도표 만들기

두 글자 이상으로 된 단어를 추출하고, 빈도 순으로 정렬해 가장많이 사용된 단어 20개를 추출

단어 빈도 막대 그래프 만들기

워드 클라우드 만들기