1 μ›Œλ“œν΄λΌμš°λ“œ λ§Œλ“€μ–΄λ³΄κΈ°

1.1 ν•„μš”ν•œ νŒ¨ν‚€μ§€ μž₯μ°©

# μ›Œλ“œν΄λΌμš°λ“œλ₯Ό λ§Œλ“€λ•Œ ν•„μš”ν•œ νŒ¨ν‚€μ§€ λͺ©λ‘ 
library(KoNLP)
## Warning: package 'KoNLP' was built under R version 3.4.1
## Checking user defined dictionary!
library(stringr)
## Warning: package 'stringr' was built under R version 3.4.1
library(ggiraphExtra)
## Warning: package 'ggiraphExtra' was built under R version 3.4.1
library(tibble)
## Warning: package 'tibble' was built under R version 3.4.3
library(stringi)
## Warning: package 'stringi' was built under R version 3.4.1
library(devtools)
## Warning: package 'devtools' was built under R version 3.4.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 3.4.1
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.4.1
useNIADic()
## Backup was just finished!
## 983012 words dictionary was built.

1.2 데이터 뢈러였기

# 2018λ…„ λŒ€ν†΅λ Ή 신년사 데이터 뢈러였기
newyear <- readLines("E:/data/president.txt")
## Warning in readLines("E:/data/president.txt"): 'E:/data/president.txt'μ—μ„œ
## λΆˆμ™„μ „ν•œ λ§ˆμ§€λ§‰ 행이 λ°œκ²¬λ˜μ—ˆμŠ΅λ‹ˆλ‹€
# μ œλŒ€λ‘œ μ½μ–΄μ™”λ‚˜ 확인
head(newyear)
## [1] "β€œλ‚΄ 삢이 λ‚˜μ•„μ§€λŠ” λ‚˜λΌβ€ "                                                                                                                                                                                                                                                                                                                                                                             
## [2] ""                                                                                                                                                                                                                                                                                                                                                                                                       
## [3] "μ‘΄κ²½ν•˜λŠ” κ΅­λ―Ό μ—¬λŸ¬λΆ„, μ§€λ‚œ 일 λ…„, μ €λŠ” 평범함이 κ°€μž₯ μœ„λŒ€ν•˜λ‹€λŠ” 것을 ν•˜λ£¨ν•˜λ£¨ λŠκΌˆμŠ΅λ‹ˆλ‹€. μ΄›λΆˆκ΄‘μž₯μ—μ„œ μ €λŠ” ꡰ쀑이 μ•„λ‹Œ ν•œ μ‚¬λžŒ ν•œ μ‚¬λžŒμ˜ ν‰λ²”ν•œ ꡭ민을 λ³΄μ•˜μŠ΅λ‹ˆλ‹€. μ–΄λ¨Έλ‹ˆμ—μ„œ μ•„λ“€λ‘œ, μ•„λ²„μ§€μ—μ„œ λ”Έλ‘œ μ΄μ–΄μ§€λŠ” 역사가 κ·Έ μ–΄λ–€ κ±°λŒ€ν•œ μ—­μ‚¬μ˜ 흐름보닀 μ€‘μš”ν•˜λ‹€λŠ” 것을 κΉ¨λ‹¬μ•˜μŠ΅λ‹ˆλ‹€. ν•œκ²¨μšΈ λ‚΄λ‚΄ μ΄›λΆˆμ„ λ“  ν›„ λ‹€μ‹œ 일상을 μΆ©μ‹€νžˆ μ‚΄μ•„κ°€λŠ” ν‰λ²”ν•œ 가쑱듀을 λ³΄λ©΄μ„œ μ €λŠ” 우리의 미래λ₯Ό λ‚™κ΄€ν•  수 μžˆμŠ΅λ‹ˆλ‹€. "
## [4] ""                                                                                                                                                                                                                                                                                                                                                                                                       
## [5] ""                                                                                                                                                                                                                                                                                                                                                                                                       
## [6] "μš°λ¦¬κ°€ 민주주의의 역사λ₯Ό λ‹€μ‹œ μ“Έ 수 μžˆμ—ˆλ˜ 것은 κ·Έλ ‡κ²Œ ν‰λ²”ν•œ μ‚¬λžŒ, ν‰λ²”ν•œ κ°€μ‘±μ˜ μš©κΈ°μžˆλŠ” 삢이 우리 주변에 항상 μ‘΄μž¬ν•˜κ³  μžˆμ—ˆκΈ° λ•Œλ¬Έμž…λ‹ˆλ‹€. μ €λŠ” 그것이 λ„ˆλ¬΄λ‚˜ μžλž‘μŠ€λŸ½μŠ΅λ‹ˆλ‹€. 덕뢄에 μš°λ¦¬λŠ” 였늘 희망을 λ‹€μ‹œ 이야기할 수 있게 λ˜μ—ˆμŠ΅λ‹ˆλ‹€. "

1.3 데이터 μ „μ²˜λ¦¬ ν•˜κΈ°

# head(newyear) ν–ˆμ„ λ•Œ λΉˆμ€„μ΄λ‚˜ 곡백 μ œκ±°ν•˜κΈ°
newyear1 <- ifelse(newyear == "", NA , newyear)
newyear2 <- ifelse(newyear1 == " ", NA , newyear1)
new_year <- na.omit(newyear2)
head(new_year)
## [1] "β€œλ‚΄ 삢이 λ‚˜μ•„μ§€λŠ” λ‚˜λΌβ€ "                                                                                                                                                                                                                                                                                                                                                                             
## [2] "μ‘΄κ²½ν•˜λŠ” κ΅­λ―Ό μ—¬λŸ¬λΆ„, μ§€λ‚œ 일 λ…„, μ €λŠ” 평범함이 κ°€μž₯ μœ„λŒ€ν•˜λ‹€λŠ” 것을 ν•˜λ£¨ν•˜λ£¨ λŠκΌˆμŠ΅λ‹ˆλ‹€. μ΄›λΆˆκ΄‘μž₯μ—μ„œ μ €λŠ” ꡰ쀑이 μ•„λ‹Œ ν•œ μ‚¬λžŒ ν•œ μ‚¬λžŒμ˜ ν‰λ²”ν•œ ꡭ민을 λ³΄μ•˜μŠ΅λ‹ˆλ‹€. μ–΄λ¨Έλ‹ˆμ—μ„œ μ•„λ“€λ‘œ, μ•„λ²„μ§€μ—μ„œ λ”Έλ‘œ μ΄μ–΄μ§€λŠ” 역사가 κ·Έ μ–΄λ–€ κ±°λŒ€ν•œ μ—­μ‚¬μ˜ 흐름보닀 μ€‘μš”ν•˜λ‹€λŠ” 것을 κΉ¨λ‹¬μ•˜μŠ΅λ‹ˆλ‹€. ν•œκ²¨μšΈ λ‚΄λ‚΄ μ΄›λΆˆμ„ λ“  ν›„ λ‹€μ‹œ 일상을 μΆ©μ‹€νžˆ μ‚΄μ•„κ°€λŠ” ν‰λ²”ν•œ 가쑱듀을 λ³΄λ©΄μ„œ μ €λŠ” 우리의 미래λ₯Ό λ‚™κ΄€ν•  수 μžˆμŠ΅λ‹ˆλ‹€. "
## [3] "μš°λ¦¬κ°€ 민주주의의 역사λ₯Ό λ‹€μ‹œ μ“Έ 수 μžˆμ—ˆλ˜ 것은 κ·Έλ ‡κ²Œ ν‰λ²”ν•œ μ‚¬λžŒ, ν‰λ²”ν•œ κ°€μ‘±μ˜ μš©κΈ°μžˆλŠ” 삢이 우리 주변에 항상 μ‘΄μž¬ν•˜κ³  μžˆμ—ˆκΈ° λ•Œλ¬Έμž…λ‹ˆλ‹€. μ €λŠ” 그것이 λ„ˆλ¬΄λ‚˜ μžλž‘μŠ€λŸ½μŠ΅λ‹ˆλ‹€. 덕뢄에 μš°λ¦¬λŠ” 였늘 희망을 λ‹€μ‹œ 이야기할 수 있게 λ˜μ—ˆμŠ΅λ‹ˆλ‹€. "                                                                                                                                                           
## [4] "κ΅­λ―Όλ“€κ»˜μ„œλŠ” μžμ‹ μ˜ μ†Œμ€‘ν•œ 일상을 ꡭ가에 λ‚΄μ–΄μ£Όμ—ˆμŠ΅λ‹ˆλ‹€. λ‚˜λΌλ₯Ό λ°”λ‘œ μ„ΈμšΈ νž˜μ„ μ£Όμ—ˆμŠ΅λ‹ˆλ‹€. 이제 κ΅­κ°€λŠ” κ΅­λ―Όλ“€μ—κ²Œ 응닡해야 ν•©λ‹ˆλ‹€. 더 μ •μ˜λ‘­κ³ , 더 평화둭고, 더 μ•ˆμ „ν•˜κ³ , 더 ν–‰λ³΅ν•œ 삢을 약속해야 ν•©λ‹ˆλ‹€. 그것이 λ°”λ‘œ λ‚˜λΌλ‹€μš΄ λ‚˜λΌμž…λ‹ˆλ‹€. "                                                                                                                                                            
## [5] "2018λ…„ μƒˆν•΄, 정뢀와 μ €μ˜ λͺ©ν‘œλŠ” κ΅­λ―Όλ“€μ˜ ν‰λ²”ν•œ 일상을 μ§€ν‚€κ³ , 더 λ‚˜μ•„μ§€κ²Œ λ§Œλ“œλŠ” κ²ƒμž…λ‹ˆλ‹€. ꡭ민의 뜻과 μš”κ΅¬λ₯Ό λ‚˜μΉ¨λ°˜μœΌλ‘œ μ‚Όκ² μŠ΅λ‹ˆλ‹€. κ΅­λ―Όλ“€κ»˜μ„œ μ‚Άμ˜ λ³€ν™”λ₯Ό 체감할 수 있게 ν•˜κ² μŠ΅λ‹ˆλ‹€."                                                                                                                                                                                                                
## [6] "κ΅­λ―Ό μ—¬λŸ¬λΆ„, μ œκ°€ λŒ€ν†΅λ Ήμ΄ λ˜μ–΄ κ°€μž₯ λ¨Όμ € ν•œ 일은 집무싀에 일자리 μƒν™©νŒμ„ μ„€μΉ˜ν•œ κ²ƒμž…λ‹ˆλ‹€. β€˜μ‚¬λžŒμ€‘μ‹¬ κ²½μ œβ€™λΌλŠ” ꡭ정철학을 μ‹€μ²œν•˜κΈ° μœ„ν•΄μ„œμ˜€μŠ΅λ‹ˆλ‹€. μΌμžλ¦¬λŠ” 우리 경제의 κ·Όκ°„μ΄μž 개개인의 μ‚Άμ˜ κΈ°λ°˜μž…λ‹ˆλ‹€. β€˜μ‚¬λžŒμ€‘μ‹¬ κ²½μ œβ€™μ˜ 핡심에 μΌμžλ¦¬κ°€ μžˆμŠ΅λ‹ˆλ‹€."

1.4 단어(λͺ…사) μΆ”μΆœν•˜κΈ°

# 특수문자 μ œκ±°ν•˜κΈ°
new_year <- str_replace_all(new_year , "\\W", " ")

nouns <- KoNLP::extractNoun(new_year)

# μœ„μ˜ κ²°κ³Όκ°€ 리슀트둜 λ‚˜μ™€μ„œ ν…Œμ΄λΈ” ν˜•νƒœλ‘œ λ³€ν™˜
wordcount <- table(unlist(nouns))

# 데이터 ν”„λ ˆμž„μœΌλ‘œ λ°”κΎΈκ³ , λ³€μˆ˜μ΄λ¦„ λ³€ν™˜
df.word <- as.data.frame(wordcount, stringsAsFactors = FALSE)

df.word <- rename(df.word, word = Var1,
                           freq = Freq)

word.freq <- df.word %>% filter(nchar(word)>=2) %>% arrange(desc(freq))

# μƒμœ„ 10개 데이터 보기
head(word.freq, 10)
##        word freq
## 1      κ΅­λ―Ό   56
## 2      μ •λΆ€   26
## 3      우리   20
## 4    일자리   14
## 5      평화   14
## 6      μ‚¬νšŒ   10
## 7      지원   10
## 8    ν•œλ°˜λ„   10
## 9      경제    9
## 10 λŒ€ν•œλ―Όκ΅­    9

1.5 μ›Œλ“œν΄λΌμš°λ“œ 그리기

# 단어 색 μ„€μ •ν•˜κΈ°
pal <- brewer.pal(8, "Dark2")[5:9]

# λ‚œμˆ˜ μ„€μ •(μ•ˆν•˜λ©΄ κ·Έλ¦΄λ•Œ λ§ˆλ‹€ λ°”λ€œ)
set.seed(1004)

# μ›Œλ“œ ν΄λΌμš°λ“œ 그리기
wordcloud::wordcloud( words = word.freq$word, freq = word.freq$freq,
                      min.freq = 2, max.words = 100,
                      random.order = FALSE, rot.per = 0.1,
                      scale= c(5,0.3),
                      colors = pal)

[μ°Έκ³ ] : Do it R

[μ°Έκ³ ] : λ¬Έμž¬μΈλŒ€ν†΅λ Ή 신년사 (인터넷)