# μλν΄λΌμ°λλ₯Ό λ§λ€λ νμν ν¨ν€μ§ λͺ©λ‘
library(KoNLP)
## Warning: package 'KoNLP' was built under R version 3.4.1
## Checking user defined dictionary!
library(stringr)
## Warning: package 'stringr' was built under R version 3.4.1
library(ggiraphExtra)
## Warning: package 'ggiraphExtra' was built under R version 3.4.1
library(tibble)
## Warning: package 'tibble' was built under R version 3.4.3
library(stringi)
## Warning: package 'stringi' was built under R version 3.4.1
library(devtools)
## Warning: package 'devtools' was built under R version 3.4.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 3.4.1
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.4.1
useNIADic()
## Backup was just finished!
## 983012 words dictionary was built.
# 2018λ
λν΅λ Ή μ λ
μ¬ λ°μ΄ν° λΆλ¬μ€κΈ°
newyear <- readLines("E:/data/president.txt")
## Warning in readLines("E:/data/president.txt"): 'E:/data/president.txt'μμ
## λΆμμ ν λ§μ§λ§ νμ΄ λ°κ²¬λμμ΅λλ€
# μ λλ‘ μ½μ΄μλ νμΈ
head(newyear)
## [1] "βλ΄ μΆμ΄ λμμ§λ λλΌβ "
## [2] ""
## [3] "μ‘΄κ²½νλ κ΅λ―Ό μ¬λ¬λΆ, μ§λ μΌ λ
, μ λ νλ²ν¨μ΄ κ°μ₯ μλνλ€λ κ²μ ν루ν루 λκΌμ΅λλ€. μ΄λΆκ΄μ₯μμ μ λ κ΅°μ€μ΄ μλ ν μ¬λ ν μ¬λμ νλ²ν κ΅λ―Όμ 보μμ΅λλ€. μ΄λ¨Έλμμ μλ€λ‘, μλ²μ§μμ λΈλ‘ μ΄μ΄μ§λ μμ¬κ° κ·Έ μ΄λ€ κ±°λν μμ¬μ νλ¦λ³΄λ€ μ€μνλ€λ κ²μ κΉ¨λ¬μμ΅λλ€. νκ²¨μΈ λ΄λ΄ μ΄λΆμ λ ν λ€μ μΌμμ μΆ©μ€ν μ΄μκ°λ νλ²ν κ°μ‘±λ€μ 보면μ μ λ μ°λ¦¬μ λ―Έλλ₯Ό λκ΄ν μ μμ΅λλ€. "
## [4] ""
## [5] ""
## [6] "μ°λ¦¬κ° λ―Όμ£Όμ£Όμμ μμ¬λ₯Ό λ€μ μΈ μ μμλ κ²μ κ·Έλ κ² νλ²ν μ¬λ, νλ²ν κ°μ‘±μ μ©κΈ°μλ μΆμ΄ μ°λ¦¬ μ£Όλ³μ νμ μ‘΄μ¬νκ³ μμκΈ° λλ¬Έμ
λλ€. μ λ κ·Έκ²μ΄ λ무λ μλμ€λ½μ΅λλ€. λλΆμ μ°λ¦¬λ μ€λ ν¬λ§μ λ€μ μ΄μΌκΈ°ν μ μκ² λμμ΅λλ€. "
# head(newyear) νμ λ λΉμ€μ΄λ 곡백 μ κ±°νκΈ°
newyear1 <- ifelse(newyear == "", NA , newyear)
newyear2 <- ifelse(newyear1 == " ", NA , newyear1)
new_year <- na.omit(newyear2)
head(new_year)
## [1] "βλ΄ μΆμ΄ λμμ§λ λλΌβ "
## [2] "μ‘΄κ²½νλ κ΅λ―Ό μ¬λ¬λΆ, μ§λ μΌ λ
, μ λ νλ²ν¨μ΄ κ°μ₯ μλνλ€λ κ²μ ν루ν루 λκΌμ΅λλ€. μ΄λΆκ΄μ₯μμ μ λ κ΅°μ€μ΄ μλ ν μ¬λ ν μ¬λμ νλ²ν κ΅λ―Όμ 보μμ΅λλ€. μ΄λ¨Έλμμ μλ€λ‘, μλ²μ§μμ λΈλ‘ μ΄μ΄μ§λ μμ¬κ° κ·Έ μ΄λ€ κ±°λν μμ¬μ νλ¦λ³΄λ€ μ€μνλ€λ κ²μ κΉ¨λ¬μμ΅λλ€. νκ²¨μΈ λ΄λ΄ μ΄λΆμ λ ν λ€μ μΌμμ μΆ©μ€ν μ΄μκ°λ νλ²ν κ°μ‘±λ€μ 보면μ μ λ μ°λ¦¬μ λ―Έλλ₯Ό λκ΄ν μ μμ΅λλ€. "
## [3] "μ°λ¦¬κ° λ―Όμ£Όμ£Όμμ μμ¬λ₯Ό λ€μ μΈ μ μμλ κ²μ κ·Έλ κ² νλ²ν μ¬λ, νλ²ν κ°μ‘±μ μ©κΈ°μλ μΆμ΄ μ°λ¦¬ μ£Όλ³μ νμ μ‘΄μ¬νκ³ μμκΈ° λλ¬Έμ
λλ€. μ λ κ·Έκ²μ΄ λ무λ μλμ€λ½μ΅λλ€. λλΆμ μ°λ¦¬λ μ€λ ν¬λ§μ λ€μ μ΄μΌκΈ°ν μ μκ² λμμ΅λλ€. "
## [4] "κ΅λ―Όλ€κ»μλ μμ μ μμ€ν μΌμμ κ΅κ°μ λ΄μ΄μ£Όμμ΅λλ€. λλΌλ₯Ό λ°λ‘ μΈμΈ νμ μ£Όμμ΅λλ€. μ΄μ κ΅κ°λ κ΅λ―Όλ€μκ² μλ΅ν΄μΌ ν©λλ€. λ μ μλ‘κ³ , λ ννλ‘κ³ , λ μμ νκ³ , λ ν볡ν μΆμ μ½μν΄μΌ ν©λλ€. κ·Έκ²μ΄ λ°λ‘ λλΌλ€μ΄ λλΌμ
λλ€. "
## [5] "2018λ
μν΄, μ λΆμ μ μ λͺ©νλ κ΅λ―Όλ€μ νλ²ν μΌμμ μ§ν€κ³ , λ λμμ§κ² λ§λλ κ²μ
λλ€. κ΅λ―Όμ λ»κ³Ό μꡬλ₯Ό λμΉ¨λ°μΌλ‘ μΌκ² μ΅λλ€. κ΅λ―Όλ€κ»μ μΆμ λ³νλ₯Ό 체κ°ν μ μκ² νκ² μ΅λλ€."
## [6] "κ΅λ―Ό μ¬λ¬λΆ, μ κ° λν΅λ Ήμ΄ λμ΄ κ°μ₯ λ¨Όμ ν μΌμ μ§λ¬΄μ€μ μΌμ리 μν©νμ μ€μΉν κ²μ
λλ€. βμ¬λμ€μ¬ κ²½μ βλΌλ κ΅μ μ² νμ μ€μ²νκΈ° μν΄μμμ΅λλ€. μΌμ리λ μ°λ¦¬ κ²½μ μ κ·Όκ°μ΄μ κ°κ°μΈμ μΆμ κΈ°λ°μ
λλ€. βμ¬λμ€μ¬ κ²½μ βμ ν΅μ¬μ μΌμλ¦¬κ° μμ΅λλ€."
# νΉμλ¬Έμ μ κ±°νκΈ°
new_year <- str_replace_all(new_year , "\\W", " ")
nouns <- KoNLP::extractNoun(new_year)
# μμ κ²°κ³Όκ° λ¦¬μ€νΈλ‘ λμμ ν
μ΄λΈ ννλ‘ λ³ν
wordcount <- table(unlist(nouns))
# λ°μ΄ν° νλ μμΌλ‘ λ°κΎΈκ³ , λ³μμ΄λ¦ λ³ν
df.word <- as.data.frame(wordcount, stringsAsFactors = FALSE)
df.word <- rename(df.word, word = Var1,
freq = Freq)
word.freq <- df.word %>% filter(nchar(word)>=2) %>% arrange(desc(freq))
# μμ 10κ° λ°μ΄ν° 보기
head(word.freq, 10)
## word freq
## 1 κ΅λ―Ό 56
## 2 μ λΆ 26
## 3 μ°λ¦¬ 20
## 4 μΌμ리 14
## 5 νν 14
## 6 μ¬ν 10
## 7 μ§μ 10
## 8 νλ°λ 10
## 9 κ²½μ 9
## 10 λνλ―Όκ΅ 9
# λ¨μ΄ μ μ€μ νκΈ°
pal <- brewer.pal(8, "Dark2")[5:9]
# λμ μ€μ (μνλ©΄ 그릴λ λ§λ€ λ°λ)
set.seed(1004)
# μλ ν΄λΌμ°λ 그리기
wordcloud::wordcloud( words = word.freq$word, freq = word.freq$freq,
min.freq = 2, max.words = 100,
random.order = FALSE, rot.per = 0.1,
scale= c(5,0.3),
colors = pal)
[μ°Έκ³ ] : Do it R
[μ°Έκ³ ] : λ¬Έμ¬μΈλν΅λ Ή μ λ μ¬ (μΈν°λ·)