Homework: 국정원 트윗 텍스트 마이닝
if("rJava" %in% installed.packages("rJava") == FALSE)install.packages("rJava")
library(rJava)
if("memoise" %in% installed.packages("memoise") == FALSE)install.packages("memoise")
library(memoise)
if("KoNLP" %in% installed.packages("KoNLP") == FALSE)install.packages("KoNLP")
library(KoNLP)
if("tm" %in% installed.packages("tm") == FALSE)install.packages("tm")
library(tm)
if("wordcloud" %in% installed.packages("wordcloud") == FALSE)install.packages("wordcloud")
library(wordcloud)
if("dplyr" %in% installed.packages("dplyr") == FALSE)install.packages("dplyr")
library(dplyr)
library(ggplot2)
if("stringr" %in% installed.packages("stringr") == FALSE)install.packages("stringr")
library(stringr)
if("RColorBrewer" %in% installed.packages("RColorBrewer") == FALSE)install.packages("RColorBrewer")
library(RColorBrewer)
KoNLP::useSejongDic()
getwd()
setwd("C:\\Users\\Administrator\\rlang_weekend2\\Data_R_180929")
# 데이터 가져오기
twitter <- read.csv ("twitter.csv",
header = T,
stringsAsFactors = F,
fileEncoding = "UTF-8"
)
# 변수명 변경
names(twitter)
twitter <- dplyr::rename(twitter,
no = 번호,
id = 계정이름,
date = 작성일,
tw = 내용
)
twitter$tw <- str_replace_all(twitter$tw, "\\W", " ")
head(twitter$tw)
# 단어 빈도표 만들기
LS0tDQp0aXRsZTogIuq1reygleybkCDtirjsnJcg7YWN7Iqk7Yq4IOuniOydtOuLnShwYWdlIDI3MykiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpIb21ld29yazog6rWt7KCV7JuQIO2KuOyclyDthY3siqTtirgg66eI7J2064udDQoNCmBgYHtyfQ0KaWYoInJKYXZhIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygickphdmEiKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJySmF2YSIpDQpsaWJyYXJ5KHJKYXZhKQ0KaWYoIm1lbW9pc2UiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJtZW1vaXNlIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygibWVtb2lzZSIpDQpsaWJyYXJ5KG1lbW9pc2UpDQppZigiS29OTFAiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJLb05MUCIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoIktvTkxQIikNCmxpYnJhcnkoS29OTFApDQppZigidG0iICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJ0bSIpID09IEZBTFNFKWluc3RhbGwucGFja2FnZXMoInRtIikNCmxpYnJhcnkodG0pDQppZigid29yZGNsb3VkIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygid29yZGNsb3VkIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygid29yZGNsb3VkIikNCmxpYnJhcnkod29yZGNsb3VkKQ0KaWYoImRwbHlyIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygiZHBseXIiKSA9PSBGQUxTRSlpbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpDQpsaWJyYXJ5KGRwbHlyKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KaWYoInN0cmluZ3IiICVpbiUgaW5zdGFsbGVkLnBhY2thZ2VzKCJzdHJpbmdyIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygic3RyaW5nciIpDQpsaWJyYXJ5KHN0cmluZ3IpDQppZigiUkNvbG9yQnJld2VyIiAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygiUkNvbG9yQnJld2VyIikgPT0gRkFMU0UpaW5zdGFsbC5wYWNrYWdlcygiUkNvbG9yQnJld2VyIikNCmxpYnJhcnkoUkNvbG9yQnJld2VyKQ0KDQoNCktvTkxQOjp1c2VTZWpvbmdEaWMoKQ0KZ2V0d2QoKQ0Kc2V0d2QoIkM6XFxVc2Vyc1xcQWRtaW5pc3RyYXRvclxccmxhbmdfd2Vla2VuZDJcXERhdGFfUl8xODA5MjkiKQ0KDQojIOuNsOydtO2EsCDqsIDsoLjsmKTquLANCnR3aXR0ZXIgPC0gcmVhZC5jc3YgKCJ0d2l0dGVyLmNzdiIsDQogICAgICAgICAgICAgICAgICAgICBoZWFkZXIgPSBULA0KICAgICAgICAgICAgICAgICAgICAgc3RyaW5nc0FzRmFjdG9ycyA9IEYsDQogICAgICAgICAgICAgICAgICAgICBmaWxlRW5jb2RpbmcgPSAiVVRGLTgiDQogICAgICAgICAgICAgICAgICAgICApDQojIOuzgOyImOuqhSDrs4Dqsr0NCm5hbWVzKHR3aXR0ZXIpDQp0d2l0dGVyIDwtIGRwbHlyOjpyZW5hbWUodHdpdHRlciwNCiAgICAgICAgICAgICAgICAgICAgICAgICBubyA9IOuyiO2YuCwNCiAgICAgICAgICAgICAgICAgICAgICAgICBpZCA9IOqzhOygleydtOumhCwgDQogICAgICAgICAgICAgICAgICAgICAgICAgZGF0ZSA9IOyekeyEseydvCwNCiAgICAgICAgICAgICAgICAgICAgICAgICB0dyA9IOuCtOyaqQ0KICAgICAgICAgICAgICAgICAgICAgICAgICkNCg0KdHdpdHRlciR0dyA8LSBzdHJfcmVwbGFjZV9hbGwodHdpdHRlciR0dywgIlxcVyIsICIgIikNCmhlYWQodHdpdHRlciR0dykNCg0KIyDri6jslrQg67mI64+E7ZGcIOunjOuTpOq4sA0KDQpgYGANCg0K