#패키지 설치
library(httr) #html들고오기
library(rvest) #웹스크랩핑
library(jsonlite)#json파일다루기
library(stringr)
library("tm")
## 필요한 패키지를 로딩중입니다: NLP
##
## 다음의 패키지를 부착합니다: 'NLP'
## The following object is masked from 'package:httr':
##
## content
library("wordcloud")
## 필요한 패키지를 로딩중입니다: RColorBrewer
library("RColorBrewer")
library("ggplot2")
##
## 다음의 패키지를 부착합니다: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
library("dplyr")
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##페이지 들고오기 ###1월부터 9월까지
cou<-c()
ti<-NA
ti_ti<-data.frame()
soi<-NA
kti<-data.frame()
j<-20 ##몇개를 들고오는가
k<-1 ##시작월
for (k in 1:9){
for (i in 1:j) {
ti<-NA
soi<-NA
s<-k
httr::GET(url = "https://section.blog.naver.com/ajax/SearchList.nhn",
query = list("countPerPage" = "7","currentPage" = i, "endDate"= paste0("2020-0",s,"-29"), "keyword"= "심리학", "orderBy"= "sim", "startDate"= paste0("2020-0",s,"-01"), "type" = "post"),
add_headers("referer" = "https://section.blog.naver.com/Search/Post.nh")) %>%
httr::content(as = "text") %>%
str_remove(pattern = '\\)\\]\\}\',') %>%
jsonlite::fromJSON() -> naverBlog
data <- naverBlog$result$searchList
cou <- dplyr::bind_rows(cou, data)
ti<-cou$noTagTitle
tii<-as.data.frame(ti)
tii<-as.character(tii)
soi<-tii
}
cat( k,"월완료","\n")
}
## 1 월완료
## 2 월완료
## 3 월완료
## 4 월완료
## 5 월완료
## 6 월완료
## 7 월완료
## 8 월완료
## 9 월완료
soi_9<-soi
###10월부터 12월까지
k<-10
Nblog<-c()
ti<-NA
ti_ti<-data.frame()
soi<-NA
kti<-data.frame()
j<-20
for (k in 10:12){
for (i in 1:j) {
ti<-NA
soi<-NA
s<-k
httr::GET(url = "https://section.blog.naver.com/ajax/SearchList.nhn",
query = list("countPerPage" = "7","currentPage" = i, "endDate"= paste0("2019-",s,"-29"), "keyword"= "심리학", "orderBy"= "sim", "startDate"= paste0("2019-",s,"-01"), "type" = "post"),
add_headers("referer" = "https://section.blog.naver.com/Search/Post.nh")) %>%
httr::content(as = "text") %>%
str_remove(pattern = '\\)\\]\\}\',') %>%
jsonlite::fromJSON() -> naverBlog
data <- naverBlog$result$searchList
Nblog <- dplyr::bind_rows(Nblog, data)
ti<-Nblog$noTagTitle
tii<-as.data.frame(ti)
tii<-as.character(tii)
soi<-tii
}
cat( k,"월완료","\n")
}
## 10 월완료
## 11 월완료
## 12 월완료
soi_12<-soi
##데이터 합치기
soi_1_12<-c(soi_9, soi_12)
soi_1_12<-as.data.frame(soi_1_12)
soi_1_12<-as.character(soi_1_12)
nnn1<-soi_1_12
##이상치제거 1
nnn1<-nnn1%>%str_replace_all(string=.,pattern = "c", replacement = '' )%>%
str_replace_all(string = ., pattern = "\n", replacement = '')%>%
str_replace_all(string = ., pattern = ",", replacement = '')%>%
str_replace_all(string = ., pattern = "심리학", replacement = '')
##구두점 날리기
TT<-nnn1%>%VectorSource(.)%>%
Corpus(.)%>%
tm_map(., removePunctuation)
## Warning in tm_map.SimpleCorpus(., removePunctuation): transformation drops
## documents
###음절단위 자르기
TT<-TermDocumentMatrix(TT)
TT<-as.matrix(TT)
##빈도로 나타내기
TTT<-sort(rowSums(TT),decreasing=TRUE)
TTT<-data.frame(word=names(TTT), freq=TTT)
print(head(TTT))
## word freq
## 책 책 104
## 상담과 상담과 82
## 과 과 79
## 위한 위한 57
## 을 을 55
## 이 이 49
###시각화
wordcloud(words = TTT$word, freq = TTT$freq, min.freq = 10, random.order = F, colors = brewer.pal(8, "Set2"))