library('progress')
library('XML')
library('stringr')
library('rvest')
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:XML':
##
## xml
마지막회차 last에 저장 html nodes는 crome을 기준으로 f12버튼을 통해 얻은 css값을 객체로 가져오는 코드이며 html_text()는 text로 가져오는 코드이다.
url<-'https://search.naver.com/search.naver?sm=tab_hty.top&where=nexearch&query=%EB%A1%9C%EB%98%90&oquery=%EB%A1%9C%EB%98%90&tqi=TmrKidpVuFdsssc0EvVssssssUd-075322'
line<-read_html(url,encoding="UTF-8")
p1<-html_nodes(line,css='._lotto-btn-current em')%>%html_text()
last=as.numeric(substr(p1,1,3))
last
## [1] 842
apply문을 통해 데이터를 가져오는 코드
url<-paste0('https://search.naver.com/search.naver?sm=tab_drt&where=nexearch&query=',last:1,'%ED%9A%8C%EB%A1%9C%EB%98%90')
system.time({
a=t(apply(data.frame(url),1,function(x){as.numeric(strsplit(str_trim(
html_nodes(read_html(x,encoding='UTF-8'),css='.num_box')%>%html_text()),split=' ')[[1]][1:6])}))
})
## user system elapsed
## 134.82 5.78 380.43
for문을 통해 데이터를 가져오는 코드
system.time({
b=NULL
for(i in last:1){
url<-paste0('https://search.naver.com/search.naver?sm=tab_drt&where=nexearch&query=',i,'%ED%9A%8C%EB%A1%9C%EB%98%90')
lines<-read_html(url,encoding='UTF-8')
keep<-html_nodes(lines,css='.num_box')%>% html_text()
b<-rbind(b,c(as.numeric(strsplit(str_trim(keep),split=" ")[[1]][1:6]),i))}
})
## user system elapsed
## 133.77 5.81 354.31
병렬처리를 통해 데이터를 가져오는 코드로 spec는 사용된 코어(?)의 수
library(parallel)
library(foreach)
spec=detectCores()
spec
## [1] 8
cl=makeCluster(spec)
url<-paste0('https://search.naver.com/search.naver?sm=tab_drt&where=nexearch&query=',last:1,'%ED%9A%8C%EB%A1%9C%EB%98%90')
system.time({
c=data.frame(t(parApply(cl,MARGIN=1,data.frame(url),function(x){
(as.numeric(strsplit(stringr::str_trim(
rvest::html_text(rvest::html_nodes(xml2::read_html(x,encoding='UTF-8'),css='.num_box'))),split=' ')[[1]][1:6]))})))
})
## user system elapsed
## 0.02 0.00 55.11
stopCluster(cl)
head(a)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 14 26 32 36 39 42
## [2,] 5 11 14 30 33 38
## [3,] 2 4 11 28 29 43
## [4,] 3 9 11 12 13 19
## [5,] 9 14 17 33 36 38
## [6,] 2 25 28 30 33 45
head(b)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 14 26 32 36 39 42 842
## [2,] 5 11 14 30 33 38 841
## [3,] 2 4 11 28 29 43 840
## [4,] 3 9 11 12 13 19 839
## [5,] 9 14 17 33 36 38 838
## [6,] 2 25 28 30 33 45 837
head(c)
## X1 X2 X3 X4 X5 X6
## 1 14 26 32 36 39 42
## 2 5 11 14 30 33 38
## 3 2 4 11 28 29 43
## 4 3 9 11 12 13 19
## 5 9 14 17 33 36 38
## 6 2 25 28 30 33 45