Rstudio/Chromote를 활용하여 웹페이지의 일부 데이터를 가져오는 연습
#remotes::install_github("rstudio/chromote")
#install.packages('dplyr')
#install.packages('utf8')
library(chromote,quietly=TRUE)
library(dplyr,quietly=TRUE)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(utf8,quietly=TRUE)
link="https://view.shoppinglive.naver.com/replays/74801"
b<-ChromoteSession$new() #new session
{
invisible({
b$Page$navigate(link,wait_=FALSE)
b$Page$loadEventFired()
Sys.sleep(5)
})
}
trial=5
while(trial>0) {
output<-b$Runtime$evaluate('document.querySelector("#root > div > div > div > div > div > div > div > div.TagItemLayout_wrap_1tXSl > a:nth-child(1) > span").innerHTML')
if(!is.null(output$result$value)) break
cat(".")
trial=trial-1
Sys.sleep(1)
}
print(output$result$value)
## [1] "48,000"
b$close()
## [1] TRUE
rm(trial,link,output,b)
코멘트는 300개까지 업데이트 된다. 실시간으로 올라오는 것만 볼 수 있다.
library(chromote,quietly = TRUE)
link="https://view.shoppinglive.naver.com/replays/74801"
b<-ChromoteSession$new() #new session
{
invisible({
b$Page$navigate(link,wait_=FALSE)
b$Page$loadEventFired()
Sys.sleep(5)
})
}
cat("Let's wait for 2 minutes...")
## Let's wait for 2 minutes...
Sys.sleep(60*2)
cat("\nOK.")
##
## OK.
code="#root > div > div > div > div > div > div > div > div.Comment_wrap_3ylpz > div > div:nth-child(1) > div > span"
codeEval=sprintf('document.querySelectorAll("%s").length',code)
output<-b$Runtime$evaluate(codeEval)
output<-output$result$value
output
## [1] 11
items=1:output
codeEvala=sprintf('document.querySelector("#root > div > div > div > div > div > div > div > div.Comment_wrap_3ylpz > div > div:nth-child(1) > div:nth-child(%d) > span").innerHTML',items)
fun <- function(x) {
output<-b$Runtime$evaluate(x)
output<-output$result$value
output
utf8::utf8_format(output)
}
x=as.character(sapply(codeEvala,fun))
knitr::kable(data.frame(no=1:10,comment=x[1:10]))
| no | comment |
|---|---|
| 1 | 🖐🖐🖐🖐 |
| 2 | grn과 함께하는 34,900원 분홍이 초록이 세트 다이어트 시작합니다! |
| 3 | 이제 봄 오니까 필수죠 |
| 4 | hello |
| 5 | 봄 왔어요 |
| 6 | 방가 |
| 7 | 안녕하세용 |
| 8 | 혜택 기대돼요 |
| 9 | 혜택 기대돼요 |
| 10 | 안녕하세요~ |
b$close()
## [1] TRUE
rm(x,fun,codeEvala,items,output,codeEval,code,b)
createBrowser <- function() {
b<-ChromoteSession$new() #new session
b
}
visitLink <- function(b,link,waitForSec=5) {
invisible({
b$Page$navigate(link,wait_=FALSE)
b$Page$loadEventFired()
Sys.sleep(waitForSec)
})
}
getVideoLength <- function(b) {
code="#root > div > div > div > div > div > div > div > div.NativePlayer_wrap_2FTTe > video"
output=b$Runtime$evaluate(sprintf('document.querySelector("%s").duration',code))
as.numeric(output$result$value)
}
getCurrentVideoTime <- function(b) {
code="#root > div > div > div > div > div > div > div > div.NativePlayer_wrap_2FTTe > video"
output=b$Runtime$evaluate(sprintf('document.querySelector("%s").currentTime',code))
as.numeric(output$result$value)
}
extractData <- function(b,css_code,trial=5,verbose=TRUE) {
while(trial>0) {
selectorCode=sprintf('document.querySelector("%s").innerHTML',css_code)
output<-b$Runtime$evaluate(selectorCode)
if(!is.null(output$result$value)) break
if(verbose) cat('.')
trial=trial-1
Sys.sleep(1)
}
if(verbose) cat("\n")
output$result$value
}
getComments <- function(b,waitFor=60*2) {
fun <- function(x) {
output<-b$Runtime$evaluate(x)
output<-output$result$value
output
utf8::utf8_format(output)
}
result=list()
timeTick=0
moment=1
video_length=getVideoLength(b)
while(timeTick<=waitFor) {
code="#root > div > div > div > div > div > div > div > div.Comment_wrap_3ylpz > div > div:nth-child(1) > div > span"
codeEval=sprintf('document.querySelectorAll("%s").length',code)
output<-b$Runtime$evaluate(codeEval)
output<-as.numeric(output$result$value)
if(output>0) {
items=1:output
codeEvala=sprintf('document.querySelector("#root > div > div > div > div > div > div > div > div.Comment_wrap_3ylpz > div > div:nth-child(1) > div:nth-child(%d) > span").innerHTML',items)
codeEvalb=sprintf('document.querySelector("#root > div > div > div > div > div > div > div > div.Comment_wrap_3ylpz > div > div:nth-child(1) > div:nth-child(%d) > strong").innerHTML',items)
comment=as.character(sapply(codeEvala,fun))
user=as.character(sapply(codeEvalb,fun))
result[[moment]]=data.frame(user,comment)
moment=moment+1
}
#
current_time=getCurrentVideoTime(b)
if(current_time>=video_length) {
break
}
timeTick=timeTick+1
Sys.sleep(1)
}
result
}
exitBrowser <- function(b) {
invisible(b$close())
}
library(chromote)
code1="#root > div > div > div > div > div > div > div > div.TagItemLayout_wrap_1tXSl > a:nth-child(1) > span"
code2="#root > div > div > div > div > div > div > div > div.TagItemLayout_wrap_1tXSl > a:nth-child(2) > span"
link="https://view.shoppinglive.naver.com/replays/74801"
b=createBrowser()
visitLink(b,link)
result1=extractData(b,code1)
result2=extractData(b,code2)
cat("-- RESULT --\n")
## -- RESULT --
cat(" N of View: ",result1,"\n")
## N of View: 48,000
cat(" N of Like: ",result2,'\n')
## N of Like: 28,800
exitBrowser(b)
rm(code1,code2,link)
rm(b)
link="https://view.shoppinglive.naver.com/replays/74801"
b=createBrowser()
visitLink(b,link)
comments=getComments(b,60*5) #Naver Shopping Live ONLY! (5min)
comments=bind_rows(comments) %>%
distinct() %>%
group_by(user,comment) %>%
summarize(numOfDup=n()) %>%
ungroup()
## `summarise()` regrouping output by 'user' (override with `.groups` argument)
knitr::kable(head(comments,20))
| user | comment | numOfDup |
|---|---|---|
| 3gd9 | ㄷ자이어트다이어트 | 1 |
| 3gd9 | 다이어트 | 1 |
| 3gd9 | 분홍이 | 1 |
| 3gd9 | 새해목표아닌가연ㅇ | 1 |
| 3gd9 | 이번에꼭!! | 1 |
| 3gd9 | 초록이 | 1 |
| 3gd9 | 해야돼요 | 1 |
| 3gd9 | 헐 | 1 |
| Be | 안녕하세요 | 1 |
| cake**** | 식사 전후 먹어야 하는 거네,, | 1 |
| Da y | 좋네요 | 1 |
| Elisabeth | 저렴하네요 오늘 | 1 |
| FLY | 믿을수있네요 | 1 |
| foxs**** | 괜찮네요 | 1 |
| gre | 다이어트식품이군요 | 1 |
| gre | 자주 보던 상품이네요 | 1 |
| imuuu | 좋아요 | 1 |
| isna**** | 오늘도 화이팅 하세요 ㅎㅎ | 1 |
| jang**** | 반가워요 | 1 |
| jang**** | 색상이 더 이뻐요 | 1 |
exitBrowser(b)
rm(comments,b,link)