API
library(devtools)
install_github("Rfacebook", "pablobarbera", subdir="Rfacebook")
require("Rfacebook")
fb_oauth <- fbOAuth(app_id="xx", app_secret="xx"
,extended_permissions = TRUE)
DATA
- 以三立新闻facebook主页最近(2016/01/22)100条post下的comment数据为样本.
- 三立新闻的facebook id为110699089014688.
sanli<-getPage(110699089014688,token = fb_oauth,n=100)
post.id <- sanli$id
commentExtract<-function(x){
getPost(post=x,token = fb_oauth)$comments$message
}
comments <- lapply(post.id,FUN = commentExtract)#list[1:100]
data clean
library(Rwordseg,quietly = TRUE)
comments<-unlist(comments)
cnsegment<-function(x){
tryCatch(segmentCN(x),
error = function(e) return(""))
}
segment.cn<- unlist(lapply(comments, cnsegment))
word = lapply(X = segment.cn, FUN = strsplit, " ")
v = table(unlist(word))
v = sort(v, decreasing = T)
v[1:100]
head(v)
d = data.frame(word = names(v), freq = v)
visualization
par(family = "STHeiti")
require(wordcloud)
dd = d
op = par(bg = "lightyellow")
# grayLevels = gray((dd$freq)/(max(dd$freq) + 140))
# wordcloud(dd$word, dd$freq, colors = grayLevels)
colors=brewer.pal(8,"Dark2")
wordcloud(dd$word, dd$freq, max.words=100,
scale=c(10,3),min.freq=-Inf,
colors=colors,random.order=F,random.color=F,ordered.colors=F
)
par(op)
