5月中旬台灣疫情再度升溫,確診人數持續增加,因此疫苗相關議題受到大眾矚目。其中,蔡總統於5/18在粉絲團發文表示,「聯亞」和「高端」兩家國內疫苗廠皆已進入臨床試驗第二期的收尾階段,並喊話希望在七月底前,就能供應第一波國產疫苗。另外,5/30高端證實政府將採購500萬劑疫苗並已完成簽約、6/10高端疫苗宣布二階解盲成功,這些事件引發了民眾對於國產疫苗的大量討論聲浪。由於高端的股價在這段期間變化甚大,因此還出現了質疑政府是否炒股行為等討論。基於上述原因,促使我們想要探討國人對於國產疫苗有甚麼樣的看法。
主要針對以下方向進行分析:
- 國產疫苗相關的討論議題有哪些?
- 討論國產疫苗的社群網路如何分布?
- 國產疫苗討論的意見領袖有誰?網友的推噓狀態如何?
系統參數設定
Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼安裝需要的packages
# echo = T,results = 'hide'
packages = c("dplyr", "tidytext", "stringr", "wordcloud2", "ggplot2",'readr','data.table','reshape2','wordcloud','tidyr','scales','plotly')
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)library(readr)
library(data.table)
library(ggplot2)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(jiebaR)## Loading required package: jiebaRD
library(tidyr)
library(tidytext)
library(stringr)
library(tm)## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(topicmodels)
library(purrr)##
## Attaching package: 'purrr'
## The following object is masked from 'package:data.table':
##
## transpose
library(igraph)##
## Attaching package: 'igraph'
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
require(RColorBrewer)## Loading required package: RColorBrewer
library(reshape2)##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
## The following objects are masked from 'package:data.table':
##
## dcast, melt
library(wordcloud2)
library(widyr)
library(ggraph)
library(plotly)##
## Attaching package: 'plotly'
## The following object is masked from 'package:igraph':
##
## groups
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# 將三個版的資料合併
MetaData = fread('0611_articleMetaData.csv',encoding = 'UTF-8')
Reviews = fread('0611_articleReviews.csv',encoding = 'UTF-8')
# 再篩一次文章 2779 篇
keywords = c('國產疫苗','高端','聯亞','解盲')
toMatch = paste(keywords,collapse="|")
MetaData = with(MetaData, MetaData[grepl(toMatch,sentence)|grepl(toMatch,artTitle),])%>%
#處理sentence欄位
mutate(sentence=gsub("[\n]{2,}", "。", sentence)) %>%
mutate(sentence=gsub("\n", "", sentence)) %>%
mutate(sentence=gsub("http(s)?[-:\\/B-Yb-y0-9\\.]+", " ", sentence))
# 移除PTT貼新聞時會出現的格式用字
MetaData<- MetaData %>%
mutate(sentence=gsub("媒體來源|記者署名|完整新聞標題|完整新聞內文|完整新聞連結|(或短網址)|備註|備註請放最後面|違者新聞文章刪除", "", sentence))
# 挑選文章對應的留言
Reviews = left_join(MetaData, Reviews[,c("artUrl", "cmtContent", "artPoster", "cmtPoster", "cmtStatus")], by = "artUrl")MetaData %>%
mutate(artDate = as.Date(artDate)) %>%
group_by(artDate) %>%
summarise(count = n())%>%
ggplot(aes(artDate,count))+
geom_line(color="red")+
geom_point()MetaData %>%
mutate(artDate = as.Date(artDate)) %>%
group_by(artDate) %>%
filter(format(artDate,'%Y%m') %in% c(202105, 202106))%>%
summarise(count = n())%>%
ggplot(aes(artDate,count))+
geom_line(color="red")+
geom_point()+
# 加上標示日期的線
geom_vline(aes(xintercept = as.numeric(artDate[which(artDate == as.Date('2021-05-30'))
[1]])),colour = "yellow")+
geom_vline(aes(xintercept = as.numeric(artDate[which(artDate == as.Date('2021-06-10'))
[1]])),colour = "yellow") 可以看到自5月中旬之後討論逐漸增加,推測是因為疫情突然快速升溫再加上蔡總統於05/18在其粉絲團表示,「聯亞」和「高端」兩家國內疫苗廠皆已投入國產疫苗的研發,且已進入臨床試驗第二期的收尾階段,並喊話希望在七月底前,就能供應第一波國產疫苗,因此帶起了討論話題。
05/30,高端疫苗公開證實已與衛福部疾管署完成合約簽訂,衛福部將採購500萬劑的國產疫苗,此時出現許多質疑的聲浪。05/31,蔡總統針對外界質疑政府採購國產疫苗為炒股一事表示已進行內部清查,相關政務人員並無炒股,且財產申報都是公開透明、可以查證的,此番言論使得國產疫苗的討論達一高峰。
另外,高端疫苗於06/10宣布解盲成功,引起第二波討論高峰。
使用默認參數初始化一個斷詞引擎
jieba_tokenizer = worker(user="../dict/user_dict.txt", stop_word = "dict/stop_words.txt")
ptt_tokenizer <- function(t) {
lapply(t, function(x) {
if(nchar(x)>1){
tokens <- segment(x, jieba_tokenizer)
# 去掉字串長度爲1的詞彙
tokens <- tokens[nchar(tokens)>1]
return(tokens)
}
})
}# 把文章和留言的斷詞結果併在一起
#MToken <- MetaData %>% unnest_tokens(word, sentence, token=ptt_tokenizer)
#RToken <- Reviews %>% unnest_tokens(word, cmtContent, token=ptt_tokenizer)
# 把資料併在一起
#data <- rbind(MToken[,c("artDate","artUrl", "word")],RToken[,c("artDate","artUrl", "word")]) 計算每篇文章各token出現次數
tokens <- MetaData %>%
unnest_tokens(word, sentence, token=ptt_tokenizer) %>%
filter((!str_detect(word, regex("[0-9a-zA-Z]"))) | str_detect(word, regex("[Aa][Zz]"))) %>%
count(artUrl, word) %>%
rename(count=n)
tokens %>% head(20)## artUrl word count
## 1: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html az 1
## 2: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 一下 1
## 3: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 大內 1
## 4: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 不好 1
## 5: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 不禁 1
## 6: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 中國 2
## 7: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 引述 1
## 8: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 只能 1
## 9: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 台灣 2
## 10: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 印度 1
## 11: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 危險 1
## 12: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 好多 1
## 13: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 好奇 1
## 14: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 有沒有 1
## 15: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 有害 1
## 16: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 行為 1
## 17: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 吹捧 2
## 18: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 官員 1
## 19: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 注射 1
## 20: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 阿三 1
dtm <-tokens %>% cast_dtm(artUrl, word, count)
dtm## <<DocumentTermMatrix (documents: 2779, terms: 23717)>>
## Non-/sparse entries: 143405/65766138
## Sparsity : 100%
## Maximal term length: 26
## Weighting : term frequency (tf)
inspect(dtm[1:10,1:10])## <<DocumentTermMatrix (documents: 10, terms: 10)>>
## Non-/sparse entries: 20/80
## Sparsity : 80%
## Maximal term length: 2
## Weighting : term frequency (tf)
## Sample :
## Terms
## Docs az 一下 大內 不好
## https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 1 1 1 1
## https://www.ptt.cc/bbs/Gossiping/M.1617764810.A.35C.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618128969.A.D6E.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618141623.A.EE3.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618288420.A.ECD.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618361319.A.A4C.html 1 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618363398.A.01A.html 5 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618839469.A.619.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1619682751.A.07C.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1619886717.A.AF1.html 0 0 0 0
## Terms
## Docs 不禁 中國 引述 只能
## https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 1 2 1 1
## https://www.ptt.cc/bbs/Gossiping/M.1617764810.A.35C.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618128969.A.D6E.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618141623.A.EE3.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618288420.A.ECD.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618361319.A.A4C.html 0 0 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618363398.A.01A.html 0 0 0 1
## https://www.ptt.cc/bbs/Gossiping/M.1618839469.A.619.html 0 6 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1619682751.A.07C.html 0 1 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1619886717.A.AF1.html 0 0 0 0
## Terms
## Docs 台灣 印度
## https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html 2 1
## https://www.ptt.cc/bbs/Gossiping/M.1617764810.A.35C.html 2 0
## https://www.ptt.cc/bbs/Gossiping/M.1618128969.A.D6E.html 3 0
## https://www.ptt.cc/bbs/Gossiping/M.1618141623.A.EE3.html 7 0
## https://www.ptt.cc/bbs/Gossiping/M.1618288420.A.ECD.html 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618361319.A.A4C.html 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1618363398.A.01A.html 3 0
## https://www.ptt.cc/bbs/Gossiping/M.1618839469.A.619.html 0 0
## https://www.ptt.cc/bbs/Gossiping/M.1619682751.A.07C.html 2 0
## https://www.ptt.cc/bbs/Gossiping/M.1619886717.A.AF1.html 0 0
lda <- LDA(dtm, k = 4, control = list(seed = 2021))
# lda <- LDA(dtm, k = 2, control = list(seed = 2021,alpha = 2,delta=0.1),method = "Gibbs") #調整alpha即delta
lda## A LDA_VEM topic model with 4 topics.
topics_words <- tidy(lda, matrix = "beta") #注意,在tidy function裡面要使用"beta"來取出Phi矩陣。
colnames(topics_words) <- c("topic", "term", "phi")
topics_words## # A tibble: 94,868 x 3
## topic term phi
## <int> <chr> <dbl>
## 1 1 az 0.00214
## 2 2 az 0.00662
## 3 3 az 0.00374
## 4 4 az 0.000125
## 5 1 一下 0.00231
## 6 2 一下 0.00205
## 7 3 一下 0.000228
## 8 4 一下 0.00268
## 9 1 大內 0.000223
## 10 2 大內 0.000547
## # ... with 94,858 more rows
terms依照各主題的phi值由大到小排序,列出前10大代表字
topics_words %>%
group_by(topic) %>%
top_n(10, phi) %>%
ungroup() %>%
mutate(top_words = reorder_within(term,phi,topic)) %>%
ggplot(aes(x = top_words, y = phi, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") +
coord_flip() +
scale_x_reordered() +
theme_grey(base_family = "STKaiti" ) #避免中文出現亂碼## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
嘗試2、3、4、6、10、15個主題數,將結果存起來,再做進一步分析。
(此部分需要跑一段時間,已經將跑完的檔案存成ldas_result.rdata,可以直接載入)
ldas = c()
# topics = c(2,3,4,6,10,15)
# for(topic in topics){
# start_time <- Sys.time()
# lda <- LDA(dtm, k = topic, control = list(seed = 2021))
# ldas =c(ldas,lda)
# print(paste(topic ,paste("topic(s) and use time is ", Sys.time() -start_time)))
# save(ldas,file = "ldas_result.rdata") # 將模型輸出成檔案
# }載入每個主題的LDA結果
load("ldas_result.rdata")topics = c(2,3,4,6,10,15)
data_frame(k = topics, perplex = map_dbl(ldas, topicmodels::perplexity)) %>%
ggplot(aes(k, perplex)) +
geom_point() +
geom_line() +
labs(title = "Evaluating LDA topic models",
subtitle = "Optimal number of topics (smaller is better)",
x = "Number of topics",
y = "Perplexity")## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
create LDAvis所需的json function
此function是將前面使用 “LDA function”所建立的model,轉換為“LDAVis”套件的input格式。
topicmodels_json_ldavis <- function(fitted, doc_term){
require(LDAvis)
require(slam)
###以下function 用來解決,主題數多會出現NA的問題
### 參考 https://github.com/cpsievert/LDAvis/commit/c7234d71168b1e946a361bc00593bc5c4bf8e57e
ls_LDA = function (phi){
jensenShannon <- function(x, y) {
m <- 0.5 * (x + y)
lhs <- ifelse(x == 0, 0, x * (log(x) - log(m+1e-16)))
rhs <- ifelse(y == 0, 0, y * (log(y) - log(m+1e-16)))
0.5 * sum(lhs) + 0.5 * sum(rhs)
}
dist.mat <- proxy::dist(x = phi, method = jensenShannon)
pca.fit <- stats::cmdscale(dist.mat, k = 2)
data.frame(x = pca.fit[, 1], y = pca.fit[, 2])
}
# Find required quantities
phi <- as.matrix(posterior(fitted)$terms)
theta <- as.matrix(posterior(fitted)$topics)
vocab <- colnames(phi)
term_freq <- slam::col_sums(doc_term)
# Convert to json
json_lda <- LDAvis::createJSON(phi = phi, theta = theta,
vocab = vocab,
doc.length = as.vector(table(doc_term$i)),
term.frequency = term_freq, mds.method = ls_LDA)
return(json_lda)
}the_lda = ldas[[3]]
json_res <- topicmodels_json_ldavis(the_lda,dtm)
serVis(json_res,open.browser = T)serVis(json_res, out.dir = "vis", open.browser = T)
writeLines(iconv(readLines("./vis/lda.json"), to = "UTF8"))從LDAvis分析結果中可以初度得知這四個主題的討論方向:
the_lda = ldas[[3]] ## 選定topic 為 4 的結果topics_words <- tidy(the_lda, matrix = "beta") #注意!在tidy function裡面要使用"beta"來取出Phi矩陣。
colnames(topics_words) <- c("topic", "term", "phi")
topics_words %>% arrange(desc(phi)) %>% head(10)## # A tibble: 10 x 3
## topic term phi
## <int> <chr> <dbl>
## 1 2 疫苗 0.0756
## 2 1 疫苗 0.0654
## 3 4 高端 0.0552
## 4 3 疫苗 0.0512
## 5 3 國產 0.0299
## 6 4 疫苗 0.0266
## 7 2 國產 0.0246
## 8 1 國產 0.0187
## 9 2 台灣 0.0181
## 10 1 試驗 0.0178
topics_words %>%
group_by(topic) %>%
top_n(10, phi) %>%
ungroup() %>%
ggplot(aes(x = reorder_within(term,phi,topic), y = phi, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") +
coord_flip() +
scale_x_reordered() +
theme_grey(base_family = "STKaiti" ) #避免中文出現亂碼## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
e.g., “疫苗”,“國產”,“有沒有”,“台灣”,“高端” 等等
removed_word = c("疫苗","國產","有沒有","台灣","高端")
topics_words %>%
filter(!term %in% removed_word) %>%
group_by(topic) %>%
top_n(10, phi) %>%
ungroup() %>%
ggplot(aes(x = reorder_within(term,phi,topic), y = phi, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") +
coord_flip() +
scale_x_reordered() +
theme_grey(base_family = "STKaiti" ) #避免中文出現亂碼## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
topics_name = c("國產疫苗的技術與進度","國產疫苗與國外疫苗","國產疫苗與政治","國產疫苗與股價")透過上述字詞,可將其分為以下主題:
# 在tidy function中使用參數"gamma"來取得 theta矩陣
topics <- tidy(lda, matrix="gamma") %>% # document topic gamma
group_by(document) %>%
top_n(1, wt=gamma)
topics## # A tibble: 2,779 x 3
## # Groups: document [2,779]
## document topic gamma
## <chr> <int> <dbl>
## 1 https://www.ptt.cc/bbs/Gossiping/M.1618288420.A.ECD.html 1 0.992
## 2 https://www.ptt.cc/bbs/Gossiping/M.1618839469.A.619.html 1 1.00
## 3 https://www.ptt.cc/bbs/Gossiping/M.1620150204.A.25D.html 1 0.994
## 4 https://www.ptt.cc/bbs/Gossiping/M.1620904849.A.D3E.html 1 0.882
## 5 https://www.ptt.cc/bbs/Gossiping/M.1620905334.A.CDC.html 1 0.856
## 6 https://www.ptt.cc/bbs/Gossiping/M.1620905447.A.792.html 1 0.999
## 7 https://www.ptt.cc/bbs/Gossiping/M.1620906592.A.DE9.html 1 1.00
## 8 https://www.ptt.cc/bbs/Gossiping/M.1620977586.A.77C.html 1 0.989
## 9 https://www.ptt.cc/bbs/Gossiping/M.1621002993.A.61B.html 1 0.521
## 10 https://www.ptt.cc/bbs/Gossiping/M.1621163767.A.A74.html 1 0.742
## # ... with 2,769 more rows
# for every document we have a probability distribution of its contained topics
tmResult <- posterior(the_lda)
doc_pro <- tmResult$topics
document_topics <- doc_pro[MetaData$artUrl,]
document_topics_df =data.frame(document_topics)
colnames(document_topics_df) = topics_name
rownames(document_topics_df) = NULL
ptt_topic = cbind(MetaData,document_topics_df)
# 刪除commentNum、push、boo欄位
ptt_topic$commentNum = NULL
ptt_topic$push = NULL
ptt_topic$boo = NULL# 去除筆數少的月份
ptt_topic %>%
mutate(artDate = as.Date(artDate)) %>%
filter(!format(artDate,'%Y%m') %in% c(202104))%>%
group_by(artDate = format(artDate,'%Y%m')) %>%
summarise_if(is.numeric, sum, na.rm = TRUE) %>%
melt(id.vars = "artDate")%>%
ggplot( aes(x=artDate, y=value, fill=variable)) +
geom_bar(stat = "identity") + ylab("value") +
scale_fill_manual(values=c("#cacaca","#a9c6de","#5588a3","#145374"))+
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme_grey(base_family = "STKaiti" ) #避免中文出現亂碼可以發現,國產疫苗與股價的相關討論於6月有了較大幅度的增加。
ptt_topic %>%
mutate(artDate = as.Date(artDate)) %>%
filter( format(artDate,'%Y%m') %in% c(202105, 202106))%>%
group_by(artDate = format(artDate,'%Y%m')) %>%
summarise_if(is.numeric, sum, na.rm = TRUE) %>%
melt(id.vars = "artDate")%>%
group_by(artDate)%>%
mutate(total_value =sum(value))%>%
ggplot( aes(x=artDate, y=value/total_value, fill=variable)) +
geom_bar(stat = "identity") + ylab("proportion") +
scale_fill_manual(values=c("#cacaca","#a9c6de","#5588a3","#145374"))+
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme_grey(base_family = "STKaiti" ) #避免中文出現亂碼ptt_topic %>%
mutate(artDate = as.Date(artDate)) %>%
filter(!format(artDate,'%Y%m') %in% c(202104))%>%
group_by(artDate = format(artDate,'%m%d')) %>%
summarise_if(is.numeric, sum, na.rm = TRUE) %>%
melt(id.vars = "artDate")%>%
group_by(artDate)%>%
mutate(total_value =sum(value))%>%
ggplot( aes(x=artDate, y=value/total_value, fill=variable)) +
geom_bar(stat = "identity") + ylab("proportion") +
scale_fill_manual(values=c("#cacaca","#a9c6de","#5588a3","#145374"))+
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
theme_grey(base_family = "STKaiti" ) + #避免中文出現亂碼
geom_col(position="fill") -> pt
ggplotly(pt)從圖中的主題分佈我們觀察到:
phi_m <- topics_words %>% arrange(desc(phi)) %>% top_n(70)## Selecting by phi
dtm <-phi_m %>% cast_dtm(topic, term, phi)
dtmm<-as.matrix(dtm)
dim(dtmm)## [1] 4 50
# network=graph_from_incidence_matrix(dtmm)
# plot
# set.seed(3)
# plot(network, ylim=c(-1,1), xlim=c(-1,1), asp = 0,
# vertex.label.cex=0.7, vertex.size=10, vertex.label.family = "Heiti TC Light")removed_word = c("高端","疫苗","台灣","已經","有沒有","一定","目前")
phi_m <- topics_words %>%
filter(!term %in% removed_word) %>%
arrange(desc(phi)) %>%
top_n(70)## Selecting by phi
dtm <-phi_m %>% cast_dtm(topic, term, phi)
dtmm <- as.matrix(dtm)
dim(dtmm)## [1] 4 52
#set.seed(1)
network = graph_from_incidence_matrix(dtmm)
# plot
set.seed(3)
plot(network, ylim = c(-1,1), xlim = c(-1,1), asp = 0,
vertex.label.cex = 0.7, vertex.size = 10, vertex.label.family = "Heiti TC Light") ## Warning in text.default(x, y, labels = labels, col = label.color, family =
## label.family, : font family not found in Windows font database
從圖中可以印證以上的主題分類:
整理文章討論參與人
# 取出 cmtPoster(回覆者)、artPoster(發文者)、artUrl(文章連結)三個欄位
link <- Reviews %>%
select(cmtPoster, artPoster.x, artUrl)
reviewNetwork <- graph_from_data_frame(d=link, directed=T)## Warning in graph_from_data_frame(d = link, directed = T): In `d' `NA' elements
## were replaced with string "NA"
# 發文者數量 1659
length(unique(MetaData$artPoster))## [1] 1659
# 留言者數量 28963
length(unique(Reviews$cmtPoster))## [1] 28963
# 參與者總數量 29484
allPoster <- c(MetaData$artPoster, Reviews$cmtPoster)
length(unique(allPoster))## [1] 29484
# 整理所有出現過的使用者:
# 若曾發過文則標註爲:Poster;不曾發過文則標註爲:Replyer
userList <- data.frame(user = unique(allPoster)) %>%
mutate(type = ifelse(user%in%MetaData$artPoster, "poster", "replyer"))# 看一下留言數大概都多少(方便後面篩選)
MetaData %>%
filter(commentNum < 100) %>%
ggplot(aes(x=commentNum)) + geom_histogram()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
大約在回覆次數30以上的文章數量就比較少了,因此回覆數可以先抓 30。
vaccine_poster = table(MetaData$artPoster) %>% sort %>% as.data.frame
colnames(vaccine_poster) = c("artPoster.x","freq")
vaccine_poster = vaccine_poster %>% filter(freq >= 5) # 發文次數 > 5
link <- Reviews %>%
filter(commentNum >= 30) %>% # 回覆數 > 30
filter(artPoster.x==vaccine_poster$artPoster.x) %>%
select(cmtPoster, artPoster.x, artUrl) %>%
unique()
# 篩選 link 中有出現的使用者
filtered_user <- userList %>%
filter(user%in%link$cmtPoster | user%in%link$artPoster.x) %>%
arrange(desc(type))set.seed(487)
# 先把關係的方向性拿掉,減少圖片中的不必要的資訊
reviewNetwork <- graph_from_data_frame(d=link, v=filtered_user, directed=F)
plot(reviewNetwork, vertex.size=3, edge.arrow.size=.2, vertex.label=NA)set.seed(487)
labels <- degree(reviewNetwork)
V(reviewNetwork)$label <- names(labels)
# 依使用者的身份來區分點的顏色:有發文的話是紅色,只有回覆文章的則是淺藍色
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "red", "lightblue")
# 顯示超過 20 個關聯的使用者帳號
plot(reviewNetwork, vertex.size = 3, edge.arrow.size=.2,
vertex.label=ifelse(degree(reviewNetwork) >= 20, V(reviewNetwork)$label, NA), vertex.label.font = 2)從圖中可以發現 Emacs、CavendishJr、f1317913、zzahoward 這四個帳號和他人有較多的關聯。
進一步分析 Emacs、CavendishJr、f1317913、zzahoward 這四位意見領袖
leader_data <- MetaData %>%
filter((artPoster == "Emacs")|(artPoster == "CavendishJr")|(artPoster == "f1317913")|(artPoster == "zzahoward"))
leader_data$artDate = as.Date(leader_data$artDate)
leader_data = leader_data %>% mutate(months = as.Date(cut(artDate, "days")))
leader_data_month = leader_data %>% group_by(months,artPoster) %>%
summarise(num=n()) %>% as.data.frame## `summarise()` has grouped output by 'months'. You can override using the `.groups` argument.
# 整合發文趨勢圖
leader_data_month %>% ggplot(aes(x= months,y=num,fill=artPoster)) +geom_bar(stat = "identity")+
facet_wrap(~artPoster, ncol = 2, scales = "fixed") 從發文趨勢圖中可以發現:
CavendishJr_data <- leader_data %>%
filter(artPoster == "CavendishJr")
CavendishJr_sentence <- CavendishJr_data %>%
select(artUrl,sentence)
CavendishJr_sentence <-strsplit(CavendishJr_sentence$sentence,"[。!;?!?;]")
# 將每個句子與所屬的文章連結配對起來,整理成 dataframe
CavendishJr_sentence <- data.frame(
artUrl = rep(CavendishJr_data$artUrl, sapply(CavendishJr_sentence, length)),
sentence = unlist(CavendishJr_sentence)) %>%
filter(!str_detect(sentence, regex("^(\t|\n| )*$")))
CavendishJr_sentence$sentence <- as.character(CavendishJr_sentence$sentence)
# 使用斷詞引擎,放入要用的詞典和停用字
jieba_tokenizer = worker(user="../dict/user_dict.txt", stop_word = "dict/stop_words.txt")
ptt_tokenizer <- function(t) {
lapply(t, function(x) {
if(nchar(x)>1){
tokens <- segment(x, jieba_tokenizer)
tokens <- tokens[nchar(tokens)>1]
return(tokens)
}
})
}
# 進行斷詞,並計算各詞彙在各文章中出現的次數
CavendishJr_word <- CavendishJr_sentence %>%
unnest_tokens(word, sentence, token=ptt_tokenizer) %>%
filter(!str_detect(word, regex("[0-9a-zA-Z]"))) %>%
count(artUrl, word, sort = TRUE)
# 畫出文字雲
# CavendishJr_word %>%
# group_by(word) %>%
# summarise(sum = n()) %>%
# filter(sum > 1) %>%
# arrange(desc(sum)) %>%
# wordcloud2()對應作者CavendishJr的文章數量分析圖,其文章較多的期間是在2021/05 月疫情升溫之前,以及5月中旬疫情升溫之後,觀察文字雲後可以發現,這個作者主要關注的議題是國產疫苗的研發進度與目前的臨床試驗結果。
Emacs_data <- leader_data %>%
filter(artPoster == "Emacs")
Emacs_sentence <- Emacs_data %>%
select(artUrl,sentence)
Emacs_sentence <-strsplit(Emacs_sentence$sentence,"[。!;?!?;]")
# 將每個句子與所屬的文章連結配對起來,整理成 dataframe
Emacs_sentence <- data.frame(
artUrl = rep(Emacs_data$artUrl, sapply(Emacs_sentence, length)),
sentence = unlist(Emacs_sentence)) %>%
filter(!str_detect(sentence, regex("^(\t|\n| )*$")))
Emacs_sentence$sentence <- as.character(Emacs_sentence$sentence)
# 使用斷詞引擎,放入要用的詞典和停用字
jieba_tokenizer = worker(user="../dict/user_dict.txt", stop_word = "dict/stop_words.txt")
ptt_tokenizer <- function(t) {
lapply(t, function(x) {
if(nchar(x)>1){
tokens <- segment(x, jieba_tokenizer)
tokens <- tokens[nchar(tokens)>1]
return(tokens)
}
})
}
# 進行斷詞,並計算各詞彙在各文章中出現的次數
Emacs_word <- Emacs_sentence %>%
unnest_tokens(word, sentence, token=ptt_tokenizer) %>%
filter(!str_detect(word, regex("[0-9a-zA-Z]"))) %>%
count(artUrl, word, sort = TRUE)
# 畫出文字雲
# Emacs_word %>%
# group_by(word) %>%
# summarise(sum = n()) %>%
# filter(sum > 1) %>%
# arrange(desc(sum)) %>%
# wordcloud2()# 計算兩個詞彙間的相關性
Emacs_word_cors <- Emacs_word %>%
group_by(word) %>%
filter(n() >= 3) %>%
pairwise_cor(word, artUrl, sort = TRUE)## Warning: `tbl_df()` was deprecated in dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
Emacs_word_cors %>%
filter(correlation > 0.3) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 3) +
geom_node_text(aes(label = name), repel = TRUE, family='STXihei') +
theme_void() ## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
對應作者Emacs的文章數量分析圖,其文章較多發布在2021/05 月疫情升溫之後一直到現在的這段期間,結合文字雲和詞彙相關性分析後可以發現,這個作者主要關注的議題除了疫情的相關討論,e.g.,國產疫苗、疫苗、口罩等等,對國產疫苗與股票的相關議題也非常關注,e.g.炒作、不法、高層等等。
f1317913_data <- leader_data %>%
filter(artPoster == "f1317913")
f1317913_sentence <- f1317913_data %>%
select(artUrl,sentence)
f1317913_sentence <-strsplit(f1317913_sentence$sentence,"[。!;?!?;]")
# 將每個句子與所屬的文章連結配對起來,整理成 dataframe
f1317913_sentence <- data.frame(
artUrl = rep(f1317913_data$artUrl, sapply(f1317913_sentence, length)),
sentence = unlist(f1317913_sentence)) %>%
filter(!str_detect(sentence, regex("^(\t|\n| )*$")))
f1317913_sentence$sentence <- as.character(f1317913_sentence$sentence)
# 使用斷詞引擎,放入要用的詞典和停用字
jieba_tokenizer = worker(user="../dict/user_dict.txt", stop_word = "dict/stop_words.txt")
ptt_tokenizer <- function(t) {
lapply(t, function(x) {
if(nchar(x)>1){
tokens <- segment(x, jieba_tokenizer)
tokens <- tokens[nchar(tokens)>1]
return(tokens)
}
})
}
# 進行斷詞,並計算各詞彙在各文章中出現的次數
f1317913_word <- f1317913_sentence %>%
unnest_tokens(word, sentence, token=ptt_tokenizer) %>%
filter(!str_detect(word, regex("[0-9a-zA-Z]"))) %>%
count(artUrl, word, sort = TRUE)
# # 畫出文字雲
# f1317913_word %>%
# group_by(word) %>%
# summarise(sum = n()) %>%
# filter(sum > 1) %>%
# arrange(desc(sum)) %>%
# wordcloud2()# 計算兩個詞彙間的相關性
f1317913_word_cors <- f1317913_word %>%
group_by(word) %>%
filter(n() >= 3) %>%
pairwise_cor(word, artUrl, sort = TRUE)
f1317913_word_cors %>%
filter(correlation > 0.3) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 3) +
geom_node_text(aes(label = name), repel = TRUE, family='STXihei') +
theme_void() ## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
對應作者f1317913的文章數量分析圖,其文章較多的期間是在2021/05月中旬到2021/06 月初這段時間,結合文字雲和詞彙相關性分析後可以發現,這個作者主要關注的是國產疫苗的研發和臨床試驗相關的生技議題。
zzahoward_data <- leader_data %>%
filter(artPoster == "zzahoward")
zzahoward_sentence <- zzahoward_data %>%
select(artUrl,sentence)
zzahoward_sentence <-strsplit(zzahoward_sentence$sentence,"[。!;?!?;]")
# 將每個句子與所屬的文章連結配對起來,整理成 dataframe
zzahoward_sentence <- data.frame(
artUrl = rep(zzahoward_data$artUrl, sapply(zzahoward_sentence, length)),
sentence = unlist(zzahoward_sentence)) %>%
filter(!str_detect(sentence, regex("^(\t|\n| )*$")))
zzahoward_sentence$sentence <- as.character(zzahoward_sentence$sentence)
# 使用斷詞引擎,放入要用的詞典和停用字
jieba_tokenizer = worker(user="../dict/user_dict.txt", stop_word = "dict/stop_words.txt")
ptt_tokenizer <- function(t) {
lapply(t, function(x) {
if(nchar(x)>1){
tokens <- segment(x, jieba_tokenizer)
tokens <- tokens[nchar(tokens)>1]
return(tokens)
}
})
}
# 進行斷詞,並計算各詞彙在各文章中出現的次數
zzahoward_word <- zzahoward_sentence %>%
unnest_tokens(word, sentence, token=ptt_tokenizer) %>%
filter(!str_detect(word, regex("[0-9a-zA-Z]"))) %>%
count(artUrl, word, sort = TRUE)
# 畫出文字雲
# zzahoward_word %>%
# group_by(word) %>%
# summarise(sum = n()) %>%
# filter(sum > 1) %>%
# arrange(desc(sum)) %>%
# wordcloud2()對應作者zzahoward的文章數量分析圖,其文章較多的期間是在2021/05 月疫情升溫後一直到現在的這段時間,觀察文字雲後可以發現,這個作者主要關注的議題在於國產疫苗的效力問題,以及對於國外疫苗的相關討論。
# 把文章和topic合併
posts_Reviews <- merge(x = Reviews, y = topics, by.x = "artUrl", by.y="document")
head(posts_Reviews,3)## artUrl
## 1: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html
## 2: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html
## 3: https://www.ptt.cc/bbs/Gossiping/M.1617757128.A.F6D.html
## artTitle artDate artTime artPoster.x artCat
## 1: Re:[爆卦]高端疫苗一期數據公佈 2021/04/07 00:58:44 senafeld Gossiping
## 2: Re:[爆卦]高端疫苗一期數據公佈 2021/04/07 00:58:44 senafeld Gossiping
## 3: Re:[爆卦]高端疫苗一期數據公佈 2021/04/07 00:58:44 senafeld Gossiping
## commentNum push boo
## 1: 19 4 8
## 2: 19 4 8
## 3: 19 4 8
## sentence
## 1: 印度阿三我只是好奇ㄧ個問題。當初認為中國疫苗不好,。是因為認為第三期實驗有造假行為。危險有害國民健康。但現在國產義苗連第三期實驗都沒做。就ㄧ直吹捧有多好有多好 就要給人民注射。怎麼就沒有疑慮了?。那請問一下 這樣是不是 大內宣阿?。引述《seanww (seanmm)》之銘言:1劑標我也不贊成打中國疫苗,但聽到台灣官員自己吹捧國內疫苗有多好多好,我不禁笑了我也想選擇不打,但現在台灣不是只有AZ就是只有國產疫苗可打,想選擇BNT也沒機會了,只能等等看有沒有嬌生
## 2: 印度阿三我只是好奇ㄧ個問題。當初認為中國疫苗不好,。是因為認為第三期實驗有造假行為。危險有害國民健康。但現在國產義苗連第三期實驗都沒做。就ㄧ直吹捧有多好有多好 就要給人民注射。怎麼就沒有疑慮了?。那請問一下 這樣是不是 大內宣阿?。引述《seanww (seanmm)》之銘言:1劑標我也不贊成打中國疫苗,但聽到台灣官員自己吹捧國內疫苗有多好多好,我不禁笑了我也想選擇不打,但現在台灣不是只有AZ就是只有國產疫苗可打,想選擇BNT也沒機會了,只能等等看有沒有嬌生
## 3: 印度阿三我只是好奇ㄧ個問題。當初認為中國疫苗不好,。是因為認為第三期實驗有造假行為。危險有害國民健康。但現在國產義苗連第三期實驗都沒做。就ㄧ直吹捧有多好有多好 就要給人民注射。怎麼就沒有疑慮了?。那請問一下 這樣是不是 大內宣阿?。引述《seanww (seanmm)》之銘言:1劑標我也不贊成打中國疫苗,但聽到台灣官員自己吹捧國內疫苗有多好多好,我不禁笑了我也想選擇不打,但現在台灣不是只有AZ就是只有國產疫苗可打,想選擇BNT也沒機會了,只能等等看有沒有嬌生
## cmtContent artPoster.y cmtPoster cmtStatus topic
## 1: :大內宣不好嗎?支持國產, senafeld benza → 2
## 2: :是要黑幾次啊就EUA啊全世界都一樣好嗎 senafeld MDCCLXXVI 噓 2
## 3: :美國的EUA是要有三期才能過喔 senafeld koster → 2
## gamma
## 1: 0.9343108
## 2: 0.9343108
## 3: 0.9343108
# 將議題分成"國產疫苗的技術與進度"與"延伸的議題others"
posts_Reviews$topic[posts_Reviews$topic>1] <- 5
link <- posts_Reviews %>%
group_by(cmtPoster, artUrl) %>%
filter(n()>3) %>%
filter(commentNum > 900) %>%
filter(topic == 1 | topic == 5) %>%
select(cmtPoster, artPoster.x, artUrl, topic) %>%
unique()
link## # A tibble: 1,084 x 4
## # Groups: cmtPoster, artUrl [1,084]
## cmtPoster artPoster.x artUrl topic
## <chr> <chr> <chr> <dbl>
## 1 speady w1230319 https://www.ptt.cc/bbs/Gossiping/M.1621898220~ 5
## 2 Ghamu w1230319 https://www.ptt.cc/bbs/Gossiping/M.1621898220~ 5
## 3 RieX w1230319 https://www.ptt.cc/bbs/Gossiping/M.1621898220~ 5
## 4 kissa0924307 w1230319 https://www.ptt.cc/bbs/Gossiping/M.1621898220~ 5
## 5 alex8725 w1230319 https://www.ptt.cc/bbs/Gossiping/M.1621898220~ 5
## 6 rogergon w1230319 https://www.ptt.cc/bbs/Gossiping/M.1621898220~ 5
## 7 KDGC w1230319 https://www.ptt.cc/bbs/Gossiping/M.1621898220~ 5
## 8 ting74942 w1230319 https://www.ptt.cc/bbs/Gossiping/M.1621898220~ 5
## 9 menshuei w1230319 https://www.ptt.cc/bbs/Gossiping/M.1621898220~ 5
## 10 seraphalpha w1230319 https://www.ptt.cc/bbs/Gossiping/M.1621898220~ 5
## # ... with 1,074 more rows
# 篩選 link 中有出現的使用者
filtered_user <- userList %>%
filter(user%in%link$cmtPoster | user%in%link$artPoster.x) %>%
arrange(desc(type))
filter_degree =50
# 建立網路關係
reviewNetwork <- graph_from_data_frame(d=link, v=filtered_user, directed=F)
# 依據使用者身份對點進行上色
labels <- degree(reviewNetwork)
V(reviewNetwork)$label <- names(labels)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "red", "lightblue")
# 依據回覆發生的文章所對應的主題,對他們的關聯線進行上色
E(reviewNetwork)$color <- ifelse(E(reviewNetwork)$topic == "1", "palevioletred", "lightgreen")
# 畫出社群網路圖
set.seed(54320)
plot(reviewNetwork, vertex.size=3, edge.width=2, vertex.label.dist=3,
vertex.label=ifelse(degree(reviewNetwork) > filter_degree, V(reviewNetwork)$label, NA),vertex.label.font=2)
# 加入標示
par(family='STXihei')
legend("bottomright", c("發文者","回文者"), pch=21,
col="#777777", pt.bg=c("red","lightblue"), pt.cex=1, cex=1)## Warning in strwidth(legend, units = "user", cex = cex, font = text.font): font
## family not found in Windows font database
legend("topleft", c("國產疫苗的技術與進度","其他議題"),
col=c("palevioletred", "lightgreen"), lty=1, cex=1)可以從圖中發現,其中一個意見領袖f1317913附近多為紅色的線,說明該帳號經常參與國產疫苗的技術與進度相關討論,可以呼應前面的意見領袖分析文字雲。
filter_degree = 5 # 使用者degree
# 過濾留言者對發文者的推噓程度
link_pb <- Reviews %>%
filter(commentNum > 20) %>%
filter(cmtStatus!="→") %>%
group_by(cmtPoster, artUrl) %>%
filter( n() > 5) %>%
ungroup() %>%
select(cmtPoster, artPoster.x, artUrl, cmtStatus) %>%
unique()
# 接下來把網路圖畫出來
# 篩選link_pb中有出現的使用者
filtered_user <- userList %>%
filter(user%in%link_pb$cmtPoster | user%in%link_pb$artPoster.x) %>%
arrange(desc(type))
# 建立網路關係
reviewNetwork <- graph_from_data_frame(d=link_pb, v=filtered_user, directed=F)
# 依據使用者身份對點進行上色
labels <- degree(reviewNetwork)
V(reviewNetwork)$label <- names(labels)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "red", "lightblue")
# 依據回覆發生的文章所對應的主題,對他們的關聯線進行上色
E(reviewNetwork)$color <- ifelse(E(reviewNetwork)$cmtStatus == "推", "lightgreen", "palevioletred")
# 畫出社群網路圖
set.seed(5432)
plot(reviewNetwork, vertex.size=2, edge.width=3, vertex.label.dist=1,
vertex.label=ifelse(degree(reviewNetwork) > filter_degree, V(reviewNetwork)$label, NA),vertex.label.font=2)
# 加入標示
par(family='STXihei')
legend("bottomright", c("發文者","回文者"), pch=21,
col="#777777", pt.bg=c("red","lightblue"), pt.cex=1, cex=1) ## Warning in strwidth(legend, units = "user", cex = cex, font = text.font): font
## family not found in Windows font database
legend("topleft", c("推","噓"),
col=c("lightgreen", "palevioletred"), lty=1, cex=1)可以看到推之比例大於噓許多,我們認為有兩種原因,第一個是針對版主轉貼的新聞事件進行討論,所以比較少噓。另外就是,版主的文章本身便帶有嘲諷的意味,因此鄉民以推文表示認同。
library(networkD3)## Warning: package 'networkD3' was built under R version 4.0.5
links <- link_pb
nodes <- filtered_user
nodes$id = 0:(length(nodes$user) - 1)
# 整理資料格式
nodes_complete <- data.frame(nodeID = unique(c(links$cmtPoster, links$artPoster.x)))
nodes_complete$group <- nodes$type[match(nodes_complete$nodeID, nodes$user)]
links$source <- match(links$cmtPoster, nodes_complete$nodeID) - 1
links$target <- match(links$artPoster.x, nodes_complete$nodeID) - 1
# 畫圖
# forceNetwork(Links = links, Nodes = nodes_complete, Source = "source",
# Target = "target", NodeID = "nodeID", Group = "group",
# opacity = 0.8, fontSize = 10, zoom = TRUE,legend = TRUE, opacityNoHover = TRUE,
#
# colourScale = "d3.scaleOrdinal(d3.schemeCategory10);",
# linkColour = ifelse(links$cmtStatus == "推", "palegreen","lightcoral") # 設定推噓顏色
# )