系統參數設定

Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼
## [1] "zh_TW.UTF-8/zh_TW.UTF-8/zh_TW.UTF-8/C/zh_TW.UTF-8/zh_TW.UTF-8"

安裝需要的packages

packages = c("readr", "dplyr", "jiebaR", "tidyr", "tidytext", "igraph", "topicmodels", "ggplot2", "stringr")
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)

讀進library

library(dplyr)
library(stringr)
library(tidytext)
library(wordcloud2)
library(data.table)
library(ggplot2)
library(reshape2)
library(topicmodels)
library(wordcloud)
library(tidyr)
library(readr)
library(scales)
require(jiebaR)
library(openxlsx)
require(readr)
require(ggraph)
require(igraph)
require(widyr)
library(plotly)
library(grid)
library(purrr)
require(RColorBrewer)
library(tm)

##輸入資料

posts = fread('核四_articleMetaData.csv' ,encoding = 'UTF-8')
reviews = fread('核四_articleReviews.csv' ,encoding = 'UTF-8')

LDA 主題分類

文章斷句

# # 文章斷句("\n\n"取代成"。")
# mask_meta <- posts %>%
#               mutate(sentence=gsub("[\n]{2,}", "。", sentence))
# 
# # 以全形或半形 驚歎號、問號、分號 以及 全形句號 爲依據進行斷句
# mask_sentences <- strsplit(mask_meta$sentence,"[。!;?!?;]")
# 
# # 將每句句子,與他所屬的文章連結配對起來,整理成一個dataframe
# mask_sentences <- data.frame(
#                         artUrl = rep(mask_meta$artUrl, sapply(mask_sentences, length)),
#                         sentence = unlist(mask_sentences)
#                       ) %>%
#                       filter(!str_detect(sentence, regex("^(\t|\n| )*$")))
#                       # 如果有\t或\n就去掉
# 
# mask_sentences$sentence <- as.character(mask_sentences$sentence)
# mask_sentences

文章斷詞

# ## 文章斷詞
# # load mask_lexicon(特定要斷開的詞,像是user_dict)
# mask_lexicon <- scan(file = "自建字典.txt", what=character(),sep='\n',
#                    encoding='utf-8',fileEncoding='utf-8')
# # load stop words
# stop_words <- scan(file = "stop_words.txt", what=character(),sep='\n',
#                    encoding='utf-8',fileEncoding='utf-8')
# 
# # 使用默認參數初始化一個斷詞引擎
# jieba_tokenizer = worker()
# 
# # 使用口罩字典重新斷詞
# new_user_word(jieba_tokenizer, c(mask_lexicon))
# 
# # tokenize function
# chi_tokenizer <- function(t) {
#   lapply(t, function(x) {
#     if(nchar(x)>1){
#       tokens <- segment(x, jieba_tokenizer)
#       tokens <- tokens[!tokens %in% stop_words]
#       # 去掉字串長度爲1的詞彙
#       tokens <- tokens[nchar(tokens)>1]
#       return(tokens)
#     }
#   })
# }
# 
# 
# # 用剛剛初始化的斷詞器把sentence斷開
# tokens <- mask_sentences %>%
#   mutate(sentence = gsub("[[:punct:]]", "",sentence)) %>%
#   mutate(sentence = gsub("[0-9a-zA-Z]", "",sentence)) %>%
#   unnest_tokens(word, sentence, token=chi_tokenizer) %>%
#   count(artUrl, word) %>% # 計算每篇文章出現的字頻
#   rename(count=n)
# tokens
# 
# save.image(file = "data/token_result.rdata")

斷詞結果可以先存起來,就不用再重跑一次

load("data/token_result.rdata")

清理斷詞結果

。根據詞頻,選擇只出現3字以上的字
。整理成url,word,n的格式之後,就可以轉dtm
% group_by(word) %>% count() %>% filter(n > freq) %>% unlist()

mask_removed <- tokens %>% filter(word %in% reserved_word)

#mask_dtm 裡面 nrow:幾篇文章 ; ncol:幾個字 mask_dtm <- mask_removed %>% cast_dtm(artUrl, word, count)


### LDA 主題分析

將剛處理好的dtm放入LDA函式分析

```r
# LDA分成3個主題
mask_lda <- LDA(mask_dtm, k = 3, control = list(seed = 2021))

取出代表字詞(term)

# 看各群的常用詞彙
tidy(mask_lda, matrix = "beta") %>% # 取出topic term beta值
  group_by(topic) %>%
  top_n(10, beta) %>% # beta值前10的字
  ungroup() %>%
  mutate(topic = as.factor(topic),
         term = reorder_within(term, beta, topic)) %>%
  ggplot(aes(term, beta, fill = topic)) +
  geom_col(show.legend = FALSE) +
  theme(text=element_text(family="黑體-繁 中黑", size = 14))+
  facet_wrap(~ topic, scales = "free") +
  coord_flip() +
  scale_x_reordered()

取出代表主題(topic)

每篇文章拿gamma值最大的topic當該文章的topic

# 在tidy function中使用參數"gamma"來取得 theta矩陣
mask_topics <- tidy(mask_lda, matrix="gamma") %>% # document topic gamma
                  group_by(document) %>%
                  top_n(1, wt=gamma)
mask_topics
## # A tibble: 1,028 x 3
## # Groups:   document [1,028]
##    document                                                 topic gamma
##    <chr>                                                    <int> <dbl>
##  1 https://www.ptt.cc/bbs/Gossiping/M.1614318736.A.9E9.html     1 0.535
##  2 https://www.ptt.cc/bbs/Gossiping/M.1614387520.A.DFF.html     1 0.555
##  3 https://www.ptt.cc/bbs/Gossiping/M.1614484619.A.6BB.html     1 0.973
##  4 https://www.ptt.cc/bbs/Gossiping/M.1614490289.A.91D.html     1 0.679
##  5 https://www.ptt.cc/bbs/Gossiping/M.1614490742.A.85F.html     1 0.513
##  6 https://www.ptt.cc/bbs/Gossiping/M.1614514844.A.F0B.html     1 0.752
##  7 https://www.ptt.cc/bbs/Gossiping/M.1614525639.A.B85.html     1 0.646
##  8 https://www.ptt.cc/bbs/Gossiping/M.1614652519.A.E1A.html     1 0.712
##  9 https://www.ptt.cc/bbs/Gossiping/M.1614653309.A.918.html     1 0.985
## 10 https://www.ptt.cc/bbs/Gossiping/M.1614657631.A.750.html     1 0.633
## # … with 1,018 more rows

資料內容探索

posts_topic <- merge(x = posts, y = mask_topics, by.x = "artUrl", by.y="document")

# 看一下各主題在說甚麼
set.seed(2021)
posts_topic %>% # 主題一
  filter(topic==1) %>%
  select(artTitle) %>%
  unique() %>%
  sample_n(5)
##                                          artTitle
## 1:    [新聞]蔡英文:重啟核四絕對不是選項2原因曝光
## 2:        Re:[問卦]核廢料補助1桶25430元你家給放嗎
## 3: Re:[新聞]快訊/核四確定重啟無望 最後一批120束
## 4:     [新聞]議員追問同意核四商轉?柯文哲嗆:愚人
## 5:          Re:[問卦]現在三讀通過重啟核四可以嗎?
posts_topic %>% # 主題二
  filter(topic==2) %>%
  select(artTitle) %>%
  unique() %>%
  sample_n(50)
##                                            artTitle
##  1:           [問卦]每次都是冬天關核電廠夏天開啟嗎?
##  2:         Re:[問卦]反核人士不小心用到核電怎麼辦?
##  3: Re:[新聞]蔡英文接見環保團體宣示:重啟核四不可能
##  4:                        [問卦]是不是該支持核四?
##  5:                [問卦]反核這麼關心後代子孫幹嘛?
##  6:    [問卦]為什麼不要求各縣市都得放核廢料就好了?
##  7:                          [問卦]興達核電廠如何?
##  8:          [問卦]為什麼反核不用對核廢料負責???
##  9:                [問卦]你們會支持蓋核電場嗎。。?
## 10:         Re:[問卦]台GG自己蓋核電廠會有人反對嗎??
## 11:                   [問卦]反核的入門條件是H組嗎@@
## 12:        [問卦]太陽能轉換效率要多少才能取代核電??
## 13:               Re:[問卦]地熱能其實也算是核電吧??
## 14:   Re:[新聞]日排放核廢水入海惹議陳吉仲:台灣玩不
## 15:             Re:[問卦]有可能2025核電直接歸零嗎?
## 16:              [問卦]掛反核福島旗配電時會開冷氣嗎
## 17:                    [問卦]震怒!核四的燃料棒呢?
## 18:               Re:[問卦]幫核電想一個新名字好嗎?
## 19:            [問卦]那個風力太陽能真的能補上核電嗎
## 20:                      [問卦]核電廠蓋台中可行嗎?
## 21:            [問卦]為什麼不考慮核能海外水淡化廠?
## 22:                   [問卦]核廢料放大潭三接可以嗎?
## 23:     Re:[新聞]民進黨的神主牌倒了看到核四民調結果
## 24:                    [問卦]反核的人現在在想什麼?
## 25:                  [問卦]為什麼投票是大潭不是核四
## 26:           [問卦]中共的核電廠都沒炸了是在怕什麼?
## 27:                [問卦]蓋核電廠不就有電又有水了?
## 28:          [問卦]#我是人我反核#用愛發電死哪去了?
## 29:           [問卦]核電廠還要在蓋幾座才能當主力啊?
## 30:             Re:[問卦]北部人支持蓋核四不是很好嗎
## 31:                  [問卦]火力or核四大家會選哪個?
## 32:             [問卦]這幾天難道是核電仔勝利的一天?
## 33:      Re:[問卦]台電的【核電廠輕油】是什麼意思啊?
## 34:                    [問卦]為什麼又突然吵核四了?
## 35:            [問卦]一縣市一核電廠,蓋在焚化爐旁邊
## 36:                 [問卦]疫苗血栓/核能爆炸怎麼選擇
## 37:               [問卦]怕核電廠爆炸,不怕疫苗出錯?
## 38:        [問卦]幾乎所有核電廠事故都起因於人為疏失
## 39:                  [問卦]反核和台獨是不是自相矛盾
## 40:                [問卦]反核的為何不列優先停電組?
## 41:      [問卦]為何擁核不像反核一樣有很多活動可參加
## 42:    [問卦]核能電廠蓋臺中輸電給台北用4不4最佳解??
## 43:                [問卦]太陽能發電快追到核電了耶?
## 44:                     [問卦]核四危險484該蓋核五了
## 45:                  [問卦]核電設在花蓮是不是最佳解
## 46:    [問卦]把核能火力包裝得潮一點環團就支持了吧?
## 47:                  [問卦]為何反核的都想開發藻礁?
## 48:          [問卦]核四會爆炸是不是在汙辱工程界阿??
## 49:              [問卦]一直哭夭的核廢料是要放你家?
## 50:     Re:[問卦]如果馬英九當年硬幹核四會被消費多久
##                                            artTitle
posts_topic %>% # 主題三
  filter(topic==3) %>%
  select(artTitle) %>%
  unique() %>%
  sample_n(5)
##                                      artTitle
## 1:           [問卦]反核為何容許核二,三運轉?
## 2:       [問卦]核廢料比照火力處理是不是最佳解
## 3: [問卦]為什麼不丟核彈或氫彈燒掉福島的核廢水
## 4:         [問卦]所以核廢料其實有辦法處理吧?
## 5:              [問卦]把核廢料射到金星會怎樣?

查看各主題中文章數量分佈

posts_topic_count <- 
  posts_topic %>%
  select(artDate,topic) %>%
  group_by(artDate,topic) %>% 
  summarise(count=n()) %>%
  ungroup()
## `summarise()` has grouped output by 'artDate'. You can override using the `.groups` argument.
posts_topic_count$artDate = posts_topic_count$artDate %>% as.Date("%Y/%m/%d")
posts_topic_count %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=as.factor(topic)))+
  geom_vline(xintercept = as.numeric(as.Date("2021-05-17")), col='black') +
  scale_x_date(labels = date_format("%m/%d"),
               limits = as.Date(c('2021-02-12','2021-05-31')),
              breaks ="10 day") +
  theme(text=element_text(family="黑體-繁 中黑", size = 8))+
  ggtitle('topic - 文章數量折線圖')
## Warning: Removed 6 row(s) containing missing values (geom_path).

posts_topic %>%
  group_by(artCat,topic) %>%
  summarise(sum = n())  %>%
  ggplot(aes(x= artCat,y=sum,fill=as.factor(topic))) +
  geom_col(position="dodge")
## `summarise()` has grouped output by 'artCat'. You can override using the `.groups` argument.

社群網路圖

資料合併

# 文章和留言
reviews <- reviews %>%
      select(artUrl, cmtPoster, cmtStatus, cmtContent)
posts_Reviews <- merge(x = posts, y = reviews, by = "artUrl")

# 把文章和topic
posts_Reviews <- merge(x = posts_Reviews, y = mask_topics, by.x = "artUrl", by.y="document")
head(posts_Reviews,3)
##                                                      artUrl
## 1: https://www.ptt.cc/bbs/Gossiping/M.1610157068.A.BA7.html
## 2: https://www.ptt.cc/bbs/Gossiping/M.1610157068.A.BA7.html
## 3: https://www.ptt.cc/bbs/Gossiping/M.1610157068.A.BA7.html
##                          artTitle    artDate  artTime artPoster    artCat
## 1: [問卦]現在冬天是不是不能反核了 2021/01/09 01:51:06     awwwe Gossiping
## 2: [問卦]現在冬天是不是不能反核了 2021/01/09 01:51:06     awwwe Gossiping
## 3: [問卦]現在冬天是不是不能反核了 2021/01/09 01:51:06     awwwe Gossiping
##    commentNum push boo
## 1:          4    2   0
## 2:          4    2   0
## 3:          4    2   0
##                                                                                                                                               sentence
## 1: 之前有鄉民說  冬天反核   夏天擁核\n\n\n現在冬天靠杯冷的\n\n需要開暖氣\n\n比冷氣更耗電\n\n現在是不是冬天也不適合反核了?\n\n現在什麼時後才適合反核?\n
## 2: 之前有鄉民說  冬天反核   夏天擁核\n\n\n現在冬天靠杯冷的\n\n需要開暖氣\n\n比冷氣更耗電\n\n現在是不是冬天也不適合反核了?\n\n現在什麼時後才適合反核?\n
## 3: 之前有鄉民說  冬天反核   夏天擁核\n\n\n現在冬天靠杯冷的\n\n需要開暖氣\n\n比冷氣更耗電\n\n現在是不是冬天也不適合反核了?\n\n現在什麼時後才適合反核?\n
##      cmtPoster cmtStatus         cmtContent topic     gamma
## 1:    antigidu        推 :覺青:核廢料放你家     2 0.9810969
## 2: iwinlottery        →  :可以用煤油暖爐啊     2 0.9810969
## 3:  jrxcombine        推  :你可以鑽木取火啊     2 0.9810969

取出 cmtPoster(回覆者)、artPoster(發文者)、artUrl(文章連結) 三個欄位

link <- posts_Reviews %>% select(cmtPoster, artPoster, artUrl)
head(link,3)
##      cmtPoster artPoster
## 1:    antigidu     awwwe
## 2: iwinlottery     awwwe
## 3:  jrxcombine     awwwe
##                                                      artUrl
## 1: https://www.ptt.cc/bbs/Gossiping/M.1610157068.A.BA7.html
## 2: https://www.ptt.cc/bbs/Gossiping/M.1610157068.A.BA7.html
## 3: https://www.ptt.cc/bbs/Gossiping/M.1610157068.A.BA7.html

基本網路圖

建立網路關係

reviewNetwork <- graph_from_data_frame(d=link, directed=T)
reviewNetwork
## IGRAPH 55409d3 DN-- 10906 52250 -- 
## + attr: name (v/c), artUrl (e/c)
## + edges from 55409d3 (vertex names):
##  [1] antigidu    ->awwwe       iwinlottery ->awwwe      
##  [3] jrxcombine  ->awwwe       benza       ->awwwe      
##  [5] iem2000     ->cowardlyman cc02040326  ->cowardlyman
##  [7] ufap        ->cowardlyman iem2000     ->cowardlyman
##  [9] niburger1001->cowardlyman iem2000     ->cowardlyman
## [11] jetzake     ->cowardlyman eterbless   ->cowardlyman
## [13] darkdogoblin->cowardlyman iWatch2     ->cowardlyman
## [15] hydra3179   ->cowardlyman edison      ->cowardlyman
## + ... omitted several edges

資料篩選

資料篩選的方式:

  • 文章:文章日期、留言數(commentNum)
  • link、node:degree
# 看一下留言數大概都多少(方便後面篩選)
posts %>%
#  filter(commentNum<100) %>%
  ggplot(aes(x=commentNum)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

依據發文數或回覆數篩選post和review

# 帳號發文篇數
post_count = posts %>%
  
  group_by(artPoster) %>%
  summarise(count = n()) %>%
  arrange(desc(count))
post_count
## # A tibble: 639 x 2
##    artPoster   count
##    <chr>       <int>
##  1 kaky           13
##  2 kid725         12
##  3 devidevi       11
##  4 chirex         10
##  5 DustToDust     10
##  6 hamasakiayu    10
##  7 A6              9
##  8 cowardlyman     9
##  9 Nagasumi        9
## 10 Whitening       9
## # … with 629 more rows
# 帳號回覆總數
review_count = reviews %>%
  group_by(cmtPoster) %>%
  summarise(count = n()) %>%
  arrange(desc(count))
review_count
## # A tibble: 10,624 x 2
##    cmtPoster  count
##    <chr>      <int>
##  1 swearflycc  1175
##  2 Anvec        676
##  3 jasonpig     451
##  4 u9005205     372
##  5 trywish      299
##  6 zeumax       272
##  7 ssisters     257
##  8 aa1052026    197
##  9 jma306       195
## 10 cfetan       173
## # … with 10,614 more rows
# 發文者
poster_select <- post_count %>% filter(count >= 2)
posts <- posts %>%  filter(posts$artPoster %in% poster_select$artPoster)

# 回覆者
reviewer_select <- review_count %>%  filter(count >= 20)
reviews <- reviews %>%  filter(reviews$cmtPoster %in% reviewer_select$cmtPoster)
# 檢視參與人數
length(unique(posts_Reviews$artPoster)) # 發文者數量 637
## [1] 637
length(unique(posts_Reviews$cmtPoster)) # 回覆者數量 10624
## [1] 10624
allPoster <- c(posts_Reviews$artPoster, posts_Reviews$cmtPoster) # 總參與人數 10906
length(unique(allPoster))
## [1] 10906

標記所有出現過得使用者

  • poster:只發過文、發過文+留過言
  • replyer:只留過言
userList <- data.frame(user=unique(allPoster)) %>%
              mutate(type=ifelse(user%in%posts$artPoster, "poster", "replyer"))

計算該日有多少文章+留言

postDate <- posts_Reviews %>%
  group_by(artDate) %>%
  summarise(sum=n()) %>%
  select(artDate,sum) %>%
  arrange(desc(sum))
postDate
## # A tibble: 90 x 2
##    artDate      sum
##    <chr>      <int>
##  1 2021/05/18  4158
##  2 2021/03/16  3732
##  3 2021/04/11  3178
##  4 2021/03/17  2838
##  5 2021/05/19  2685
##  6 2021/05/17  2068
##  7 2021/04/27  1994
##  8 2021/03/11  1865
##  9 2021/03/30  1826
## 10 2021/03/28  1798
## # … with 80 more rows

以日期篩選社群

文章和留言數目在 05/17 ~ 05/19 較多,且 05/18 為最多,挑出當天的文章和回覆看看 05/17 再度跳電,同日,宅神朱學恒昨晚貼出訪問前核四廠長王伯輝的影片, 王伯輝直言,當初就是看到台灣未來的缺電問題才規劃核四,如今錯誤的能源政策,才造成現在的缺電問題。

link <- posts_Reviews %>%
      group_by(cmtPoster, artUrl) %>% 
      filter(n()>3) %>% 
      #filter(commentNum > 200) %>%
      filter(artCat=="Gossiping") %>% 
      filter(artDate >= as.Date('2021-05-17')) %>%
      filter(artDate <= as.Date('2021-05-19')) %>%
      select(cmtPoster, artPoster, artUrl) %>% 
      unique()

#篩選在link裡面有出現的使用者
filtered_user <- userList %>%
          filter(user%in%link$cmtPoster | user%in%link$artPoster) %>%
          arrange(desc(type))

set.seed(2021)
reviewNetwork = degree(reviewNetwork) > 2
reviewNetwork <- graph_from_data_frame(d=link, v=filtered_user, directed=F)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "gold", "lightblue")
plot(reviewNetwork, vertex.size=3, edge.arrow.size=0.3,vertex.label=NA)

為點加上帳號名字,用degree篩選要顯示出的使用者,以免圖形被密密麻麻的文字覆蓋

filter_degree = 10
set.seed(2021)

# 設定 node 的 label/ color
labels <- degree(reviewNetwork) # 算出每個點的degree
V(reviewNetwork)$label <- names(labels)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "gold", "lightblue")

plot(
  reviewNetwork, 
  vertex.size=3, 
  edge.width=3, 
  vertex.label.dist=1,
  vertex.label=ifelse(degree(reviewNetwork) > filter_degree, V(reviewNetwork)$label, NA),vertex.label.font=2)

上圖為 05/17~05/19 的社群網路圖

以主題篩選社群(1)

挑選出2021-03-16~17當天的文章, 篩選一篇文章回覆1次以上者,且文章留言數多餘50則, 文章主題歸類為1(缺電->核電相關), 欄位只取:cmtPoster(評論者), artPoster(發文者), artUrl(文章連結), topic(主題)

link <- posts_Reviews %>%
      group_by(cmtPoster, artUrl) %>% 
      filter(n()>3) %>% 
      filter(commentNum > 50) %>%
      filter(artCat=="Gossiping") %>% #Gossiping
      filter(artDate >= as.Date('2021-03-16')) %>%
      filter(artDate <= as.Date('2021-03-17')) %>%
      #filter(topic == 2 | topic == 3) %>% 
      select(cmtPoster, artPoster, artUrl, topic) %>% 
      unique()

#在所有的使用者裡面,篩選link中有出現的使用者
filtered_user <- userList %>%
          filter(user%in%link$cmtPoster | user%in%link$artPoster) %>%
          arrange(desc(type))
# 使用者經常參與的文章種類
filter_degree = 10

# 建立網路關係
reviewNetwork <- graph_from_data_frame(d=link, v=filtered_user, directed=F)

# 依據使用者身份對點進行上色
labels <- degree(reviewNetwork)
V(reviewNetwork)$label <- names(labels)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "gold", "lightblue")

# 依據回覆發生的文章所對應的主題,對他們的關聯線進行上色
E(reviewNetwork)$color <- ifelse(E(reviewNetwork)$topic == "1", "palevioletred",ifelse((E(reviewNetwork)$topic =="2"),"lightgreen","deepskyblue" ))

# 畫出社群網路圖(degree>7的才畫出來)
set.seed(2021)
plot(reviewNetwork, vertex.size=3, edge.width=3, vertex.label.dist=1,
     vertex.label=ifelse(degree(reviewNetwork) > filter_degree, V(reviewNetwork)$label, NA),vertex.label.font=2)

# 加入標示
par(family="黑體-繁 中黑")
legend("bottomright", c("發文者","回文者"), pch=21,
  col="#777777", pt.bg=c("gold","lightblue"), pt.cex=1, cex=1)
legend("topleft", c("政治相關","核電相關","核廢料相關"),
       col=c("palevioletred", "lightgreen","deepskyblue"), lty=1, cex=1,text.font=NULL)

以主題篩選社群(2)

挑選出2021-05-17~19當天的文章, 篩選一篇文章回覆1次以上者,且文章留言數多餘50則, 文章主題歸類為1(缺電->核電相關), 欄位只取:cmtPoster(評論者), artPoster(發文者), artUrl(文章連結), topic(主題)

link <- posts_Reviews %>%
      group_by(cmtPoster, artUrl) %>% 
      filter(n()>3) %>% 
      filter(commentNum > 50) %>%
      filter(artCat=="Gossiping") %>% #Gossiping
      filter(artDate >= as.Date('2021-05-17')) %>%
      filter(artDate <= as.Date('2021-05-19')) %>%
      #filter(topic == 1 | topic == 2) %>% 
      select(cmtPoster, artPoster, artUrl, topic) %>% 
      unique()

#在所有的使用者裡面,篩選link中有出現的使用者
filtered_user <- userList %>%
          filter(user%in%link$cmtPoster | user%in%link$artPoster) %>%
          arrange(desc(type))
# 使用者經常參與的文章種類
filter_degree = 10

# 建立網路關係
reviewNetwork <- graph_from_data_frame(d=link, v=filtered_user, directed=F)

# 依據使用者身份對點進行上色
labels <- degree(reviewNetwork)
V(reviewNetwork)$label <- names(labels)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "gold", "lightblue")

# 依據回覆發生的文章所對應的主題,對他們的關聯線進行上色
E(reviewNetwork)$color <- ifelse(E(reviewNetwork)$topic == "1", "palevioletred",ifelse((E(reviewNetwork)$topic =="2"),"lightgreen","deepskyblue" ))

# 畫出社群網路圖(degree>7的才畫出來)
set.seed(2021)
plot(reviewNetwork, vertex.size=3, edge.width=3, vertex.label.dist=1,
     vertex.label=ifelse(degree(reviewNetwork) > filter_degree, V(reviewNetwork)$label, NA),vertex.label.font=2)

# 加入標示
par(family="黑體-繁 中黑")
legend("bottomright", c("發文者","回文者"), pch=21,
  col="#777777", pt.bg=c("gold","lightblue"), pt.cex=1, cex=1)
legend("topleft", c("政治相關","核電相關","核廢料相關"),
       col=c("palevioletred", "lightgreen","deepskyblue"), lty=1, cex=1,text.font=NULL)

使用者是否受到歡迎 ALL

PTT的回覆有三種,推文、噓文、箭頭,我們只要看推噓就好,因此把箭頭清掉,這樣資料筆數較少,所以我們把篩選的條件放寬一些。

filter_degree = 7 # 使用者degree

# 過濾留言者對發文者的推噓程度
link <- posts_Reviews %>%
      filter(artCat=="Gossiping") %>% 
      filter(commentNum > 100) %>%
      filter(cmtStatus!="→") %>%
      group_by(cmtPoster, artUrl) %>%
      filter( n() > 2) %>%
      ungroup() %>% 
      select(cmtPoster, artPoster, artUrl, cmtStatus) %>% 
      unique()

# 接下來把網路圖畫出來,跟前面做的事都一樣,因此不再細述

# 篩選link中有出現的使用者
filtered_user <- userList %>%
          filter(user%in%link$cmtPoster | user%in%link$artPoster) %>%
          arrange(desc(type))

# 建立網路關係
reviewNetwork <- graph_from_data_frame(d=link, v=filtered_user, directed=F)

# 依據使用者身份對點進行上色
labels <- degree(reviewNetwork)
V(reviewNetwork)$label <- names(labels)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "gold", "lightblue")


# 依據回覆發生的文章所對應的主題,對他們的關聯線進行上色
E(reviewNetwork)$color <- ifelse(E(reviewNetwork)$cmtStatus == "推", "lightgreen", "palevioletred")

# 畫出社群網路圖
set.seed(2021)
plot(reviewNetwork, vertex.size=2, edge.width=3, vertex.label.dist=1,
     vertex.label=ifelse(degree(reviewNetwork) > filter_degree, V(reviewNetwork)$label, NA),vertex.label.font=1)

# 加入標示
par(family="黑體-繁 中黑")
legend("bottomright", c("發文者","回文者"), pch=21,
  col="#777777", pt.bg=c("gold","lightblue"), pt.cex=1, cex=1)
legend("topleft", c("推","噓"), 
       col=c("lightgreen","palevioletred"), lty=1, cex=1)

使用者是否受到歡迎 03/16~03/17

filter_degree = 5 # 使用者degree

# 過濾留言者對發文者的推噓程度
link <- posts_Reviews %>%
      filter(artCat=="Gossiping") %>% 
      filter(commentNum > 100) %>%
      filter(cmtStatus!="→") %>%
      group_by(cmtPoster, artUrl) %>%
      filter(artDate >= as.Date('2021-03-16')) %>%
      filter(artDate <= as.Date('2021-03-17')) %>%
      filter( n() > 2) %>%
      ungroup() %>% 
      select(cmtPoster, artPoster, artUrl, cmtStatus) %>% 
      unique()

# 接下來把網路圖畫出來,跟前面做的事都一樣,因此不再細述

# 篩選link中有出現的使用者
filtered_user <- userList %>%
          filter(user%in%link$cmtPoster | user%in%link$artPoster) %>%
          arrange(desc(type))

# 建立網路關係
reviewNetwork <- graph_from_data_frame(d=link, v=filtered_user, directed=F)

# 依據使用者身份對點進行上色
labels <- degree(reviewNetwork)
V(reviewNetwork)$label <- names(labels)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "gold", "lightblue")


# 依據回覆發生的文章所對應的主題,對他們的關聯線進行上色
E(reviewNetwork)$color <- ifelse(E(reviewNetwork)$cmtStatus == "推", "lightgreen", "palevioletred")

# 畫出社群網路圖
set.seed(2021)
plot(reviewNetwork, vertex.size=2, edge.width=3, vertex.label.dist=1,
     vertex.label=ifelse(degree(reviewNetwork) > filter_degree, V(reviewNetwork)$label, NA),vertex.label.font=1)

# 加入標示
par(family="黑體-繁 中黑")
legend("bottomright", c("發文者","回文者"), pch=21,
  col="#777777", pt.bg=c("gold","lightblue"), pt.cex=1, cex=1)
legend("topleft", c("推","噓"), 
       col=c("lightgreen","palevioletred"), lty=1, cex=1)

使用者是否受到歡迎 05/17~05/19

filter_degree = 5 # 使用者degree

# 過濾留言者對發文者的推噓程度
link <- posts_Reviews %>%
      filter(artCat=="Gossiping") %>% 
      filter(commentNum > 100) %>%
      filter(cmtStatus!="→") %>%
      group_by(cmtPoster, artUrl) %>%
      filter(artDate >= as.Date('2021-05-17')) %>%
      filter(artDate <= as.Date('2021-05-19')) %>%
      filter( n() > 2) %>%
      ungroup() %>% 
      select(cmtPoster, artPoster, artUrl, cmtStatus) %>% 
      unique()

# 接下來把網路圖畫出來,跟前面做的事都一樣,因此不再細述

# 篩選link中有出現的使用者
filtered_user <- userList %>%
          filter(user%in%link$cmtPoster | user%in%link$artPoster) %>%
          arrange(desc(type))

# 建立網路關係
reviewNetwork <- graph_from_data_frame(d=link, v=filtered_user, directed=F)

# 依據使用者身份對點進行上色
labels <- degree(reviewNetwork)
V(reviewNetwork)$label <- names(labels)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "gold", "lightblue")


# 依據回覆發生的文章所對應的主題,對他們的關聯線進行上色
E(reviewNetwork)$color <- ifelse(E(reviewNetwork)$cmtStatus == "推", "lightgreen", "palevioletred")

# 畫出社群網路圖
set.seed(2021)
plot(reviewNetwork, vertex.size=2, edge.width=3, vertex.label.dist=1,
     vertex.label=ifelse(degree(reviewNetwork) > filter_degree, V(reviewNetwork)$label, NA),vertex.label.font=1)

# 加入標示
par(family="黑體-繁 中黑")
legend("bottomright", c("發文者","回文者"), pch=21,
  col="#777777", pt.bg=c("gold","lightblue"), pt.cex=1, cex=1)
legend("topleft", c("推","噓"), 
       col=c("lightgreen","palevioletred"), lty=1, cex=1)