系統參數設定

my_locale <- Sys.getlocale("LC_ALL")
Sys.setlocale("LC_ALL", my_locale)
## Warning in Sys.setlocale("LC_ALL", my_locale): 作業系統回報無法實現設
## 定語區為 "LC_COLLATE=Chinese (Traditional)_Taiwan.950;LC_CTYPE=Chinese
## (Traditional)_Taiwan.950;LC_MONETARY=Chinese (Traditional)_Taiwan.
## 950;LC_NUMERIC=C;LC_TIME=Chinese (Traditional)_Taiwan.950" 的要求
## [1] ""
#Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼

安裝需要的packages

packages = c("dplyr", "tidytext", "stringr", "wordcloud2", "ggplot2",'readr','data.table','reshape2','wordcloud','tidyr','scales')
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(tidytext)
## Warning: package 'tidytext' was built under R version 3.6.3
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 3.6.3
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(ggplot2)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.6.3
## Loading required package: RColorBrewer
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
library(readr)
library(scales)
## Warning: package 'scales' was built under R version 3.6.2
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor

前言 台灣第15任總統、副總統選舉,於2020年1月11日舉行。在競選期間,不同世代的人之間的訴求和輿論,均牽動選情。民眾為了落實公民權的行使,以及表述各方立場,候選人及相關政黨議題不斷被分享,創造網路高聲量。網路取代傳統傳播大幅影響為政治競爭型態。因此,本組欲探究網路的「政治影響力」到底有多大?透過PTT資料進行候選人及政黨的詞頻計算找出關鍵字,以及輿情正向、反向的情緒分析。

載入總統候選人

Tsai_data = fread('Tsai_artWordFreq.csv',encoding = 'UTF-8')  #蔡英文
Hen_data = fread('Hen_artWordFreq.csv',encoding = 'UTF-8')  #韓國瑜
Song_data = fread('Song_artWordFreq.csv',encoding = 'UTF-8')  #宋楚瑜

載入政黨

DPP_data = fread('DPP_artWordFreq.csv',encoding = 'UTF-8') #民進黨
KMT_data = fread('KMT_artWordFreq.csv',encoding = 'UTF-8') #國民黨
NPP_data = fread('NPP_artWordFreq.csv',encoding = 'UTF-8') #時代力量
PFP_data = fread('PFP_artWordFreq.csv',encoding = 'UTF-8') #親民黨
TPP_data = fread('TPP_artWordFreq.csv',encoding = 'UTF-8') #民眾黨

查看資料前幾筆(已經整理成文章-詞彙-詞頻)

head(Tsai_data)
##                                artTitle    artDate  artTime
## 1: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## 2: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## 3: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## 4: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## 5: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## 6: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
##                                                      artUrl     word count
## 1: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html 時代力量    10
## 2: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html     本土     9
## 3: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html     支持     6
## 4: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html     總統     6
## 5: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html   洪慈庸     4
## 6: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html     明確     4
head(Hen_data)
##                                        artTitle    artDate  artTime
## 1: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## 2: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## 3: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## 4: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## 5: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## 6: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
##                                                      artUrl   word count
## 1: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 王金平    19
## 2: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 柯文哲     5
## 3: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html   市長     4
## 4: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html   組黨     4
## 5: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html   一事     4
## 6: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 韓國瑜     4
head(Song_data)
##                                       artTitle    artDate  artTime
## 1: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 2: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 3: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 4: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 5: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 6: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
##                                                      artUrl   word count
## 1: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html 親民黨     7
## 2: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html   總統     6
## 3: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html 宋楚瑜     4
## 4: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html   選戰     4
## 5: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html   鴻源     4
## 6: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html   加入     3

過濾特殊字元

###總統
Tsai_data = Tsai_data %>% 
  filter(!grepl('_',word))
Tsai_data = Tsai_data %>% 
  filter(!(word %in% c("https")))

Hen_data = Hen_data %>% 
  filter(!grepl('_',word))
Hen_data = Hen_data %>% 
  filter(!(word %in% c("https")))

Song_data = Song_data %>% 
  filter(!grepl('_',word))
Song_data = Song_data %>% 
  filter(!(word %in% c("https")))

###政黨
DPP_data = DPP_data %>% 
  filter(!grepl('_',word))
DPP_data = DPP_data %>% 
  filter(!(word %in% c("https")))

KMT_data = KMT_data %>% 
  filter(!grepl('_',word))
KMT_data = KMT_data %>% 
  filter(!(word %in% c("https")))

NPP_data = NPP_data %>% 
  filter(!grepl('_',word))
NPP_data = NPP_data %>% 
  filter(!(word %in% c("https")))

PFP_data = PFP_data %>% 
  filter(!grepl('_',word))
PFP_data = PFP_data %>% 
  filter(!(word %in% c("https")))

TPP_data = TPP_data %>% 
  filter(!grepl('_',word))
TPP_data = TPP_data %>% 
  filter(!(word %in% c("https")))

轉換日期格式

###總統
Tsai_data$artDate= Tsai_data$artDate %>% as.Date("%Y/%m/%d")
Hen_data$artDate= Hen_data$artDate %>% as.Date("%Y/%m/%d")
Song_data$artDate= Song_data$artDate %>% as.Date("%Y/%m/%d")

###政黨
DPP_data$artDate= DPP_data$artDate %>% as.Date("%Y/%m/%d")
KMT_data$artDate= KMT_data$artDate %>% as.Date("%Y/%m/%d")
NPP_data$artDate= NPP_data$artDate %>% as.Date("%Y/%m/%d")
PFP_data$artDate= PFP_data$artDate %>% as.Date("%Y/%m/%d")
TPP_data$artDate= TPP_data$artDate %>% as.Date("%Y/%m/%d")

計算所有字在文集中的總詞頻

###president
word_count_Tsai <- Tsai_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_Hen <- Hen_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_Song <- Song_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))

###party
word_count_DPP <- DPP_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_KMT <- KMT_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_NPP <- NPP_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_PFP <- PFP_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_TPP <- TPP_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_Tsai
## # A tibble: 17,428 x 2
##    word   count
##    <chr>  <int>
##  1 蔡英文 16604
##  2 台灣    9934
##  3 總統    8863
##  4 完整    6048
##  5 新聞    4732
##  6 民進黨  4424
##  7 韓國瑜  4399
##  8 記者    4236
##  9 表示    4081
## 10 媒體    3269
## # ... with 17,418 more rows
word_count_Hen
## # A tibble: 21,871 x 2
##    word   count
##    <chr>  <int>
##  1 韓國瑜 32443
##  2 總統    9112
##  3 完整    9070
##  4 新聞    7454
##  5 國民黨  6919
##  6 台灣    6869
##  7 記者    6510
##  8 表示    5555
##  9 媒體    5550
## 10 蔡英文  4482
## # ... with 21,861 more rows
word_count_Song
## # A tibble: 1,810 x 2
##    word   count
##    <chr>  <int>
##  1 宋楚瑜  1134
##  2 總統     636
##  3 親民黨   566
##  4 完整     355
##  5 台灣     334
##  6 柯文哲   323
##  7 新聞     266
##  8 表示     259
##  9 國民黨   248
## 10 記者     246
## # ... with 1,800 more rows

詞頻 三筆候選人的資料中可看到最常被提及的關鍵字為候選人的姓名,再者是競爭對手、政黨關鍵詞。蔡英文及韓國瑜常被媒體作為彼此論述的議題,因此可看到兩者的關鍵詞中出現對方的姓名、政黨。而在宋楚瑜則是出現柯文哲此關鍵字,原因可推究為當時親民黨常提出小黨合作等話題。

文字雲

Tsai_plot Hen_plot Song_plot

以LIWC字典判斷文集中的word屬於正面字還是負面字

# 正向字典txt檔
# 以,將字分隔
P <- read_file("liwc/positive.txt")

# 負向字典txt檔
N <- read_file("liwc/negative.txt")

將字串依,分割

#strsplit回傳list , 我們取出list中的第一個元素
P = strsplit(P, ",")[[1]]
N = strsplit(N, ",")[[1]]

# 建立dataframe 有兩個欄位word,sentiments,word欄位內容是字典向量
P = data.frame(word = P, sentiment = "positive")
N = data.frame(word = N, sentiment = "negative")
LIWC = rbind(P, N)

總統候選人與LIWC情緒字典做join

word_count_Tsai %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 804 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持   2835 positive 
##  2 問題   1720 negative 
##  3 希望   1525 positive 
##  4 自由   1030 positive 
##  5 重要    944 positive 
##  6 批評    640 negative 
##  7 相信    608 positive 
##  8 安全    474 positive 
##  9 朋友    457 positive 
## 10 決定    449 positive 
## # ... with 794 more rows
word_count_Hen %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 948 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持   3314 positive 
##  2 希望   2024 positive 
##  3 問題   1998 negative 
##  4 自由   1020 positive 
##  5 重要    949 positive 
##  6 批評    845 negative 
##  7 朋友    759 positive 
##  8 相信    623 positive 
##  9 安全    594 positive 
## 10 遲到    568 negative 
## # ... with 938 more rows
word_count_Song %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 115 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持    141 positive 
##  2 希望    110 positive 
##  3 問題    110 negative 
##  4 自由     50 positive 
##  5 決定     39 positive 
##  6 重要     35 positive 
##  7 美人     34 positive 
##  8 尊重     34 positive 
##  9 批評     33 negative 
## 10 朋友     27 positive 
## # ... with 105 more rows

候選人正負向情緒字 在候選人正向關鍵字中最常出現的為「支持、希望、自由」,負向關鍵字為「問題、批評」多與選舉相關,而在韓國瑜的負向情緒字中出現「遲到」。

政黨與LIWC情緒字典做join

word_count_DPP %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 609 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持   1207 positive 
##  2 希望    727 positive 
##  3 問題    649 negative 
##  4 自由    385 positive 
##  5 重要    356 positive 
##  6 批評    327 negative 
##  7 攻擊    267 negative 
##  8 安全    227 positive 
##  9 相信    206 positive 
## 10 清楚    206 positive 
## # ... with 599 more rows
word_count_KMT %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 588 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持    970 positive 
##  2 希望    608 positive 
##  3 問題    533 negative 
##  4 自由    332 positive 
##  5 批評    295 negative 
##  6 決定    293 positive 
##  7 重要    287 positive 
##  8 安全    249 positive 
##  9 相信    199 positive 
## 10 和平    173 positive 
## # ... with 578 more rows
word_count_NPP %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 597 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持   1407 positive 
##  2 問題   1068 negative 
##  3 希望    788 positive 
##  4 重要    347 positive 
##  5 相信    284 positive 
##  6 清楚    283 positive 
##  7 違法    259 negative 
##  8 決定    256 positive 
##  9 朋友    243 positive 
## 10 關心    217 positive 
## # ... with 587 more rows
word_count_PFP %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 64 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持     89 positive 
##  2 希望     45 positive 
##  3 問題     45 negative 
##  4 自由     28 positive 
##  5 朋友     23 positive 
##  6 安全     22 positive 
##  7 決定     22 positive 
##  8 擔心     21 negative 
##  9 美人     20 positive 
## 10 才能     16 positive 
## # ... with 54 more rows
word_count_TPP %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 861 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 問題   2940 negative 
##  2 支持   2186 positive 
##  3 希望   1269 positive 
##  4 批評    892 negative 
##  5 自由    791 positive 
##  6 重要    773 positive 
##  7 相信    596 positive 
##  8 清楚    581 positive 
##  9 朋友    518 positive 
## 10 喜歡    517 positive 
## # ... with 851 more rows

政黨正負向情緒字 政黨的正負向關鍵字跟候選人相似。在親民黨的資料集中出現「美人」此用詞,經分析後發現為該政黨發言人姓名。另外,在各政黨的關鍵字數量中可以發現以民進黨、時代力量、民眾黨較多,推論為該政黨支持者較常使用網路進行討論,以致關鍵字數量高於國民黨、親民黨。

統計每天的文章正面字的次數與負面字的次數

###總統
sentiment_count_Tsai = Tsai_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_Hen = Hen_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_Song = Song_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
###政黨
sentiment_count_DPP = DPP_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_KMT = KMT_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_NPP = NPP_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_PFP = PFP_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_TPP = TPP_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_Tsai %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d")) 

sentiment_count_Hen %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d")) 

sentiment_count_Song %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d")) 

候選人正負向情緒 三位候選人的正負向情緒可以看出多數以正向情緒較高,可推究為選舉期間多數議題雖為爭議性取向,但在選詞上乃為正向。因此,整題觀察下可看到多為正向情緒。另外,蔡英文及韓國瑜較早提出參選計畫,網路討論聲量相較於宋楚瑜較早出現。

sentiment_count_Tsai %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Tsai$artDate == as.Date('2019/08/05'))[1]])),colour = "yellow",size=1)+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Tsai$artDate == as.Date('2020/01/11'))[1]])),colour = "yellow",size=1)

##8月5日負面情緒飆升

蔡英文正負向情緒 2019/8/5柯文哲組新政黨後,指控蔡英文總統身邊的人「每個人都貪汙」,引起與論譁然,總統府也立即要求柯文哲釐清及道歉。因此,當日出現較高的負向情緒。而2020/1/11當日蔡英文贏得該屆總統選舉,並且以高得票率打破先前競選紀錄。因此,正向情緒比負向情緒高出許多。

Tsai_data %>% 
  filter(artDate == as.Date('2019/08/05')) %>% 
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>20) %>%   ####過濾出現太少次的字
  wordcloud2()
##柯文哲表示蔡英文身邊的人都貪汙
Tsai_plot_0805

Tsai_plot_0805

sentiment_count_TPP %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Hen$artDate == as.Date('2019/08/05'))
[1]])),colour = "yellow",size=1) 

###同日台民黨、柯文哲的討論度也提高 

民眾黨正負向情緒 柯文哲組織「台灣民眾黨」搶攻立法院席次,然而是否有意要參選總統、會不會有「郭柯配」、「柯郭配」引起大家關注。以及柯文哲在2019/8/5上午先大罵韓國瑜發大財是喊口號,又批評蔡英文沒貪汙,但身邊每個人都貪汙,甚至砲轟深綠團體是假義和團。使得此日讓民眾黨輿論聲量達到最高峰。

sentiment_count_Hen %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Hen$artDate == as.Date('2019/12/29'))
[1]])),colour = "yellow",size=1) 

韓國瑜正負情緒 2019/12/29韓國瑜參與台灣總統大選電視辯論會,以及當日晚上參加台中造勢晚會,在大雨造勢、辯論會火力全開,聲量創一年新高。首度拋出「台灣六塊肌」政策,韓國瑜表示,台灣分為六大區塊,發展不同區域特色與產業,要讓台灣重新被國際社會重視,以及提出「滿天星」計劃,培養年輕人出國交換一年,軍公教警消每年帶職帶薪出國進修。然而,網友認為這些政策內容較為空泛。因此,當日出現負向情緒最高。

Hen_plot_1229

Hen_plot_1229

sentiment_count_Song %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Song$artDate == as.Date('2019/11/12'))
[1]])),colour = "yellow",size=1)

宋楚瑜正負向情緒 2019/11/12宋楚瑜提出參選2020總統,引發網路一片熱議,而對於宋楚瑜5度參選一事,許多網友表示真的是「活到老選到老」、「有選舉那年,就有宋楚瑜,這就是年年有瑜」、「用參選陪伴著我們長大」、「遲到但永不缺席的男子」。讓正向情緒達到高點。

Song_data %>% 
  filter(artDate == as.Date('2019/11/12')) %>% 
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>10) %>%   # 過濾出現太少次的字
  wordcloud2()
## 11/13 宋楚瑜宣布參選總統
sentiment_count_TPP %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_TPP$artDate == as.Date('2019/10/03'))
[1]])),colour = "yellow",size=1)

民眾黨正負向情緒 2019/10/3民眾黨提出推出不分區立委海選計畫,然而區域立委候選人名單被質疑具有「綠營背景」,遭郭台銘拒絕合照。因此,當日的負面情緒較高。

Tsai_data %>%
  inner_join(LIWC) %>%
  group_by(word,sentiment) %>%
  summarise(count = n()) %>% 
  data.frame() %>% 
  top_n(30,wt = count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Contribution to sentiment",
       x = NULL) +
  theme(text=element_text(size=12))+
  coord_flip()
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector

Hen_data %>%
  inner_join(LIWC) %>%
  group_by(word,sentiment) %>%
  summarise(count = n()) %>% 
  data.frame() %>% 
  top_n(30,wt = count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Contribution to sentiment",
       x = NULL) +
  theme(text=element_text(size=12))+
  coord_flip()
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector

Song_data %>%
  inner_join(LIWC) %>%
  group_by(word,sentiment) %>%
  summarise(count = n()) %>% 
  data.frame() %>% 
  top_n(30,wt = count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Contribution to sentiment",
       x = NULL) +
  theme(text=element_text(size=12))+
  coord_flip()
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector

總統候選人標題及留言數分析

## 載入總統候選人標題文
Tsai_article = fread('Tsai_articleMetaData.csv',encoding = 'UTF-8')
Hen_article = fread('Hen_articleMetaData.csv',encoding = 'UTF-8')
commentNum_Tsai <- Tsai_article %>%
  filter(!is.na(as.numeric(commentNum)))%>%
 group_by(artDate) %>%
 summarise(comment_Tsai = sum(as.numeric(commentNum)))

articleNum_Tsai <- Tsai_article %>%
  filter(!is.na(as.numeric(commentNum))) %>%
  group_by(artDate) %>%
 summarise(article_Tsai = n())

commentNum_Hen <- Hen_article %>%
  filter(!is.na(as.numeric(commentNum)))%>%
 group_by(artDate) %>%
 summarise(comment_Hen = sum(as.numeric(commentNum)))

articleNum_Hen <- Hen_article %>%
  filter(!is.na(as.numeric(commentNum))) %>%
  group_by(artDate) %>%
 summarise(article_Hen = n())
commentNum_Hen %>%
  inner_join(articleNum_Hen) %>% 
  inner_join(commentNum_Tsai) %>%
  inner_join(articleNum_Tsai) %>%
  ggplot()+
  geom_line(aes(x=as.Date(artDate),y=(comment_Tsai),color="蔡英文留言數"))+
  geom_line(aes(x=as.Date(artDate),y=(comment_Hen),color="韓國瑜留言數"))+
  geom_line(aes(x=as.Date(artDate),y=(article_Tsai),color="蔡英文文章數"))+
  geom_line(aes(x=as.Date(artDate),y=(article_Hen),color="韓國瑜文章數"))+
  scale_colour_manual(values=c("#15851c","#ffb940","#0d31bf","#5dc2fc"))+
  scale_x_date(labels = date_format("%m/%d"))+
  scale_y_log10()
## Joining, by = "artDate"
## Joining, by = "artDate"
## Joining, by = "artDate"

###韓國瑜文章的留言數明顯多於蔡英文,討論度較高

候選人網路聲量 整體觀察下,網路聲量大小與選舉結果不成正比,韓國瑜的總留言數、總文章數大多超過蔡英文,依舊是政治人物中的聲量王,然而選舉的結果卻是敗選,可推竟網路聲量能居高不下,可能是由爭議造成的負面聲量堆疊而成。

commentNum_Hen %>%
  inner_join(articleNum_Hen) %>% 
  inner_join(commentNum_Tsai) %>%
  inner_join(articleNum_Tsai) %>%
  ggplot()+ 
  geom_line(aes(x=as.Date(artDate),y=(comment_Tsai / article_Tsai),color="Tsai"))+
  geom_line(aes(x=as.Date(artDate),y=(comment_Hen / article_Hen),color="Hen"))+
  scale_x_date(labels = date_format("%m/%d"))
## Joining, by = "artDate"
## Joining, by = "artDate"
## Joining, by = "artDate"

###但也因為討論韓國瑜的文章較多,因此韓國瑜文章的平均留言數並沒有比較多

候選人網路聲量 當我們用總言數/總文章數來看,韓國瑜的文章平均留言數並沒有比較多。雖然討論韓國瑜的文章較多,但留言數量並不多。