系統參數設定

my_locale <- Sys.getlocale("LC_ALL")
Sys.setlocale("LC_ALL", my_locale)

## Warning in Sys.setlocale("LC_ALL", my_locale): 作業系統回報無法實現設
## 定語區為 "LC_COLLATE=Chinese (Traditional)_Taiwan.950;LC_CTYPE=Chinese
## (Traditional)_Taiwan.950;LC_MONETARY=Chinese (Traditional)_Taiwan.
## 950;LC_NUMERIC=C;LC_TIME=Chinese (Traditional)_Taiwan.950" 的要求

## [1] ""

#Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼

安裝需要的packages

packages = c("dplyr", "tidytext", "stringr", "wordcloud2", "ggplot2",'readr','data.table','reshape2','wordcloud','tidyr','scales')
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stringr)
library(tidytext)

## Warning: package 'tidytext' was built under R version 3.6.3

library(wordcloud2)

## Warning: package 'wordcloud2' was built under R version 3.6.3

library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

library(ggplot2)
library(reshape2)

## 
## Attaching package: 'reshape2'

## The following objects are masked from 'package:data.table':
## 
##     dcast, melt

library(wordcloud)

## Warning: package 'wordcloud' was built under R version 3.6.3

## Loading required package: RColorBrewer

library(tidyr)

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:reshape2':
## 
##     smiths

library(readr)
library(scales)

## Warning: package 'scales' was built under R version 3.6.2

## 
## Attaching package: 'scales'

## The following object is masked from 'package:readr':
## 
##     col_factor

前言台灣第15任總統、副總統選舉，於2020年1月11日舉行。在競選期間，不同世代的人之間的訴求和輿論，均牽動選情。民眾為了落實公民權的行使，以及表述各方立場，候選人及相關政黨議題不斷被分享，創造網路高聲量。網路取代傳統傳播大幅影響為政治競爭型態。因此，本組欲探究網路的「政治影響力」到底有多大？透過PTT資料進行候選人及政黨的詞頻計算找出關鍵字，以及輿情正向、反向的情緒分析。

載入總統候選人

Tsai_data = fread('Tsai_artWordFreq.csv',encoding = 'UTF-8')  #蔡英文
Hen_data = fread('Hen_artWordFreq.csv',encoding = 'UTF-8')  #韓國瑜
Song_data = fread('Song_artWordFreq.csv',encoding = 'UTF-8')  #宋楚瑜

載入政黨

DPP_data = fread('DPP_artWordFreq.csv',encoding = 'UTF-8') #民進黨
KMT_data = fread('KMT_artWordFreq.csv',encoding = 'UTF-8') #國民黨
NPP_data = fread('NPP_artWordFreq.csv',encoding = 'UTF-8') #時代力量
PFP_data = fread('PFP_artWordFreq.csv',encoding = 'UTF-8') #親民黨
TPP_data = fread('TPP_artWordFreq.csv',encoding = 'UTF-8') #民眾黨

查看資料前幾筆(已經整理成文章-詞彙-詞頻)

head(Tsai_data)

##                                artTitle    artDate  artTime
## 1: [新聞]洪慈庸：明確支持蔡英文連任2020 2019/08/01 03:04:10
## 2: [新聞]洪慈庸：明確支持蔡英文連任2020 2019/08/01 03:04:10
## 3: [新聞]洪慈庸：明確支持蔡英文連任2020 2019/08/01 03:04:10
## 4: [新聞]洪慈庸：明確支持蔡英文連任2020 2019/08/01 03:04:10
## 5: [新聞]洪慈庸：明確支持蔡英文連任2020 2019/08/01 03:04:10
## 6: [新聞]洪慈庸：明確支持蔡英文連任2020 2019/08/01 03:04:10
##                                                      artUrl     word count
## 1: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html 時代力量    10
## 2: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html     本土     9
## 3: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html     支持     6
## 4: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html     總統     6
## 5: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html   洪慈庸     4
## 6: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html     明確     4

head(Hen_data)

##                                        artTitle    artDate  artTime
## 1: [新聞]王金平拒當韓國瑜副手原因曝光！　「一定 2019/08/01 18:52:07
## 2: [新聞]王金平拒當韓國瑜副手原因曝光！　「一定 2019/08/01 18:52:07
## 3: [新聞]王金平拒當韓國瑜副手原因曝光！　「一定 2019/08/01 18:52:07
## 4: [新聞]王金平拒當韓國瑜副手原因曝光！　「一定 2019/08/01 18:52:07
## 5: [新聞]王金平拒當韓國瑜副手原因曝光！　「一定 2019/08/01 18:52:07
## 6: [新聞]王金平拒當韓國瑜副手原因曝光！　「一定 2019/08/01 18:52:07
##                                                      artUrl   word count
## 1: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 王金平    19
## 2: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 柯文哲     5
## 3: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html   市長     4
## 4: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html   組黨     4
## 5: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html   一事     4
## 6: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 韓國瑜     4

head(Song_data)

##                                       artTitle    artDate  artTime
## 1: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 2: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 3: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 4: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 5: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 6: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
##                                                      artUrl   word count
## 1: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html 親民黨     7
## 2: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html   總統     6
## 3: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html 宋楚瑜     4
## 4: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html   選戰     4
## 5: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html   鴻源     4
## 6: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html   加入     3

過濾特殊字元

###總統
Tsai_data = Tsai_data %>% 
  filter(!grepl('_',word))
Tsai_data = Tsai_data %>% 
  filter(!(word %in% c("https")))

Hen_data = Hen_data %>% 
  filter(!grepl('_',word))
Hen_data = Hen_data %>% 
  filter(!(word %in% c("https")))

Song_data = Song_data %>% 
  filter(!grepl('_',word))
Song_data = Song_data %>% 
  filter(!(word %in% c("https")))

###政黨
DPP_data = DPP_data %>% 
  filter(!grepl('_',word))
DPP_data = DPP_data %>% 
  filter(!(word %in% c("https")))

KMT_data = KMT_data %>% 
  filter(!grepl('_',word))
KMT_data = KMT_data %>% 
  filter(!(word %in% c("https")))

NPP_data = NPP_data %>% 
  filter(!grepl('_',word))
NPP_data = NPP_data %>% 
  filter(!(word %in% c("https")))

PFP_data = PFP_data %>% 
  filter(!grepl('_',word))
PFP_data = PFP_data %>% 
  filter(!(word %in% c("https")))

TPP_data = TPP_data %>% 
  filter(!grepl('_',word))
TPP_data = TPP_data %>% 
  filter(!(word %in% c("https")))

轉換日期格式

###總統
Tsai_data$artDate= Tsai_data$artDate %>% as.Date("%Y/%m/%d")
Hen_data$artDate= Hen_data$artDate %>% as.Date("%Y/%m/%d")
Song_data$artDate= Song_data$artDate %>% as.Date("%Y/%m/%d")

###政黨
DPP_data$artDate= DPP_data$artDate %>% as.Date("%Y/%m/%d")
KMT_data$artDate= KMT_data$artDate %>% as.Date("%Y/%m/%d")
NPP_data$artDate= NPP_data$artDate %>% as.Date("%Y/%m/%d")
PFP_data$artDate= PFP_data$artDate %>% as.Date("%Y/%m/%d")
TPP_data$artDate= TPP_data$artDate %>% as.Date("%Y/%m/%d")

計算所有字在文集中的總詞頻

###president
word_count_Tsai <- Tsai_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_Hen <- Hen_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_Song <- Song_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))

###party
word_count_DPP <- DPP_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_KMT <- KMT_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_NPP <- NPP_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_PFP <- PFP_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_TPP <- TPP_data %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>3) %>%  # 過濾出現太少次的字
  arrange(desc(count))

word_count_Tsai

## # A tibble: 17,428 x 2
##    word   count
##    <chr>  <int>
##  1 蔡英文 16604
##  2 台灣    9934
##  3 總統    8863
##  4 完整    6048
##  5 新聞    4732
##  6 民進黨  4424
##  7 韓國瑜  4399
##  8 記者    4236
##  9 表示    4081
## 10 媒體    3269
## # ... with 17,418 more rows

word_count_Hen

## # A tibble: 21,871 x 2
##    word   count
##    <chr>  <int>
##  1 韓國瑜 32443
##  2 總統    9112
##  3 完整    9070
##  4 新聞    7454
##  5 國民黨  6919
##  6 台灣    6869
##  7 記者    6510
##  8 表示    5555
##  9 媒體    5550
## 10 蔡英文  4482
## # ... with 21,861 more rows

word_count_Song

## # A tibble: 1,810 x 2
##    word   count
##    <chr>  <int>
##  1 宋楚瑜  1134
##  2 總統     636
##  3 親民黨   566
##  4 完整     355
##  5 台灣     334
##  6 柯文哲   323
##  7 新聞     266
##  8 表示     259
##  9 國民黨   248
## 10 記者     246
## # ... with 1,800 more rows

詞頻三筆候選人的資料中可看到最常被提及的關鍵字為候選人的姓名，再者是競爭對手、政黨關鍵詞。蔡英文及韓國瑜常被媒體作為彼此論述的議題，因此可看到兩者的關鍵詞中出現對方的姓名、政黨。而在宋楚瑜則是出現柯文哲此關鍵字，原因可推究為當時親民黨常提出小黨合作等話題。

文字雲

Tsai_plot Hen_plot Song_plot

以LIWC字典判斷文集中的word屬於正面字還是負面字

# 正向字典txt檔
# 以,將字分隔
P <- read_file("liwc/positive.txt")

# 負向字典txt檔
N <- read_file("liwc/negative.txt")

將字串依,分割

#strsplit回傳list , 我們取出list中的第一個元素
P = strsplit(P, ",")[[1]]
N = strsplit(N, ",")[[1]]

# 建立dataframe 有兩個欄位word,sentiments，word欄位內容是字典向量
P = data.frame(word = P, sentiment = "positive")
N = data.frame(word = N, sentiment = "negative")

LIWC = rbind(P, N)

總統候選人與LIWC情緒字典做join

word_count_Tsai %>% inner_join(LIWC)

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

## # A tibble: 804 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持   2835 positive 
##  2 問題   1720 negative 
##  3 希望   1525 positive 
##  4 自由   1030 positive 
##  5 重要    944 positive 
##  6 批評    640 negative 
##  7 相信    608 positive 
##  8 安全    474 positive 
##  9 朋友    457 positive 
## 10 決定    449 positive 
## # ... with 794 more rows

word_count_Hen %>% inner_join(LIWC)

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

## # A tibble: 948 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持   3314 positive 
##  2 希望   2024 positive 
##  3 問題   1998 negative 
##  4 自由   1020 positive 
##  5 重要    949 positive 
##  6 批評    845 negative 
##  7 朋友    759 positive 
##  8 相信    623 positive 
##  9 安全    594 positive 
## 10 遲到    568 negative 
## # ... with 938 more rows

word_count_Song %>% inner_join(LIWC)

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

## # A tibble: 115 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持    141 positive 
##  2 希望    110 positive 
##  3 問題    110 negative 
##  4 自由     50 positive 
##  5 決定     39 positive 
##  6 重要     35 positive 
##  7 美人     34 positive 
##  8 尊重     34 positive 
##  9 批評     33 negative 
## 10 朋友     27 positive 
## # ... with 105 more rows

候選人正負向情緒字 在候選人正向關鍵字中最常出現的為「支持、希望、自由」，負向關鍵字為「問題、批評」多與選舉相關，而在韓國瑜的負向情緒字中出現「遲到」。

政黨與LIWC情緒字典做join

word_count_DPP %>% inner_join(LIWC)

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

## # A tibble: 609 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持   1207 positive 
##  2 希望    727 positive 
##  3 問題    649 negative 
##  4 自由    385 positive 
##  5 重要    356 positive 
##  6 批評    327 negative 
##  7 攻擊    267 negative 
##  8 安全    227 positive 
##  9 相信    206 positive 
## 10 清楚    206 positive 
## # ... with 599 more rows

word_count_KMT %>% inner_join(LIWC)

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

## # A tibble: 588 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持    970 positive 
##  2 希望    608 positive 
##  3 問題    533 negative 
##  4 自由    332 positive 
##  5 批評    295 negative 
##  6 決定    293 positive 
##  7 重要    287 positive 
##  8 安全    249 positive 
##  9 相信    199 positive 
## 10 和平    173 positive 
## # ... with 578 more rows

word_count_NPP %>% inner_join(LIWC)

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

## # A tibble: 597 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持   1407 positive 
##  2 問題   1068 negative 
##  3 希望    788 positive 
##  4 重要    347 positive 
##  5 相信    284 positive 
##  6 清楚    283 positive 
##  7 違法    259 negative 
##  8 決定    256 positive 
##  9 朋友    243 positive 
## 10 關心    217 positive 
## # ... with 587 more rows

word_count_PFP %>% inner_join(LIWC)

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

## # A tibble: 64 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 支持     89 positive 
##  2 希望     45 positive 
##  3 問題     45 negative 
##  4 自由     28 positive 
##  5 朋友     23 positive 
##  6 安全     22 positive 
##  7 決定     22 positive 
##  8 擔心     21 negative 
##  9 美人     20 positive 
## 10 才能     16 positive 
## # ... with 54 more rows

word_count_TPP %>% inner_join(LIWC)

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

## # A tibble: 861 x 3
##    word  count sentiment
##    <chr> <int> <fct>    
##  1 問題   2940 negative 
##  2 支持   2186 positive 
##  3 希望   1269 positive 
##  4 批評    892 negative 
##  5 自由    791 positive 
##  6 重要    773 positive 
##  7 相信    596 positive 
##  8 清楚    581 positive 
##  9 朋友    518 positive 
## 10 喜歡    517 positive 
## # ... with 851 more rows

政黨正負向情緒字 政黨的正負向關鍵字跟候選人相似。在親民黨的資料集中出現「美人」此用詞，經分析後發現為該政黨發言人姓名。另外，在各政黨的關鍵字數量中可以發現以民進黨、時代力量、民眾黨較多，推論為該政黨支持者較常使用網路進行討論，以致關鍵字數量高於國民黨、親民黨。

統計每天的文章正面字的次數與負面字的次數

###總統
sentiment_count_Tsai = Tsai_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

sentiment_count_Hen = Hen_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

sentiment_count_Song = Song_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

###政黨
sentiment_count_DPP = DPP_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

sentiment_count_KMT = KMT_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

sentiment_count_NPP = NPP_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

sentiment_count_PFP = PFP_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

sentiment_count_TPP = TPP_data %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

sentiment_count_Tsai %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))

sentiment_count_Hen %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))

sentiment_count_Song %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))

候選人正負向情緒 三位候選人的正負向情緒可以看出多數以正向情緒較高，可推究為選舉期間多數議題雖為爭議性取向，但在選詞上乃為正向。因此，整題觀察下可看到多為正向情緒。另外，蔡英文及韓國瑜較早提出參選計畫，網路討論聲量相較於宋楚瑜較早出現。

sentiment_count_Tsai %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Tsai$artDate == as.Date('2019/08/05'))[1]])),colour = "yellow",size=1)+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Tsai$artDate == as.Date('2020/01/11'))[1]])),colour = "yellow",size=1)

##8月5日負面情緒飆升

蔡英文正負向情緒 2019/8/5柯文哲組新政黨後，指控蔡英文總統身邊的人「每個人都貪汙」，引起與論譁然，總統府也立即要求柯文哲釐清及道歉。因此，當日出現較高的負向情緒。而2020/1/11當日蔡英文贏得該屆總統選舉，並且以高得票率打破先前競選紀錄。因此，正向情緒比負向情緒高出許多。

Tsai_data %>% 
  filter(artDate == as.Date('2019/08/05')) %>% 
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>20) %>%   ####過濾出現太少次的字
  wordcloud2()

##柯文哲表示蔡英文身邊的人都貪汙

Tsai_plot_0805

sentiment_count_TPP %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Hen$artDate == as.Date('2019/08/05'))
[1]])),colour = "yellow",size=1)

###同日台民黨、柯文哲的討論度也提高

民眾黨正負向情緒 柯文哲組織「台灣民眾黨」搶攻立法院席次，然而是否有意要參選總統、會不會有「郭柯配」、「柯郭配」引起大家關注。以及柯文哲在2019/8/5上午先大罵韓國瑜發大財是喊口號，又批評蔡英文沒貪汙，但身邊每個人都貪汙，甚至砲轟深綠團體是假義和團。使得此日讓民眾黨輿論聲量達到最高峰。

sentiment_count_Hen %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Hen$artDate == as.Date('2019/12/29'))
[1]])),colour = "yellow",size=1)

韓國瑜正負情緒 2019/12/29韓國瑜參與台灣總統大選電視辯論會，以及當日晚上參加台中造勢晚會，在大雨造勢、辯論會火力全開，聲量創一年新高。首度拋出「台灣六塊肌」政策，韓國瑜表示，台灣分為六大區塊，發展不同區域特色與產業，要讓台灣重新被國際社會重視，以及提出「滿天星」計劃，培養年輕人出國交換一年，軍公教警消每年帶職帶薪出國進修。然而，網友認為這些政策內容較為空泛。因此，當日出現負向情緒最高。

Hen_plot_1229

sentiment_count_Song %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Song$artDate == as.Date('2019/11/12'))
[1]])),colour = "yellow",size=1)

宋楚瑜正負向情緒 2019/11/12宋楚瑜提出參選2020總統，引發網路一片熱議，而對於宋楚瑜5度參選一事，許多網友表示真的是「活到老選到老」、「有選舉那年，就有宋楚瑜，這就是年年有瑜」、「用參選陪伴著我們長大」、「遲到但永不缺席的男子」。讓正向情緒達到高點。

Song_data %>% 
  filter(artDate == as.Date('2019/11/12')) %>% 
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count>10) %>%   # 過濾出現太少次的字
  wordcloud2()

## 11/13 宋楚瑜宣布參選總統

sentiment_count_TPP %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_TPP$artDate == as.Date('2019/10/03'))
[1]])),colour = "yellow",size=1)

民眾黨正負向情緒 2019/10/3民眾黨提出推出不分區立委海選計畫，然而區域立委候選人名單被質疑具有「綠營背景」，遭郭台銘拒絕合照。因此，當日的負面情緒較高。

Tsai_data %>%
  inner_join(LIWC) %>%
  group_by(word,sentiment) %>%
  summarise(count = n()) %>% 
  data.frame() %>% 
  top_n(30,wt = count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Contribution to sentiment",
       x = NULL) +
  theme(text=element_text(size=12))+
  coord_flip()

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

Hen_data %>%
  inner_join(LIWC) %>%
  group_by(word,sentiment) %>%
  summarise(count = n()) %>% 
  data.frame() %>% 
  top_n(30,wt = count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Contribution to sentiment",
       x = NULL) +
  theme(text=element_text(size=12))+
  coord_flip()

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

Song_data %>%
  inner_join(LIWC) %>%
  group_by(word,sentiment) %>%
  summarise(count = n()) %>% 
  data.frame() %>% 
  top_n(30,wt = count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Contribution to sentiment",
       x = NULL) +
  theme(text=element_text(size=12))+
  coord_flip()

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

總統候選人標題及留言數分析

## 載入總統候選人標題文
Tsai_article = fread('Tsai_articleMetaData.csv',encoding = 'UTF-8')
Hen_article = fread('Hen_articleMetaData.csv',encoding = 'UTF-8')

commentNum_Tsai <- Tsai_article %>%
  filter(!is.na(as.numeric(commentNum)))%>%
 group_by(artDate) %>%
 summarise(comment_Tsai = sum(as.numeric(commentNum)))

articleNum_Tsai <- Tsai_article %>%
  filter(!is.na(as.numeric(commentNum))) %>%
  group_by(artDate) %>%
 summarise(article_Tsai = n())

commentNum_Hen <- Hen_article %>%
  filter(!is.na(as.numeric(commentNum)))%>%
 group_by(artDate) %>%
 summarise(comment_Hen = sum(as.numeric(commentNum)))

articleNum_Hen <- Hen_article %>%
  filter(!is.na(as.numeric(commentNum))) %>%
  group_by(artDate) %>%
 summarise(article_Hen = n())

commentNum_Hen %>%
  inner_join(articleNum_Hen) %>% 
  inner_join(commentNum_Tsai) %>%
  inner_join(articleNum_Tsai) %>%
  ggplot()+
  geom_line(aes(x=as.Date(artDate),y=(comment_Tsai),color="蔡英文留言數"))+
  geom_line(aes(x=as.Date(artDate),y=(comment_Hen),color="韓國瑜留言數"))+
  geom_line(aes(x=as.Date(artDate),y=(article_Tsai),color="蔡英文文章數"))+
  geom_line(aes(x=as.Date(artDate),y=(article_Hen),color="韓國瑜文章數"))+
  scale_colour_manual(values=c("#15851c","#ffb940","#0d31bf","#5dc2fc"))+
  scale_x_date(labels = date_format("%m/%d"))+
  scale_y_log10()

## Joining, by = "artDate"
## Joining, by = "artDate"
## Joining, by = "artDate"

###韓國瑜文章的留言數明顯多於蔡英文，討論度較高

候選人網路聲量 整體觀察下，網路聲量大小與選舉結果不成正比，韓國瑜的總留言數、總文章數大多超過蔡英文，依舊是政治人物中的聲量王，然而選舉的結果卻是敗選，可推竟網路聲量能居高不下，可能是由爭議造成的負面聲量堆疊而成。

commentNum_Hen %>%
  inner_join(articleNum_Hen) %>% 
  inner_join(commentNum_Tsai) %>%
  inner_join(articleNum_Tsai) %>%
  ggplot()+ 
  geom_line(aes(x=as.Date(artDate),y=(comment_Tsai / article_Tsai),color="Tsai"))+
  geom_line(aes(x=as.Date(artDate),y=(comment_Hen / article_Hen),color="Hen"))+
  scale_x_date(labels = date_format("%m/%d"))

## Joining, by = "artDate"
## Joining, by = "artDate"
## Joining, by = "artDate"

###但也因為討論韓國瑜的文章較多，因此韓國瑜文章的平均留言數並沒有比較多

候選人網路聲量 當我們用總言數/總文章數來看，韓國瑜的文章平均留言數並沒有比較多。雖然討論韓國瑜的文章較多，但留言數量並不多。

社群媒體分析第八組讀書會

王品堯、陳芓僮、黃晟育、蔡尚亨、莊子霆、黃緯翔、游景筑

2020/03/31