my_locale <- Sys.getlocale("LC_ALL")
Sys.setlocale("LC_ALL", my_locale)
## Warning in Sys.setlocale("LC_ALL", my_locale): 作業系統回報無法實現設
## 定語區為 "LC_COLLATE=Chinese (Traditional)_Taiwan.950;LC_CTYPE=Chinese
## (Traditional)_Taiwan.950;LC_MONETARY=Chinese (Traditional)_Taiwan.
## 950;LC_NUMERIC=C;LC_TIME=Chinese (Traditional)_Taiwan.950" 的要求
## [1] ""
#Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼
packages = c("dplyr", "tidytext", "stringr", "wordcloud2", "ggplot2",'readr','data.table','reshape2','wordcloud','tidyr','scales')
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(tidytext)
## Warning: package 'tidytext' was built under R version 3.6.3
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 3.6.3
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(ggplot2)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
##
## dcast, melt
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.6.3
## Loading required package: RColorBrewer
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
library(readr)
library(scales)
## Warning: package 'scales' was built under R version 3.6.2
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
前言 台灣第15任總統、副總統選舉,於2020年1月11日舉行。在競選期間,不同世代的人之間的訴求和輿論,均牽動選情。民眾為了落實公民權的行使,以及表述各方立場,候選人及相關政黨議題不斷被分享,創造網路高聲量。網路取代傳統傳播大幅影響為政治競爭型態。因此,本組欲探究網路的「政治影響力」到底有多大?透過PTT資料進行候選人及政黨的詞頻計算找出關鍵字,以及輿情正向、反向的情緒分析。
Tsai_data = fread('Tsai_artWordFreq.csv',encoding = 'UTF-8') #蔡英文
Hen_data = fread('Hen_artWordFreq.csv',encoding = 'UTF-8') #韓國瑜
Song_data = fread('Song_artWordFreq.csv',encoding = 'UTF-8') #宋楚瑜
DPP_data = fread('DPP_artWordFreq.csv',encoding = 'UTF-8') #民進黨
KMT_data = fread('KMT_artWordFreq.csv',encoding = 'UTF-8') #國民黨
NPP_data = fread('NPP_artWordFreq.csv',encoding = 'UTF-8') #時代力量
PFP_data = fread('PFP_artWordFreq.csv',encoding = 'UTF-8') #親民黨
TPP_data = fread('TPP_artWordFreq.csv',encoding = 'UTF-8') #民眾黨
head(Tsai_data)
## artTitle artDate artTime
## 1: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## 2: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## 3: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## 4: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## 5: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## 6: [新聞]洪慈庸:明確支持蔡英文連任2020 2019/08/01 03:04:10
## artUrl word count
## 1: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html 時代力量 10
## 2: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html 本土 9
## 3: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html 支持 6
## 4: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html 總統 6
## 5: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html 洪慈庸 4
## 6: https://www.ptt.cc/bbs/Gossiping/M.1564657817.A.607.html 明確 4
head(Hen_data)
## artTitle artDate artTime
## 1: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## 2: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## 3: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## 4: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## 5: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## 6: [新聞]王金平拒當韓國瑜副手原因曝光! 「一定 2019/08/01 18:52:07
## artUrl word count
## 1: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 王金平 19
## 2: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 柯文哲 5
## 3: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 市長 4
## 4: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 組黨 4
## 5: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 一事 4
## 6: https://www.ptt.cc/bbs/Gossiping/M.1564714689.A.199.html 韓國瑜 4
head(Song_data)
## artTitle artDate artTime
## 1: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 2: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 3: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 4: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 5: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## 6: [新聞]傳宋楚瑜明年第5度參加總統大選親民黨說 2019/08/15 05:02:27
## artUrl word count
## 1: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html 親民黨 7
## 2: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html 總統 6
## 3: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html 宋楚瑜 4
## 4: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html 選戰 4
## 5: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html 鴻源 4
## 6: https://www.ptt.cc/bbs/Gossiping/M.1565874515.A.4D6.html 加入 3
###總統
Tsai_data = Tsai_data %>%
filter(!grepl('_',word))
Tsai_data = Tsai_data %>%
filter(!(word %in% c("https")))
Hen_data = Hen_data %>%
filter(!grepl('_',word))
Hen_data = Hen_data %>%
filter(!(word %in% c("https")))
Song_data = Song_data %>%
filter(!grepl('_',word))
Song_data = Song_data %>%
filter(!(word %in% c("https")))
###政黨
DPP_data = DPP_data %>%
filter(!grepl('_',word))
DPP_data = DPP_data %>%
filter(!(word %in% c("https")))
KMT_data = KMT_data %>%
filter(!grepl('_',word))
KMT_data = KMT_data %>%
filter(!(word %in% c("https")))
NPP_data = NPP_data %>%
filter(!grepl('_',word))
NPP_data = NPP_data %>%
filter(!(word %in% c("https")))
PFP_data = PFP_data %>%
filter(!grepl('_',word))
PFP_data = PFP_data %>%
filter(!(word %in% c("https")))
TPP_data = TPP_data %>%
filter(!grepl('_',word))
TPP_data = TPP_data %>%
filter(!(word %in% c("https")))
###總統
Tsai_data$artDate= Tsai_data$artDate %>% as.Date("%Y/%m/%d")
Hen_data$artDate= Hen_data$artDate %>% as.Date("%Y/%m/%d")
Song_data$artDate= Song_data$artDate %>% as.Date("%Y/%m/%d")
###政黨
DPP_data$artDate= DPP_data$artDate %>% as.Date("%Y/%m/%d")
KMT_data$artDate= KMT_data$artDate %>% as.Date("%Y/%m/%d")
NPP_data$artDate= NPP_data$artDate %>% as.Date("%Y/%m/%d")
PFP_data$artDate= PFP_data$artDate %>% as.Date("%Y/%m/%d")
TPP_data$artDate= TPP_data$artDate %>% as.Date("%Y/%m/%d")
###president
word_count_Tsai <- Tsai_data %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count>3) %>% # 過濾出現太少次的字
arrange(desc(count))
word_count_Hen <- Hen_data %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count>3) %>% # 過濾出現太少次的字
arrange(desc(count))
word_count_Song <- Song_data %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count>3) %>% # 過濾出現太少次的字
arrange(desc(count))
###party
word_count_DPP <- DPP_data %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count>3) %>% # 過濾出現太少次的字
arrange(desc(count))
word_count_KMT <- KMT_data %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count>3) %>% # 過濾出現太少次的字
arrange(desc(count))
word_count_NPP <- NPP_data %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count>3) %>% # 過濾出現太少次的字
arrange(desc(count))
word_count_PFP <- PFP_data %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count>3) %>% # 過濾出現太少次的字
arrange(desc(count))
word_count_TPP <- TPP_data %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count>3) %>% # 過濾出現太少次的字
arrange(desc(count))
word_count_Tsai
## # A tibble: 17,428 x 2
## word count
## <chr> <int>
## 1 蔡英文 16604
## 2 台灣 9934
## 3 總統 8863
## 4 完整 6048
## 5 新聞 4732
## 6 民進黨 4424
## 7 韓國瑜 4399
## 8 記者 4236
## 9 表示 4081
## 10 媒體 3269
## # ... with 17,418 more rows
word_count_Hen
## # A tibble: 21,871 x 2
## word count
## <chr> <int>
## 1 韓國瑜 32443
## 2 總統 9112
## 3 完整 9070
## 4 新聞 7454
## 5 國民黨 6919
## 6 台灣 6869
## 7 記者 6510
## 8 表示 5555
## 9 媒體 5550
## 10 蔡英文 4482
## # ... with 21,861 more rows
word_count_Song
## # A tibble: 1,810 x 2
## word count
## <chr> <int>
## 1 宋楚瑜 1134
## 2 總統 636
## 3 親民黨 566
## 4 完整 355
## 5 台灣 334
## 6 柯文哲 323
## 7 新聞 266
## 8 表示 259
## 9 國民黨 248
## 10 記者 246
## # ... with 1,800 more rows
詞頻 三筆候選人的資料中可看到最常被提及的關鍵字為候選人的姓名,再者是競爭對手、政黨關鍵詞。蔡英文及韓國瑜常被媒體作為彼此論述的議題,因此可看到兩者的關鍵詞中出現對方的姓名、政黨。而在宋楚瑜則是出現柯文哲此關鍵字,原因可推究為當時親民黨常提出小黨合作等話題。
# 正向字典txt檔
# 以,將字分隔
P <- read_file("liwc/positive.txt")
# 負向字典txt檔
N <- read_file("liwc/negative.txt")
#strsplit回傳list , 我們取出list中的第一個元素
P = strsplit(P, ",")[[1]]
N = strsplit(N, ",")[[1]]
# 建立dataframe 有兩個欄位word,sentiments,word欄位內容是字典向量
P = data.frame(word = P, sentiment = "positive")
N = data.frame(word = N, sentiment = "negative")
LIWC = rbind(P, N)
word_count_Tsai %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 804 x 3
## word count sentiment
## <chr> <int> <fct>
## 1 支持 2835 positive
## 2 問題 1720 negative
## 3 希望 1525 positive
## 4 自由 1030 positive
## 5 重要 944 positive
## 6 批評 640 negative
## 7 相信 608 positive
## 8 安全 474 positive
## 9 朋友 457 positive
## 10 決定 449 positive
## # ... with 794 more rows
word_count_Hen %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 948 x 3
## word count sentiment
## <chr> <int> <fct>
## 1 支持 3314 positive
## 2 希望 2024 positive
## 3 問題 1998 negative
## 4 自由 1020 positive
## 5 重要 949 positive
## 6 批評 845 negative
## 7 朋友 759 positive
## 8 相信 623 positive
## 9 安全 594 positive
## 10 遲到 568 negative
## # ... with 938 more rows
word_count_Song %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 115 x 3
## word count sentiment
## <chr> <int> <fct>
## 1 支持 141 positive
## 2 希望 110 positive
## 3 問題 110 negative
## 4 自由 50 positive
## 5 決定 39 positive
## 6 重要 35 positive
## 7 美人 34 positive
## 8 尊重 34 positive
## 9 批評 33 negative
## 10 朋友 27 positive
## # ... with 105 more rows
候選人正負向情緒字 在候選人正向關鍵字中最常出現的為「支持、希望、自由」,負向關鍵字為「問題、批評」多與選舉相關,而在韓國瑜的負向情緒字中出現「遲到」。
word_count_DPP %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 609 x 3
## word count sentiment
## <chr> <int> <fct>
## 1 支持 1207 positive
## 2 希望 727 positive
## 3 問題 649 negative
## 4 自由 385 positive
## 5 重要 356 positive
## 6 批評 327 negative
## 7 攻擊 267 negative
## 8 安全 227 positive
## 9 相信 206 positive
## 10 清楚 206 positive
## # ... with 599 more rows
word_count_KMT %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 588 x 3
## word count sentiment
## <chr> <int> <fct>
## 1 支持 970 positive
## 2 希望 608 positive
## 3 問題 533 negative
## 4 自由 332 positive
## 5 批評 295 negative
## 6 決定 293 positive
## 7 重要 287 positive
## 8 安全 249 positive
## 9 相信 199 positive
## 10 和平 173 positive
## # ... with 578 more rows
word_count_NPP %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 597 x 3
## word count sentiment
## <chr> <int> <fct>
## 1 支持 1407 positive
## 2 問題 1068 negative
## 3 希望 788 positive
## 4 重要 347 positive
## 5 相信 284 positive
## 6 清楚 283 positive
## 7 違法 259 negative
## 8 決定 256 positive
## 9 朋友 243 positive
## 10 關心 217 positive
## # ... with 587 more rows
word_count_PFP %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 64 x 3
## word count sentiment
## <chr> <int> <fct>
## 1 支持 89 positive
## 2 希望 45 positive
## 3 問題 45 negative
## 4 自由 28 positive
## 5 朋友 23 positive
## 6 安全 22 positive
## 7 決定 22 positive
## 8 擔心 21 negative
## 9 美人 20 positive
## 10 才能 16 positive
## # ... with 54 more rows
word_count_TPP %>% inner_join(LIWC)
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
## # A tibble: 861 x 3
## word count sentiment
## <chr> <int> <fct>
## 1 問題 2940 negative
## 2 支持 2186 positive
## 3 希望 1269 positive
## 4 批評 892 negative
## 5 自由 791 positive
## 6 重要 773 positive
## 7 相信 596 positive
## 8 清楚 581 positive
## 9 朋友 518 positive
## 10 喜歡 517 positive
## # ... with 851 more rows
政黨正負向情緒字 政黨的正負向關鍵字跟候選人相似。在親民黨的資料集中出現「美人」此用詞,經分析後發現為該政黨發言人姓名。另外,在各政黨的關鍵字數量中可以發現以民進黨、時代力量、民眾黨較多,推論為該政黨支持者較常使用網路進行討論,以致關鍵字數量高於國民黨、親民黨。
###總統
sentiment_count_Tsai = Tsai_data %>%
select(artDate,word,count) %>%
inner_join(LIWC) %>%
group_by(artDate,sentiment) %>%
summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_Hen = Hen_data %>%
select(artDate,word,count) %>%
inner_join(LIWC) %>%
group_by(artDate,sentiment) %>%
summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_Song = Song_data %>%
select(artDate,word,count) %>%
inner_join(LIWC) %>%
group_by(artDate,sentiment) %>%
summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
###政黨
sentiment_count_DPP = DPP_data %>%
select(artDate,word,count) %>%
inner_join(LIWC) %>%
group_by(artDate,sentiment) %>%
summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_KMT = KMT_data %>%
select(artDate,word,count) %>%
inner_join(LIWC) %>%
group_by(artDate,sentiment) %>%
summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_NPP = NPP_data %>%
select(artDate,word,count) %>%
inner_join(LIWC) %>%
group_by(artDate,sentiment) %>%
summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_PFP = PFP_data %>%
select(artDate,word,count) %>%
inner_join(LIWC) %>%
group_by(artDate,sentiment) %>%
summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_TPP = TPP_data %>%
select(artDate,word,count) %>%
inner_join(LIWC) %>%
group_by(artDate,sentiment) %>%
summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count_Tsai %>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))
sentiment_count_Hen %>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))
sentiment_count_Song %>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))
候選人正負向情緒 三位候選人的正負向情緒可以看出多數以正向情緒較高,可推究為選舉期間多數議題雖為爭議性取向,但在選詞上乃為正向。因此,整題觀察下可看到多為正向情緒。另外,蔡英文及韓國瑜較早提出參選計畫,網路討論聲量相較於宋楚瑜較早出現。
sentiment_count_Tsai %>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))+
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Tsai$artDate == as.Date('2019/08/05'))[1]])),colour = "yellow",size=1)+
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Tsai$artDate == as.Date('2020/01/11'))[1]])),colour = "yellow",size=1)
##8月5日負面情緒飆升
蔡英文正負向情緒 2019/8/5柯文哲組新政黨後,指控蔡英文總統身邊的人「每個人都貪汙」,引起與論譁然,總統府也立即要求柯文哲釐清及道歉。因此,當日出現較高的負向情緒。而2020/1/11當日蔡英文贏得該屆總統選舉,並且以高得票率打破先前競選紀錄。因此,正向情緒比負向情緒高出許多。
Tsai_data %>%
filter(artDate == as.Date('2019/08/05')) %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count>20) %>% ####過濾出現太少次的字
wordcloud2()
##柯文哲表示蔡英文身邊的人都貪汙
Tsai_plot_0805
sentiment_count_TPP %>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))+
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Hen$artDate == as.Date('2019/08/05'))
[1]])),colour = "yellow",size=1)
###同日台民黨、柯文哲的討論度也提高
民眾黨正負向情緒 柯文哲組織「台灣民眾黨」搶攻立法院席次,然而是否有意要參選總統、會不會有「郭柯配」、「柯郭配」引起大家關注。以及柯文哲在2019/8/5上午先大罵韓國瑜發大財是喊口號,又批評蔡英文沒貪汙,但身邊每個人都貪汙,甚至砲轟深綠團體是假義和團。使得此日讓民眾黨輿論聲量達到最高峰。
sentiment_count_Hen %>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))+
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Hen$artDate == as.Date('2019/12/29'))
[1]])),colour = "yellow",size=1)
韓國瑜正負情緒 2019/12/29韓國瑜參與台灣總統大選電視辯論會,以及當日晚上參加台中造勢晚會,在大雨造勢、辯論會火力全開,聲量創一年新高。首度拋出「台灣六塊肌」政策,韓國瑜表示,台灣分為六大區塊,發展不同區域特色與產業,要讓台灣重新被國際社會重視,以及提出「滿天星」計劃,培養年輕人出國交換一年,軍公教警消每年帶職帶薪出國進修。然而,網友認為這些政策內容較為空泛。因此,當日出現負向情緒最高。
Hen_plot_1229
sentiment_count_Song %>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))+
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_Song$artDate == as.Date('2019/11/12'))
[1]])),colour = "yellow",size=1)
宋楚瑜正負向情緒 2019/11/12宋楚瑜提出參選2020總統,引發網路一片熱議,而對於宋楚瑜5度參選一事,許多網友表示真的是「活到老選到老」、「有選舉那年,就有宋楚瑜,這就是年年有瑜」、「用參選陪伴著我們長大」、「遲到但永不缺席的男子」。讓正向情緒達到高點。
Song_data %>%
filter(artDate == as.Date('2019/11/12')) %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count>10) %>% # 過濾出現太少次的字
wordcloud2()
## 11/13 宋楚瑜宣布參選總統
sentiment_count_TPP %>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))+
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count_TPP$artDate == as.Date('2019/10/03'))
[1]])),colour = "yellow",size=1)
民眾黨正負向情緒 2019/10/3民眾黨提出推出不分區立委海選計畫,然而區域立委候選人名單被質疑具有「綠營背景」,遭郭台銘拒絕合照。因此,當日的負面情緒較高。
Tsai_data %>%
inner_join(LIWC) %>%
group_by(word,sentiment) %>%
summarise(count = n()) %>%
data.frame() %>%
top_n(30,wt = count) %>%
ungroup() %>%
mutate(word = reorder(word, count)) %>%
ggplot(aes(word, count, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
theme(text=element_text(size=12))+
coord_flip()
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
Hen_data %>%
inner_join(LIWC) %>%
group_by(word,sentiment) %>%
summarise(count = n()) %>%
data.frame() %>%
top_n(30,wt = count) %>%
ungroup() %>%
mutate(word = reorder(word, count)) %>%
ggplot(aes(word, count, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
theme(text=element_text(size=12))+
coord_flip()
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
Song_data %>%
inner_join(LIWC) %>%
group_by(word,sentiment) %>%
summarise(count = n()) %>%
data.frame() %>%
top_n(30,wt = count) %>%
ungroup() %>%
mutate(word = reorder(word, count)) %>%
ggplot(aes(word, count, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
theme(text=element_text(size=12))+
coord_flip()
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
總統候選人標題及留言數分析
## 載入總統候選人標題文
Tsai_article = fread('Tsai_articleMetaData.csv',encoding = 'UTF-8')
Hen_article = fread('Hen_articleMetaData.csv',encoding = 'UTF-8')
commentNum_Tsai <- Tsai_article %>%
filter(!is.na(as.numeric(commentNum)))%>%
group_by(artDate) %>%
summarise(comment_Tsai = sum(as.numeric(commentNum)))
articleNum_Tsai <- Tsai_article %>%
filter(!is.na(as.numeric(commentNum))) %>%
group_by(artDate) %>%
summarise(article_Tsai = n())
commentNum_Hen <- Hen_article %>%
filter(!is.na(as.numeric(commentNum)))%>%
group_by(artDate) %>%
summarise(comment_Hen = sum(as.numeric(commentNum)))
articleNum_Hen <- Hen_article %>%
filter(!is.na(as.numeric(commentNum))) %>%
group_by(artDate) %>%
summarise(article_Hen = n())
commentNum_Hen %>%
inner_join(articleNum_Hen) %>%
inner_join(commentNum_Tsai) %>%
inner_join(articleNum_Tsai) %>%
ggplot()+
geom_line(aes(x=as.Date(artDate),y=(comment_Tsai),color="蔡英文留言數"))+
geom_line(aes(x=as.Date(artDate),y=(comment_Hen),color="韓國瑜留言數"))+
geom_line(aes(x=as.Date(artDate),y=(article_Tsai),color="蔡英文文章數"))+
geom_line(aes(x=as.Date(artDate),y=(article_Hen),color="韓國瑜文章數"))+
scale_colour_manual(values=c("#15851c","#ffb940","#0d31bf","#5dc2fc"))+
scale_x_date(labels = date_format("%m/%d"))+
scale_y_log10()
## Joining, by = "artDate"
## Joining, by = "artDate"
## Joining, by = "artDate"
###韓國瑜文章的留言數明顯多於蔡英文,討論度較高
候選人網路聲量 整體觀察下,網路聲量大小與選舉結果不成正比,韓國瑜的總留言數、總文章數大多超過蔡英文,依舊是政治人物中的聲量王,然而選舉的結果卻是敗選,可推竟網路聲量能居高不下,可能是由爭議造成的負面聲量堆疊而成。
commentNum_Hen %>%
inner_join(articleNum_Hen) %>%
inner_join(commentNum_Tsai) %>%
inner_join(articleNum_Tsai) %>%
ggplot()+
geom_line(aes(x=as.Date(artDate),y=(comment_Tsai / article_Tsai),color="Tsai"))+
geom_line(aes(x=as.Date(artDate),y=(comment_Hen / article_Hen),color="Hen"))+
scale_x_date(labels = date_format("%m/%d"))
## Joining, by = "artDate"
## Joining, by = "artDate"
## Joining, by = "artDate"
###但也因為討論韓國瑜的文章較多,因此韓國瑜文章的平均留言數並沒有比較多
候選人網路聲量 當我們用總言數/總文章數來看,韓國瑜的文章平均留言數並沒有比較多。雖然討論韓國瑜的文章較多,但留言數量並不多。