台灣正式宣布的的第1例新冠肺炎確診,是在今年的1月21日 1. 資料取得及套件載入 載入的資料是由中山大學管理學院文字分析平台取得,在平台資料輸出區塊選擇「文章+詞彙+詞頻」選項,即可取得相同格式之csv檔案。

資料簡介 本資料為2019-09-01~2020-04-22,將PTT1八卦版透過文字分析平台整理,得到2311篇文章。 # 系統參數設定

分析主題 武漢肺炎疫情的影響造成外送平台的訂單增加,以此分析外送平台在疫情爆發的前後聲量和情緒有甚麼重大的變化

Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼
## Warning in Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8"): 作業系統
## 回報無法實現設定語區為 "zh_TW.UTF-8" 的要求
## [1] ""
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
library(tidytext)
library(wordcloud2)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(ggplot2)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
## 
##     dcast, melt
library(wordcloud)
## Loading required package: RColorBrewer
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
library(readr)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
require(jiebaR)
## Loading required package: jiebaR
## Loading required package: jiebaRD
require(widyr)
## Loading required package: widyr
require(NLP)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
require(ggraph)
## Loading required package: ggraph
require(igraph)
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union

八卦版

data_gossip = fread('gossip_article.csv',encoding = 'UTF-8')

轉換日期格式

data_gossip$artDate = data_gossip$artDate %>% as.Date("%Y/%m/%d")
data_gossip$word[which(data_gossip$word == "FOODPANDA")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "Foodpanda")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "富胖達")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "food胖達")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "熊貓")] = "foodpanda"

過濾特殊字元

data_gossip = data_gossip %>% 
  filter(!grepl('_',word))
#選取有提到foodpanda的文章
panda_url = data_gossip$artUrl[grepl("foodpanda", data_gossip$word)]

###斷詞

jieba_tokenizer = worker()
stop_words <- c("https", "com", "新聞", "完整", "沒有","有沒有","現在","八卦","jpg","imgur","news","http","內文","htm","ettoday","ETtoday","請問","蘇格蘭","網址","連結","記者","署名","來源","媒體","新聞標題","備註","表示","報導","今天","看到")
#篩選出foodpanda的文章並去除停用字
data_panda <- data_gossip %>% 
  filter(data_gossip$artUrl %in% panda_url)

data_panda <- data_panda %>% 
  filter(!(data_panda$word %in% stop_words))

日期折線圖 計算出每一天文章的發表數量,看出討論「foodpanda」的熱度。 資料處理

#討論篇數最多的前10天
p_date <- data_panda %>% 
  select(artDate, artUrl) %>% 
  distinct()
article_count_by_date <- p_date %>% 
  group_by(artDate) %>% 
  summarise(count = n())
article_count_by_date %>% 
  arrange(desc(count))%>% 
  top_n(20)
## Selecting by count
## # A tibble: 21 x 2
##    artDate    count
##    <date>     <int>
##  1 2019-10-14    61
##  2 2019-10-11    30
##  3 2019-10-15    30
##  4 2019-10-13    29
##  5 2019-10-16    25
##  6 2019-10-12    18
##  7 2020-01-15    16
##  8 2019-10-21    14
##  9 2019-10-17    13
## 10 2020-01-18    12
## # ... with 11 more rows
date_plot <- article_count_by_date %>% 
  ggplot(aes(x = artDate, y = count)) +
  geom_line(color = "purple", size = 1.5)  + 
  scale_x_date(labels = date_format("%Y/%m/%d")) + 
  geom_vline(xintercept = c(as.numeric(as.Date("2019-10-14")),
                            as.numeric(as.Date("2020-01-15")),
                            as.numeric(as.Date("2020-02-14"))), col='red', size = 1) + 
  ggtitle("「foodpanada」討論文章數") + 
  xlab("日期") + 
  ylab("數量") +
  theme(text = element_text(family = "Heiti TC Light"))
date_plot
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

#我們可以發現聲量的表現有明顯的三個高峰期

查看三個高峰期,最常出現的詞彙

p_tokens_by_date <- data_panda %>% 
  count(artDate, word, sort = TRUE)
  
plot_merge <- p_tokens_by_date %>% 
  filter(word != "foodpanda"& word != "外送員"& word != "今天"& word != "很多") %>% 
  filter(artDate == as.Date("2019-10-14") | 
         artDate == as.Date("2020-01-15")| 
         artDate == as.Date("2020-02-14")) %>% 
  group_by(artDate) %>% 
  top_n(5, n) %>% 
  ungroup() %>% 
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x=word, y=n, fill = artDate)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = NULL) +
  facet_wrap(~artDate, scales="free", ncol = 2) + 
  coord_flip()+
  theme(text = element_text(family = "Heiti TC Light"))
plot_merge
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

計算所有字在文集中的總詞頻

word_count_panda <- data_panda %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count > 50) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_panda 
## # A tibble: 152 x 2
##    word      count
##    <chr>     <int>
##  1 foodpanda  1978
##  2 外送員     1801
##  3 外送       1032
##  4 公司        355
##  5 業者        285
##  6 店家        276
##  7 外送平台    260
##  8 關係        240
##  9 平台        231
## 10 訂單        231
## # ... with 142 more rows
word_count_panda %>% wordcloud2()
#文章對應標題
data_panda %>% filter(artDate == as.Date("2019-10-11")) %>% distinct(data_panda,artTitle)
## Warning: Trying to compute distinct() for variables not found in the data:
## - `data_panda`
## This is an error, but only a warning is raised for compatibility reasons.
## The following variables will be used:
## - artTitle
##                                          artTitle
## 1        [新聞]女熊貓外送上樓!見宅男開門快被嚇死
## 2     Re:[新聞]女熊貓外送上樓!見宅男開門快被嚇死
## 3   Re:[問卦]Ubereats484輸給Foodpanda了?<U+2708>
## 4    [問卦]外送現在才發生第1起撞死的484比小黃好了
## 5                         [問卦]foodpanda店內價??
## 6                            [問卦]熊貓外送的八卦
## 7                      Re:[問卦]foodpanda店內價??
## 8  Re:[新聞]【外送搏命1】熊貓外送員拚賺宵夜錢桃園
## 9                      [問卦]foodpanda被刷到剩1星
## 10     [新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 11  Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 12                       [問卦]熊貓外送員被撞死了
## 13                [新聞]外送員車禍foodpanda回應了
## 14             Re:[新聞]外送員車禍foodpanda回應了
## 15                [問卦]ubereat人員素質比較好一些
## 16            [問卦]foodpanda外送人員的騎車技術?
## 17     [新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 18  Re:[新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 19            Re:[問卦]真的有人騎腳踏車送外送嗎?
#文章對應標題
data_panda %>% filter(artDate == as.Date("2020-02-14")) %>% distinct(data_panda,artTitle)
## Warning: Trying to compute distinct() for variables not found in the data:
## - `data_panda`
## This is an error, but only a warning is raised for compatibility reasons.
## The following variables will be used:
## - artTitle
##                                      artTitle
## 1 [新聞]怕老鳥帶壞新人?Foodpanda「隔離」菜鳥
## 2      [問卦]PChome聯手熊貓速度還可以到多快?
## 3    [問卦]訂ubereats的人是不是很不愛娶餐阿?
## 4     [問卦]遇到會隱形的熊貓人怎麼辦?(在線等
## 5                 [爆卦]熊貓foodpanda薪水詳解
## 6              Re:[爆卦]熊貓foodpanda薪水詳解
#文章對應標題
data_panda %>% filter(artDate == as.Date("2020-01-15")) %>% distinct(data_panda,artTitle)
## Warning: Trying to compute distinct() for variables not found in the data:
## - `data_panda`
## This is an error, but only a warning is raised for compatibility reasons.
## The following variables will be used:
## - artTitle
##                                                     artTitle
## 1                           [問卦]熊貓明天罷工,大家會怕嗎?
## 2                               [問卦]熊貓有必要自相殘殺嗎?
## 3                            Re:[問卦]熊貓有必要自相殘殺嗎?
## 4                           [問卦]可憐哪!熊貓外送師被砍獎金
## 5                                   [問卦]熊貓開始罷工了嗎?
## 6                       Re:[問卦]欸欸欸!聽說台灣今天熊貓罷工
## 7                         [問卦]熊貓外送罷工有人被影響到嗎?
## 8                                   [問卦]今天中午能定熊貓嗎
## 9                                Re:[問卦]今天中午能定熊貓嗎
## 10              [新聞]控「年前毀約」!台中60名熊貓外送員市府
## 11    [新聞]熊貓今罷送最多慢1小時…外送員:萬人響應卻無人棄單
## 12                 [新聞]foodpanda罷工實測!點餐後30分就拿到
## 13 Re:[新聞]熊貓今罷送最多慢1小時…外送員:萬人響應卻無人棄單
## 14                [新聞]熊貓變更計薪制度內湖外送員控「變相減
## 15                    [問卦]社會上為何充滿仇視外送員的八卦?

計算 tf-idf

p_tokens_by_art <- data_panda %>% 
  filter(!str_detect(word, regex("[0-9]"))) %>%
  count(artTitle, word,artUrl, sort = TRUE)
p_total_words_by_art <- p_tokens_by_art  %>% 
  group_by(artTitle) %>% 
  summarize(total = sum(n)) %>% 
  arrange(desc(total))
p_tokens_by_art <- left_join(p_tokens_by_art, p_total_words_by_art)
## Joining, by = "artTitle"
# 以每篇文章爲單位,計算每個詞彙在的tf-idf值
p_words_tf_idf <- p_tokens_by_art %>%
  bind_tf_idf(word, artTitle, n) 
## Warning in bind_tf_idf.data.frame(., word, artTitle, n): A value for tf_idf is negative:
## Input should have exactly one row per document-term combination.
p_words_tf_idf %>% 
  filter(total > 20) %>% 
  arrange(desc(tf_idf))
## # A tibble: 47,328 x 8
##    artTitle           word   artUrl                  n total     tf   idf tf_idf
##    <chr>              <chr>  <chr>               <int> <int>  <dbl> <dbl>  <dbl>
##  1 [問卦]2019台灣的代表性職業是~ 代表性 https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  2 [問卦]2019台灣的代表性職業是~ 幾萬人 https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  3 [問卦]Foodpanda改制導致~ 上中   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  4 [問卦]foodpanda是不是快~ 五次   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  5 [問卦]foodpanda是不是快~ 拜會   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  6 [問卦]foodpanda是不是快~ 間點   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  7 [問卦]foodpanda是不是快~ 愛用者 https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  8 [問卦]foodpanda是不是快~ 解惑   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  9 [問卦]小黃出事車隊沒事那外送為啥~ 人要   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
## 10 [問卦]小黃出事車隊沒事那外送為啥~ 給司   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
## # ... with 47,318 more rows

將同一篇的斷詞整理在一起

data_full = data_panda %>%
                group_by(artUrl) %>% 
                summarise(sentence = paste0(word, collapse = ""))
# remove stopwords
# unnest_tokens 使用的bigram分詞函數
# Input: a character vector
# Output: a list of character vectors of the same length
jieba_bigram <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer)
      bigram<- ngrams(tokens, 2)
      bigram <- lapply(bigram, paste, collapse = " ")
      unlist(bigram)
    }
  })
}
# 執行bigram分詞
panda_bigram <- data_full %>%
  unnest_tokens(bigram, sentence, token = jieba_bigram)
panda_bigram
## # A tibble: 50,473 x 2
##    artUrl                                                   bigram          
##    <chr>                                                    <chr>           
##  1 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 凌晨 板橋       
##  2 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 板橋 一兩間     
##  3 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 一兩間 地方     
##  4 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 地方 沒想到     
##  5 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 沒想到 三點     
##  6 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 三點 foodpanda  
##  7 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html foodpanda 摩托車
##  8 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 摩托車 騎       
##  9 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 騎 路上         
## 10 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 路上 紅綠燈     
## # ... with 50,463 more rows
panda_bigram %>%
  count(bigram, sort = TRUE)
## # A tibble: 46,658 x 2
##    bigram               n
##    <chr>            <int>
##  1 外送 平台          126
##  2 為 什麼             49
##  3 foodpanda 外送      46
##  4 也 不               40
##  5 foodpanda 外送員    33
##  6 更 多               25
##  7 外送 foodpanda      23
##  8 外送員 foodpanda    23
##  9 美食 外送           20
## 10 擦 撞               20
## # ... with 46,648 more rows
panda_bigram
## # A tibble: 50,473 x 2
##    artUrl                                                   bigram          
##    <chr>                                                    <chr>           
##  1 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 凌晨 板橋       
##  2 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 板橋 一兩間     
##  3 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 一兩間 地方     
##  4 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 地方 沒想到     
##  5 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 沒想到 三點     
##  6 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 三點 foodpanda  
##  7 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html foodpanda 摩托車
##  8 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 摩托車 騎       
##  9 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 騎 路上         
## 10 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 路上 紅綠燈     
## # ... with 50,463 more rows
# 計算兩個詞彙同時出現的總次數
word_pairs <- p_tokens_by_art %>%
  pairwise_count(word, artUrl, sort = TRUE)
word_pairs 
## # A tibble: 4,815,650 x 3
##    item1     item2         n
##    <chr>     <chr>     <dbl>
##  1 外送      foodpanda   391
##  2 foodpanda 外送        391
##  3 外送員    foodpanda   373
##  4 foodpanda 外送員      373
##  5 外送員    外送        230
##  6 外送      外送員      230
##  7 公司      foodpanda   150
##  8 foodpanda 公司        150
##  9 真的      foodpanda   138
## 10 foodpanda 真的        138
## # ... with 4,815,640 more rows
# 計算兩個詞彙間的相關性
word_cors <- p_tokens_by_art%>%
  group_by(word) %>%
  filter(n() >= 20) %>%
  pairwise_cor(word, artUrl, sort = TRUE)
# 顯示相關性大於0.5的組合
set.seed(2020)

word_cors %>%
  filter(correlation > 0.5) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") + #加入中文字型設定,避免中文字顯示錯誤。
  theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# 設定幾個詞做爲seed words
seed_words <- c("udn", "來源","連結","網址","備註","內文","www","自由","針對","進行")
# 設定threshold爲0.5
threshold <- 0.5
# 跟seed words相關性高於threshold的詞彙會被加入移除列表中
remove_words <- word_cors %>%
                filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
                .$item1 %>%
                unique()
remove_words
## [1] "udn"          "story"        "聯合報"       "ltn"          "自由"        
## [6] "net"          "www"          "breakingnews"
# 清除存在這些詞彙的組合
word_cors_new <- word_cors %>%
                filter(!(item1 %in% remove_words|item2 %in% remove_words))

word_cors_new %>%
  filter(correlation > 0.4) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) + 
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
  theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

因為只用bigram看不出前後文之間的insight,所以取前後五個做為觀察

# 執行ngram_11分詞
ngram_11 <- function(t) {
  lapply(t, function(x) {
    tokens <- segment(x, jieba_tokenizer)
    ngram <- ngrams(tokens, 11)
    ngram <- lapply(ngram, paste, collapse = " ")
    unlist(ngram)
  })
}
p_ngram_11<- data_full %>%
  unnest_tokens(ngram,sentence, token = ngram_11)

p_ngrams_11_separated <-p_ngram_11  %>%
  separate(ngram, paste0("word", c(1:11),sep=""), sep = " ")
p_ngrams_11_separated
## # A tibble: 43,007 x 12
##    artUrl   word1  word2 word3 word4 word5 word6 word7 word8 word9 word10 word11
##    <chr>    <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>  <chr> 
##  1 https:/~ 凌晨   板橋  一兩間~ 地方  沒想到~ 三點  food~ 摩托車~ 騎    路上   紅綠燈
##  2 https:/~ 板橋   一兩間~ 地方  沒想到~ 三點  food~ 摩托車~ 騎    路上  紅綠燈 哇    
##  3 https:/~ 一兩間 地方  沒想到~ 三點  food~ 摩托車~ 騎    路上  紅綠燈~ 哇     塞外  
##  4 https:/~ 地方   沒想到~ 三點  food~ 摩托車~ 騎    路上  紅綠燈~ 哇    塞外   送    
##  5 https:/~ 沒想到 三點  food~ 摩托車~ 騎    路上  紅綠燈~ 哇    塞外  送     東湖  
##  6 https:/~ 三點   food~ 摩托車~ 騎    路上  紅綠燈~ 哇    塞外  送    東湖   是否  
##  7 https:/~ foodp~ 摩托車~ 騎    路上  紅綠燈~ 哇    塞外  送    東湖  是否   專    
##  8 https:/~ 摩托車 騎    路上  紅綠燈~ 哇    塞外  送    東湖  是否  專     板本  
##  9 https:/~ 騎     路上  紅綠燈~ 哇    塞外  送    東湖  是否  專    板本   板    
## 10 https:/~ 路上   紅綠燈~ 哇    塞外  送    東湖  是否  專    板本  板     並非  
## # ... with 42,997 more rows
p_check_words <- p_ngrams_11_separated %>%
  filter((word6 == "foodpanda"))
p_check_words
## # A tibble: 377 x 12
##    artUrl    word1 word2 word3 word4 word5 word6 word7 word8 word9 word10 word11
##    <chr>     <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>  <chr> 
##  1 https://~ 板橋  一兩間~ 地方  沒想到~ 三點  food~ 摩托車~ 騎    路上  紅綠燈 哇    
##  2 https://~ 阿婆  同樣  身材  不知  餐點  food~ 好像  年輕人~ 粉紅  袖套   感覺  
##  3 https://~ 車手  來回  剛剛  時間  到達  food~ 初體驗~ 下雨  免運  訂單   預計  
##  4 https://~ 台中市~ 警察局~ 交通  警方  台中  food~ 時間  衝撞  大隊  近來   多起  
##  5 https://~ 阿等  那麼  再給  不爽  直接  food~ 有免  運費  頓時  覺得   廢到  
##  6 https://~ cdn2~ 態度  激辯  要求  傳訊  food~ 送上來~ 外送  food~ 第一次 碰到  
##  7 https://~ 要求  傳訊  food~ 送上來~ 外送  food~ 第一次~ 碰到  顧客  直接   遇到  
##  8 https://~ 現場  知道  每次  停車場~ 三台  food~ 優勢  不是  便宜  這點   漲到  
##  9 https://~ 白跑一趟~ 風險  上到  專櫃  裝成  food~ 戶戶  上去  以上  例子   合理  
## 10 https://~ 引發  熱烈  討論  回應  之前  food~ 幫送  知道  送上來~ 感謝   真的  
## # ... with 367 more rows
p_check_words_count <- p_check_words %>%
  melt(id.vars = "artUrl", measure.vars = paste0("word", c(1:11),sep="")) %>%
  rename(word=value) %>%
  filter(variable!="word6") %>%
  filter(!(word %in% stop_words), nchar(word)>1) %>%
  count(word, sort = TRUE)
p_check_words_count %>%
  arrange(desc(abs(n))) %>%
  head(15) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = n > 0)) +
  geom_col(show.legend = FALSE) +
  xlab("出現在「foodpanda」附近的字") +
  ylab("出現次數") +
  coord_flip()+ 
  theme(text = element_text(family = "Heiti TC Light"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

以LIWC字典判斷文集中的word屬於正面字還是負面字

# 正向字典txt檔
# 以,將字分隔
P <- read_file("./positive.txt")

# 負向字典txt檔
N <- read_file("./negative.txt")
#將字串依,分割
#strsplit回傳list , 我們取出list中的第一個元素
P = strsplit(P, ",")[[1]]
N = strsplit(N, ",")[[1]]

# 建立dataframe 有兩個欄位word,sentiments,word欄位內容是字典向量
P = data.frame(word = P, sentiment = "positive")
N = data.frame(word = N, sentiment = "negative")
LIWC = rbind(P, N)

#以LIWC情緒字典分析

統計每天的文章正面字的次數與負面字的次數

sentiment_count = data_panda %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count %>% 
  arrange(desc(count))
## # A tibble: 302 x 3
## # Groups:   artDate [170]
##    artDate    sentiment count
##    <date>     <fct>     <int>
##  1 2019-10-14 negative    178
##  2 2019-10-15 negative    133
##  3 2019-10-14 positive    132
##  4 2019-10-11 negative    112
##  5 2019-10-15 positive     88
##  6 2019-10-13 negative     84
##  7 2020-02-12 negative     75
##  8 2019-10-11 positive     66
##  9 2019-10-16 negative     66
## 10 2019-10-16 positive     60
## # ... with 292 more rows
sentiment_count %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2019/10/14'))
[1]])),colour = "red") +
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/02/12'))
[1]])),colour = "red") 

#geom_vline畫出vertical line,xintercept告訴他要在artDate欄位的哪一個row畫線
data_panda %>% 
  filter(artDate == as.Date('2019/10/14')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)
##                                                          artTitle
## 1                             [新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 2                                 Re:[問卦]其他外送員現在在想什麼
## 3                                     Re:[問卦]熊貓到底靠什麼賺錢
## 4                      [新聞]外送員連三起傷亡事故交通部長林佳龍說
## 5                             [問卦]北一女以後叫不到熊貓要吃什麼?
## 6                          Re:[新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 7                    Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 8                              [問卦]農地工廠不罰Uber熊貓罰很大?
## 9                         [問卦]嬌生慣養的台女竟然在做FOODPANDA!?
## 10                   [新聞]外送平台讓生意爆紅?業者親揭殘酷真相「
## 11                    [新聞]再傳外送員車禍!Foodpanda遭闖紅燈貨車
## 12                    Re:[爆卦]勞動部認定熊貓跟ubereats是僱傭契約
## 13                            [問卦]現在風向484外送車禍=外送員錯?
## 14                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 15                    [新聞]北市勞動局︰UberEats違7項職安法規、Fo
## 16                                         [問卦]大家支持熊貓法嗎
## 17                    [新聞]外送員車禍亡北市勞檢業者涉違反7項職安
## 18                              Re:[問卦]foodpanda是不是擋人財路?
## 19                       [新聞]美食外送員女車禍摔傷嘉義市屢傳意外
## 20                       [新聞]又一起美食外送員車禍女騎士手腳擦傷
## 21                               [問卦]台灣外送的會車禍很正常吧?
## 22                      [新聞]今日第三起!雲林Foodpanda外送員車禍
## 23                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 24                     [新聞]外送員屬僱傭關係健保署也加入戰場:得
## 25                         [問卦]半夜還在送foodpanda不會太辛苦嗎?
## 26                            [問卦]把外送搞臭是不是慣老闆的陰謀?
## 27                             [新聞]勞部研擬外送員強制納保意外險
## 28                         [新聞]5天3死!外送員撞死違規過馬路老翁
## 29                               [問卦]所以現在外送員要加勞健保了
## 30                       [問卦]慣老闆沒有奴工用開始搞外送業的卦?
## 31                      [新聞]勞檢認定foodpanda、UberEats假承攬真
## 32                      Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 33                               [問卦]其實熊貓成立工會不就好惹??
## 34                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 35                          Re:[新聞]勞部研擬外送員強制納保意外險
## 36                     [新聞]美食外送為僱傭律師:用牛刀管理將打擊
## 37                                         [問卦]外送為什麼被針對
## 38                                [問卦]給張麻子送foodpanda會怎樣
## 39                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 40                    [新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 41                  Re:[新聞]外送員列入雇傭關係後月入10萬夢碎? 
## 42                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 43                      Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 44                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 45                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 46                         [新聞]外送爭議蘇貞昌:勞檢不再憂讒畏譏
## 47                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 48                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 49                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 50                                   [問卦]搞熊貓最後受傷的是經濟
## 51                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 52                   [新聞]外送事故5天3死!Lalamove外送員撞死違規
## 53                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 54                      Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 55                                   [問卦]所以國外外送員不會車禍
## 56 [新聞]【血汗外送】狠打臉熊貓! 勞檢驚揭業者兩點內規認證「無良
## 57                             [問卦]外送三雄大家偏好哪個????
## 58                    Re:[問卦]其實把外送搞「純平台」不就解決了?
## 59                     [新聞]外送員5天3死公路總局開罰foodpanda9千
## 60                   [新聞]又見外送事故!外送員疑搶黃燈遭「闖紅燈
## 61                    Re:[新聞]北市勞工局開罰UberEats認定罹災黃姓
data_panda %>% 
  filter(artDate == as.Date('2020/02/12')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)
##                                       artTitle
## 1   [問卦]居家自主隔離但是叫熊貓,會不會有問題
## 2 [新聞]雲林惡客狂棄訂單 20名熊貓外送員同時被
## 3 [新聞]影/「黃千千」訂的!雲林foodpanda出現2
## 4 [新聞]熊貓外送同時遭惡作劇 10餘外送員荒地「
## 5   [新聞]雲林foodpanda惡意棄單3天逾百件20外送

武漢肺炎前後期聲量比較

date_plot <- article_count_by_date %>% 
  ggplot(aes(x = artDate, y = count)) +
  geom_line(color = "purple", size = 1.5)  + 
  scale_x_date(labels = date_format("%Y/%m/%d")) + 
  geom_vline(xintercept = c(as.numeric(as.Date("2020-01-21"))), col='red', size = 1) + 
  ggtitle("「foodpanada」討論文章數") + 
  xlab("日期") + 
  ylab("數量") +
  theme(text = element_text(family = "Heiti TC Light"))
date_plot
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

#我們可以發現武漢肺炎之後foodpanda聲量也沒有特別提升
word_count_panda_new2 <- data_panda %>% 
  filter(artDate < as.Date("2020-01-21")) %>%
  select(word,count) %>% 
  group_by(word)%>% 
  summarise(count = sum(count))  %>%
  filter(count > 50) %>%  # 過濾出現太少次的字
  arrange(desc(count))%>% 
  filter(word !="外送"& word !="外送員")
word_count_panda_new2  
## # A tibble: 120 x 2
##    word      count
##    <chr>     <int>
##  1 foodpanda  1581
##  2 公司        313
##  3 業者        252
##  4 關係        235
##  5 外送平台    225
##  6 承攬        217
##  7 平台        202
##  8 店家        199
##  9 發生        198
## 10 機車        196
## # ... with 110 more rows
word_count_panda_new2 %>% wordcloud2()
word_count_panda_new2 <- data_panda %>% 
  filter(artDate >= as.Date("2020-01-21")) %>%
  select(word,count) %>% 
  group_by(word)%>% 
  summarise(count = sum(count))  %>%
  filter(count > 20) %>%  # 過濾出現太少次的字
  arrange(desc(count))%>% 
  filter(word !="外送"& word !="外送員")
word_count_panda_new2  
## # A tibble: 34 x 2
##    word      count
##    <chr>     <int>
##  1 foodpanda   397
##  2 訂單         82
##  3 店家         77
##  4 公司         42
##  5 訂餐         42
##  6 餐點         39
##  7 外送平台     35
##  8 業者         33
##  9 惡意         32
## 10 大家         30
## # ... with 24 more rows
word_count_panda_new2 %>% wordcloud2()

計算 tf-idf

p_tokens_by_art_new1 <- data_panda%>% 
  filter(artDate < as.Date("2020-01-21"))%>% 
  filter(!str_detect(word, regex("[0-9]"))) %>%
  count(artTitle, word,artUrl, sort = TRUE)
p_total_words_by_art_new1 <-p_tokens_by_art_new1   %>% 
  group_by(artTitle) %>% 
  summarize(total = sum(n)) %>% 
  arrange(desc(total))
p_tokens_by_art_new1 <- left_join(p_tokens_by_art_new1, p_total_words_by_art_new1)
## Joining, by = "artTitle"
# 以每篇文章爲單位,計算每個詞彙在的tf-idf值
p_words_tf_idf_new1 <- p_tokens_by_art_new1 %>%
  bind_tf_idf(word, artTitle, n) 
## Warning in bind_tf_idf.data.frame(., word, artTitle, n): A value for tf_idf is negative:
## Input should have exactly one row per document-term combination.
p_words_tf_idf_new1 %>% 
  filter(total > 20) %>% 
  arrange(desc(tf_idf))
## # A tibble: 40,301 x 8
##    artTitle          word  artUrl                    n total     tf   idf tf_idf
##    <chr>             <chr> <chr>                 <int> <int>  <dbl> <dbl>  <dbl>
##  1 [問卦]2019台灣的代表性職業~ 代表性~ https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  2 [問卦]2019台灣的代表性職業~ 幾萬人~ https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  3 [問卦]Foodpanda改制導~ Band  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  4 [問卦]Foodpanda改制導~ 上中  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  5 [問卦]foodpanda是不是~ 五次  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  6 [問卦]foodpanda是不是~ 拜會  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  7 [問卦]foodpanda是不是~ 間點  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  8 [問卦]foodpanda是不是~ 愛用者~ https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  9 [問卦]foodpanda是不是~ 解惑  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
## 10 [問卦]Ubereats要國外交~ 帳單  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
## # ... with 40,291 more rows
# 計算兩個詞彙同時出現的總次數
word_pairs_new1 <- p_tokens_by_art_new1 %>%
  pairwise_count(word, artUrl, sort = TRUE)
word_pairs_new1
## # A tibble: 4,240,106 x 3
##    item1     item2         n
##    <chr>     <chr>     <dbl>
##  1 外送      foodpanda   323
##  2 foodpanda 外送        323
##  3 外送員    foodpanda   311
##  4 foodpanda 外送員      311
##  5 外送員    外送        196
##  6 外送      外送員      196
##  7 公司      foodpanda   130
##  8 foodpanda 公司        130
##  9 外送平台  foodpanda   114
## 10 foodpanda 外送平台    114
## # ... with 4,240,096 more rows
# 計算兩個詞彙間的相關性
word_cors_new1<- p_tokens_by_art_new1%>%
  group_by(word) %>%
  filter(n() >= 20) %>%
  pairwise_cor(word, artUrl, sort = TRUE)
# 顯示相關性大於0.5的組合
set.seed(2020)

word_cors_new1 %>%
  filter(correlation > 0.5) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") + #加入中文字型設定,避免中文字顯示錯誤。
  theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# 設定幾個詞做爲seed words
seed_words <- c("udn", "自由","net","story","針對")
# 設定threshold爲0.5
threshold <- 0.5
# 跟seed words相關性高於threshold的詞彙會被加入移除列表中
remove_words <- word_cors %>%
                filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
                .$item1 %>%
                unique()
remove_words
## [1] "udn"          "story"        "聯合報"       "ltn"          "自由"        
## [6] "net"          "www"          "breakingnews" "透露"
# 清除存在這些詞彙的組合
word_cors_new_new1 <- word_cors_new1 %>%
                filter(!(item1 %in% remove_words|item2 %in% remove_words))

word_cors_new1 %>%
  filter(correlation > 0.4) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) + 
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
  theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

後期

p_tokens_by_art_new2 <- data_panda%>% 
  filter(artDate >= as.Date("2020-01-21"))%>% 
  filter(!str_detect(word, regex("[0-9]"))) %>%
  count(artTitle, word,artUrl, sort = TRUE)
p_total_words_by_art_new2 <-p_tokens_by_art_new2   %>% 
  group_by(artTitle) %>% 
  summarize(total = sum(n)) %>% 
  arrange(desc(total))
p_tokens_by_art_new2 <- left_join(p_tokens_by_art_new2, p_total_words_by_art_new2)
## Joining, by = "artTitle"
# 以每篇文章爲單位,計算每個詞彙在的tf-idf值
p_words_tf_idf_new2 <- p_tokens_by_art_new2 %>%
  bind_tf_idf(word, artTitle, n) 
## Warning in bind_tf_idf.data.frame(., word, artTitle, n): A value for tf_idf is negative:
## Input should have exactly one row per document-term combination.
p_words_tf_idf_new2 %>% 
  filter(total > 20) %>% 
  arrange(desc(tf_idf))
## # A tibble: 7,027 x 8
##    artTitle        word   artUrl                     n total     tf   idf tf_idf
##    <chr>           <chr>  <chr>                  <int> <int>  <dbl> <dbl>  <dbl>
##  1 [問卦]民雄要有foodpa~ ayi    https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  2 [問卦]民雄要有foodpa~ bbs    https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  3 [問卦]民雄要有foodpa~ html   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  4 [問卦]民雄要有foodpa~ ptt    https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  5 [問卦]民雄要有foodpa~ 大學   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  6 [問卦]民雄要有foodpa~ 中正   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  7 [問卦]民雄要有foodpa~ 太神   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  8 [問卦]民雄要有foodpa~ 民雄連 https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  9 [問卦]民雄要有foodpa~ 挖靠   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
## 10 [問卦]民雄要有foodpa~ 發抖   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
## # ... with 7,017 more rows
# 計算兩個詞彙同時出現的總次數
word_pairs_new2 <- p_tokens_by_art_new2 %>%
  pairwise_count(word, artUrl, sort = TRUE)
word_pairs_new2
## # A tibble: 712,868 x 3
##    item1     item2         n
##    <chr>     <chr>     <dbl>
##  1 外送      foodpanda    68
##  2 foodpanda 外送         68
##  3 外送員    foodpanda    62
##  4 foodpanda 外送員       62
##  5 外送員    外送         34
##  6 外送      外送員       34
##  7 訂單      foodpanda    28
##  8 foodpanda 訂單         28
##  9 店家      foodpanda    27
## 10 真的      foodpanda    27
## # ... with 712,858 more rows
# 計算兩個詞彙間的相關性
word_cors_new2<- p_tokens_by_art_new2%>%
  group_by(word) %>%
  filter(n() >= 20) %>%
  pairwise_cor(word, artUrl, sort = TRUE)
word_cors_new2
## # A tibble: 132 x 3
##    item1     item2     correlation
##    <chr>     <chr>           <dbl>
##  1 有人      foodpanda     Inf    
##  2 大家      foodpanda     Inf    
##  3 foodpanda 有人          Inf    
##  4 foodpanda 大家          Inf    
##  5 餐點      外送員          0.350
##  6 外送員    餐點            0.350
##  7 外送員    公司            0.295
##  8 公司      外送員          0.295
##  9 店家      訂單            0.287
## 10 訂單      店家            0.287
## # ... with 122 more rows
# 顯示相關性大於0.5的組合
set.seed(2020)

word_cors_new2 %>%
  filter(correlation > 0.1) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") + #加入中文字型設定,避免中文字顯示錯誤。
  theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# 設定幾個詞做爲seed words
seed_words <- c("大家", "有人","不是")
# 設定threshold爲0.5
threshold <- 0.17
# 跟seed words相關性高於threshold的詞彙會被加入移除列表中
remove_words <- word_cors %>%
                filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
                .$item1 %>%
                unique()
remove_words
##  [1] "問題" "不是" "勞工" "勞動" "PO"   "真的" "承攬" "有人" "討論" "知道"
## [11] "僱傭"
# 清除存在這些詞彙的組合
word_cors_new_new2 <- word_cors_new2 %>%
                filter(!(item1 %in% remove_words|item2 %in% remove_words))

word_cors_new2 %>%
  filter(correlation > 0.1) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) + 
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
  theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

武漢肺炎前後期情緒比較

sentiment_count %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/01/21'))
[1]])),colour = "red")

#geom_vline畫出vertical line,xintercept告訴他要在artDate欄位的哪一個row畫線
sentiment_count %>% 
  filter(artDate < as.Date("2020-01-21"))%>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2019/10/14'))
[1]])),colour = "red") +
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2019/10/11'))
[1]])),colour = "red") +
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/01/15'))
[1]])),colour = "red") 

#geom_vline畫出vertical line,xintercept告訴他要在artDate欄位的哪一個row畫線
data_panda %>% 
  filter(artDate == as.Date('2019/10/11')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)
##                                          artTitle
## 1        [新聞]女熊貓外送上樓!見宅男開門快被嚇死
## 2     Re:[新聞]女熊貓外送上樓!見宅男開門快被嚇死
## 3     Re:[新聞]女熊貓外送上樓!見宅男開門快被嚇死
## 4   Re:[問卦]Ubereats484輸給Foodpanda了?<U+2708>
## 5    [問卦]外送現在才發生第1起撞死的484比小黃好了
## 6                         [問卦]foodpanda店內價??
## 7                            [問卦]熊貓外送的八卦
## 8                      Re:[問卦]foodpanda店內價??
## 9  Re:[新聞]【外送搏命1】熊貓外送員拚賺宵夜錢桃園
## 10                     [問卦]foodpanda被刷到剩1星
## 11     [新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 12  Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 13  Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 14                       [問卦]熊貓外送員被撞死了
## 15  Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 16  Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 17                [新聞]外送員車禍foodpanda回應了
## 18             Re:[新聞]外送員車禍foodpanda回應了
## 19                [問卦]ubereat人員素質比較好一些
## 20             Re:[新聞]外送員車禍foodpanda回應了
## 21     [新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 22             Re:[新聞]外送員車禍foodpanda回應了
## 23             Re:[新聞]外送員車禍foodpanda回應了
## 24             Re:[新聞]外送員車禍foodpanda回應了
## 25            [問卦]foodpanda外送人員的騎車技術?
## 26     [新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 27  Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 28  Re:[新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 29  Re:[新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 30            Re:[問卦]真的有人騎腳踏車送外送嗎?
data_panda %>% 
  filter(artDate == as.Date('2019/10/14')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)
##                                                          artTitle
## 1                             [新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 2                                 Re:[問卦]其他外送員現在在想什麼
## 3                                     Re:[問卦]熊貓到底靠什麼賺錢
## 4                      [新聞]外送員連三起傷亡事故交通部長林佳龍說
## 5                             [問卦]北一女以後叫不到熊貓要吃什麼?
## 6                          Re:[新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 7                    Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 8                              [問卦]農地工廠不罰Uber熊貓罰很大?
## 9                         [問卦]嬌生慣養的台女竟然在做FOODPANDA!?
## 10                   [新聞]外送平台讓生意爆紅?業者親揭殘酷真相「
## 11                    [新聞]再傳外送員車禍!Foodpanda遭闖紅燈貨車
## 12                    Re:[爆卦]勞動部認定熊貓跟ubereats是僱傭契約
## 13                            [問卦]現在風向484外送車禍=外送員錯?
## 14                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 15                    [新聞]北市勞動局︰UberEats違7項職安法規、Fo
## 16                                         [問卦]大家支持熊貓法嗎
## 17                    [新聞]外送員車禍亡北市勞檢業者涉違反7項職安
## 18                              Re:[問卦]foodpanda是不是擋人財路?
## 19                       [新聞]美食外送員女車禍摔傷嘉義市屢傳意外
## 20                       [新聞]又一起美食外送員車禍女騎士手腳擦傷
## 21                               [問卦]台灣外送的會車禍很正常吧?
## 22                      [新聞]今日第三起!雲林Foodpanda外送員車禍
## 23                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 24                     [新聞]外送員屬僱傭關係健保署也加入戰場:得
## 25                         [問卦]半夜還在送foodpanda不會太辛苦嗎?
## 26                            [問卦]把外送搞臭是不是慣老闆的陰謀?
## 27                             [新聞]勞部研擬外送員強制納保意外險
## 28                         [新聞]5天3死!外送員撞死違規過馬路老翁
## 29                               [問卦]所以現在外送員要加勞健保了
## 30                       [問卦]慣老闆沒有奴工用開始搞外送業的卦?
## 31                      [新聞]勞檢認定foodpanda、UberEats假承攬真
## 32                      Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 33                               [問卦]其實熊貓成立工會不就好惹??
## 34                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 35                          Re:[新聞]勞部研擬外送員強制納保意外險
## 36                     [新聞]美食外送為僱傭律師:用牛刀管理將打擊
## 37                                         [問卦]外送為什麼被針對
## 38                                [問卦]給張麻子送foodpanda會怎樣
## 39                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 40                    [新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 41                  Re:[新聞]外送員列入雇傭關係後月入10萬夢碎? 
## 42                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 43                      Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 44                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 45                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 46                         [新聞]外送爭議蘇貞昌:勞檢不再憂讒畏譏
## 47                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 48                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 49                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 50                                   [問卦]搞熊貓最後受傷的是經濟
## 51                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 52                   [新聞]外送事故5天3死!Lalamove外送員撞死違規
## 53                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 54                      Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 55                                   [問卦]所以國外外送員不會車禍
## 56 [新聞]【血汗外送】狠打臉熊貓! 勞檢驚揭業者兩點內規認證「無良
## 57                             [問卦]外送三雄大家偏好哪個????
## 58                    Re:[問卦]其實把外送搞「純平台」不就解決了?
## 59                     [新聞]外送員5天3死公路總局開罰foodpanda9千
## 60                   [新聞]又見外送事故!外送員疑搶黃燈遭「闖紅燈
## 61                    Re:[新聞]北市勞工局開罰UberEats認定罹災黃姓
data_panda %>% 
  filter(artDate == as.Date('2020/01/15')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)
##                                                     artTitle
## 1                           [問卦]熊貓明天罷工,大家會怕嗎?
## 2                               [問卦]熊貓有必要自相殘殺嗎?
## 3                            Re:[問卦]熊貓有必要自相殘殺嗎?
## 4                           [問卦]可憐哪!熊貓外送師被砍獎金
## 5                                   [問卦]熊貓開始罷工了嗎?
## 6                       Re:[問卦]欸欸欸!聽說台灣今天熊貓罷工
## 7                         [問卦]熊貓外送罷工有人被影響到嗎?
## 8                                   [問卦]今天中午能定熊貓嗎
## 9                                Re:[問卦]今天中午能定熊貓嗎
## 10              [新聞]控「年前毀約」!台中60名熊貓外送員市府
## 11    [新聞]熊貓今罷送最多慢1小時…外送員:萬人響應卻無人棄單
## 12                 [新聞]foodpanda罷工實測!點餐後30分就拿到
## 13 Re:[新聞]熊貓今罷送最多慢1小時…外送員:萬人響應卻無人棄單
## 14 Re:[新聞]熊貓今罷送最多慢1小時…外送員:萬人響應卻無人棄單
## 15                [新聞]熊貓變更計薪制度內湖外送員控「變相減
## 16                    [問卦]社會上為何充滿仇視外送員的八卦?
sentiment_count %>% 
  filter(artDate >= as.Date("2020-01-21"))%>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/02/12'))
[1]])),colour = "red") +
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/04/10'))
[1]])),colour = "red")
## Warning: Removed 90 rows containing missing values (geom_vline).

## Warning: Removed 90 rows containing missing values (geom_vline).

data_panda %>% 
  filter(artDate == as.Date('2019/10/14')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)
##                                                          artTitle
## 1                             [新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 2                                 Re:[問卦]其他外送員現在在想什麼
## 3                                     Re:[問卦]熊貓到底靠什麼賺錢
## 4                      [新聞]外送員連三起傷亡事故交通部長林佳龍說
## 5                             [問卦]北一女以後叫不到熊貓要吃什麼?
## 6                          Re:[新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 7                    Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 8                              [問卦]農地工廠不罰Uber熊貓罰很大?
## 9                         [問卦]嬌生慣養的台女竟然在做FOODPANDA!?
## 10                   [新聞]外送平台讓生意爆紅?業者親揭殘酷真相「
## 11                    [新聞]再傳外送員車禍!Foodpanda遭闖紅燈貨車
## 12                    Re:[爆卦]勞動部認定熊貓跟ubereats是僱傭契約
## 13                            [問卦]現在風向484外送車禍=外送員錯?
## 14                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 15                    [新聞]北市勞動局︰UberEats違7項職安法規、Fo
## 16                                         [問卦]大家支持熊貓法嗎
## 17                    [新聞]外送員車禍亡北市勞檢業者涉違反7項職安
## 18                              Re:[問卦]foodpanda是不是擋人財路?
## 19                       [新聞]美食外送員女車禍摔傷嘉義市屢傳意外
## 20                       [新聞]又一起美食外送員車禍女騎士手腳擦傷
## 21                               [問卦]台灣外送的會車禍很正常吧?
## 22                      [新聞]今日第三起!雲林Foodpanda外送員車禍
## 23                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 24                     [新聞]外送員屬僱傭關係健保署也加入戰場:得
## 25                         [問卦]半夜還在送foodpanda不會太辛苦嗎?
## 26                            [問卦]把外送搞臭是不是慣老闆的陰謀?
## 27                             [新聞]勞部研擬外送員強制納保意外險
## 28                         [新聞]5天3死!外送員撞死違規過馬路老翁
## 29                               [問卦]所以現在外送員要加勞健保了
## 30                       [問卦]慣老闆沒有奴工用開始搞外送業的卦?
## 31                      [新聞]勞檢認定foodpanda、UberEats假承攬真
## 32                      Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 33                               [問卦]其實熊貓成立工會不就好惹??
## 34                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 35                          Re:[新聞]勞部研擬外送員強制納保意外險
## 36                     [新聞]美食外送為僱傭律師:用牛刀管理將打擊
## 37                                         [問卦]外送為什麼被針對
## 38                                [問卦]給張麻子送foodpanda會怎樣
## 39                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 40                    [新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 41                  Re:[新聞]外送員列入雇傭關係後月入10萬夢碎? 
## 42                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 43                      Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 44                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 45                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 46                         [新聞]外送爭議蘇貞昌:勞檢不再憂讒畏譏
## 47                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 48                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 49                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 50                                   [問卦]搞熊貓最後受傷的是經濟
## 51                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 52                   [新聞]外送事故5天3死!Lalamove外送員撞死違規
## 53                 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 54                      Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 55                                   [問卦]所以國外外送員不會車禍
## 56 [新聞]【血汗外送】狠打臉熊貓! 勞檢驚揭業者兩點內規認證「無良
## 57                             [問卦]外送三雄大家偏好哪個????
## 58                    Re:[問卦]其實把外送搞「純平台」不就解決了?
## 59                     [新聞]外送員5天3死公路總局開罰foodpanda9千
## 60                   [新聞]又見外送事故!外送員疑搶黃燈遭「闖紅燈
## 61                    Re:[新聞]北市勞工局開罰UberEats認定罹災黃姓
data_panda %>% 
  filter(artDate == as.Date('2020/04/10')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)
##                                      artTitle
## 1 [新聞]安心配送再升級!foodpanda發萬瓶酒精給
## 2   [新聞]5業者組外送國家隊救餐飲!UberEats、
## 3              [問卦]外送員也適用紓困方案嗎?
data_panda %>% 
  filter(artDate >= as.Date("2020-01-21")) %>% 
  distinct(artUrl, .keep_all = TRUE) %>%filter(word =="武漢"|word =="肺炎"|word =="武漢肺炎"|word =="COVID-19"|word =="covid19"|word =="covid-19")%>% 
  select(artTitle)
## [1] artTitle
## <0 rows> (or 0-length row.names)