外送平台文字分析

台灣正式宣布的的第1例新冠肺炎確診，是在今年的1月21日 1. 資料取得及套件載入載入的資料是由中山大學管理學院文字分析平台取得，在平台資料輸出區塊選擇「文章+詞彙+詞頻」選項，即可取得相同格式之csv檔案。

資料簡介本資料為2019-09-01～2020-04-22，將PTT1八卦版透過文字分析平台整理，得到2311篇文章。 # 系統參數設定

分析主題武漢肺炎疫情的影響造成外送平台的訂單增加，以此分析外送平台在疫情爆發的前後聲量和情緒有甚麼重大的變化

Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼

## Warning in Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8"): 作業系統
## 回報無法實現設定語區為 "zh_TW.UTF-8" 的要求

## [1] ""

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stringr)
library(tidytext)
library(wordcloud2)
library(data.table)

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

library(ggplot2)
library(reshape2)

## 
## Attaching package: 'reshape2'

## The following objects are masked from 'package:data.table':
## 
##     dcast, melt

library(wordcloud)

## Loading required package: RColorBrewer

library(tidyr)

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:reshape2':
## 
##     smiths

library(readr)
library(scales)

## 
## Attaching package: 'scales'

## The following object is masked from 'package:readr':
## 
##     col_factor

require(jiebaR)

## Loading required package: jiebaR

## Loading required package: jiebaRD

require(widyr)

## Loading required package: widyr

require(NLP)

## Loading required package: NLP

## 
## Attaching package: 'NLP'

## The following object is masked from 'package:ggplot2':
## 
##     annotate

require(ggraph)

## Loading required package: ggraph

require(igraph)

## Loading required package: igraph

## 
## Attaching package: 'igraph'

## The following object is masked from 'package:tidyr':
## 
##     crossing

## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

八卦版

data_gossip = fread('gossip_article.csv',encoding = 'UTF-8')

轉換日期格式

data_gossip$artDate = data_gossip$artDate %>% as.Date("%Y/%m/%d")

data_gossip$word[which(data_gossip$word == "FOODPANDA")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "Foodpanda")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "富胖達")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "food胖達")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "熊貓")] = "foodpanda"

過濾特殊字元

data_gossip = data_gossip %>% 
  filter(!grepl('_',word))

#選取有提到foodpanda的文章
panda_url = data_gossip$artUrl[grepl("foodpanda", data_gossip$word)]

###斷詞

jieba_tokenizer = worker()
stop_words <- c("https", "com", "新聞", "完整", "沒有","有沒有","現在","八卦","jpg","imgur","news","http","內文","htm","ettoday","ETtoday","請問","蘇格蘭","網址","連結","記者","署名","來源","媒體","新聞標題","備註","表示","報導","今天","看到")

#篩選出foodpanda的文章並去除停用字
data_panda <- data_gossip %>% 
  filter(data_gossip$artUrl %in% panda_url)

data_panda <- data_panda %>% 
  filter(!(data_panda$word %in% stop_words))

日期折線圖計算出每一天文章的發表數量，看出討論「foodpanda」的熱度。資料處理

#討論篇數最多的前10天
p_date <- data_panda %>% 
  select(artDate, artUrl) %>% 
  distinct()
article_count_by_date <- p_date %>% 
  group_by(artDate) %>% 
  summarise(count = n())
article_count_by_date %>% 
  arrange(desc(count))%>% 
  top_n(20)

## Selecting by count

## # A tibble: 21 x 2
##    artDate    count
##    <date>     <int>
##  1 2019-10-14    61
##  2 2019-10-11    30
##  3 2019-10-15    30
##  4 2019-10-13    29
##  5 2019-10-16    25
##  6 2019-10-12    18
##  7 2020-01-15    16
##  8 2019-10-21    14
##  9 2019-10-17    13
## 10 2020-01-18    12
## # ... with 11 more rows

date_plot <- article_count_by_date %>% 
  ggplot(aes(x = artDate, y = count)) +
  geom_line(color = "purple", size = 1.5)  + 
  scale_x_date(labels = date_format("%Y/%m/%d")) + 
  geom_vline(xintercept = c(as.numeric(as.Date("2019-10-14")),
                            as.numeric(as.Date("2020-01-15")),
                            as.numeric(as.Date("2020-02-14"))), col='red', size = 1) + 
  ggtitle("「foodpanada」討論文章數") + 
  xlab("日期") + 
  ylab("數量") +
  theme(text = element_text(family = "Heiti TC Light"))
date_plot

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

#我們可以發現聲量的表現有明顯的三個高峰期

查看三個高峰期，最常出現的詞彙

p_tokens_by_date <- data_panda %>% 
  count(artDate, word, sort = TRUE)
  
plot_merge <- p_tokens_by_date %>% 
  filter(word != "foodpanda"& word != "外送員"& word != "今天"& word != "很多") %>% 
  filter(artDate == as.Date("2019-10-14") | 
         artDate == as.Date("2020-01-15")| 
         artDate == as.Date("2020-02-14")) %>% 
  group_by(artDate) %>% 
  top_n(5, n) %>% 
  ungroup() %>% 
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x=word, y=n, fill = artDate)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = NULL) +
  facet_wrap(~artDate, scales="free", ncol = 2) + 
  coord_flip()+
  theme(text = element_text(family = "Heiti TC Light"))
plot_merge

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

計算所有字在文集中的總詞頻

word_count_panda <- data_panda %>%
  select(word,count) %>% 
  group_by(word) %>% 
  summarise(count = sum(count))  %>%
  filter(count > 50) %>%  # 過濾出現太少次的字
  arrange(desc(count))
word_count_panda

## # A tibble: 152 x 2
##    word      count
##    <chr>     <int>
##  1 foodpanda  1978
##  2 外送員     1801
##  3 外送       1032
##  4 公司        355
##  5 業者        285
##  6 店家        276
##  7 外送平台    260
##  8 關係        240
##  9 平台        231
## 10 訂單        231
## # ... with 142 more rows

word_count_panda %>% wordcloud2()

#文章對應標題
data_panda %>% filter(artDate == as.Date("2019-10-11")) %>% distinct(data_panda,artTitle)

## Warning: Trying to compute distinct() for variables not found in the data:
## - `data_panda`
## This is an error, but only a warning is raised for compatibility reasons.
## The following variables will be used:
## - artTitle

##                                          artTitle
## 1        [新聞]女熊貓外送上樓！見宅男開門快被嚇死
## 2     Re:[新聞]女熊貓外送上樓！見宅男開門快被嚇死
## 3   Re:[問卦]Ubereats484輸給Foodpanda了？<U+2708>
## 4    [問卦]外送現在才發生第1起撞死的484比小黃好了
## 5                         [問卦]foodpanda店內價??
## 6                            [問卦]熊貓外送的八卦
## 7                      Re:[問卦]foodpanda店內價??
## 8  Re:[新聞]【外送搏命1】熊貓外送員拚賺宵夜錢桃園
## 9                      [問卦]foodpanda被刷到剩1星
## 10     [新聞]賣命！熊貓美食外送員遭撞慘死超驚悚畫
## 11  Re:[新聞]賣命！熊貓美食外送員遭撞慘死超驚悚畫
## 12                       [問卦]熊貓外送員被撞死了
## 13                [新聞]外送員車禍foodpanda回應了
## 14             Re:[新聞]外送員車禍foodpanda回應了
## 15                [問卦]ubereat人員素質比較好一些
## 16            [問卦]foodpanda外送人員的騎車技術？
## 17     [新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 18  Re:[新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 19            Re:[問卦]真的有人騎腳踏車送外送嗎？

#文章對應標題
data_panda %>% filter(artDate == as.Date("2020-02-14")) %>% distinct(data_panda,artTitle)

## Warning: Trying to compute distinct() for variables not found in the data:
## - `data_panda`
## This is an error, but only a warning is raised for compatibility reasons.
## The following variables will be used:
## - artTitle

##                                      artTitle
## 1 [新聞]怕老鳥帶壞新人？Foodpanda「隔離」菜鳥
## 2      [問卦]PChome聯手熊貓速度還可以到多快？
## 3    [問卦]訂ubereats的人是不是很不愛娶餐阿？
## 4     [問卦]遇到會隱形的熊貓人怎麼辦？(在線等
## 5                 [爆卦]熊貓foodpanda薪水詳解
## 6              Re:[爆卦]熊貓foodpanda薪水詳解

#文章對應標題
data_panda %>% filter(artDate == as.Date("2020-01-15")) %>% distinct(data_panda,artTitle)

## Warning: Trying to compute distinct() for variables not found in the data:
## - `data_panda`
## This is an error, but only a warning is raised for compatibility reasons.
## The following variables will be used:
## - artTitle

##                                                     artTitle
## 1                           [問卦]熊貓明天罷工，大家會怕嗎？
## 2                               [問卦]熊貓有必要自相殘殺嗎？
## 3                            Re:[問卦]熊貓有必要自相殘殺嗎？
## 4                           [問卦]可憐哪！熊貓外送師被砍獎金
## 5                                   [問卦]熊貓開始罷工了嗎？
## 6                       Re:[問卦]欸欸欸!聽說台灣今天熊貓罷工
## 7                         [問卦]熊貓外送罷工有人被影響到嗎？
## 8                                   [問卦]今天中午能定熊貓嗎
## 9                                Re:[問卦]今天中午能定熊貓嗎
## 10              [新聞]控「年前毀約」！台中60名熊貓外送員市府
## 11    [新聞]熊貓今罷送最多慢1小時…外送員：萬人響應卻無人棄單
## 12                 [新聞]foodpanda罷工實測！點餐後30分就拿到
## 13 Re:[新聞]熊貓今罷送最多慢1小時…外送員：萬人響應卻無人棄單
## 14                [新聞]熊貓變更計薪制度內湖外送員控「變相減
## 15                    [問卦]社會上為何充滿仇視外送員的八卦？

計算 tf-idf

p_tokens_by_art <- data_panda %>% 
  filter(!str_detect(word, regex("[0-9]"))) %>%
  count(artTitle, word,artUrl, sort = TRUE)
p_total_words_by_art <- p_tokens_by_art  %>% 
  group_by(artTitle) %>% 
  summarize(total = sum(n)) %>% 
  arrange(desc(total))
p_tokens_by_art <- left_join(p_tokens_by_art, p_total_words_by_art)

## Joining, by = "artTitle"

# 以每篇文章爲單位，計算每個詞彙在的tf-idf值
p_words_tf_idf <- p_tokens_by_art %>%
  bind_tf_idf(word, artTitle, n)

## Warning in bind_tf_idf.data.frame(., word, artTitle, n): A value for tf_idf is negative:
## Input should have exactly one row per document-term combination.

p_words_tf_idf %>% 
  filter(total > 20) %>% 
  arrange(desc(tf_idf))

## # A tibble: 47,328 x 8
##    artTitle           word   artUrl                  n total     tf   idf tf_idf
##    <chr>              <chr>  <chr>               <int> <int>  <dbl> <dbl>  <dbl>
##  1 [問卦]2019台灣的代表性職業是~ 代表性 https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  2 [問卦]2019台灣的代表性職業是~ 幾萬人 https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  3 [問卦]Foodpanda改制導致~ 上中   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  4 [問卦]foodpanda是不是快~ 五次   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  5 [問卦]foodpanda是不是快~ 拜會   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  6 [問卦]foodpanda是不是快~ 間點   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  7 [問卦]foodpanda是不是快~ 愛用者 https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  8 [問卦]foodpanda是不是快~ 解惑   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
##  9 [問卦]小黃出事車隊沒事那外送為啥~ 人要   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
## 10 [問卦]小黃出事車隊沒事那外送為啥~ 給司   https://www.ptt.cc~     1    21 0.0476  6.62  0.315
## # ... with 47,318 more rows

將同一篇的斷詞整理在一起

data_full = data_panda %>%
                group_by(artUrl) %>% 
                summarise(sentence = paste0(word, collapse = ""))

# remove stopwords
# unnest_tokens 使用的bigram分詞函數
# Input: a character vector
# Output: a list of character vectors of the same length
jieba_bigram <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer)
      bigram<- ngrams(tokens, 2)
      bigram <- lapply(bigram, paste, collapse = " ")
      unlist(bigram)
    }
  })
}

# 執行bigram分詞
panda_bigram <- data_full %>%
  unnest_tokens(bigram, sentence, token = jieba_bigram)
panda_bigram

## # A tibble: 50,473 x 2
##    artUrl                                                   bigram          
##    <chr>                                                    <chr>           
##  1 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 凌晨 板橋       
##  2 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 板橋 一兩間     
##  3 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 一兩間 地方     
##  4 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 地方 沒想到     
##  5 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 沒想到 三點     
##  6 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 三點 foodpanda  
##  7 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html foodpanda 摩托車
##  8 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 摩托車 騎       
##  9 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 騎 路上         
## 10 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 路上 紅綠燈     
## # ... with 50,463 more rows

panda_bigram %>%
  count(bigram, sort = TRUE)

## # A tibble: 46,658 x 2
##    bigram               n
##    <chr>            <int>
##  1 外送 平台          126
##  2 為 什麼             49
##  3 foodpanda 外送      46
##  4 也 不               40
##  5 foodpanda 外送員    33
##  6 更 多               25
##  7 外送 foodpanda      23
##  8 外送員 foodpanda    23
##  9 美食 外送           20
## 10 擦 撞               20
## # ... with 46,648 more rows

panda_bigram

## # A tibble: 50,473 x 2
##    artUrl                                                   bigram          
##    <chr>                                                    <chr>           
##  1 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 凌晨 板橋       
##  2 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 板橋 一兩間     
##  3 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 一兩間 地方     
##  4 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 地方 沒想到     
##  5 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 沒想到 三點     
##  6 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 三點 foodpanda  
##  7 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html foodpanda 摩托車
##  8 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 摩托車 騎       
##  9 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 騎 路上         
## 10 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 路上 紅綠燈     
## # ... with 50,463 more rows

# 計算兩個詞彙同時出現的總次數
word_pairs <- p_tokens_by_art %>%
  pairwise_count(word, artUrl, sort = TRUE)
word_pairs

## # A tibble: 4,815,650 x 3
##    item1     item2         n
##    <chr>     <chr>     <dbl>
##  1 外送      foodpanda   391
##  2 foodpanda 外送        391
##  3 外送員    foodpanda   373
##  4 foodpanda 外送員      373
##  5 外送員    外送        230
##  6 外送      外送員      230
##  7 公司      foodpanda   150
##  8 foodpanda 公司        150
##  9 真的      foodpanda   138
## 10 foodpanda 真的        138
## # ... with 4,815,640 more rows

# 計算兩個詞彙間的相關性
word_cors <- p_tokens_by_art%>%
  group_by(word) %>%
  filter(n() >= 20) %>%
  pairwise_cor(word, artUrl, sort = TRUE)

# 顯示相關性大於0.5的組合
set.seed(2020)

word_cors %>%
  filter(correlation > 0.5) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") + #加入中文字型設定，避免中文字顯示錯誤。
  theme_void()

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# 設定幾個詞做爲seed words
seed_words <- c("udn", "來源","連結","網址","備註","內文","www","自由","針對","進行")
# 設定threshold爲0.5
threshold <- 0.5
# 跟seed words相關性高於threshold的詞彙會被加入移除列表中
remove_words <- word_cors %>%
                filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
                .$item1 %>%
                unique()
remove_words

## [1] "udn"          "story"        "聯合報"       "ltn"          "自由"        
## [6] "net"          "www"          "breakingnews"

# 清除存在這些詞彙的組合
word_cors_new <- word_cors %>%
                filter(!(item1 %in% remove_words|item2 %in% remove_words))

word_cors_new %>%
  filter(correlation > 0.4) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) + 
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
  theme_void()

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

因為只用bigram看不出前後文之間的insight，所以取前後五個做為觀察

# 執行ngram_11分詞
ngram_11 <- function(t) {
  lapply(t, function(x) {
    tokens <- segment(x, jieba_tokenizer)
    ngram <- ngrams(tokens, 11)
    ngram <- lapply(ngram, paste, collapse = " ")
    unlist(ngram)
  })
}

p_ngram_11<- data_full %>%
  unnest_tokens(ngram,sentence, token = ngram_11)

p_ngrams_11_separated <-p_ngram_11  %>%
  separate(ngram, paste0("word", c(1:11),sep=""), sep = " ")
p_ngrams_11_separated

## # A tibble: 43,007 x 12
##    artUrl   word1  word2 word3 word4 word5 word6 word7 word8 word9 word10 word11
##    <chr>    <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>  <chr> 
##  1 https:/~ 凌晨   板橋  一兩間~ 地方  沒想到~ 三點  food~ 摩托車~ 騎    路上   紅綠燈
##  2 https:/~ 板橋   一兩間~ 地方  沒想到~ 三點  food~ 摩托車~ 騎    路上  紅綠燈 哇    
##  3 https:/~ 一兩間 地方  沒想到~ 三點  food~ 摩托車~ 騎    路上  紅綠燈~ 哇     塞外  
##  4 https:/~ 地方   沒想到~ 三點  food~ 摩托車~ 騎    路上  紅綠燈~ 哇    塞外   送    
##  5 https:/~ 沒想到 三點  food~ 摩托車~ 騎    路上  紅綠燈~ 哇    塞外  送     東湖  
##  6 https:/~ 三點   food~ 摩托車~ 騎    路上  紅綠燈~ 哇    塞外  送    東湖   是否  
##  7 https:/~ foodp~ 摩托車~ 騎    路上  紅綠燈~ 哇    塞外  送    東湖  是否   專    
##  8 https:/~ 摩托車 騎    路上  紅綠燈~ 哇    塞外  送    東湖  是否  專     板本  
##  9 https:/~ 騎     路上  紅綠燈~ 哇    塞外  送    東湖  是否  專    板本   板    
## 10 https:/~ 路上   紅綠燈~ 哇    塞外  送    東湖  是否  專    板本  板     並非  
## # ... with 42,997 more rows

p_check_words <- p_ngrams_11_separated %>%
  filter((word6 == "foodpanda"))
p_check_words

## # A tibble: 377 x 12
##    artUrl    word1 word2 word3 word4 word5 word6 word7 word8 word9 word10 word11
##    <chr>     <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>  <chr> 
##  1 https://~ 板橋  一兩間~ 地方  沒想到~ 三點  food~ 摩托車~ 騎    路上  紅綠燈 哇    
##  2 https://~ 阿婆  同樣  身材  不知  餐點  food~ 好像  年輕人~ 粉紅  袖套   感覺  
##  3 https://~ 車手  來回  剛剛  時間  到達  food~ 初體驗~ 下雨  免運  訂單   預計  
##  4 https://~ 台中市~ 警察局~ 交通  警方  台中  food~ 時間  衝撞  大隊  近來   多起  
##  5 https://~ 阿等  那麼  再給  不爽  直接  food~ 有免  運費  頓時  覺得   廢到  
##  6 https://~ cdn2~ 態度  激辯  要求  傳訊  food~ 送上來~ 外送  food~ 第一次 碰到  
##  7 https://~ 要求  傳訊  food~ 送上來~ 外送  food~ 第一次~ 碰到  顧客  直接   遇到  
##  8 https://~ 現場  知道  每次  停車場~ 三台  food~ 優勢  不是  便宜  這點   漲到  
##  9 https://~ 白跑一趟~ 風險  上到  專櫃  裝成  food~ 戶戶  上去  以上  例子   合理  
## 10 https://~ 引發  熱烈  討論  回應  之前  food~ 幫送  知道  送上來~ 感謝   真的  
## # ... with 367 more rows

p_check_words_count <- p_check_words %>%
  melt(id.vars = "artUrl", measure.vars = paste0("word", c(1:11),sep="")) %>%
  rename(word=value) %>%
  filter(variable!="word6") %>%
  filter(!(word %in% stop_words), nchar(word)>1) %>%
  count(word, sort = TRUE)
p_check_words_count %>%
  arrange(desc(abs(n))) %>%
  head(15) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = n > 0)) +
  geom_col(show.legend = FALSE) +
  xlab("出現在「foodpanda」附近的字") +
  ylab("出現次數") +
  coord_flip()+ 
  theme(text = element_text(family = "Heiti TC Light"))

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

以LIWC字典判斷文集中的word屬於正面字還是負面字

# 正向字典txt檔
# 以,將字分隔
P <- read_file("./positive.txt")

# 負向字典txt檔
N <- read_file("./negative.txt")

#將字串依,分割
#strsplit回傳list , 我們取出list中的第一個元素
P = strsplit(P, ",")[[1]]
N = strsplit(N, ",")[[1]]

# 建立dataframe 有兩個欄位word,sentiments，word欄位內容是字典向量
P = data.frame(word = P, sentiment = "positive")
N = data.frame(word = N, sentiment = "negative")

LIWC = rbind(P, N)

#以LIWC情緒字典分析

統計每天的文章正面字的次數與負面字的次數

sentiment_count = data_panda %>%
  select(artDate,word,count) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(count))

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

sentiment_count %>% 
  arrange(desc(count))

## # A tibble: 302 x 3
## # Groups:   artDate [170]
##    artDate    sentiment count
##    <date>     <fct>     <int>
##  1 2019-10-14 negative    178
##  2 2019-10-15 negative    133
##  3 2019-10-14 positive    132
##  4 2019-10-11 negative    112
##  5 2019-10-15 positive     88
##  6 2019-10-13 negative     84
##  7 2020-02-12 negative     75
##  8 2019-10-11 positive     66
##  9 2019-10-16 negative     66
## 10 2019-10-16 positive     60
## # ... with 292 more rows

sentiment_count %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2019/10/14'))
[1]])),colour = "red") +
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/02/12'))
[1]])),colour = "red")

#geom_vline畫出vertical line，xintercept告訴他要在artDate欄位的哪一個row畫線

data_panda %>% 
  filter(artDate == as.Date('2019/10/14')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)

##                                                          artTitle
## 1                             [新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 2                                 Re:[問卦]其他外送員現在在想什麼
## 3                                     Re:[問卦]熊貓到底靠什麼賺錢
## 4                      [新聞]外送員連三起傷亡事故交通部長林佳龍說
## 5                             [問卦]北一女以後叫不到熊貓要吃什麼?
## 6                          Re:[新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 7                    Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 8                              [問卦]農地工廠不罰Uber熊貓罰很大？
## 9                         [問卦]嬌生慣養的台女竟然在做FOODPANDA!?
## 10                   [新聞]外送平台讓生意爆紅？業者親揭殘酷真相「
## 11                    [新聞]再傳外送員車禍！Foodpanda遭闖紅燈貨車
## 12                    Re:[爆卦]勞動部認定熊貓跟ubereats是僱傭契約
## 13                            [問卦]現在風向484外送車禍=外送員錯?
## 14                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 15                    [新聞]北市勞動局︰UberEats違7項職安法規、Fo
## 16                                         [問卦]大家支持熊貓法嗎
## 17                    [新聞]外送員車禍亡北市勞檢業者涉違反7項職安
## 18                              Re:[問卦]foodpanda是不是擋人財路?
## 19                       [新聞]美食外送員女車禍摔傷嘉義市屢傳意外
## 20                       [新聞]又一起美食外送員車禍女騎士手腳擦傷
## 21                               [問卦]台灣外送的會車禍很正常吧？
## 22                      [新聞]今日第三起！雲林Foodpanda外送員車禍
## 23                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 24                     [新聞]外送員屬僱傭關係健保署也加入戰場：得
## 25                         [問卦]半夜還在送foodpanda不會太辛苦嗎?
## 26                            [問卦]把外送搞臭是不是慣老闆的陰謀?
## 27                             [新聞]勞部研擬外送員強制納保意外險
## 28                         [新聞]5天3死！外送員撞死違規過馬路老翁
## 29                               [問卦]所以現在外送員要加勞健保了
## 30                       [問卦]慣老闆沒有奴工用開始搞外送業的卦？
## 31                      [新聞]勞檢認定foodpanda、UberEats假承攬真
## 32                      Re:[新聞]5天3死！外送員撞死違規過馬路老翁
## 33                               [問卦]其實熊貓成立工會不就好惹??
## 34                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 35                          Re:[新聞]勞部研擬外送員強制納保意外險
## 36                     [新聞]美食外送為僱傭律師：用牛刀管理將打擊
## 37                                         [問卦]外送為什麼被針對
## 38                                [問卦]給張麻子送foodpanda會怎樣
## 39                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 40                    [新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 41                  Re:[新聞]外送員列入雇傭關係後月入10萬夢碎？　
## 42                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 43                      Re:[新聞]5天3死！外送員撞死違規過馬路老翁
## 44                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 45                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 46                         [新聞]外送爭議蘇貞昌：勞檢不再憂讒畏譏
## 47                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 48                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 49                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 50                                   [問卦]搞熊貓最後受傷的是經濟
## 51                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 52                   [新聞]外送事故5天3死！Lalamove外送員撞死違規
## 53                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 54                      Re:[新聞]5天3死！外送員撞死違規過馬路老翁
## 55                                   [問卦]所以國外外送員不會車禍
## 56 [新聞]【血汗外送】狠打臉熊貓！　勞檢驚揭業者兩點內規認證「無良
## 57                             [問卦]外送三雄大家偏好哪個？？？？
## 58                    Re:[問卦]其實把外送搞「純平台」不就解決了？
## 59                     [新聞]外送員5天3死公路總局開罰foodpanda9千
## 60                   [新聞]又見外送事故！外送員疑搶黃燈遭「闖紅燈
## 61                    Re:[新聞]北市勞工局開罰UberEats認定罹災黃姓

data_panda %>% 
  filter(artDate == as.Date('2020/02/12')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)

##                                       artTitle
## 1   [問卦]居家自主隔離但是叫熊貓，會不會有問題
## 2 [新聞]雲林惡客狂棄訂單　20名熊貓外送員同時被
## 3 [新聞]影／「黃千千」訂的！雲林foodpanda出現2
## 4 [新聞]熊貓外送同時遭惡作劇　10餘外送員荒地「
## 5   [新聞]雲林foodpanda惡意棄單3天逾百件20外送

武漢肺炎前後期聲量比較

date_plot <- article_count_by_date %>% 
  ggplot(aes(x = artDate, y = count)) +
  geom_line(color = "purple", size = 1.5)  + 
  scale_x_date(labels = date_format("%Y/%m/%d")) + 
  geom_vline(xintercept = c(as.numeric(as.Date("2020-01-21"))), col='red', size = 1) + 
  ggtitle("「foodpanada」討論文章數") + 
  xlab("日期") + 
  ylab("數量") +
  theme(text = element_text(family = "Heiti TC Light"))
date_plot

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

#我們可以發現武漢肺炎之後foodpanda聲量也沒有特別提升

word_count_panda_new2 <- data_panda %>% 
  filter(artDate < as.Date("2020-01-21")) %>%
  select(word,count) %>% 
  group_by(word)%>% 
  summarise(count = sum(count))  %>%
  filter(count > 50) %>%  # 過濾出現太少次的字
  arrange(desc(count))%>% 
  filter(word !="外送"& word !="外送員")
word_count_panda_new2

## # A tibble: 120 x 2
##    word      count
##    <chr>     <int>
##  1 foodpanda  1581
##  2 公司        313
##  3 業者        252
##  4 關係        235
##  5 外送平台    225
##  6 承攬        217
##  7 平台        202
##  8 店家        199
##  9 發生        198
## 10 機車        196
## # ... with 110 more rows

word_count_panda_new2 %>% wordcloud2()

word_count_panda_new2 <- data_panda %>% 
  filter(artDate >= as.Date("2020-01-21")) %>%
  select(word,count) %>% 
  group_by(word)%>% 
  summarise(count = sum(count))  %>%
  filter(count > 20) %>%  # 過濾出現太少次的字
  arrange(desc(count))%>% 
  filter(word !="外送"& word !="外送員")
word_count_panda_new2

## # A tibble: 34 x 2
##    word      count
##    <chr>     <int>
##  1 foodpanda   397
##  2 訂單         82
##  3 店家         77
##  4 公司         42
##  5 訂餐         42
##  6 餐點         39
##  7 外送平台     35
##  8 業者         33
##  9 惡意         32
## 10 大家         30
## # ... with 24 more rows

word_count_panda_new2 %>% wordcloud2()

計算 tf-idf

p_tokens_by_art_new1 <- data_panda%>% 
  filter(artDate < as.Date("2020-01-21"))%>% 
  filter(!str_detect(word, regex("[0-9]"))) %>%
  count(artTitle, word,artUrl, sort = TRUE)
p_total_words_by_art_new1 <-p_tokens_by_art_new1   %>% 
  group_by(artTitle) %>% 
  summarize(total = sum(n)) %>% 
  arrange(desc(total))
p_tokens_by_art_new1 <- left_join(p_tokens_by_art_new1, p_total_words_by_art_new1)

## Joining, by = "artTitle"

# 以每篇文章爲單位，計算每個詞彙在的tf-idf值
p_words_tf_idf_new1 <- p_tokens_by_art_new1 %>%
  bind_tf_idf(word, artTitle, n)

## Warning in bind_tf_idf.data.frame(., word, artTitle, n): A value for tf_idf is negative:
## Input should have exactly one row per document-term combination.

p_words_tf_idf_new1 %>% 
  filter(total > 20) %>% 
  arrange(desc(tf_idf))

## # A tibble: 40,301 x 8
##    artTitle          word  artUrl                    n total     tf   idf tf_idf
##    <chr>             <chr> <chr>                 <int> <int>  <dbl> <dbl>  <dbl>
##  1 [問卦]2019台灣的代表性職業~ 代表性~ https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  2 [問卦]2019台灣的代表性職業~ 幾萬人~ https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  3 [問卦]Foodpanda改制導~ Band  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  4 [問卦]Foodpanda改制導~ 上中  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  5 [問卦]foodpanda是不是~ 五次  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  6 [問卦]foodpanda是不是~ 拜會  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  7 [問卦]foodpanda是不是~ 間點  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  8 [問卦]foodpanda是不是~ 愛用者~ https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
##  9 [問卦]foodpanda是不是~ 解惑  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
## 10 [問卦]Ubereats要國外交~ 帳單  https://www.ptt.cc/b~     1    21 0.0476  6.37  0.303
## # ... with 40,291 more rows

# 計算兩個詞彙同時出現的總次數
word_pairs_new1 <- p_tokens_by_art_new1 %>%
  pairwise_count(word, artUrl, sort = TRUE)
word_pairs_new1

## # A tibble: 4,240,106 x 3
##    item1     item2         n
##    <chr>     <chr>     <dbl>
##  1 外送      foodpanda   323
##  2 foodpanda 外送        323
##  3 外送員    foodpanda   311
##  4 foodpanda 外送員      311
##  5 外送員    外送        196
##  6 外送      外送員      196
##  7 公司      foodpanda   130
##  8 foodpanda 公司        130
##  9 外送平台  foodpanda   114
## 10 foodpanda 外送平台    114
## # ... with 4,240,096 more rows

# 計算兩個詞彙間的相關性
word_cors_new1<- p_tokens_by_art_new1%>%
  group_by(word) %>%
  filter(n() >= 20) %>%
  pairwise_cor(word, artUrl, sort = TRUE)

# 顯示相關性大於0.5的組合
set.seed(2020)

word_cors_new1 %>%
  filter(correlation > 0.5) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") + #加入中文字型設定，避免中文字顯示錯誤。
  theme_void()

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# 設定幾個詞做爲seed words
seed_words <- c("udn", "自由","net","story","針對")
# 設定threshold爲0.5
threshold <- 0.5
# 跟seed words相關性高於threshold的詞彙會被加入移除列表中
remove_words <- word_cors %>%
                filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
                .$item1 %>%
                unique()
remove_words

## [1] "udn"          "story"        "聯合報"       "ltn"          "自由"        
## [6] "net"          "www"          "breakingnews" "透露"

# 清除存在這些詞彙的組合
word_cors_new_new1 <- word_cors_new1 %>%
                filter(!(item1 %in% remove_words|item2 %in% remove_words))

word_cors_new1 %>%
  filter(correlation > 0.4) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) + 
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
  theme_void()

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

後期

p_tokens_by_art_new2 <- data_panda%>% 
  filter(artDate >= as.Date("2020-01-21"))%>% 
  filter(!str_detect(word, regex("[0-9]"))) %>%
  count(artTitle, word,artUrl, sort = TRUE)
p_total_words_by_art_new2 <-p_tokens_by_art_new2   %>% 
  group_by(artTitle) %>% 
  summarize(total = sum(n)) %>% 
  arrange(desc(total))
p_tokens_by_art_new2 <- left_join(p_tokens_by_art_new2, p_total_words_by_art_new2)

## Joining, by = "artTitle"

# 以每篇文章爲單位，計算每個詞彙在的tf-idf值
p_words_tf_idf_new2 <- p_tokens_by_art_new2 %>%
  bind_tf_idf(word, artTitle, n)

## Warning in bind_tf_idf.data.frame(., word, artTitle, n): A value for tf_idf is negative:
## Input should have exactly one row per document-term combination.

p_words_tf_idf_new2 %>% 
  filter(total > 20) %>% 
  arrange(desc(tf_idf))

## # A tibble: 7,027 x 8
##    artTitle        word   artUrl                     n total     tf   idf tf_idf
##    <chr>           <chr>  <chr>                  <int> <int>  <dbl> <dbl>  <dbl>
##  1 [問卦]民雄要有foodpa~ ayi    https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  2 [問卦]民雄要有foodpa~ bbs    https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  3 [問卦]民雄要有foodpa~ html   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  4 [問卦]民雄要有foodpa~ ptt    https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  5 [問卦]民雄要有foodpa~ 大學   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  6 [問卦]民雄要有foodpa~ 中正   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  7 [問卦]民雄要有foodpa~ 太神   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  8 [問卦]民雄要有foodpa~ 民雄連 https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
##  9 [問卦]民雄要有foodpa~ 挖靠   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
## 10 [問卦]民雄要有foodpa~ 發抖   https://www.ptt.cc/bb~     1    21 0.0476  5.11  0.243
## # ... with 7,017 more rows

# 計算兩個詞彙同時出現的總次數
word_pairs_new2 <- p_tokens_by_art_new2 %>%
  pairwise_count(word, artUrl, sort = TRUE)
word_pairs_new2

## # A tibble: 712,868 x 3
##    item1     item2         n
##    <chr>     <chr>     <dbl>
##  1 外送      foodpanda    68
##  2 foodpanda 外送         68
##  3 外送員    foodpanda    62
##  4 foodpanda 外送員       62
##  5 外送員    外送         34
##  6 外送      外送員       34
##  7 訂單      foodpanda    28
##  8 foodpanda 訂單         28
##  9 店家      foodpanda    27
## 10 真的      foodpanda    27
## # ... with 712,858 more rows

# 計算兩個詞彙間的相關性
word_cors_new2<- p_tokens_by_art_new2%>%
  group_by(word) %>%
  filter(n() >= 20) %>%
  pairwise_cor(word, artUrl, sort = TRUE)
word_cors_new2

## # A tibble: 132 x 3
##    item1     item2     correlation
##    <chr>     <chr>           <dbl>
##  1 有人      foodpanda     Inf    
##  2 大家      foodpanda     Inf    
##  3 foodpanda 有人          Inf    
##  4 foodpanda 大家          Inf    
##  5 餐點      外送員          0.350
##  6 外送員    餐點            0.350
##  7 外送員    公司            0.295
##  8 公司      外送員          0.295
##  9 店家      訂單            0.287
## 10 訂單      店家            0.287
## # ... with 122 more rows

# 顯示相關性大於0.5的組合
set.seed(2020)

word_cors_new2 %>%
  filter(correlation > 0.1) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") + #加入中文字型設定，避免中文字顯示錯誤。
  theme_void()

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

# 設定幾個詞做爲seed words
seed_words <- c("大家", "有人","不是")
# 設定threshold爲0.5
threshold <- 0.17
# 跟seed words相關性高於threshold的詞彙會被加入移除列表中
remove_words <- word_cors %>%
                filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
                .$item1 %>%
                unique()
remove_words

##  [1] "問題" "不是" "勞工" "勞動" "PO"   "真的" "承攬" "有人" "討論" "知道"
## [11] "僱傭"

# 清除存在這些詞彙的組合
word_cors_new_new2 <- word_cors_new2 %>%
                filter(!(item1 %in% remove_words|item2 %in% remove_words))

word_cors_new2 %>%
  filter(correlation > 0.1) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) + 
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
  theme_void()

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

武漢肺炎前後期情緒比較

sentiment_count %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/01/21'))
[1]])),colour = "red")

#geom_vline畫出vertical line，xintercept告訴他要在artDate欄位的哪一個row畫線

sentiment_count %>% 
  filter(artDate < as.Date("2020-01-21"))%>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2019/10/14'))
[1]])),colour = "red") +
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2019/10/11'))
[1]])),colour = "red") +
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/01/15'))
[1]])),colour = "red")

#geom_vline畫出vertical line，xintercept告訴他要在artDate欄位的哪一個row畫線

data_panda %>% 
  filter(artDate == as.Date('2019/10/11')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)

##                                          artTitle
## 1        [新聞]女熊貓外送上樓！見宅男開門快被嚇死
## 2     Re:[新聞]女熊貓外送上樓！見宅男開門快被嚇死
## 3     Re:[新聞]女熊貓外送上樓！見宅男開門快被嚇死
## 4   Re:[問卦]Ubereats484輸給Foodpanda了？<U+2708>
## 5    [問卦]外送現在才發生第1起撞死的484比小黃好了
## 6                         [問卦]foodpanda店內價??
## 7                            [問卦]熊貓外送的八卦
## 8                      Re:[問卦]foodpanda店內價??
## 9  Re:[新聞]【外送搏命1】熊貓外送員拚賺宵夜錢桃園
## 10                     [問卦]foodpanda被刷到剩1星
## 11     [新聞]賣命！熊貓美食外送員遭撞慘死超驚悚畫
## 12  Re:[新聞]賣命！熊貓美食外送員遭撞慘死超驚悚畫
## 13  Re:[新聞]賣命！熊貓美食外送員遭撞慘死超驚悚畫
## 14                       [問卦]熊貓外送員被撞死了
## 15  Re:[新聞]賣命！熊貓美食外送員遭撞慘死超驚悚畫
## 16  Re:[新聞]賣命！熊貓美食外送員遭撞慘死超驚悚畫
## 17                [新聞]外送員車禍foodpanda回應了
## 18             Re:[新聞]外送員車禍foodpanda回應了
## 19                [問卦]ubereat人員素質比較好一些
## 20             Re:[新聞]外送員車禍foodpanda回應了
## 21     [新聞]賣命！熊貓美食外送員遭撞慘死超驚悚畫
## 22             Re:[新聞]外送員車禍foodpanda回應了
## 23             Re:[新聞]外送員車禍foodpanda回應了
## 24             Re:[新聞]外送員車禍foodpanda回應了
## 25            [問卦]foodpanda外送人員的騎車技術？
## 26     [新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 27  Re:[新聞]賣命！熊貓美食外送員遭撞慘死超驚悚畫
## 28  Re:[新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 29  Re:[新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 30            Re:[問卦]真的有人騎腳踏車送外送嗎？

data_panda %>% 
  filter(artDate == as.Date('2019/10/14')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)

##                                                          artTitle
## 1                             [新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 2                                 Re:[問卦]其他外送員現在在想什麼
## 3                                     Re:[問卦]熊貓到底靠什麼賺錢
## 4                      [新聞]外送員連三起傷亡事故交通部長林佳龍說
## 5                             [問卦]北一女以後叫不到熊貓要吃什麼?
## 6                          Re:[新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 7                    Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 8                              [問卦]農地工廠不罰Uber熊貓罰很大？
## 9                         [問卦]嬌生慣養的台女竟然在做FOODPANDA!?
## 10                   [新聞]外送平台讓生意爆紅？業者親揭殘酷真相「
## 11                    [新聞]再傳外送員車禍！Foodpanda遭闖紅燈貨車
## 12                    Re:[爆卦]勞動部認定熊貓跟ubereats是僱傭契約
## 13                            [問卦]現在風向484外送車禍=外送員錯?
## 14                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 15                    [新聞]北市勞動局︰UberEats違7項職安法規、Fo
## 16                                         [問卦]大家支持熊貓法嗎
## 17                    [新聞]外送員車禍亡北市勞檢業者涉違反7項職安
## 18                              Re:[問卦]foodpanda是不是擋人財路?
## 19                       [新聞]美食外送員女車禍摔傷嘉義市屢傳意外
## 20                       [新聞]又一起美食外送員車禍女騎士手腳擦傷
## 21                               [問卦]台灣外送的會車禍很正常吧？
## 22                      [新聞]今日第三起！雲林Foodpanda外送員車禍
## 23                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 24                     [新聞]外送員屬僱傭關係健保署也加入戰場：得
## 25                         [問卦]半夜還在送foodpanda不會太辛苦嗎?
## 26                            [問卦]把外送搞臭是不是慣老闆的陰謀?
## 27                             [新聞]勞部研擬外送員強制納保意外險
## 28                         [新聞]5天3死！外送員撞死違規過馬路老翁
## 29                               [問卦]所以現在外送員要加勞健保了
## 30                       [問卦]慣老闆沒有奴工用開始搞外送業的卦？
## 31                      [新聞]勞檢認定foodpanda、UberEats假承攬真
## 32                      Re:[新聞]5天3死！外送員撞死違規過馬路老翁
## 33                               [問卦]其實熊貓成立工會不就好惹??
## 34                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 35                          Re:[新聞]勞部研擬外送員強制納保意外險
## 36                     [新聞]美食外送為僱傭律師：用牛刀管理將打擊
## 37                                         [問卦]外送為什麼被針對
## 38                                [問卦]給張麻子送foodpanda會怎樣
## 39                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 40                    [新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 41                  Re:[新聞]外送員列入雇傭關係後月入10萬夢碎？　
## 42                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 43                      Re:[新聞]5天3死！外送員撞死違規過馬路老翁
## 44                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 45                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 46                         [新聞]外送爭議蘇貞昌：勞檢不再憂讒畏譏
## 47                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 48                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 49                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 50                                   [問卦]搞熊貓最後受傷的是經濟
## 51                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 52                   [新聞]外送事故5天3死！Lalamove外送員撞死違規
## 53                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 54                      Re:[新聞]5天3死！外送員撞死違規過馬路老翁
## 55                                   [問卦]所以國外外送員不會車禍
## 56 [新聞]【血汗外送】狠打臉熊貓！　勞檢驚揭業者兩點內規認證「無良
## 57                             [問卦]外送三雄大家偏好哪個？？？？
## 58                    Re:[問卦]其實把外送搞「純平台」不就解決了？
## 59                     [新聞]外送員5天3死公路總局開罰foodpanda9千
## 60                   [新聞]又見外送事故！外送員疑搶黃燈遭「闖紅燈
## 61                    Re:[新聞]北市勞工局開罰UberEats認定罹災黃姓

data_panda %>% 
  filter(artDate == as.Date('2020/01/15')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)

##                                                     artTitle
## 1                           [問卦]熊貓明天罷工，大家會怕嗎？
## 2                               [問卦]熊貓有必要自相殘殺嗎？
## 3                            Re:[問卦]熊貓有必要自相殘殺嗎？
## 4                           [問卦]可憐哪！熊貓外送師被砍獎金
## 5                                   [問卦]熊貓開始罷工了嗎？
## 6                       Re:[問卦]欸欸欸!聽說台灣今天熊貓罷工
## 7                         [問卦]熊貓外送罷工有人被影響到嗎？
## 8                                   [問卦]今天中午能定熊貓嗎
## 9                                Re:[問卦]今天中午能定熊貓嗎
## 10              [新聞]控「年前毀約」！台中60名熊貓外送員市府
## 11    [新聞]熊貓今罷送最多慢1小時…外送員：萬人響應卻無人棄單
## 12                 [新聞]foodpanda罷工實測！點餐後30分就拿到
## 13 Re:[新聞]熊貓今罷送最多慢1小時…外送員：萬人響應卻無人棄單
## 14 Re:[新聞]熊貓今罷送最多慢1小時…外送員：萬人響應卻無人棄單
## 15                [新聞]熊貓變更計薪制度內湖外送員控「變相減
## 16                    [問卦]社會上為何充滿仇視外送員的八卦？

sentiment_count %>% 
  filter(artDate >= as.Date("2020-01-21"))%>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%m/%d"))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/02/12'))
[1]])),colour = "red") +
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/04/10'))
[1]])),colour = "red")

## Warning: Removed 90 rows containing missing values (geom_vline).

## Warning: Removed 90 rows containing missing values (geom_vline).

data_panda %>% 
  filter(artDate == as.Date('2019/10/14')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)

##                                                          artTitle
## 1                             [新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 2                                 Re:[問卦]其他外送員現在在想什麼
## 3                                     Re:[問卦]熊貓到底靠什麼賺錢
## 4                      [新聞]外送員連三起傷亡事故交通部長林佳龍說
## 5                             [問卦]北一女以後叫不到熊貓要吃什麼?
## 6                          Re:[新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 7                    Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 8                              [問卦]農地工廠不罰Uber熊貓罰很大？
## 9                         [問卦]嬌生慣養的台女竟然在做FOODPANDA!?
## 10                   [新聞]外送平台讓生意爆紅？業者親揭殘酷真相「
## 11                    [新聞]再傳外送員車禍！Foodpanda遭闖紅燈貨車
## 12                    Re:[爆卦]勞動部認定熊貓跟ubereats是僱傭契約
## 13                            [問卦]現在風向484外送車禍=外送員錯?
## 14                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 15                    [新聞]北市勞動局︰UberEats違7項職安法規、Fo
## 16                                         [問卦]大家支持熊貓法嗎
## 17                    [新聞]外送員車禍亡北市勞檢業者涉違反7項職安
## 18                              Re:[問卦]foodpanda是不是擋人財路?
## 19                       [新聞]美食外送員女車禍摔傷嘉義市屢傳意外
## 20                       [新聞]又一起美食外送員車禍女騎士手腳擦傷
## 21                               [問卦]台灣外送的會車禍很正常吧？
## 22                      [新聞]今日第三起！雲林Foodpanda外送員車禍
## 23                   Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 24                     [新聞]外送員屬僱傭關係健保署也加入戰場：得
## 25                         [問卦]半夜還在送foodpanda不會太辛苦嗎?
## 26                            [問卦]把外送搞臭是不是慣老闆的陰謀?
## 27                             [新聞]勞部研擬外送員強制納保意外險
## 28                         [新聞]5天3死！外送員撞死違規過馬路老翁
## 29                               [問卦]所以現在外送員要加勞健保了
## 30                       [問卦]慣老闆沒有奴工用開始搞外送業的卦？
## 31                      [新聞]勞檢認定foodpanda、UberEats假承攬真
## 32                      Re:[新聞]5天3死！外送員撞死違規過馬路老翁
## 33                               [問卦]其實熊貓成立工會不就好惹??
## 34                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 35                          Re:[新聞]勞部研擬外送員強制納保意外險
## 36                     [新聞]美食外送為僱傭律師：用牛刀管理將打擊
## 37                                         [問卦]外送為什麼被針對
## 38                                [問卦]給張麻子送foodpanda會怎樣
## 39                   Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 40                    [新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 41                  Re:[新聞]外送員列入雇傭關係後月入10萬夢碎？　
## 42                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 43                      Re:[新聞]5天3死！外送員撞死違規過馬路老翁
## 44                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 45                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 46                         [新聞]外送爭議蘇貞昌：勞檢不再憂讒畏譏
## 47                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 48                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 49                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 50                                   [問卦]搞熊貓最後受傷的是經濟
## 51                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 52                   [新聞]外送事故5天3死！Lalamove外送員撞死違規
## 53                 Re:[新聞]不甩勞動部！foodpanda：不接受僱傭關係
## 54                      Re:[新聞]5天3死！外送員撞死違規過馬路老翁
## 55                                   [問卦]所以國外外送員不會車禍
## 56 [新聞]【血汗外送】狠打臉熊貓！　勞檢驚揭業者兩點內規認證「無良
## 57                             [問卦]外送三雄大家偏好哪個？？？？
## 58                    Re:[問卦]其實把外送搞「純平台」不就解決了？
## 59                     [新聞]外送員5天3死公路總局開罰foodpanda9千
## 60                   [新聞]又見外送事故！外送員疑搶黃燈遭「闖紅燈
## 61                    Re:[新聞]北市勞工局開罰UberEats認定罹災黃姓

data_panda %>% 
  filter(artDate == as.Date('2020/04/10')) %>% 
  distinct(artUrl, .keep_all = TRUE) %>% 
  select(artTitle)

##                                      artTitle
## 1 [新聞]安心配送再升級！foodpanda發萬瓶酒精給
## 2   [新聞]5業者組外送國家隊救餐飲！UberEats、
## 3              [問卦]外送員也適用紓困方案嗎？

data_panda %>% 
  filter(artDate >= as.Date("2020-01-21")) %>% 
  distinct(artUrl, .keep_all = TRUE) %>%filter(word =="武漢"|word =="肺炎"|word =="武漢肺炎"|word =="COVID-19"|word =="covid19"|word =="covid-19")%>% 
  select(artTitle)

## [1] artTitle
## <0 rows> (or 0-length row.names)