台灣正式宣布的的第1例新冠肺炎確診,是在今年的1月21日 1. 資料取得及套件載入 載入的資料是由中山大學管理學院文字分析平台取得,在平台資料輸出區塊選擇「文章+詞彙+詞頻」選項,即可取得相同格式之csv檔案。
資料簡介 本資料為2019-09-01~2020-04-22,將PTT1八卦版透過文字分析平台整理,得到2311篇文章。 # 系統參數設定
分析主題 武漢肺炎疫情的影響造成外送平台的訂單增加,以此分析外送平台在疫情爆發的前後聲量和情緒有甚麼重大的變化
Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼
## Warning in Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8"): 作業系統
## 回報無法實現設定語區為 "zh_TW.UTF-8" 的要求
## [1] ""
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(tidytext)
library(wordcloud2)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(ggplot2)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
##
## dcast, melt
library(wordcloud)
## Loading required package: RColorBrewer
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
library(readr)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
require(jiebaR)
## Loading required package: jiebaR
## Loading required package: jiebaRD
require(widyr)
## Loading required package: widyr
require(NLP)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
require(ggraph)
## Loading required package: ggraph
require(igraph)
## Loading required package: igraph
##
## Attaching package: 'igraph'
## The following object is masked from 'package:tidyr':
##
## crossing
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
八卦版
data_gossip = fread('gossip_article.csv',encoding = 'UTF-8')
轉換日期格式
data_gossip$artDate = data_gossip$artDate %>% as.Date("%Y/%m/%d")
data_gossip$word[which(data_gossip$word == "FOODPANDA")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "Foodpanda")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "富胖達")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "food胖達")] = "foodpanda"
data_gossip$word[which(data_gossip$word == "熊貓")] = "foodpanda"
過濾特殊字元
data_gossip = data_gossip %>%
filter(!grepl('_',word))
#選取有提到foodpanda的文章
panda_url = data_gossip$artUrl[grepl("foodpanda", data_gossip$word)]
###斷詞
jieba_tokenizer = worker()
stop_words <- c("https", "com", "新聞", "完整", "沒有","有沒有","現在","八卦","jpg","imgur","news","http","內文","htm","ettoday","ETtoday","請問","蘇格蘭","網址","連結","記者","署名","來源","媒體","新聞標題","備註","表示","報導","今天","看到")
#篩選出foodpanda的文章並去除停用字
data_panda <- data_gossip %>%
filter(data_gossip$artUrl %in% panda_url)
data_panda <- data_panda %>%
filter(!(data_panda$word %in% stop_words))
日期折線圖 計算出每一天文章的發表數量,看出討論「foodpanda」的熱度。 資料處理
#討論篇數最多的前10天
p_date <- data_panda %>%
select(artDate, artUrl) %>%
distinct()
article_count_by_date <- p_date %>%
group_by(artDate) %>%
summarise(count = n())
article_count_by_date %>%
arrange(desc(count))%>%
top_n(20)
## Selecting by count
## # A tibble: 21 x 2
## artDate count
## <date> <int>
## 1 2019-10-14 61
## 2 2019-10-11 30
## 3 2019-10-15 30
## 4 2019-10-13 29
## 5 2019-10-16 25
## 6 2019-10-12 18
## 7 2020-01-15 16
## 8 2019-10-21 14
## 9 2019-10-17 13
## 10 2020-01-18 12
## # ... with 11 more rows
date_plot <- article_count_by_date %>%
ggplot(aes(x = artDate, y = count)) +
geom_line(color = "purple", size = 1.5) +
scale_x_date(labels = date_format("%Y/%m/%d")) +
geom_vline(xintercept = c(as.numeric(as.Date("2019-10-14")),
as.numeric(as.Date("2020-01-15")),
as.numeric(as.Date("2020-02-14"))), col='red', size = 1) +
ggtitle("「foodpanada」討論文章數") +
xlab("日期") +
ylab("數量") +
theme(text = element_text(family = "Heiti TC Light"))
date_plot
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
#我們可以發現聲量的表現有明顯的三個高峰期
查看三個高峰期,最常出現的詞彙
p_tokens_by_date <- data_panda %>%
count(artDate, word, sort = TRUE)
plot_merge <- p_tokens_by_date %>%
filter(word != "foodpanda"& word != "外送員"& word != "今天"& word != "很多") %>%
filter(artDate == as.Date("2019-10-14") |
artDate == as.Date("2020-01-15")|
artDate == as.Date("2020-02-14")) %>%
group_by(artDate) %>%
top_n(5, n) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x=word, y=n, fill = artDate)) +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = NULL) +
facet_wrap(~artDate, scales="free", ncol = 2) +
coord_flip()+
theme(text = element_text(family = "Heiti TC Light"))
plot_merge
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
word_count_panda <- data_panda %>%
select(word,count) %>%
group_by(word) %>%
summarise(count = sum(count)) %>%
filter(count > 50) %>% # 過濾出現太少次的字
arrange(desc(count))
word_count_panda
## # A tibble: 152 x 2
## word count
## <chr> <int>
## 1 foodpanda 1978
## 2 外送員 1801
## 3 外送 1032
## 4 公司 355
## 5 業者 285
## 6 店家 276
## 7 外送平台 260
## 8 關係 240
## 9 平台 231
## 10 訂單 231
## # ... with 142 more rows
word_count_panda %>% wordcloud2()
#文章對應標題
data_panda %>% filter(artDate == as.Date("2019-10-11")) %>% distinct(data_panda,artTitle)
## Warning: Trying to compute distinct() for variables not found in the data:
## - `data_panda`
## This is an error, but only a warning is raised for compatibility reasons.
## The following variables will be used:
## - artTitle
## artTitle
## 1 [新聞]女熊貓外送上樓!見宅男開門快被嚇死
## 2 Re:[新聞]女熊貓外送上樓!見宅男開門快被嚇死
## 3 Re:[問卦]Ubereats484輸給Foodpanda了?<U+2708>
## 4 [問卦]外送現在才發生第1起撞死的484比小黃好了
## 5 [問卦]foodpanda店內價??
## 6 [問卦]熊貓外送的八卦
## 7 Re:[問卦]foodpanda店內價??
## 8 Re:[新聞]【外送搏命1】熊貓外送員拚賺宵夜錢桃園
## 9 [問卦]foodpanda被刷到剩1星
## 10 [新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 11 Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 12 [問卦]熊貓外送員被撞死了
## 13 [新聞]外送員車禍foodpanda回應了
## 14 Re:[新聞]外送員車禍foodpanda回應了
## 15 [問卦]ubereat人員素質比較好一些
## 16 [問卦]foodpanda外送人員的騎車技術?
## 17 [新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 18 Re:[新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 19 Re:[問卦]真的有人騎腳踏車送外送嗎?
#文章對應標題
data_panda %>% filter(artDate == as.Date("2020-02-14")) %>% distinct(data_panda,artTitle)
## Warning: Trying to compute distinct() for variables not found in the data:
## - `data_panda`
## This is an error, but only a warning is raised for compatibility reasons.
## The following variables will be used:
## - artTitle
## artTitle
## 1 [新聞]怕老鳥帶壞新人?Foodpanda「隔離」菜鳥
## 2 [問卦]PChome聯手熊貓速度還可以到多快?
## 3 [問卦]訂ubereats的人是不是很不愛娶餐阿?
## 4 [問卦]遇到會隱形的熊貓人怎麼辦?(在線等
## 5 [爆卦]熊貓foodpanda薪水詳解
## 6 Re:[爆卦]熊貓foodpanda薪水詳解
#文章對應標題
data_panda %>% filter(artDate == as.Date("2020-01-15")) %>% distinct(data_panda,artTitle)
## Warning: Trying to compute distinct() for variables not found in the data:
## - `data_panda`
## This is an error, but only a warning is raised for compatibility reasons.
## The following variables will be used:
## - artTitle
## artTitle
## 1 [問卦]熊貓明天罷工,大家會怕嗎?
## 2 [問卦]熊貓有必要自相殘殺嗎?
## 3 Re:[問卦]熊貓有必要自相殘殺嗎?
## 4 [問卦]可憐哪!熊貓外送師被砍獎金
## 5 [問卦]熊貓開始罷工了嗎?
## 6 Re:[問卦]欸欸欸!聽說台灣今天熊貓罷工
## 7 [問卦]熊貓外送罷工有人被影響到嗎?
## 8 [問卦]今天中午能定熊貓嗎
## 9 Re:[問卦]今天中午能定熊貓嗎
## 10 [新聞]控「年前毀約」!台中60名熊貓外送員市府
## 11 [新聞]熊貓今罷送最多慢1小時…外送員:萬人響應卻無人棄單
## 12 [新聞]foodpanda罷工實測!點餐後30分就拿到
## 13 Re:[新聞]熊貓今罷送最多慢1小時…外送員:萬人響應卻無人棄單
## 14 [新聞]熊貓變更計薪制度內湖外送員控「變相減
## 15 [問卦]社會上為何充滿仇視外送員的八卦?
p_tokens_by_art <- data_panda %>%
filter(!str_detect(word, regex("[0-9]"))) %>%
count(artTitle, word,artUrl, sort = TRUE)
p_total_words_by_art <- p_tokens_by_art %>%
group_by(artTitle) %>%
summarize(total = sum(n)) %>%
arrange(desc(total))
p_tokens_by_art <- left_join(p_tokens_by_art, p_total_words_by_art)
## Joining, by = "artTitle"
# 以每篇文章爲單位,計算每個詞彙在的tf-idf值
p_words_tf_idf <- p_tokens_by_art %>%
bind_tf_idf(word, artTitle, n)
## Warning in bind_tf_idf.data.frame(., word, artTitle, n): A value for tf_idf is negative:
## Input should have exactly one row per document-term combination.
p_words_tf_idf %>%
filter(total > 20) %>%
arrange(desc(tf_idf))
## # A tibble: 47,328 x 8
## artTitle word artUrl n total tf idf tf_idf
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 [問卦]2019台灣的代表性職業是~ 代表性 https://www.ptt.cc~ 1 21 0.0476 6.62 0.315
## 2 [問卦]2019台灣的代表性職業是~ 幾萬人 https://www.ptt.cc~ 1 21 0.0476 6.62 0.315
## 3 [問卦]Foodpanda改制導致~ 上中 https://www.ptt.cc~ 1 21 0.0476 6.62 0.315
## 4 [問卦]foodpanda是不是快~ 五次 https://www.ptt.cc~ 1 21 0.0476 6.62 0.315
## 5 [問卦]foodpanda是不是快~ 拜會 https://www.ptt.cc~ 1 21 0.0476 6.62 0.315
## 6 [問卦]foodpanda是不是快~ 間點 https://www.ptt.cc~ 1 21 0.0476 6.62 0.315
## 7 [問卦]foodpanda是不是快~ 愛用者 https://www.ptt.cc~ 1 21 0.0476 6.62 0.315
## 8 [問卦]foodpanda是不是快~ 解惑 https://www.ptt.cc~ 1 21 0.0476 6.62 0.315
## 9 [問卦]小黃出事車隊沒事那外送為啥~ 人要 https://www.ptt.cc~ 1 21 0.0476 6.62 0.315
## 10 [問卦]小黃出事車隊沒事那外送為啥~ 給司 https://www.ptt.cc~ 1 21 0.0476 6.62 0.315
## # ... with 47,318 more rows
將同一篇的斷詞整理在一起
data_full = data_panda %>%
group_by(artUrl) %>%
summarise(sentence = paste0(word, collapse = ""))
# remove stopwords
# unnest_tokens 使用的bigram分詞函數
# Input: a character vector
# Output: a list of character vectors of the same length
jieba_bigram <- function(t) {
lapply(t, function(x) {
if(nchar(x)>1){
tokens <- segment(x, jieba_tokenizer)
bigram<- ngrams(tokens, 2)
bigram <- lapply(bigram, paste, collapse = " ")
unlist(bigram)
}
})
}
# 執行bigram分詞
panda_bigram <- data_full %>%
unnest_tokens(bigram, sentence, token = jieba_bigram)
panda_bigram
## # A tibble: 50,473 x 2
## artUrl bigram
## <chr> <chr>
## 1 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 凌晨 板橋
## 2 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 板橋 一兩間
## 3 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 一兩間 地方
## 4 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 地方 沒想到
## 5 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 沒想到 三點
## 6 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 三點 foodpanda
## 7 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html foodpanda 摩托車
## 8 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 摩托車 騎
## 9 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 騎 路上
## 10 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 路上 紅綠燈
## # ... with 50,463 more rows
panda_bigram %>%
count(bigram, sort = TRUE)
## # A tibble: 46,658 x 2
## bigram n
## <chr> <int>
## 1 外送 平台 126
## 2 為 什麼 49
## 3 foodpanda 外送 46
## 4 也 不 40
## 5 foodpanda 外送員 33
## 6 更 多 25
## 7 外送 foodpanda 23
## 8 外送員 foodpanda 23
## 9 美食 外送 20
## 10 擦 撞 20
## # ... with 46,648 more rows
panda_bigram
## # A tibble: 50,473 x 2
## artUrl bigram
## <chr> <chr>
## 1 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 凌晨 板橋
## 2 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 板橋 一兩間
## 3 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 一兩間 地方
## 4 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 地方 沒想到
## 5 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 沒想到 三點
## 6 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 三點 foodpanda
## 7 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html foodpanda 摩托車
## 8 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 摩托車 騎
## 9 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 騎 路上
## 10 https://www.ptt.cc/bbs/Gossiping/M.1567409639.A.C25.html 路上 紅綠燈
## # ... with 50,463 more rows
# 計算兩個詞彙同時出現的總次數
word_pairs <- p_tokens_by_art %>%
pairwise_count(word, artUrl, sort = TRUE)
word_pairs
## # A tibble: 4,815,650 x 3
## item1 item2 n
## <chr> <chr> <dbl>
## 1 外送 foodpanda 391
## 2 foodpanda 外送 391
## 3 外送員 foodpanda 373
## 4 foodpanda 外送員 373
## 5 外送員 外送 230
## 6 外送 外送員 230
## 7 公司 foodpanda 150
## 8 foodpanda 公司 150
## 9 真的 foodpanda 138
## 10 foodpanda 真的 138
## # ... with 4,815,640 more rows
# 計算兩個詞彙間的相關性
word_cors <- p_tokens_by_art%>%
group_by(word) %>%
filter(n() >= 20) %>%
pairwise_cor(word, artUrl, sort = TRUE)
# 顯示相關性大於0.5的組合
set.seed(2020)
word_cors %>%
filter(correlation > 0.5) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 3) +
geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") + #加入中文字型設定,避免中文字顯示錯誤。
theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
# 設定幾個詞做爲seed words
seed_words <- c("udn", "來源","連結","網址","備註","內文","www","自由","針對","進行")
# 設定threshold爲0.5
threshold <- 0.5
# 跟seed words相關性高於threshold的詞彙會被加入移除列表中
remove_words <- word_cors %>%
filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
.$item1 %>%
unique()
remove_words
## [1] "udn" "story" "聯合報" "ltn" "自由"
## [6] "net" "www" "breakingnews"
# 清除存在這些詞彙的組合
word_cors_new <- word_cors %>%
filter(!(item1 %in% remove_words|item2 %in% remove_words))
word_cors_new %>%
filter(correlation > 0.4) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 3) +
geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
因為只用bigram看不出前後文之間的insight,所以取前後五個做為觀察
# 執行ngram_11分詞
ngram_11 <- function(t) {
lapply(t, function(x) {
tokens <- segment(x, jieba_tokenizer)
ngram <- ngrams(tokens, 11)
ngram <- lapply(ngram, paste, collapse = " ")
unlist(ngram)
})
}
p_ngram_11<- data_full %>%
unnest_tokens(ngram,sentence, token = ngram_11)
p_ngrams_11_separated <-p_ngram_11 %>%
separate(ngram, paste0("word", c(1:11),sep=""), sep = " ")
p_ngrams_11_separated
## # A tibble: 43,007 x 12
## artUrl word1 word2 word3 word4 word5 word6 word7 word8 word9 word10 word11
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 https:/~ 凌晨 板橋 一兩間~ 地方 沒想到~ 三點 food~ 摩托車~ 騎 路上 紅綠燈
## 2 https:/~ 板橋 一兩間~ 地方 沒想到~ 三點 food~ 摩托車~ 騎 路上 紅綠燈 哇
## 3 https:/~ 一兩間 地方 沒想到~ 三點 food~ 摩托車~ 騎 路上 紅綠燈~ 哇 塞外
## 4 https:/~ 地方 沒想到~ 三點 food~ 摩托車~ 騎 路上 紅綠燈~ 哇 塞外 送
## 5 https:/~ 沒想到 三點 food~ 摩托車~ 騎 路上 紅綠燈~ 哇 塞外 送 東湖
## 6 https:/~ 三點 food~ 摩托車~ 騎 路上 紅綠燈~ 哇 塞外 送 東湖 是否
## 7 https:/~ foodp~ 摩托車~ 騎 路上 紅綠燈~ 哇 塞外 送 東湖 是否 專
## 8 https:/~ 摩托車 騎 路上 紅綠燈~ 哇 塞外 送 東湖 是否 專 板本
## 9 https:/~ 騎 路上 紅綠燈~ 哇 塞外 送 東湖 是否 專 板本 板
## 10 https:/~ 路上 紅綠燈~ 哇 塞外 送 東湖 是否 專 板本 板 並非
## # ... with 42,997 more rows
p_check_words <- p_ngrams_11_separated %>%
filter((word6 == "foodpanda"))
p_check_words
## # A tibble: 377 x 12
## artUrl word1 word2 word3 word4 word5 word6 word7 word8 word9 word10 word11
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 https://~ 板橋 一兩間~ 地方 沒想到~ 三點 food~ 摩托車~ 騎 路上 紅綠燈 哇
## 2 https://~ 阿婆 同樣 身材 不知 餐點 food~ 好像 年輕人~ 粉紅 袖套 感覺
## 3 https://~ 車手 來回 剛剛 時間 到達 food~ 初體驗~ 下雨 免運 訂單 預計
## 4 https://~ 台中市~ 警察局~ 交通 警方 台中 food~ 時間 衝撞 大隊 近來 多起
## 5 https://~ 阿等 那麼 再給 不爽 直接 food~ 有免 運費 頓時 覺得 廢到
## 6 https://~ cdn2~ 態度 激辯 要求 傳訊 food~ 送上來~ 外送 food~ 第一次 碰到
## 7 https://~ 要求 傳訊 food~ 送上來~ 外送 food~ 第一次~ 碰到 顧客 直接 遇到
## 8 https://~ 現場 知道 每次 停車場~ 三台 food~ 優勢 不是 便宜 這點 漲到
## 9 https://~ 白跑一趟~ 風險 上到 專櫃 裝成 food~ 戶戶 上去 以上 例子 合理
## 10 https://~ 引發 熱烈 討論 回應 之前 food~ 幫送 知道 送上來~ 感謝 真的
## # ... with 367 more rows
p_check_words_count <- p_check_words %>%
melt(id.vars = "artUrl", measure.vars = paste0("word", c(1:11),sep="")) %>%
rename(word=value) %>%
filter(variable!="word6") %>%
filter(!(word %in% stop_words), nchar(word)>1) %>%
count(word, sort = TRUE)
p_check_words_count %>%
arrange(desc(abs(n))) %>%
head(15) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = n > 0)) +
geom_col(show.legend = FALSE) +
xlab("出現在「foodpanda」附近的字") +
ylab("出現次數") +
coord_flip()+
theme(text = element_text(family = "Heiti TC Light"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
# 正向字典txt檔
# 以,將字分隔
P <- read_file("./positive.txt")
# 負向字典txt檔
N <- read_file("./negative.txt")
#將字串依,分割
#strsplit回傳list , 我們取出list中的第一個元素
P = strsplit(P, ",")[[1]]
N = strsplit(N, ",")[[1]]
# 建立dataframe 有兩個欄位word,sentiments,word欄位內容是字典向量
P = data.frame(word = P, sentiment = "positive")
N = data.frame(word = N, sentiment = "negative")
LIWC = rbind(P, N)
#以LIWC情緒字典分析
sentiment_count = data_panda %>%
select(artDate,word,count) %>%
inner_join(LIWC) %>%
group_by(artDate,sentiment) %>%
summarise(count=sum(count))
## Joining, by = "word"
## Warning: Column `word` joining character vector and factor, coercing into
## character vector
sentiment_count %>%
arrange(desc(count))
## # A tibble: 302 x 3
## # Groups: artDate [170]
## artDate sentiment count
## <date> <fct> <int>
## 1 2019-10-14 negative 178
## 2 2019-10-15 negative 133
## 3 2019-10-14 positive 132
## 4 2019-10-11 negative 112
## 5 2019-10-15 positive 88
## 6 2019-10-13 negative 84
## 7 2020-02-12 negative 75
## 8 2019-10-11 positive 66
## 9 2019-10-16 negative 66
## 10 2019-10-16 positive 60
## # ... with 292 more rows
sentiment_count %>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))+
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2019/10/14'))
[1]])),colour = "red") +
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/02/12'))
[1]])),colour = "red")
#geom_vline畫出vertical line,xintercept告訴他要在artDate欄位的哪一個row畫線
data_panda %>%
filter(artDate == as.Date('2019/10/14')) %>%
distinct(artUrl, .keep_all = TRUE) %>%
select(artTitle)
## artTitle
## 1 [新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 2 Re:[問卦]其他外送員現在在想什麼
## 3 Re:[問卦]熊貓到底靠什麼賺錢
## 4 [新聞]外送員連三起傷亡事故交通部長林佳龍說
## 5 [問卦]北一女以後叫不到熊貓要吃什麼?
## 6 Re:[新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 7 Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 8 [問卦]農地工廠不罰Uber熊貓罰很大?
## 9 [問卦]嬌生慣養的台女竟然在做FOODPANDA!?
## 10 [新聞]外送平台讓生意爆紅?業者親揭殘酷真相「
## 11 [新聞]再傳外送員車禍!Foodpanda遭闖紅燈貨車
## 12 Re:[爆卦]勞動部認定熊貓跟ubereats是僱傭契約
## 13 [問卦]現在風向484外送車禍=外送員錯?
## 14 Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 15 [新聞]北市勞動局︰UberEats違7項職安法規、Fo
## 16 [問卦]大家支持熊貓法嗎
## 17 [新聞]外送員車禍亡北市勞檢業者涉違反7項職安
## 18 Re:[問卦]foodpanda是不是擋人財路?
## 19 [新聞]美食外送員女車禍摔傷嘉義市屢傳意外
## 20 [新聞]又一起美食外送員車禍女騎士手腳擦傷
## 21 [問卦]台灣外送的會車禍很正常吧?
## 22 [新聞]今日第三起!雲林Foodpanda外送員車禍
## 23 Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 24 [新聞]外送員屬僱傭關係健保署也加入戰場:得
## 25 [問卦]半夜還在送foodpanda不會太辛苦嗎?
## 26 [問卦]把外送搞臭是不是慣老闆的陰謀?
## 27 [新聞]勞部研擬外送員強制納保意外險
## 28 [新聞]5天3死!外送員撞死違規過馬路老翁
## 29 [問卦]所以現在外送員要加勞健保了
## 30 [問卦]慣老闆沒有奴工用開始搞外送業的卦?
## 31 [新聞]勞檢認定foodpanda、UberEats假承攬真
## 32 Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 33 [問卦]其實熊貓成立工會不就好惹??
## 34 Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 35 Re:[新聞]勞部研擬外送員強制納保意外險
## 36 [新聞]美食外送為僱傭律師:用牛刀管理將打擊
## 37 [問卦]外送為什麼被針對
## 38 [問卦]給張麻子送foodpanda會怎樣
## 39 Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 40 [新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 41 Re:[新聞]外送員列入雇傭關係後月入10萬夢碎?
## 42 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 43 Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 44 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 45 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 46 [新聞]外送爭議蘇貞昌:勞檢不再憂讒畏譏
## 47 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 48 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 49 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 50 [問卦]搞熊貓最後受傷的是經濟
## 51 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 52 [新聞]外送事故5天3死!Lalamove外送員撞死違規
## 53 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 54 Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 55 [問卦]所以國外外送員不會車禍
## 56 [新聞]【血汗外送】狠打臉熊貓! 勞檢驚揭業者兩點內規認證「無良
## 57 [問卦]外送三雄大家偏好哪個????
## 58 Re:[問卦]其實把外送搞「純平台」不就解決了?
## 59 [新聞]外送員5天3死公路總局開罰foodpanda9千
## 60 [新聞]又見外送事故!外送員疑搶黃燈遭「闖紅燈
## 61 Re:[新聞]北市勞工局開罰UberEats認定罹災黃姓
data_panda %>%
filter(artDate == as.Date('2020/02/12')) %>%
distinct(artUrl, .keep_all = TRUE) %>%
select(artTitle)
## artTitle
## 1 [問卦]居家自主隔離但是叫熊貓,會不會有問題
## 2 [新聞]雲林惡客狂棄訂單 20名熊貓外送員同時被
## 3 [新聞]影/「黃千千」訂的!雲林foodpanda出現2
## 4 [新聞]熊貓外送同時遭惡作劇 10餘外送員荒地「
## 5 [新聞]雲林foodpanda惡意棄單3天逾百件20外送
武漢肺炎前後期聲量比較
date_plot <- article_count_by_date %>%
ggplot(aes(x = artDate, y = count)) +
geom_line(color = "purple", size = 1.5) +
scale_x_date(labels = date_format("%Y/%m/%d")) +
geom_vline(xintercept = c(as.numeric(as.Date("2020-01-21"))), col='red', size = 1) +
ggtitle("「foodpanada」討論文章數") +
xlab("日期") +
ylab("數量") +
theme(text = element_text(family = "Heiti TC Light"))
date_plot
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
#我們可以發現武漢肺炎之後foodpanda聲量也沒有特別提升
word_count_panda_new2 <- data_panda %>%
filter(artDate < as.Date("2020-01-21")) %>%
select(word,count) %>%
group_by(word)%>%
summarise(count = sum(count)) %>%
filter(count > 50) %>% # 過濾出現太少次的字
arrange(desc(count))%>%
filter(word !="外送"& word !="外送員")
word_count_panda_new2
## # A tibble: 120 x 2
## word count
## <chr> <int>
## 1 foodpanda 1581
## 2 公司 313
## 3 業者 252
## 4 關係 235
## 5 外送平台 225
## 6 承攬 217
## 7 平台 202
## 8 店家 199
## 9 發生 198
## 10 機車 196
## # ... with 110 more rows
word_count_panda_new2 %>% wordcloud2()
word_count_panda_new2 <- data_panda %>%
filter(artDate >= as.Date("2020-01-21")) %>%
select(word,count) %>%
group_by(word)%>%
summarise(count = sum(count)) %>%
filter(count > 20) %>% # 過濾出現太少次的字
arrange(desc(count))%>%
filter(word !="外送"& word !="外送員")
word_count_panda_new2
## # A tibble: 34 x 2
## word count
## <chr> <int>
## 1 foodpanda 397
## 2 訂單 82
## 3 店家 77
## 4 公司 42
## 5 訂餐 42
## 6 餐點 39
## 7 外送平台 35
## 8 業者 33
## 9 惡意 32
## 10 大家 30
## # ... with 24 more rows
word_count_panda_new2 %>% wordcloud2()
p_tokens_by_art_new1 <- data_panda%>%
filter(artDate < as.Date("2020-01-21"))%>%
filter(!str_detect(word, regex("[0-9]"))) %>%
count(artTitle, word,artUrl, sort = TRUE)
p_total_words_by_art_new1 <-p_tokens_by_art_new1 %>%
group_by(artTitle) %>%
summarize(total = sum(n)) %>%
arrange(desc(total))
p_tokens_by_art_new1 <- left_join(p_tokens_by_art_new1, p_total_words_by_art_new1)
## Joining, by = "artTitle"
# 以每篇文章爲單位,計算每個詞彙在的tf-idf值
p_words_tf_idf_new1 <- p_tokens_by_art_new1 %>%
bind_tf_idf(word, artTitle, n)
## Warning in bind_tf_idf.data.frame(., word, artTitle, n): A value for tf_idf is negative:
## Input should have exactly one row per document-term combination.
p_words_tf_idf_new1 %>%
filter(total > 20) %>%
arrange(desc(tf_idf))
## # A tibble: 40,301 x 8
## artTitle word artUrl n total tf idf tf_idf
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 [問卦]2019台灣的代表性職業~ 代表性~ https://www.ptt.cc/b~ 1 21 0.0476 6.37 0.303
## 2 [問卦]2019台灣的代表性職業~ 幾萬人~ https://www.ptt.cc/b~ 1 21 0.0476 6.37 0.303
## 3 [問卦]Foodpanda改制導~ Band https://www.ptt.cc/b~ 1 21 0.0476 6.37 0.303
## 4 [問卦]Foodpanda改制導~ 上中 https://www.ptt.cc/b~ 1 21 0.0476 6.37 0.303
## 5 [問卦]foodpanda是不是~ 五次 https://www.ptt.cc/b~ 1 21 0.0476 6.37 0.303
## 6 [問卦]foodpanda是不是~ 拜會 https://www.ptt.cc/b~ 1 21 0.0476 6.37 0.303
## 7 [問卦]foodpanda是不是~ 間點 https://www.ptt.cc/b~ 1 21 0.0476 6.37 0.303
## 8 [問卦]foodpanda是不是~ 愛用者~ https://www.ptt.cc/b~ 1 21 0.0476 6.37 0.303
## 9 [問卦]foodpanda是不是~ 解惑 https://www.ptt.cc/b~ 1 21 0.0476 6.37 0.303
## 10 [問卦]Ubereats要國外交~ 帳單 https://www.ptt.cc/b~ 1 21 0.0476 6.37 0.303
## # ... with 40,291 more rows
# 計算兩個詞彙同時出現的總次數
word_pairs_new1 <- p_tokens_by_art_new1 %>%
pairwise_count(word, artUrl, sort = TRUE)
word_pairs_new1
## # A tibble: 4,240,106 x 3
## item1 item2 n
## <chr> <chr> <dbl>
## 1 外送 foodpanda 323
## 2 foodpanda 外送 323
## 3 外送員 foodpanda 311
## 4 foodpanda 外送員 311
## 5 外送員 外送 196
## 6 外送 外送員 196
## 7 公司 foodpanda 130
## 8 foodpanda 公司 130
## 9 外送平台 foodpanda 114
## 10 foodpanda 外送平台 114
## # ... with 4,240,096 more rows
# 計算兩個詞彙間的相關性
word_cors_new1<- p_tokens_by_art_new1%>%
group_by(word) %>%
filter(n() >= 20) %>%
pairwise_cor(word, artUrl, sort = TRUE)
# 顯示相關性大於0.5的組合
set.seed(2020)
word_cors_new1 %>%
filter(correlation > 0.5) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 3) +
geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") + #加入中文字型設定,避免中文字顯示錯誤。
theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
# 設定幾個詞做爲seed words
seed_words <- c("udn", "自由","net","story","針對")
# 設定threshold爲0.5
threshold <- 0.5
# 跟seed words相關性高於threshold的詞彙會被加入移除列表中
remove_words <- word_cors %>%
filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
.$item1 %>%
unique()
remove_words
## [1] "udn" "story" "聯合報" "ltn" "自由"
## [6] "net" "www" "breakingnews" "透露"
# 清除存在這些詞彙的組合
word_cors_new_new1 <- word_cors_new1 %>%
filter(!(item1 %in% remove_words|item2 %in% remove_words))
word_cors_new1 %>%
filter(correlation > 0.4) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 3) +
geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
後期
p_tokens_by_art_new2 <- data_panda%>%
filter(artDate >= as.Date("2020-01-21"))%>%
filter(!str_detect(word, regex("[0-9]"))) %>%
count(artTitle, word,artUrl, sort = TRUE)
p_total_words_by_art_new2 <-p_tokens_by_art_new2 %>%
group_by(artTitle) %>%
summarize(total = sum(n)) %>%
arrange(desc(total))
p_tokens_by_art_new2 <- left_join(p_tokens_by_art_new2, p_total_words_by_art_new2)
## Joining, by = "artTitle"
# 以每篇文章爲單位,計算每個詞彙在的tf-idf值
p_words_tf_idf_new2 <- p_tokens_by_art_new2 %>%
bind_tf_idf(word, artTitle, n)
## Warning in bind_tf_idf.data.frame(., word, artTitle, n): A value for tf_idf is negative:
## Input should have exactly one row per document-term combination.
p_words_tf_idf_new2 %>%
filter(total > 20) %>%
arrange(desc(tf_idf))
## # A tibble: 7,027 x 8
## artTitle word artUrl n total tf idf tf_idf
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 [問卦]民雄要有foodpa~ ayi https://www.ptt.cc/bb~ 1 21 0.0476 5.11 0.243
## 2 [問卦]民雄要有foodpa~ bbs https://www.ptt.cc/bb~ 1 21 0.0476 5.11 0.243
## 3 [問卦]民雄要有foodpa~ html https://www.ptt.cc/bb~ 1 21 0.0476 5.11 0.243
## 4 [問卦]民雄要有foodpa~ ptt https://www.ptt.cc/bb~ 1 21 0.0476 5.11 0.243
## 5 [問卦]民雄要有foodpa~ 大學 https://www.ptt.cc/bb~ 1 21 0.0476 5.11 0.243
## 6 [問卦]民雄要有foodpa~ 中正 https://www.ptt.cc/bb~ 1 21 0.0476 5.11 0.243
## 7 [問卦]民雄要有foodpa~ 太神 https://www.ptt.cc/bb~ 1 21 0.0476 5.11 0.243
## 8 [問卦]民雄要有foodpa~ 民雄連 https://www.ptt.cc/bb~ 1 21 0.0476 5.11 0.243
## 9 [問卦]民雄要有foodpa~ 挖靠 https://www.ptt.cc/bb~ 1 21 0.0476 5.11 0.243
## 10 [問卦]民雄要有foodpa~ 發抖 https://www.ptt.cc/bb~ 1 21 0.0476 5.11 0.243
## # ... with 7,017 more rows
# 計算兩個詞彙同時出現的總次數
word_pairs_new2 <- p_tokens_by_art_new2 %>%
pairwise_count(word, artUrl, sort = TRUE)
word_pairs_new2
## # A tibble: 712,868 x 3
## item1 item2 n
## <chr> <chr> <dbl>
## 1 外送 foodpanda 68
## 2 foodpanda 外送 68
## 3 外送員 foodpanda 62
## 4 foodpanda 外送員 62
## 5 外送員 外送 34
## 6 外送 外送員 34
## 7 訂單 foodpanda 28
## 8 foodpanda 訂單 28
## 9 店家 foodpanda 27
## 10 真的 foodpanda 27
## # ... with 712,858 more rows
# 計算兩個詞彙間的相關性
word_cors_new2<- p_tokens_by_art_new2%>%
group_by(word) %>%
filter(n() >= 20) %>%
pairwise_cor(word, artUrl, sort = TRUE)
word_cors_new2
## # A tibble: 132 x 3
## item1 item2 correlation
## <chr> <chr> <dbl>
## 1 有人 foodpanda Inf
## 2 大家 foodpanda Inf
## 3 foodpanda 有人 Inf
## 4 foodpanda 大家 Inf
## 5 餐點 外送員 0.350
## 6 外送員 餐點 0.350
## 7 外送員 公司 0.295
## 8 公司 外送員 0.295
## 9 店家 訂單 0.287
## 10 訂單 店家 0.287
## # ... with 122 more rows
# 顯示相關性大於0.5的組合
set.seed(2020)
word_cors_new2 %>%
filter(correlation > 0.1) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 3) +
geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") + #加入中文字型設定,避免中文字顯示錯誤。
theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
# 設定幾個詞做爲seed words
seed_words <- c("大家", "有人","不是")
# 設定threshold爲0.5
threshold <- 0.17
# 跟seed words相關性高於threshold的詞彙會被加入移除列表中
remove_words <- word_cors %>%
filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
.$item1 %>%
unique()
remove_words
## [1] "問題" "不是" "勞工" "勞動" "PO" "真的" "承攬" "有人" "討論" "知道"
## [11] "僱傭"
# 清除存在這些詞彙的組合
word_cors_new_new2 <- word_cors_new2 %>%
filter(!(item1 %in% remove_words|item2 %in% remove_words))
word_cors_new2 %>%
filter(correlation > 0.1) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 3) +
geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
武漢肺炎前後期情緒比較
sentiment_count %>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))+
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/01/21'))
[1]])),colour = "red")
#geom_vline畫出vertical line,xintercept告訴他要在artDate欄位的哪一個row畫線
sentiment_count %>%
filter(artDate < as.Date("2020-01-21"))%>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))+
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2019/10/14'))
[1]])),colour = "red") +
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2019/10/11'))
[1]])),colour = "red") +
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/01/15'))
[1]])),colour = "red")
#geom_vline畫出vertical line,xintercept告訴他要在artDate欄位的哪一個row畫線
data_panda %>%
filter(artDate == as.Date('2019/10/11')) %>%
distinct(artUrl, .keep_all = TRUE) %>%
select(artTitle)
## artTitle
## 1 [新聞]女熊貓外送上樓!見宅男開門快被嚇死
## 2 Re:[新聞]女熊貓外送上樓!見宅男開門快被嚇死
## 3 Re:[新聞]女熊貓外送上樓!見宅男開門快被嚇死
## 4 Re:[問卦]Ubereats484輸給Foodpanda了?<U+2708>
## 5 [問卦]外送現在才發生第1起撞死的484比小黃好了
## 6 [問卦]foodpanda店內價??
## 7 [問卦]熊貓外送的八卦
## 8 Re:[問卦]foodpanda店內價??
## 9 Re:[新聞]【外送搏命1】熊貓外送員拚賺宵夜錢桃園
## 10 [問卦]foodpanda被刷到剩1星
## 11 [新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 12 Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 13 Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 14 [問卦]熊貓外送員被撞死了
## 15 Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 16 Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 17 [新聞]外送員車禍foodpanda回應了
## 18 Re:[新聞]外送員車禍foodpanda回應了
## 19 [問卦]ubereat人員素質比較好一些
## 20 Re:[新聞]外送員車禍foodpanda回應了
## 21 [新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 22 Re:[新聞]外送員車禍foodpanda回應了
## 23 Re:[新聞]外送員車禍foodpanda回應了
## 24 Re:[新聞]外送員車禍foodpanda回應了
## 25 [問卦]foodpanda外送人員的騎車技術?
## 26 [新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 27 Re:[新聞]賣命!熊貓美食外送員遭撞慘死超驚悚畫
## 28 Re:[新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 29 Re:[新聞]熊貓外送員車禍身亡時力參選人質疑平台
## 30 Re:[問卦]真的有人騎腳踏車送外送嗎?
data_panda %>%
filter(artDate == as.Date('2019/10/14')) %>%
distinct(artUrl, .keep_all = TRUE) %>%
select(artTitle)
## artTitle
## 1 [新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 2 Re:[問卦]其他外送員現在在想什麼
## 3 Re:[問卦]熊貓到底靠什麼賺錢
## 4 [新聞]外送員連三起傷亡事故交通部長林佳龍說
## 5 [問卦]北一女以後叫不到熊貓要吃什麼?
## 6 Re:[新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 7 Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 8 [問卦]農地工廠不罰Uber熊貓罰很大?
## 9 [問卦]嬌生慣養的台女竟然在做FOODPANDA!?
## 10 [新聞]外送平台讓生意爆紅?業者親揭殘酷真相「
## 11 [新聞]再傳外送員車禍!Foodpanda遭闖紅燈貨車
## 12 Re:[爆卦]勞動部認定熊貓跟ubereats是僱傭契約
## 13 [問卦]現在風向484外送車禍=外送員錯?
## 14 Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 15 [新聞]北市勞動局︰UberEats違7項職安法規、Fo
## 16 [問卦]大家支持熊貓法嗎
## 17 [新聞]外送員車禍亡北市勞檢業者涉違反7項職安
## 18 Re:[問卦]foodpanda是不是擋人財路?
## 19 [新聞]美食外送員女車禍摔傷嘉義市屢傳意外
## 20 [新聞]又一起美食外送員車禍女騎士手腳擦傷
## 21 [問卦]台灣外送的會車禍很正常吧?
## 22 [新聞]今日第三起!雲林Foodpanda外送員車禍
## 23 Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 24 [新聞]外送員屬僱傭關係健保署也加入戰場:得
## 25 [問卦]半夜還在送foodpanda不會太辛苦嗎?
## 26 [問卦]把外送搞臭是不是慣老闆的陰謀?
## 27 [新聞]勞部研擬外送員強制納保意外險
## 28 [新聞]5天3死!外送員撞死違規過馬路老翁
## 29 [問卦]所以現在外送員要加勞健保了
## 30 [問卦]慣老闆沒有奴工用開始搞外送業的卦?
## 31 [新聞]勞檢認定foodpanda、UberEats假承攬真
## 32 Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 33 [問卦]其實熊貓成立工會不就好惹??
## 34 Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 35 Re:[新聞]勞部研擬外送員強制納保意外險
## 36 [新聞]美食外送為僱傭律師:用牛刀管理將打擊
## 37 [問卦]外送為什麼被針對
## 38 [問卦]給張麻子送foodpanda會怎樣
## 39 Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 40 [新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 41 Re:[新聞]外送員列入雇傭關係後月入10萬夢碎?
## 42 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 43 Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 44 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 45 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 46 [新聞]外送爭議蘇貞昌:勞檢不再憂讒畏譏
## 47 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 48 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 49 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 50 [問卦]搞熊貓最後受傷的是經濟
## 51 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 52 [新聞]外送事故5天3死!Lalamove外送員撞死違規
## 53 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 54 Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 55 [問卦]所以國外外送員不會車禍
## 56 [新聞]【血汗外送】狠打臉熊貓! 勞檢驚揭業者兩點內規認證「無良
## 57 [問卦]外送三雄大家偏好哪個????
## 58 Re:[問卦]其實把外送搞「純平台」不就解決了?
## 59 [新聞]外送員5天3死公路總局開罰foodpanda9千
## 60 [新聞]又見外送事故!外送員疑搶黃燈遭「闖紅燈
## 61 Re:[新聞]北市勞工局開罰UberEats認定罹災黃姓
data_panda %>%
filter(artDate == as.Date('2020/01/15')) %>%
distinct(artUrl, .keep_all = TRUE) %>%
select(artTitle)
## artTitle
## 1 [問卦]熊貓明天罷工,大家會怕嗎?
## 2 [問卦]熊貓有必要自相殘殺嗎?
## 3 Re:[問卦]熊貓有必要自相殘殺嗎?
## 4 [問卦]可憐哪!熊貓外送師被砍獎金
## 5 [問卦]熊貓開始罷工了嗎?
## 6 Re:[問卦]欸欸欸!聽說台灣今天熊貓罷工
## 7 [問卦]熊貓外送罷工有人被影響到嗎?
## 8 [問卦]今天中午能定熊貓嗎
## 9 Re:[問卦]今天中午能定熊貓嗎
## 10 [新聞]控「年前毀約」!台中60名熊貓外送員市府
## 11 [新聞]熊貓今罷送最多慢1小時…外送員:萬人響應卻無人棄單
## 12 [新聞]foodpanda罷工實測!點餐後30分就拿到
## 13 Re:[新聞]熊貓今罷送最多慢1小時…外送員:萬人響應卻無人棄單
## 14 Re:[新聞]熊貓今罷送最多慢1小時…外送員:萬人響應卻無人棄單
## 15 [新聞]熊貓變更計薪制度內湖外送員控「變相減
## 16 [問卦]社會上為何充滿仇視外送員的八卦?
sentiment_count %>%
filter(artDate >= as.Date("2020-01-21"))%>%
ggplot()+
geom_line(aes(x=artDate,y=count,colour=sentiment))+
scale_x_date(labels = date_format("%m/%d"))+
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/02/12'))
[1]])),colour = "red") +
geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2020/04/10'))
[1]])),colour = "red")
## Warning: Removed 90 rows containing missing values (geom_vline).
## Warning: Removed 90 rows containing missing values (geom_vline).
data_panda %>%
filter(artDate == as.Date('2019/10/14')) %>%
distinct(artUrl, .keep_all = TRUE) %>%
select(artTitle)
## artTitle
## 1 [新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 2 Re:[問卦]其他外送員現在在想什麼
## 3 Re:[問卦]熊貓到底靠什麼賺錢
## 4 [新聞]外送員連三起傷亡事故交通部長林佳龍說
## 5 [問卦]北一女以後叫不到熊貓要吃什麼?
## 6 Re:[新聞]綠燈才起步…熊貓外送員挨撞噴飛
## 7 Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 8 [問卦]農地工廠不罰Uber熊貓罰很大?
## 9 [問卦]嬌生慣養的台女竟然在做FOODPANDA!?
## 10 [新聞]外送平台讓生意爆紅?業者親揭殘酷真相「
## 11 [新聞]再傳外送員車禍!Foodpanda遭闖紅燈貨車
## 12 Re:[爆卦]勞動部認定熊貓跟ubereats是僱傭契約
## 13 [問卦]現在風向484外送車禍=外送員錯?
## 14 Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 15 [新聞]北市勞動局︰UberEats違7項職安法規、Fo
## 16 [問卦]大家支持熊貓法嗎
## 17 [新聞]外送員車禍亡北市勞檢業者涉違反7項職安
## 18 Re:[問卦]foodpanda是不是擋人財路?
## 19 [新聞]美食外送員女車禍摔傷嘉義市屢傳意外
## 20 [新聞]又一起美食外送員車禍女騎士手腳擦傷
## 21 [問卦]台灣外送的會車禍很正常吧?
## 22 [新聞]今日第三起!雲林Foodpanda外送員車禍
## 23 Re:[新聞]勞動部認定Foodpanda、UberEats與外送
## 24 [新聞]外送員屬僱傭關係健保署也加入戰場:得
## 25 [問卦]半夜還在送foodpanda不會太辛苦嗎?
## 26 [問卦]把外送搞臭是不是慣老闆的陰謀?
## 27 [新聞]勞部研擬外送員強制納保意外險
## 28 [新聞]5天3死!外送員撞死違規過馬路老翁
## 29 [問卦]所以現在外送員要加勞健保了
## 30 [問卦]慣老闆沒有奴工用開始搞外送業的卦?
## 31 [新聞]勞檢認定foodpanda、UberEats假承攬真
## 32 Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 33 [問卦]其實熊貓成立工會不就好惹??
## 34 Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 35 Re:[新聞]勞部研擬外送員強制納保意外險
## 36 [新聞]美食外送為僱傭律師:用牛刀管理將打擊
## 37 [問卦]外送為什麼被針對
## 38 [問卦]給張麻子送foodpanda會怎樣
## 39 Re:[新聞]勞檢認定foodpanda、UberEats假承攬真
## 40 [新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 41 Re:[新聞]外送員列入雇傭關係後月入10萬夢碎?
## 42 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 43 Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 44 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 45 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 46 [新聞]外送爭議蘇貞昌:勞檢不再憂讒畏譏
## 47 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 48 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 49 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 50 [問卦]搞熊貓最後受傷的是經濟
## 51 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 52 [新聞]外送事故5天3死!Lalamove外送員撞死違規
## 53 Re:[新聞]不甩勞動部!foodpanda:不接受僱傭關係
## 54 Re:[新聞]5天3死!外送員撞死違規過馬路老翁
## 55 [問卦]所以國外外送員不會車禍
## 56 [新聞]【血汗外送】狠打臉熊貓! 勞檢驚揭業者兩點內規認證「無良
## 57 [問卦]外送三雄大家偏好哪個????
## 58 Re:[問卦]其實把外送搞「純平台」不就解決了?
## 59 [新聞]外送員5天3死公路總局開罰foodpanda9千
## 60 [新聞]又見外送事故!外送員疑搶黃燈遭「闖紅燈
## 61 Re:[新聞]北市勞工局開罰UberEats認定罹災黃姓
data_panda %>%
filter(artDate == as.Date('2020/04/10')) %>%
distinct(artUrl, .keep_all = TRUE) %>%
select(artTitle)
## artTitle
## 1 [新聞]安心配送再升級!foodpanda發萬瓶酒精給
## 2 [新聞]5業者組外送國家隊救餐飲!UberEats、
## 3 [問卦]外送員也適用紓困方案嗎?
data_panda %>%
filter(artDate >= as.Date("2020-01-21")) %>%
distinct(artUrl, .keep_all = TRUE) %>%filter(word =="武漢"|word =="肺炎"|word =="武漢肺炎"|word =="COVID-19"|word =="covid19"|word =="covid-19")%>%
select(artTitle)
## [1] artTitle
## <0 rows> (or 0-length row.names)