現今資訊科技發達,數位化、線上化更是各行各業的發展趨勢。而消費是我們生活人人都會需要的行為,從過去的以物易物,到文明世界的貨幣交易,到現在的線上交易、第三方支付、虛擬貨幣等。人們越來越習慣將錢存在銀行帳戶、電子商務平台的虛擬錢包等地方。因此,本組想要針對近期市場上較熱門行動支付進行研究。
研究目的 • 行動支付產品聲量分析: 找尋討論度較高的行動支付,並針對該支付進行較深入的研究。 • 觀察該行動支付主要討論內容 • 競品分析 資料集描述 行動支付種類五花八門,我們希望能從社群網路上的資訊來分析近期較熱門的行動支付有哪些,這些行動支付又是因為什麼而走紅呢?而社群網路中的PTT批踢踢實業坊為較知名的網路論壇平台,20歲到45歲的大眾為該平台主要使用者,故我們採用PTT行動支付版(mobilepay)的資料為我們研究的資料集。 • 資料來源 : PTT mobilepay版 • 取得管道 : 中山大學文字分析平台 • 資料期間 : 2017/04/29 ~ 2021/04/20
##系統參數設定
Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼
## [1] ""
安裝需要的packages
# echo = T,results = 'hide'
= c("dplyr", "tidytext", "jiebaR", "gutenbergr", "stringr", "wordcloud2", "ggplot2", "tidyr", "scales", "widyr", "readr", "reshape2", "NLP", "ggraph", "igraph", "tm", "data.table", "quanteda", "Matrix", "slam", "wordcloud", "topicmodels", "LDAvis", "webshot", "htmlwidgets","servr","tidytext")
packages = as.character(installed.packages()[,1])
existing for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)
讀進library
require(dplyr)
require(tidytext)
require(jiebaR)
require(gutenbergr)
require(stringr)
require(wordcloud2)
require(ggplot2)
require(tidyr)
require(scales)
require(widyr)
require(readr)
require(reshape2)
require(NLP)
require(ggraph)
require(igraph)
require(tm)
require(data.table)
require(quanteda)
require(Matrix)
require(slam)
require(wordcloud)
require(topicmodels)
require(LDAvis)
require(webshot)
require(htmlwidgets)
require(servr)
require(tidytext)
資料載入 資料集的描述:載入的資料是由中山大學管理學院文字分析平台取得,在平台資料選擇下載原始資料所取得之csv檔案。抓取範圍為 2017/04/29 ~ 2021/04/20透過文字分析平台搜尋「行動支付」關鍵字,共搜尋到7787篇文章。
<- fread("./data/mobile_pay_articleMetaData.CSV", encoding = "UTF-8")
g_csv $artDate <- g_csv$artDate %>% as.Date("%Y/%m/%d")
g_csvstr(g_csv)
## Classes 'data.table' and 'data.frame': 7787 obs. of 10 variables:
## $ artTitle : chr "[情報]屏東市LINEPayMoney指定店家20%回饋" "Re:[情報]SamsungPay支援HAPPYGO卡驗證+點數查詢" "[請益]台灣pay繳稅抽汽車詢問" "[討論]請問LPM不接受申請改ID的原因?" ...
## $ artDate : Date, format: "2020-04-21" "2020-04-22" ...
## $ artTime : chr "16:17:39" "07:15:21" "11:38:36" "16:35:45" ...
## $ artUrl : chr "https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html" "https://www.ptt.cc/bbs/MobilePay/M.1587539731.A.BAD.html" "https://www.ptt.cc/bbs/MobilePay/M.1587641918.A.B38.html" "https://www.ptt.cc/bbs/MobilePay/M.1587659747.A.A4D.html" ...
## $ artPoster : chr "cokelon" "Kasamori54" "awei861023" "nijawang" ...
## $ artCat : chr "MobilePay" "MobilePay" "MobilePay" "MobilePay" ...
## $ commentNum: int 48 7 12 36 55 70 4 298 19 19 ...
## $ push : int 30 2 3 11 19 3 3 110 10 8 ...
## $ boo : int 0 0 1 0 0 0 0 28 0 0 ...
## $ sentence : chr "屏東市LINE Pay Money樂贈點,筆筆消費享20%點數回饋\nhttps://event-web.line.me/ECTW/article/Nk3ZX8?utm_source=cop"| __truncated__ "吃光原文\n\n最近換了手機後發現登入會員綁定的功能消失了\n剩下掃條碼的方式加入\n但因為我從一開始就沒有實體會員卡\"| __truncated__ "請問活動寫房屋稅、牌照稅、綜所稅\n符合參加資格\n\n那地方稅和燃料稅也能參加嗎?\n\n臉書小編是說地方稅有\n但活動"| __truncated__ "4/23晚上幫家人申請 LINE Pay Money,\n結果一開始填ID的地方失神,然後一直到都驗證完後,\n才發現ID填錯了…\n後來就"| __truncated__ ...
## - attr(*, ".internal.selfref")=<externalptr>
抓取(2020-01-01~2021-04-22)的資料,並過濾
<- g_csv %>%
g_csv filter(artDate >= as.Date("2020-01-01"))
資料處理_保留文章以及日期欄位及去重
<- g_csv %>%
data ::select(artDate, sentence) %>%
dplyrdistinct()
資料處理_日期分群,計算每天共有幾篇討論文章
<- data %>%
article_count_by_date group_by(artDate) %>%
summarise(count = n())
head(article_count_by_date, 20)
## # A tibble: 20 x 2
## artDate count
## <date> <int>
## 1 2020-01-01 6
## 2 2020-01-02 3
## 3 2020-01-03 5
## 4 2020-01-04 4
## 5 2020-01-05 2
## 6 2020-01-06 5
## 7 2020-01-07 3
## 8 2020-01-08 2
## 9 2020-01-09 3
## 10 2020-01-11 2
## 11 2020-01-12 2
## 12 2020-01-13 5
## 13 2020-01-14 8
## 14 2020-01-15 5
## 15 2020-01-16 7
## 16 2020-01-17 3
## 17 2020-01-18 6
## 18 2020-01-19 2
## 19 2020-01-20 9
## 20 2020-01-21 5
<-
plot_date %>%
article_count_by_date ggplot(aes(x = artDate, y = count)) +
geom_line(color = "#00AFBB", size = 1) +
geom_vline(xintercept = as.numeric(as.Date("2020-03-24")), col='red', size = 1) +
geom_vline(xintercept = as.numeric(as.Date("2020-07-15")), col='red', size = 1) +
geom_vline(xintercept = as.numeric(as.Date("2020-09-01")), col='red', size = 1) +
geom_vline(xintercept = as.numeric(as.Date("2021-03-31")), col='red', size = 1) +
geom_vline(xintercept = as.numeric(as.Date("2021-04-01")), col='red', size = 1) +
scale_x_date(labels = date_format("%Y/%m/%d")) +
ggtitle("ptt行動支付版 行動支付討論文章數") +
xlab("日期") +
ylab("數量") +
theme(text = element_text(family = "Heiti TC Light")) #加入中文字型設定,避免中文字顯示錯誤。
plot_date
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
<- worker(user="g_dict.txt", stop_word ="stop_words.txt")
jieba_tokenizer <- function(t) {
g_tokenizer lapply(t, function(x) {
<- segment(x, jieba_tokenizer)
tokens <- tokens[nchar(tokens)>1]
tokens return(tokens)
}) }
<- g_csv %>%
g_tokens unnest_tokens(word, sentence, token=g_tokenizer) %>%
select(-artTime, -artUrl,-artPoster, - artCat, - commentNum,-push,-boo)
<- g_tokens %>%
g_tokens_count group_by(word) %>%
summarise(sum = n()) %>%
arrange(desc(sum))
head(g_tokens_count)
## # A tibble: 6 x 2
## word sum
## <chr> <int>
## 1 活動 4308
## 2 回饋 4195
## 3 pay 3788
## 4 line 2742
## 5 支付 2222
## 6 使用 1913
#文字雲觀察_從詞頻將不相關的詞語 pay、支付、https、com 排除
<- g_tokens_count %>%
wordc_plot filter(word != "pay" & word != "支付" & word != "https" & word != "com") %>%
filter(sum > 100) %>%
wordcloud2()
wordc_plot
關於行動支付的話題,以長條圖分析大家在討論什麼
<- g_tokens %>%
g_tokens_by_date count(artDate, word, sort = TRUE)
<- g_tokens_by_date %>%
plot_merge filter(word != "pay" & word != "支付" & word != "https" & word != "com") %>%
filter(artDate == as.Date("2021-04-01")) %>%
group_by(artDate) %>%
top_n(5, n) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(x=word, y=n, fill = artDate)) +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = NULL) +
facet_wrap(~artDate, scales="free", ncol = 2) +
coord_flip()+
theme(text = element_text(family = "Heiti TC Light"))
plot_merge
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
2021年4月1日 透過媒體新聞大幅度的宣傳、政府的推動下,此時關於悠遊付的文章字眼提高不少
##以文章區格,建立tf-idf
<- g_tokens %>%
g_tokens_by_art filter(!str_detect(word, regex("[0-9]"))) %>%
count(artTitle, word, sort = TRUE)
<- g_tokens_by_art %>%
g_total_words_by_art group_by(artTitle) %>%
summarize(total = sum(n)) %>%
arrange(desc(total))
<- left_join(g_tokens_by_art, g_total_words_by_art) g_tokens_by_art
## Joining, by = "artTitle"
過濾掉文章長度少於20個詞
<- g_tokens_by_art %>%
g_words_tf_idf bind_tf_idf(word, artTitle, n)
%>%
g_words_tf_idf filter(total > 20) %>%
arrange(desc(tf_idf))
## artTitle word n total
## 1: [請益]橘支推薦碼回饋是不是已經沒有了 推薦 7 30
## 2: [請益]悠遊卡定期票未到期加購 月票 4 32
## 3: [請益]SpayMst功能問題 mst 6 49
## 4: [討論]橘子支付請款名稱 喜樂 3 27
## 5: [請益]悠遊付刪除使用者帳號 刪掉 4 28
## ---
## 111289: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 成功 1 4241
## 111290: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 綁定 1 4241
## 111291: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 https 2 4241
## 111292: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 消費 1 4241
## 111293: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 com 1 4241
## tf idf tf_idf
## 1: 0.2333333333 4.1159486 0.9603880034
## 2: 0.1250000000 6.0969501 0.7621187569
## 3: 0.1224489796 5.8738065 0.7192416127
## 4: 0.1111111111 6.3846321 0.7094035697
## 5: 0.1428571429 4.9182951 0.7026135798
## ---
## 111289: 0.0002357934 1.7996646 0.0004243491
## 111290: 0.0002357934 1.4358722 0.0003385693
## 111291: 0.0004715869 0.5835213 0.0002751810
## 111292: 0.0002357934 1.0930037 0.0002577231
## 111293: 0.0002357934 0.6491357 0.0001530619
文章總長度大於100個詞
%>%
g_words_tf_idf filter(total > 100) %>%
arrange(desc(tf_idf))
## artTitle word n total
## 1: [情報]LINEPay滿百折30小北/康是美/小屈 門市 33 133
## 2: [新聞]日本docomo支付服務存款被盜事件頻發 docomo 10 140
## 3: [討論]PxPay10/30~12/31活動/凱基入陣 無限 12 131
## 4: Re:[請益]icashPay能設定一般的icash卡自動加值嗎? icp 18 245
## 5: [新聞]香港AppleWallet八達通正式開通 八達通 21 238
## ---
## 72671: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 成功 1 4241
## 72672: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 綁定 1 4241
## 72673: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 https 2 4241
## 72674: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 消費 1 4241
## 72675: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 com 1 4241
## tf idf tf_idf
## 1: 0.2481203008 2.6629629 0.6607351433
## 2: 0.0714285714 7.4832444 0.5345174583
## 3: 0.0916030534 5.6914849 0.5213573997
## 4: 0.0734693878 6.7900972 0.4988642867
## 5: 0.0882352941 5.5373343 0.4885883177
## ---
## 72671: 0.0002357934 1.7996646 0.0004243491
## 72672: 0.0002357934 1.4358722 0.0003385693
## 72673: 0.0004715869 0.5835213 0.0002751810
## 72674: 0.0002357934 1.0930037 0.0002577231
## 72675: 0.0002357934 0.6491357 0.0001530619
這段期間主要是街口、LINEPay、悠遊付三家最多討論話題
查看關於行動支付附近字彙
<- function(t) {
ngram_11 lapply(t, function(x) {
<- segment(x, jieba_tokenizer)
tokens <- ngrams(tokens, 11)
ngram <- lapply(ngram, paste, collapse = " ")
ngram unlist(ngram)
})
}
<- g_csv %>%
g_ngram_11 select(artUrl, sentence) %>%
unnest_tokens(ngram, sentence, token = ngram_11) %>%
filter(!str_detect(ngram, regex("[0-9a-zA-Z]")))
<- g_ngram_11 %>%
g_ngrams_11_separated separate(ngram, paste0("word", c(1:11),sep=""), sep = " ")
g_ngrams_11_separated
## artUrl word1 word2
## 1: https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html 參加 活動
## 2: https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html 活動 前
## 3: https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html 前 請
## 4: https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html 請 務必
## 5: https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html 務必 先
## ---
## 71601: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 未來 支付
## 71602: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 支付 型態
## 71603: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 型態 即時
## 71604: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 即時 支付
## 71605: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 支付 行動
## word3 word4 word5 word6 word7 word8 word9 word10 word11
## 1: 前 請 務必 先 確認 是否 已 同意 行銷
## 2: 請 務必 先 確認 是否 已 同意 行銷 資訊
## 3: 務必 先 確認 是否 已 同意 行銷 資訊 提供
## 4: 先 確認 是否 已 同意 行銷 資訊 提供 政策
## 5: 確認 是否 已 同意 行銷 資訊 提供 政策 進入
## ---
## 71601: 型態 即時 支付 行動 支付 成為 受益者 人們 加速
## 71602: 即時 支付 行動 支付 成為 受益者 人們 加速 拋棄
## 71603: 支付 行動 支付 成為 受益者 人們 加速 拋棄 現金
## 71604: 行動 支付 成為 受益者 人們 加速 拋棄 現金 舊
## 71605: 支付 成為 受益者 人們 加速 拋棄 現金 舊 思維
查看關於行動支付前後五個字彙
<- g_ngrams_11_separated %>%
g_check_words filter((word6 == "支付"))
g_check_words
## artUrl word1 word2
## 1: https://www.ptt.cc/bbs/MobilePay/M.1587736114.A.760.html 說不定 高齡
## 2: https://www.ptt.cc/bbs/MobilePay/M.1587900716.A.628.html 使用 便利性
## 3: https://www.ptt.cc/bbs/MobilePay/M.1587900716.A.628.html 一卡通 用戶
## 4: https://www.ptt.cc/bbs/MobilePay/M.1587900716.A.628.html 轉入 街口
## 5: https://www.ptt.cc/bbs/MobilePay/M.1587900716.A.628.html 店 一併
## ---
## 799: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 供應商 付款
## 800: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 改變 人們
## 801: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 態 支付
## 802: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 改變 未來
## 803: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 支付 型態
## word3 word4 word5 word6 word7 word8 word9 word10 word11
## 1: 用戶 中 非現金 支付 工具 悠遊 卡 全 聯想
## 2: 金管會 啟動 電子 支付 電 支 電子 票證 電
## 3: 錢 轉入 街口 支付 帳戶 電子 支付 業者 簽下
## 4: 支付 帳戶 電子 支付 業者 簽下 特約 商 店
## 5: 使用 為政 國內 支付 工具 種類 繁多 舉例來說 電子
## ---
## 799: 已經 受到 影響 支付 速度 變慢 根本 支付 款項
## 800: 企業 新型 態 支付 了解 改變 未來 支付 型態
## 801: 了解 改變 未來 支付 型態 即時 支付 行動 支付
## 802: 支付 型態 即時 支付 行動 支付 成為 受益者 人們
## 803: 即時 支付 行動 支付 成為 受益者 人們 加速 拋棄
查看關於支付前後五個字彙_長條圖
<- g_check_words %>%
g_check_words_count melt(id.vars = "artUrl", measure.vars = paste0("word", c(1:11),sep="")) %>%
rename(word=value) %>%
filter(variable!="word6") %>%
filter(!(word %in% stop_words), nchar(word)>1) %>%
count(word, sort = TRUE)
%>%
g_check_words_count arrange(desc(abs(n))) %>%
head(20) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = n > 0)) +
geom_col(show.legend = FALSE) +
xlab("出現在「支付」附近的字") +
ylab("出現次數") +
coord_flip()+
theme(text = element_text(family = "Heiti TC Light"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
<- g_csv %>%
g_words_by_art unnest_tokens(word, sentence, token=g_tokenizer) %>%
filter(!str_detect(word, regex("[0-9]"))) %>%
count(artUrl, word, sort = TRUE)
<- g_words_by_art %>%
g_word_pairs pairwise_count(word, artUrl, sort = TRUE)
## Warning: `distinct_()` was deprecated in dplyr 0.7.0.
## Please use `distinct()` instead.
## See vignette('programming') for more help
## Warning: `tbl_df()` was deprecated in dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
g_word_pairs
## # A tibble: 7,478,018 x 3
## item1 item2 n
## <chr> <chr> <dbl>
## 1 https com 859
## 2 com https 859
## 3 imgur com 574
## 4 com imgur 574
## 5 https 回饋 571
## 6 回饋 https 571
## 7 https 活動 561
## 8 活動 https 561
## 9 https pay 537
## 10 pay https 537
## # ... with 7,478,008 more rows
<- g_words_by_art %>%
g_word_cors group_by(word) %>%
filter(n() >= 20) %>%
pairwise_cor(word, artUrl, sort = TRUE)
%>%
g_word_cors filter(item1 == "支付")
## # A tibble: 1,016 x 3
## item1 item2 correlation
## <chr> <chr> <dbl>
## 1 支付 行動 0.532
## 2 支付 使用 0.336
## 3 支付 電子 0.294
## 4 支付 現金 0.249
## 5 支付 交易 0.246
## 6 支付 進行 0.240
## 7 支付 工具 0.234
## 8 支付 用戶 0.234
## 9 支付 消費者 0.231
## 10 支付 街口 0.228
## # ... with 1,006 more rows
<- c("line")
seed_words <- 0.60
threshold <- g_word_cors %>%
remove_words filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
$item1 %>%
.unique()
set.seed(10)
<- g_word_cors %>%
g_word_cors_new filter(!(item1 %in% remove_words|item2 %in% remove_words))
%>%
g_word_cors_new filter(correlation > .4) %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 3) +
geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning: ggrepel: 288 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## 分群 透過詞彙平均tf-idf,去除部分不重要的字
<- g_words_tf_idf %>%
term_avg_tfidf group_by(word) %>%
summarise(tfidf_avg = mean(tf_idf))
$tfidf_avg %>% summary term_avg_tfidf
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.001765 0.025540 0.048592 0.076531 0.093541 1.247207
=term_avg_tfidf %>%
term_removefilter(tfidf_avg<0.02063) %>%
$word
.%>% head term_remove
## [1] "<U+5186>拿" "all" "announcement" "auto" "bb"
## [6] "bbb"
= g_words_tf_idf %>%
g_dtm filter(!word %in% term_remove) %>%
cast_dtm(document=artTitle,term=word,value= n)
g_dtm
## <<DocumentTermMatrix (documents: 1778, terms: 14258)>>
## Non-/sparse entries: 99164/25251560
## Sparsity : 100%
## Maximal term length: 23
## Weighting : term frequency (tf)
= g_dtm %>% as.data.frame.matrix
g_dtm_matrix 1:10,1:20] g_dtm_matrix[
## 街口 line 公司 繳費 pay
## Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 211 0 91 0 0
## [情報]12/1~12/31LINEPayx摩斯滿$150最高10% 0 111 1 1 69
## [情報]LINEPayMoney生活繳費最高2%(7/1~9/30) 0 94 6 84 58
## [情報]美廉社用LINEPay滿額最高享10%回饋! 0 88 2 0 52
## [新聞]「託付寶」再踩金管會紅線街口投信、街口支付急澄 81 0 10 0 0
## [情報]美廉社LINEPay加碼滿額送抽抽樂 0 77 4 0 46
## [情報]LPM指定交通服務6%回饋10/1~10/11 0 65 3 1 40
## [情報]台中商圈指定商店LinePayMoney享6%回饋 0 60 2 1 35
## [情報]寶雅選用LPM消費滿388筆筆享7%回饋 0 59 2 1 33
## [情報]歡慶麥當勞上線!用LINEPay最高12%回饋 0 45 0 0 33
## 投信 基金 資金 回饋 內部
## Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 65 56 55 0 53
## [情報]12/1~12/31LINEPayx摩斯滿$150最高10% 0 0 0 47 0
## [情報]LINEPayMoney生活繳費最高2%(7/1~9/30) 0 0 0 24 0
## [情報]美廉社用LINEPay滿額最高享10%回饋! 0 0 0 39 0
## [新聞]「託付寶」再踩金管會紅線街口投信、街口支付急澄 40 23 0 0 0
## [情報]美廉社LINEPay加碼滿額送抽抽樂 0 0 0 9 0
## [情報]LPM指定交通服務6%回饋10/1~10/11 0 0 0 15 0
## [情報]台中商圈指定商店LinePayMoney享6%回饋 0 0 0 8 0
## [情報]寶雅選用LPM消費滿388筆筆享7%回饋 0 0 0 8 0
## [情報]歡慶麥當勞上線!用LINEPay最高12%回饋 0 0 0 54 0
## 活動 證券 金科 google
## Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 11 52 49 0
## [情報]12/1~12/31LINEPayx摩斯滿$150最高10% 44 0 0 0
## [情報]LINEPayMoney生活繳費最高2%(7/1~9/30) 31 0 0 0
## [情報]美廉社用LINEPay滿額最高享10%回饋! 44 0 0 0
## [新聞]「託付寶」再踩金管會紅線街口投信、街口支付急澄 0 2 0 0
## [情報]美廉社LINEPay加碼滿額送抽抽樂 52 0 0 0
## [情報]LPM指定交通服務6%回饋10/1~10/11 36 0 0 0
## [情報]台中商圈指定商店LinePayMoney享6%回饋 31 0 0 0
## [情報]寶雅選用LPM消費滿388筆筆享7%回饋 31 0 0 0
## [情報]歡慶麥當勞上線!用LINEPay最高12%回饋 26 0 0 0
## 銀行 條第 事業 控制 px
## Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 2 45 44 42 0
## [情報]12/1~12/31LINEPayx摩斯滿$150最高10% 7 0 0 0 0
## [情報]LINEPayMoney生活繳費最高2%(7/1~9/30) 1 0 0 0 0
## [情報]美廉社用LINEPay滿額最高享10%回饋! 3 0 0 0 0
## [新聞]「託付寶」再踩金管會紅線街口投信、街口支付急澄 2 0 0 0 0
## [情報]美廉社LINEPay加碼滿額送抽抽樂 3 0 0 0 0
## [情報]LPM指定交通服務6%回饋10/1~10/11 2 0 0 0 0
## [情報]台中商圈指定商店LinePayMoney享6%回饋 0 0 0 0 0
## [情報]寶雅選用LPM消費滿388筆筆享7%回饋 0 0 0 0 0
## [情報]歡慶麥當勞上線!用LINEPay最高12%回饋 5 0 0 0 0
## 點數
## Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 0
## [情報]12/1~12/31LINEPayx摩斯滿$150最高10% 39
## [情報]LINEPayMoney生活繳費最高2%(7/1~9/30) 16
## [情報]美廉社用LINEPay滿額最高享10%回饋! 41
## [新聞]「託付寶」再踩金管會紅線街口投信、街口支付急澄 0
## [情報]美廉社LINEPay加碼滿額送抽抽樂 27
## [情報]LPM指定交通服務6%回饋10/1~10/11 16
## [情報]台中商圈指定商店LinePayMoney享6%回饋 16
## [情報]寶雅選用LPM消費滿388筆筆享7%回饋 17
## [情報]歡慶麥當勞上線!用LINEPay最高12%回饋 28
<- g_tokens %>%
g_artid filter(!str_detect(word, regex("[0-9a-zA-Z]"))) %>%
count(artTitle, word) %>%
rename(count=n) %>%
mutate(artId = group_indices(., artTitle))
## Warning: The `...` argument of `group_keys()` is deprecated as of dplyr 1.0.0.
## Please `group_by()` first
g_artid
## artTitle word count artId
## 1: [心得]7-11刷ApplePay常常多扣好幾筆 一聲 1 1
## 2: [心得]7-11刷ApplePay常常多扣好幾筆 土司 1 1
## 3: [心得]7-11刷ApplePay常常多扣好幾筆 已經 1 1
## 4: [心得]7-11刷ApplePay常常多扣好幾筆 支付 1 1
## 5: [心得]7-11刷ApplePay常常多扣好幾筆 方便 1 1
## ---
## 99431: 歡慶全家悠遊付上線單筆消費滿百立折20元(已額 悠遊付 1 1777
## 99432: 歡慶全家悠遊付上線單筆消費滿百立折20元(已額 推出 1 1777
## 99433: 歡慶全家悠遊付上線單筆消費滿百立折20元(已額 最近 1 1777
## 99434: 歡慶全家悠遊付上線單筆消費滿百立折20元(已額 還會 1 1777
## 99435: 歡慶全家悠遊付上線單筆消費滿百立折20元(已額 額滿 1 1777
<- g_artid %>%
reserved_word group_by(word) %>%
count() %>%
filter(n > 5) %>%
unlist()
<- g_artid %>%
g_artid filter(word %in% reserved_word)
<- g_artid %>% cast_dtm(artId, word, count)
g_com_dtm g_com_dtm
## <<DocumentTermMatrix (documents: 1776, terms: 2414)>>
## Non-/sparse entries: 79391/4207873
## Sparsity : 98%
## Maximal term length: 4
## Weighting : term frequency (tf)
<- LDA(g_com_dtm, k = 2, control = list(seed = 1234)) g_lda
<- tidy(g_lda, matrix = "beta")
g_topics g_topics
## # A tibble: 4,828 x 3
## topic term beta
## <int> <chr> <dbl>
## 1 1 已經 0.00191
## 2 2 已經 0.00172
## 3 1 支付 0.00574
## 4 2 支付 0.0286
## 5 1 方便 0.000487
## 6 2 方便 0.00110
## 7 1 如題 0.000832
## 8 2 如題 0.000190
## 9 1 早餐 0.0000354
## 10 2 早餐 0.000341
## # ... with 4,818 more rows
<- g_topics %>%
g_top_terms group_by(topic) %>%
top_n(10, beta) %>%
ungroup() %>%
arrange(topic, -beta)
<- c("支付")
remove_words <- g_topics %>%
g_top_terms filter(! term %in% remove_words) %>%
group_by(topic) %>%
top_n(10, beta) %>%
ungroup() %>%
arrange(topic, -beta)
%>%
g_top_terms mutate(term = reorder(term, beta)) %>%
ggplot(aes(term, beta, fill = factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") +
coord_flip() +
theme(text = element_text(family = "Heiti TC Light"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
可看出在只分為兩群的情況,發現討論到行動支付是以悠遊付一詞熱度最高
推文最高的文章為:jello好友互發紅包回饋 文章時間為2021/01/24,推文最高的不一定是討論度最高的議題,從文字雲可以看出,主要是以悠遊付、Linepay、街口較多人討論。
<-
plot_date %>%
article_count_by_date ggplot(aes(x = artDate, y = count)) +
geom_line(color = "#00AFBB", size = 1) +
geom_vline(xintercept = as.numeric(as.Date("2021-04-01")), col='red', size = 1) +
scale_x_date(labels = date_format("%Y/%m/%d")) +
ggtitle("PTT行動支付版:行動支付討論文章數") +
xlab("日期") +
ylab("數量") +
theme(text = element_text(family = "Heiti TC Light")) #加入中文字型設定,避免中文字顯示錯誤。
plot_date
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
<-g_csv %>%
g_oberve select(artTitle, commentNum, push, boo) %>%
filter(commentNum >= 100) %>%
mutate(p_ratio = push/commentNum, b_ratio = boo/commentNum) %>%
arrange(-p_ratio)
這幾月主要是悠遊付,較多鄉民在討論,因為這幾個月悠遊卡公司為了強化「 悠遊付 」電子支付優惠,陸續推出不錯的回饋活動,而且近期活動都是針對超商、超市、3C、美食等消費區塊大送儲值金,換算回饋比例高出其他行動支付許多。