動機與分析目的

現今資訊科技發達,數位化、線上化更是各行各業的發展趨勢。而消費是我們生活人人都會需要的行為,從過去的以物易物,到文明世界的貨幣交易,到現在的線上交易、第三方支付、虛擬貨幣等。人們越來越習慣將錢存在銀行帳戶、電子商務平台的虛擬錢包等地方。因此,本組想要針對近期市場上較熱門行動支付進行研究。

研究目的 • 行動支付產品聲量分析: 找尋討論度較高的行動支付,並針對該支付進行較深入的研究。 • 觀察該行動支付主要討論內容 • 競品分析 資料集描述 行動支付種類五花八門,我們希望能從社群網路上的資訊來分析近期較熱門的行動支付有哪些,這些行動支付又是因為什麼而走紅呢?而社群網路中的PTT批踢踢實業坊為較知名的網路論壇平台,20歲到45歲的大眾為該平台主要使用者,故我們採用PTT行動支付版(mobilepay)的資料為我們研究的資料集。 • 資料來源 : PTT mobilepay版 • 取得管道 : 中山大學文字分析平台 • 資料期間 : 2017/04/29 ~ 2021/04/20

##系統參數設定

Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼
## [1] ""

安裝需要的packages

# echo = T,results = 'hide'
packages = c("dplyr", "tidytext", "jiebaR", "gutenbergr", "stringr", "wordcloud2", "ggplot2", "tidyr", "scales", "widyr", "readr", "reshape2", "NLP", "ggraph", "igraph", "tm", "data.table", "quanteda", "Matrix", "slam", "wordcloud", "topicmodels", "LDAvis", "webshot", "htmlwidgets","servr","tidytext")
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)

讀進library

require(dplyr)
require(tidytext)
require(jiebaR)
require(gutenbergr)
require(stringr)
require(wordcloud2)
require(ggplot2)
require(tidyr)
require(scales)
require(widyr)
require(readr)
require(reshape2)
require(NLP)
require(ggraph)
require(igraph)
require(tm)
require(data.table)
require(quanteda)
require(Matrix)
require(slam)
require(wordcloud)
require(topicmodels)
require(LDAvis)
require(webshot)
require(htmlwidgets)
require(servr)
require(tidytext)

資料載入 資料集的描述:載入的資料是由中山大學管理學院文字分析平台取得,在平台資料選擇下載原始資料所取得之csv檔案。抓取範圍為 2017/04/29 ~ 2021/04/20透過文字分析平台搜尋「行動支付」關鍵字,共搜尋到7787篇文章。

g_csv <- fread("./data/mobile_pay_articleMetaData.CSV", encoding = "UTF-8")
g_csv$artDate <- g_csv$artDate %>% as.Date("%Y/%m/%d")
str(g_csv)
## Classes 'data.table' and 'data.frame':   7787 obs. of  10 variables:
##  $ artTitle  : chr  "[情報]屏東市LINEPayMoney指定店家20%回饋" "Re:[情報]SamsungPay支援HAPPYGO卡驗證+點數查詢" "[請益]台灣pay繳稅抽汽車詢問" "[討論]請問LPM不接受申請改ID的原因?" ...
##  $ artDate   : Date, format: "2020-04-21" "2020-04-22" ...
##  $ artTime   : chr  "16:17:39" "07:15:21" "11:38:36" "16:35:45" ...
##  $ artUrl    : chr  "https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html" "https://www.ptt.cc/bbs/MobilePay/M.1587539731.A.BAD.html" "https://www.ptt.cc/bbs/MobilePay/M.1587641918.A.B38.html" "https://www.ptt.cc/bbs/MobilePay/M.1587659747.A.A4D.html" ...
##  $ artPoster : chr  "cokelon" "Kasamori54" "awei861023" "nijawang" ...
##  $ artCat    : chr  "MobilePay" "MobilePay" "MobilePay" "MobilePay" ...
##  $ commentNum: int  48 7 12 36 55 70 4 298 19 19 ...
##  $ push      : int  30 2 3 11 19 3 3 110 10 8 ...
##  $ boo       : int  0 0 1 0 0 0 0 28 0 0 ...
##  $ sentence  : chr  "屏東市LINE Pay Money樂贈點,筆筆消費享20%點數回饋\nhttps://event-web.line.me/ECTW/article/Nk3ZX8?utm_source=cop"| __truncated__ "吃光原文\n\n最近換了手機後發現登入會員綁定的功能消失了\n剩下掃條碼的方式加入\n但因為我從一開始就沒有實體會員卡\"| __truncated__ "請問活動寫房屋稅、牌照稅、綜所稅\n符合參加資格\n\n那地方稅和燃料稅也能參加嗎?\n\n臉書小編是說地方稅有\n但活動"| __truncated__ "4/23晚上幫家人申請 LINE Pay Money,\n結果一開始填ID的地方失神,然後一直到都驗證完後,\n才發現ID填錯了…\n後來就"| __truncated__ ...
##  - attr(*, ".internal.selfref")=<externalptr>

抓取(2020-01-01~2021-04-22)的資料,並過濾

g_csv <- g_csv %>% 
  filter(artDate >= as.Date("2020-01-01"))

資料處理_保留文章以及日期欄位及去重

data <- g_csv %>% 
  dplyr::select(artDate, sentence) %>% 
  distinct()

資料處理_日期分群,計算每天共有幾篇討論文章

article_count_by_date <- data %>% 
  group_by(artDate) %>% 
  summarise(count = n())
head(article_count_by_date, 20)
## # A tibble: 20 x 2
##    artDate    count
##    <date>     <int>
##  1 2020-01-01     6
##  2 2020-01-02     3
##  3 2020-01-03     5
##  4 2020-01-04     4
##  5 2020-01-05     2
##  6 2020-01-06     5
##  7 2020-01-07     3
##  8 2020-01-08     2
##  9 2020-01-09     3
## 10 2020-01-11     2
## 11 2020-01-12     2
## 12 2020-01-13     5
## 13 2020-01-14     8
## 14 2020-01-15     5
## 15 2020-01-16     7
## 16 2020-01-17     3
## 17 2020-01-18     6
## 18 2020-01-19     2
## 19 2020-01-20     9
## 20 2020-01-21     5

日期折線圖

plot_date <- 
  article_count_by_date %>% 
  ggplot(aes(x = artDate, y = count)) +
  geom_line(color = "#00AFBB", size = 1) +
  geom_vline(xintercept = as.numeric(as.Date("2020-03-24")), col='red', size = 1) +
  geom_vline(xintercept = as.numeric(as.Date("2020-07-15")), col='red', size = 1) +
  geom_vline(xintercept = as.numeric(as.Date("2020-09-01")), col='red', size = 1) +
  geom_vline(xintercept = as.numeric(as.Date("2021-03-31")), col='red', size = 1) +
  geom_vline(xintercept = as.numeric(as.Date("2021-04-01")), col='red', size = 1) +
  scale_x_date(labels = date_format("%Y/%m/%d")) +
  ggtitle("ptt行動支付版 行動支付討論文章數") + 
  xlab("日期") + 
  ylab("數量") + 
  theme(text = element_text(family = "Heiti TC Light")) #加入中文字型設定,避免中文字顯示錯誤。

plot_date
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

斷詞、停用字

jieba_tokenizer <- worker(user="g_dict.txt", stop_word ="stop_words.txt")
g_tokenizer <- function(t) {
  lapply(t, function(x) {
    tokens <- segment(x, jieba_tokenizer)
    tokens <- tokens[nchar(tokens)>1]
    return(tokens)
  })
}

詞頻

g_tokens <- g_csv %>% 
  unnest_tokens(word, sentence, token=g_tokenizer) %>% 
  select(-artTime, -artUrl,-artPoster, - artCat, - commentNum,-push,-boo)
g_tokens_count <- g_tokens %>% 
  group_by(word) %>% 
  summarise(sum = n()) %>% 
  arrange(desc(sum))
head(g_tokens_count)
## # A tibble: 6 x 2
##   word    sum
##   <chr> <int>
## 1 活動   4308
## 2 回饋   4195
## 3 pay    3788
## 4 line   2742
## 5 支付   2222
## 6 使用   1913

#文字雲觀察_從詞頻將不相關的詞語 pay、支付、https、com 排除

wordc_plot <- g_tokens_count %>% 
  filter(word != "pay" & word != "支付" & word != "https" & word != "com") %>% 
  filter(sum > 100) %>% 
  wordcloud2()
wordc_plot

關於行動支付的話題,以長條圖分析大家在討論什麼

g_tokens_by_date <- g_tokens %>% 
  count(artDate, word, sort = TRUE)
  
plot_merge <- g_tokens_by_date %>%
  filter(word != "pay" & word != "支付" & word != "https" & word != "com") %>%
  filter(artDate == as.Date("2021-04-01"))  %>% 
  group_by(artDate) %>% 
  top_n(5, n) %>% 
  ungroup() %>% 
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x=word, y=n, fill = artDate)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = NULL) +
  facet_wrap(~artDate, scales="free", ncol = 2) + 
  coord_flip()+
  theme(text = element_text(family = "Heiti TC Light"))
plot_merge
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

2021年4月1日 透過媒體新聞大幅度的宣傳、政府的推動下,此時關於悠遊付的文章字眼提高不少

##以文章區格,建立tf-idf

g_tokens_by_art <- g_tokens %>% 
  filter(!str_detect(word, regex("[0-9]"))) %>%
  count(artTitle, word, sort = TRUE)
g_total_words_by_art <- g_tokens_by_art %>% 
  group_by(artTitle) %>% 
  summarize(total = sum(n)) %>% 
  arrange(desc(total))
g_tokens_by_art <- left_join(g_tokens_by_art, g_total_words_by_art)
## Joining, by = "artTitle"

過濾掉文章長度少於20個詞

g_words_tf_idf <- g_tokens_by_art %>%
  bind_tf_idf(word, artTitle, n) 
g_words_tf_idf %>% 
  filter(total > 20) %>% 
  arrange(desc(tf_idf))
##                                                       artTitle  word n total
##      1:                   [請益]橘支推薦碼回饋是不是已經沒有了  推薦 7    30
##      2:                           [請益]悠遊卡定期票未到期加購  月票 4    32
##      3:                                  [請益]SpayMst功能問題   mst 6    49
##      4:                                 [討論]橘子支付請款名稱  喜樂 3    27
##      5:                             [請益]悠遊付刪除使用者帳號  刪掉 4    28
##     ---                                                                     
## 111289: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年  成功 1  4241
## 111290: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年  綁定 1  4241
## 111291: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年 https 2  4241
## 111292: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年  消費 1  4241
## 111293: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年   com 1  4241
##                   tf       idf       tf_idf
##      1: 0.2333333333 4.1159486 0.9603880034
##      2: 0.1250000000 6.0969501 0.7621187569
##      3: 0.1224489796 5.8738065 0.7192416127
##      4: 0.1111111111 6.3846321 0.7094035697
##      5: 0.1428571429 4.9182951 0.7026135798
##     ---                                    
## 111289: 0.0002357934 1.7996646 0.0004243491
## 111290: 0.0002357934 1.4358722 0.0003385693
## 111291: 0.0004715869 0.5835213 0.0002751810
## 111292: 0.0002357934 1.0930037 0.0002577231
## 111293: 0.0002357934 0.6491357 0.0001530619

文章總長度大於100個詞

g_words_tf_idf %>% 
  filter(total > 100) %>% 
  arrange(desc(tf_idf))
##                                                      artTitle   word  n total
##     1:                  [情報]LINEPay滿百折30小北/康是美/小屈   門市 33   133
##     2:               [新聞]日本docomo支付服務存款被盜事件頻發 docomo 10   140
##     3:                    [討論]PxPay10/30~12/31活動/凱基入陣   無限 12   131
##     4:        Re:[請益]icashPay能設定一般的icash卡自動加值嗎?    icp 18   245
##     5:                    [新聞]香港AppleWallet八達通正式開通 八達通 21   238
##    ---                                                                       
## 72671: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年   成功  1  4241
## 72672: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年   綁定  1  4241
## 72673: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年  https  2  4241
## 72674: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年   消費  1  4241
## 72675: Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年    com  1  4241
##                  tf       idf       tf_idf
##     1: 0.2481203008 2.6629629 0.6607351433
##     2: 0.0714285714 7.4832444 0.5345174583
##     3: 0.0916030534 5.6914849 0.5213573997
##     4: 0.0734693878 6.7900972 0.4988642867
##     5: 0.0882352941 5.5373343 0.4885883177
##    ---                                    
## 72671: 0.0002357934 1.7996646 0.0004243491
## 72672: 0.0002357934 1.4358722 0.0003385693
## 72673: 0.0004715869 0.5835213 0.0002751810
## 72674: 0.0002357934 1.0930037 0.0002577231
## 72675: 0.0002357934 0.6491357 0.0001530619

這段期間主要是街口、LINEPay、悠遊付三家最多討論話題

查看關於行動支付附近字彙

ngram_11 <- function(t) {
  lapply(t, function(x) {
    tokens <- segment(x, jieba_tokenizer)
    ngram <- ngrams(tokens, 11)
    ngram <- lapply(ngram, paste, collapse = " ")
    unlist(ngram)
  })
}

g_ngram_11 <- g_csv %>%
  select(artUrl, sentence) %>%
  unnest_tokens(ngram, sentence, token = ngram_11) %>%
  filter(!str_detect(ngram, regex("[0-9a-zA-Z]")))
g_ngrams_11_separated <- g_ngram_11 %>%
  separate(ngram, paste0("word", c(1:11),sep=""), sep = " ")
g_ngrams_11_separated
##                                                          artUrl word1 word2
##     1: https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html  參加  活動
##     2: https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html  活動    前
##     3: https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html    前    請
##     4: https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html    請  務必
##     5: https://www.ptt.cc/bbs/MobilePay/M.1587485863.A.662.html  務必    先
##    ---                                                                     
## 71601: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html  未來  支付
## 71602: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html  支付  型態
## 71603: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html  型態  即時
## 71604: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html  即時  支付
## 71605: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html  支付  行動
##        word3 word4  word5  word6  word7  word8  word9 word10 word11
##     1:    前    請   務必     先   確認   是否     已   同意   行銷
##     2:    請  務必     先   確認   是否     已   同意   行銷   資訊
##     3:  務必    先   確認   是否     已   同意   行銷   資訊   提供
##     4:    先  確認   是否     已   同意   行銷   資訊   提供   政策
##     5:  確認  是否     已   同意   行銷   資訊   提供   政策   進入
##    ---                                                             
## 71601:  型態  即時   支付   行動   支付   成為 受益者   人們   加速
## 71602:  即時  支付   行動   支付   成為 受益者   人們   加速   拋棄
## 71603:  支付  行動   支付   成為 受益者   人們   加速   拋棄   現金
## 71604:  行動  支付   成為 受益者   人們   加速   拋棄   現金     舊
## 71605:  支付  成為 受益者   人們   加速   拋棄   現金     舊   思維

查看關於行動支付前後五個字彙

g_check_words <- g_ngrams_11_separated %>%
  filter((word6 == "支付"))
g_check_words
##                                                        artUrl  word1  word2
##   1: https://www.ptt.cc/bbs/MobilePay/M.1587736114.A.760.html 說不定   高齡
##   2: https://www.ptt.cc/bbs/MobilePay/M.1587900716.A.628.html   使用 便利性
##   3: https://www.ptt.cc/bbs/MobilePay/M.1587900716.A.628.html 一卡通   用戶
##   4: https://www.ptt.cc/bbs/MobilePay/M.1587900716.A.628.html   轉入   街口
##   5: https://www.ptt.cc/bbs/MobilePay/M.1587900716.A.628.html     店   一併
##  ---                                                                       
## 799: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html 供應商   付款
## 800: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html   改變   人們
## 801: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html     態   支付
## 802: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html   改變   未來
## 803: https://www.ptt.cc/bbs/MobilePay/M.1587472708.A.744.html   支付   型態
##       word3 word4  word5 word6 word7  word8 word9   word10 word11
##   1:   用戶    中 非現金  支付  工具   悠遊    卡       全   聯想
##   2: 金管會  啟動   電子  支付    電     支  電子     票證     電
##   3:     錢  轉入   街口  支付  帳戶   電子  支付     業者   簽下
##   4:   支付  帳戶   電子  支付  業者   簽下  特約       商     店
##   5:   使用  為政   國內  支付  工具   種類  繁多 舉例來說   電子
##  ---                                                             
## 799:   已經  受到   影響  支付  速度   變慢  根本     支付   款項
## 800:   企業  新型     態  支付  了解   改變  未來     支付   型態
## 801:   了解  改變   未來  支付  型態   即時  支付     行動   支付
## 802:   支付  型態   即時  支付  行動   支付  成為   受益者   人們
## 803:   即時  支付   行動  支付  成為 受益者  人們     加速   拋棄

查看關於支付前後五個字彙_長條圖

g_check_words_count <- g_check_words %>%
  melt(id.vars = "artUrl", measure.vars = paste0("word", c(1:11),sep="")) %>%
  rename(word=value) %>%
  filter(variable!="word6") %>%
  filter(!(word %in% stop_words), nchar(word)>1) %>%
  count(word, sort = TRUE)
g_check_words_count %>%
  arrange(desc(abs(n))) %>%
  head(20) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(word, n, fill = n > 0)) +
  geom_col(show.legend = FALSE) +
  xlab("出現在「支付」附近的字") +
  ylab("出現次數") +
  coord_flip()+ 
  theme(text = element_text(family = "Heiti TC Light"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Word Correlation

g_words_by_art <- g_csv %>%
  unnest_tokens(word, sentence, token=g_tokenizer) %>%
  filter(!str_detect(word, regex("[0-9]"))) %>%
  count(artUrl, word, sort = TRUE)
g_word_pairs <- g_words_by_art %>%
  pairwise_count(word, artUrl, sort = TRUE)
## Warning: `distinct_()` was deprecated in dplyr 0.7.0.
## Please use `distinct()` instead.
## See vignette('programming') for more help
## Warning: `tbl_df()` was deprecated in dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
g_word_pairs
## # A tibble: 7,478,018 x 3
##    item1 item2     n
##    <chr> <chr> <dbl>
##  1 https com     859
##  2 com   https   859
##  3 imgur com     574
##  4 com   imgur   574
##  5 https 回饋    571
##  6 回饋  https   571
##  7 https 活動    561
##  8 活動  https   561
##  9 https pay     537
## 10 pay   https   537
## # ... with 7,478,008 more rows
g_word_cors <- g_words_by_art %>%
  group_by(word) %>%
  filter(n() >= 20) %>%
  pairwise_cor(word, artUrl, sort = TRUE)
g_word_cors %>%
  filter(item1 == "支付")
## # A tibble: 1,016 x 3
##    item1 item2  correlation
##    <chr> <chr>        <dbl>
##  1 支付  行動         0.532
##  2 支付  使用         0.336
##  3 支付  電子         0.294
##  4 支付  現金         0.249
##  5 支付  交易         0.246
##  6 支付  進行         0.240
##  7 支付  工具         0.234
##  8 支付  用戶         0.234
##  9 支付  消費者       0.231
## 10 支付  街口         0.228
## # ... with 1,006 more rows

詞彙之間相關性

seed_words <- c("line")
threshold <- 0.60
remove_words <- g_word_cors %>%
                filter((item1 %in% seed_words|item2 %in% seed_words), correlation>threshold) %>%
                .$item1 %>%
                unique()
set.seed(10)
g_word_cors_new <- g_word_cors %>%
                filter(!(item1 %in% remove_words|item2 %in% remove_words))
g_word_cors_new %>%
  filter(correlation > .4) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) + 
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") +
  theme_void()
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning: ggrepel: 288 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

## 分群 透過詞彙平均tf-idf,去除部分不重要的字

term_avg_tfidf <- g_words_tf_idf %>% 
  group_by(word) %>% 
  summarise(tfidf_avg = mean(tf_idf))
term_avg_tfidf$tfidf_avg %>% summary
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.001765 0.025540 0.048592 0.076531 0.093541 1.247207
term_remove=term_avg_tfidf %>%  
  filter(tfidf_avg<0.02063) %>% 
  .$word
term_remove %>% head
## [1] "<U+5186>拿"   "all"          "announcement" "auto"         "bb"          
## [6] "bbb"
g_dtm = g_words_tf_idf %>%
  filter(!word %in% term_remove) %>%
  cast_dtm(document=artTitle,term=word,value= n)
g_dtm
## <<DocumentTermMatrix (documents: 1778, terms: 14258)>>
## Non-/sparse entries: 99164/25251560
## Sparsity           : 100%
## Maximal term length: 23
## Weighting          : term frequency (tf)
g_dtm_matrix = g_dtm %>% as.data.frame.matrix 
g_dtm_matrix[1:10,1:20]
##                                                        街口 line 公司 繳費 pay
## Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年  211    0   91    0   0
## [情報]12/1~12/31LINEPayx摩斯滿$150最高10%                 0  111    1    1  69
## [情報]LINEPayMoney生活繳費最高2%(7/1~9/30)                0   94    6   84  58
## [情報]美廉社用LINEPay滿額最高享10%回饋!                  0   88    2    0  52
## [新聞]「託付寶」再踩金管會紅線街口投信、街口支付急澄     81    0   10    0   0
## [情報]美廉社LINEPay加碼滿額送抽抽樂                       0   77    4    0  46
## [情報]LPM指定交通服務6%回饋10/1~10/11                     0   65    3    1  40
## [情報]台中商圈指定商店LinePayMoney享6%回饋                0   60    2    1  35
## [情報]寶雅選用LPM消費滿388筆筆享7%回饋                    0   59    2    1  33
## [情報]歡慶麥當勞上線!用LINEPay最高12%回饋                0   45    0    0  33
##                                                        投信 基金 資金 回饋 內部
## Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年   65   56   55    0   53
## [情報]12/1~12/31LINEPayx摩斯滿$150最高10%                 0    0    0   47    0
## [情報]LINEPayMoney生活繳費最高2%(7/1~9/30)                0    0    0   24    0
## [情報]美廉社用LINEPay滿額最高享10%回饋!                  0    0    0   39    0
## [新聞]「託付寶」再踩金管會紅線街口投信、街口支付急澄     40   23    0    0    0
## [情報]美廉社LINEPay加碼滿額送抽抽樂                       0    0    0    9    0
## [情報]LPM指定交通服務6%回饋10/1~10/11                     0    0    0   15    0
## [情報]台中商圈指定商店LinePayMoney享6%回饋                0    0    0    8    0
## [情報]寶雅選用LPM消費滿388筆筆享7%回饋                    0    0    0    8    0
## [情報]歡慶麥當勞上線!用LINEPay最高12%回饋                0    0    0   54    0
##                                                        活動 證券 金科 google
## Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年   11   52   49      0
## [情報]12/1~12/31LINEPayx摩斯滿$150最高10%                44    0    0      0
## [情報]LINEPayMoney生活繳費最高2%(7/1~9/30)               31    0    0      0
## [情報]美廉社用LINEPay滿額最高享10%回饋!                 44    0    0      0
## [新聞]「託付寶」再踩金管會紅線街口投信、街口支付急澄      0    2    0      0
## [情報]美廉社LINEPay加碼滿額送抽抽樂                      52    0    0      0
## [情報]LPM指定交通服務6%回饋10/1~10/11                    36    0    0      0
## [情報]台中商圈指定商店LinePayMoney享6%回饋               31    0    0      0
## [情報]寶雅選用LPM消費滿388筆筆享7%回饋                   31    0    0      0
## [情報]歡慶麥當勞上線!用LINEPay最高12%回饋               26    0    0      0
##                                                        銀行 條第 事業 控制 px
## Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年    2   45   44   42  0
## [情報]12/1~12/31LINEPayx摩斯滿$150最高10%                 7    0    0    0  0
## [情報]LINEPayMoney生活繳費最高2%(7/1~9/30)                1    0    0    0  0
## [情報]美廉社用LINEPay滿額最高享10%回饋!                  3    0    0    0  0
## [新聞]「託付寶」再踩金管會紅線街口投信、街口支付急澄      2    0    0    0  0
## [情報]美廉社LINEPay加碼滿額送抽抽樂                       3    0    0    0  0
## [情報]LPM指定交通服務6%回饋10/1~10/11                     2    0    0    0  0
## [情報]台中商圈指定商店LinePayMoney享6%回饋                0    0    0    0  0
## [情報]寶雅選用LPM消費滿388筆筆享7%回饋                    0    0    0    0  0
## [情報]歡慶麥當勞上線!用LINEPay最高12%回饋                5    0    0    0  0
##                                                        點數
## Re:[新聞]街口支付遭罰180萬金管會:董事長胡亦嘉停職一年    0
## [情報]12/1~12/31LINEPayx摩斯滿$150最高10%                39
## [情報]LINEPayMoney生活繳費最高2%(7/1~9/30)               16
## [情報]美廉社用LINEPay滿額最高享10%回饋!                 41
## [新聞]「託付寶」再踩金管會紅線街口投信、街口支付急澄      0
## [情報]美廉社LINEPay加碼滿額送抽抽樂                      27
## [情報]LPM指定交通服務6%回饋10/1~10/11                    16
## [情報]台中商圈指定商店LinePayMoney享6%回饋               16
## [情報]寶雅選用LPM消費滿388筆筆享7%回饋                   17
## [情報]歡慶麥當勞上線!用LINEPay最高12%回饋               28

建立LDA模型 統計每篇文章詞頻

g_artid <- g_tokens %>%
  filter(!str_detect(word, regex("[0-9a-zA-Z]"))) %>% 
  count(artTitle, word) %>% 
  rename(count=n) %>% 
  mutate(artId = group_indices(., artTitle))
## Warning: The `...` argument of `group_keys()` is deprecated as of dplyr 1.0.0.
## Please `group_by()` first
g_artid
##                                           artTitle   word count artId
##     1:          [心得]7-11刷ApplePay常常多扣好幾筆   一聲     1     1
##     2:          [心得]7-11刷ApplePay常常多扣好幾筆   土司     1     1
##     3:          [心得]7-11刷ApplePay常常多扣好幾筆   已經     1     1
##     4:          [心得]7-11刷ApplePay常常多扣好幾筆   支付     1     1
##     5:          [心得]7-11刷ApplePay常常多扣好幾筆   方便     1     1
##    ---                                                               
## 99431: 歡慶全家悠遊付上線單筆消費滿百立折20元(已額 悠遊付     1  1777
## 99432: 歡慶全家悠遊付上線單筆消費滿百立折20元(已額   推出     1  1777
## 99433: 歡慶全家悠遊付上線單筆消費滿百立折20元(已額   最近     1  1777
## 99434: 歡慶全家悠遊付上線單筆消費滿百立折20元(已額   還會     1  1777
## 99435: 歡慶全家悠遊付上線單筆消費滿百立折20元(已額   額滿     1  1777
reserved_word <- g_artid %>% 
  group_by(word) %>% 
  count() %>% 
  filter(n > 5) %>% 
  unlist()
g_artid <- g_artid %>% 
  filter(word %in% reserved_word)
g_com_dtm <- g_artid %>% cast_dtm(artId, word, count)
g_com_dtm
## <<DocumentTermMatrix (documents: 1776, terms: 2414)>>
## Non-/sparse entries: 79391/4207873
## Sparsity           : 98%
## Maximal term length: 4
## Weighting          : term frequency (tf)

轉為分成兩群的LDA

g_lda <- LDA(g_com_dtm, k = 2, control = list(seed = 1234))
g_topics <- tidy(g_lda, matrix = "beta")
g_topics
## # A tibble: 4,828 x 3
##    topic term       beta
##    <int> <chr>     <dbl>
##  1     1 已經  0.00191  
##  2     2 已經  0.00172  
##  3     1 支付  0.00574  
##  4     2 支付  0.0286   
##  5     1 方便  0.000487 
##  6     2 方便  0.00110  
##  7     1 如題  0.000832 
##  8     2 如題  0.000190 
##  9     1 早餐  0.0000354
## 10     2 早餐  0.000341 
## # ... with 4,818 more rows
g_top_terms <- g_topics %>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)
remove_words <- c("支付")
g_top_terms <- g_topics %>%
  filter(! term %in% remove_words) %>% 
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)
g_top_terms %>%
  mutate(term = reorder(term, beta)) %>%
  ggplot(aes(term, beta, fill = factor(topic))) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  coord_flip() +
  theme(text = element_text(family = "Heiti TC Light"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

可看出在只分為兩群的情況,發現討論到行動支付是以悠遊付一詞熱度最高

查看行動支付議題持續程度

推文最高的文章為:jello好友互發紅包回饋 文章時間為2021/01/24,推文最高的不一定是討論度最高的議題,從文字雲可以看出,主要是以悠遊付、Linepay、街口較多人討論。

plot_date <- 
  article_count_by_date %>% 
  ggplot(aes(x = artDate, y = count)) +
  geom_line(color = "#00AFBB", size = 1) +
  geom_vline(xintercept = as.numeric(as.Date("2021-04-01")), col='red', size = 1) +
  scale_x_date(labels = date_format("%Y/%m/%d")) +
  ggtitle("PTT行動支付版:行動支付討論文章數") + 
  xlab("日期") + 
  ylab("數量") + 
  theme(text = element_text(family = "Heiti TC Light")) #加入中文字型設定,避免中文字顯示錯誤。

plot_date
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

g_oberve <-g_csv %>% 
  select(artTitle, commentNum, push, boo) %>% 
  filter(commentNum >= 100) %>% 
  mutate(p_ratio = push/commentNum, b_ratio = boo/commentNum) %>% 
  arrange(-p_ratio)

結論

這幾月主要是悠遊付,較多鄉民在討論,因為這幾個月悠遊卡公司為了強化「 悠遊付 」電子支付優惠,陸續推出不錯的回饋活動,而且近期活動都是針對超商、超市、3C、美食等消費區塊大送儲值金,換算回饋比例高出其他行動支付許多。