一、專案簡介

1.動機

2020年全年行動支付交易金額高達4,230億元，創下有統計以來新高紀錄，電子支付使用人數亦突破千萬人，表示民眾支付習慣的改變，除了過往以現金支付、信用卡支付的付款行為，也愈來愈多的人以手機支付APP作消費，只要「嗶」一下，不用帶現金，不用找零，又可享受業者五花八門的回饋方案，多「嗶」多賺。
此外行動支付機購百家齊鳴，可支付的商家、通路及使用場域也大增，更增加了民眾使用行動支付作交易的意願，本組想藉此了解網路社群對各行動支付相關的討論議題是否有差異?若有可探究造成差異的原因，並觀察哪些通路推出的行動支付內容較受關注，最後利用情緒分析來探討行動支付的優惠議題與問題。

2.分析目的

主要行動支付網路討論聲量
主要行動支付種類比較
網路社群較關注哪些通路所推動的行動支付優惠、使用等內容
透過情緒分析探討行動支付常見的問題及優惠

3.資料集

資料來源：PTT MobilePay版
資料區間：2020/01~2021/03 所有文章
資料數量：發文：1800篇、評論71990篇

系統參數設定

Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼

## [1] ""

安裝需要的packages

# echo = T,results = 'hide'
packages = c("dplyr", "tidytext", "stringr", "wordcloud2", "ggplot2",'readr','data.table','reshape2','wordcloud','tidyr','scales', "ggraph", "igraph", "widyr","BiocManager","TTR","plotly","showtext")
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)

讀進library

library(dplyr)
library(stringr)
library(tidytext)
library(wordcloud2)
library(data.table)
library(ggplot2)
library(reshape2)
library(wordcloud)
library(tidyr)
library(readr)
require(NLP)
require(jiebaR)
require(ggraph)
require(igraph)
require(scales)
require(reshape2)
require(widyr)
require(limma)
require(TTR)
require(plotly) 
# BiocManager::install("limma")

# 把文章和留言讀進來
# 資料清理
metaData = fread('../data/pay_articleMetaData.csv',encoding = 'UTF-8') %>% 
 mutate(sentence=gsub("[\n]{2,}", "。", sentence)) %>% 
 mutate(sentence=gsub("\n", "", sentence)) %>%  #換行符號 
 mutate(sentence=gsub("http(s)?[-:\\/A-Za-z0-9\\.]+", " ", sentence)) %>%  #有url的取代掉
 mutate(sentence=tolower(sentence))  %>%   #轉小寫 
 mutate(artTitle=tolower(artTitle))   %>%   
 mutate(sentence=gsub("媒體來源|記者署名|完整新聞標題|完整新聞內文|完整新聞連結|(或短網址)|備註|備註請放最後面|違者新聞文章刪除", "", sentence))

metaData$sentence <- gsub(" ","",metaData$sentence) #去空白以免影響英文斷詞 
metaData$sentence <- gsub("  ","",metaData$sentence)
metaData$artTitle <- gsub(" ","",metaData$artTitle) #去空白以免影響英文斷詞 
metaData$artTitle <- gsub("  ","",metaData$artTitle)

#為每篇增加流水號 
metaData <- metaData %>% mutate(seqno = 1:n(),
                                class="",linepay=0,streetpay=0)

#新增發表文章種類欄位
for (ix in c(1:nrow(metaData))) {
  metaData$class[ix] <- if (grepl("情報",metaData$artTitle[ix]) == TRUE){"情報"
    }else if ( grepl("請益",metaData$artTitle[ix]) == TRUE){"請益"
    }else if ( grepl("討論",metaData$artTitle[ix]) == TRUE){"討論"
    }else if ( grepl("心得",metaData$artTitle[ix]) == TRUE){"心得"
    }else if ( grepl("閒聊",metaData$artTitle[ix]) == TRUE){"閒聊"
    }else if ( grepl("優惠",metaData$artTitle[ix]) == TRUE){"優惠"
    }else if ( grepl("新聞",metaData$artTitle[ix]) == TRUE){"新聞"
    }else if ( grepl("公告",metaData$artTitle[ix]) == TRUE){"公告"
    }else{"其他"}
  
    metaData$linepay[ix] <- if (grepl("line",metaData$sentence[ix]) == TRUE){1}else {0}
    
    metaData$streetpay[ix] <- if (grepl("街口",metaData$sentence[ix]) == TRUE){1} else {0}
    
}

# 挑選文章對應的留言
reviews  = fread('../data/pay_articleReviews.csv',encoding = 'UTF-8')
reviews = left_join(metaData, reviews[,c("artUrl", "cmtContent")], by = "artUrl")

###發表文章的分佈情形

#各種類發表文章的分佈情形 
metaData %>% 
  count(class,sort=TRUE) %>% 
  ggplot(aes(class, n)) +
  geom_col() +
  labs(y = "篇數 ",
       x = "發文種類") +
  theme(text=element_text(size=14))

情報類發文最多近800筆，,請益類次之約500筆

二、行動支付資料總覽

(1). 文章斷詞

設定斷詞引擎

# 加入自定義的字典
jieba_tokenizer <- worker(user="../dict/mp_dict.txt", stop_word = "../dict/mp_stop_words.txt")

# 設定斷詞function
customized_tokenizer <- function(t) {
  lapply(t, function(x) {
    tokens <- segment(x, jieba_tokenizer)
    return(tokens)
  })
}

# 把文章和留言的斷詞結果併在一起
meta_tokens <- metaData %>% unnest_tokens(word, sentence, token=customized_tokenizer)
review_tokens <- reviews %>% unnest_tokens(word, cmtContent, token=customized_tokenizer) 

# 把資料併在一起
all_tokens <- rbind(meta_tokens[,c("artDate","artUrl", "word")],
              review_tokens[,c("artDate","artUrl", "word")]
              )

(2). 資料基本清理

# 格式化日期欄位
all_tokens$artDate= all_tokens$artDate %>% as.Date("%Y/%m/%d")

#將7-11替換成中文字
all_tokens$word[which(all_tokens$word %in% c("7-11","711"))] = "統一超商"

# 過濾特殊字元
all_tokens <- all_tokens %>% 
  filter(!grepl('[[:punct:]]',word)) %>% # 去標點符號
  filter(!grepl("['^0-9']",word)) %>% # 去數字,英文要先保留有linepay,linepaymoney, ubear這些詞
  filter(nchar(.$word)>1) 

synonym <- function(str)
{
  str <- recode(str, lp="linepay",
              gp="googlepay",
              gpay="googlepay",
              sp="samsungpay",
              hgpay="happygopay",
              fami="famipay",
              台灣行動支付="台灣pay",
              pi="pipay",
              pi拍錢包="pipay",
              pi錢包="pipay",
              拍錢包="pipay",
              街口="街口支付",
              xd="",
              seven = "統一超商",
              小七 = "統一超商",
              全家便利商店 = "全家"
              )
}

meta_tokens$word=synonym(meta_tokens$word) 
review_tokens$word=synonym(review_tokens$word) 
all_tokens$word=synonym(all_tokens$word)

1.網路討論聲量

toppay <- c("linepay","街口支付","台灣pay","pipay","applepay")

all_tokens %>% 
  filter(all_tokens$word %in% toppay) %>% 
  group_by(format(artDate,"%Y-%m"),word) %>%
  summarise(count = n())  %>% 
  rename(artDate= `format(artDate, "%Y-%m")`) %>% #更改行名
  ggplot(aes(x=artDate, y=count, col=word)) +
  geom_point() + geom_line(aes(group = 1)) +
  ggtitle("各支付方式討論度") + xlab("date (yr-mn)") +
  #scale_x_date(date_breaks="1 months", date_labels="%y-%m")
  theme(axis.text.x=element_text(angle = 90, colour = "black"))-> p

## `summarise()` has grouped output by 'format(artDate, "%Y-%m")'. You can override using the `.groups` argument.

ggplotly(p)

以長期來看linepay和街口支付的每月討論度相較於其他三者比較高的
2020-07因為發行振興三倍卷台灣pay、linepay和街口支付的討論度增加

2.行動支付文字雲

#找出詞頻較常出現的
word_count <- all_tokens %>%
  select(word) %>%
  count(word) %>% 
  filter(n>150) %>%   # 過濾出現太少次的字
  arrange(desc(n())) 

# word_count %>% wordcloud2()

整體而言行動支付社群討論文字雲為 “信用卡”，行動支付的產品很多,從文字雲初步看來以linepay,台灣pay,google pay, samsungpay,悠遊付,pipay 的詞頻為最高,
消費,優惠,點數,可消費的店家也是社群所關注的
通路部份包括了超市通路如全聯, 超商如全家
與行動支付有關的錢包如linepaymoney, 錢包

3.發文種類TF-IDF

#不同種類的tf_idf 
meta_tokens %>% 
  count(class,word,sort=TRUE) %>% 
  bind_tf_idf(word, class, n) %>% 
  filter(n>50,nchar(word)>1) %>% 
  filter(class=="優惠") %>% 
  arrange(class,desc(tf_idf))

##     class     word   n          tf       idf      tf_idf
##  1:  優惠     台新  51 0.008132674 0.1335314 0.001085967
##  2:  優惠     回饋 153 0.024398023 0.0000000 0.000000000
##  3:  優惠     活動 123 0.019614097 0.0000000 0.000000000
##  4:  優惠   悠遊付  83 0.013235529 0.0000000 0.000000000
##  5:  優惠  linepay  62 0.009886780 0.0000000 0.000000000
##  6:  優惠      app  61 0.009727316 0.0000000 0.000000000
##  7:  優惠     消費  61 0.009727316 0.0000000 0.000000000
##  8:  優惠 活動期間  59 0.009408388 0.0000000 0.000000000
##  9:  優惠     帳戶  53 0.008451603 0.0000000 0.000000000
## 10:  優惠     綁定  51 0.008132674 0.0000000 0.000000000

情報類，出現“限得”, 如活動期間每人限得一次,限得8個紅包,限得999點,限得一張 “活動回饋”, “福利”,“零用金”, “回饋資格”,“優惠”
請益類，出現“感應”，表示鄉民較常請益手機感應出現問題的情況，要如何處理 “nfc”,“失敗”,“掃碼”,“hamipay”,“googlepay”,“玉山”和其它類比較不同
新聞類：“基金”,“證券”,“金管會”,“市場”,“電子支付”,等金融法令都用詞 , 另外則是“機構”,“董市長”,“業者”等新聞報導用字
其餘類的tf_idf則都偏低,與其他類沒有比較特殊的用字。

三、主要行動支付種類評比

1.各支付之社群文字雲

(1)「Line Pay」社群文字雲

#以下為linepay的處理
metaData_linepay <- metaData %>% 
   filter(str_detect(sentence,"linepay") | str_detect(sentence,"linepaymoney") | str_detect(sentence,"lp") | str_detect(sentence,"lpm") | str_detect(sentence,"lpmoney")) 

reviews_linepay <-reviews %>%
   filter(str_detect(cmtContent,"linepay") | str_detect(cmtContent,"linepaymoney") | str_detect(cmtContent,"lp") | str_detect(cmtContent,"lpm") | str_detect(cmtContent,"lpmoney"))

meta_tokens_linepay <- metaData_linepay %>% unnest_tokens(word, sentence, token=customized_tokenizer)
review_tokens_linepay <- reviews_linepay %>% unnest_tokens(word, cmtContent, token=customized_tokenizer) 

# 把資料併在一起
all_tokens_linepay <- rbind(meta_tokens_linepay[,c("artDate","artUrl", "word")],
              review_tokens_linepay[,c("artDate","artUrl", "word")]
              ) 

line_synonym<-c("linepay","linepaymoney","lp","lpm","lpmoney") 

line_tokens<-all_tokens_linepay  %>% 
  filter(!word %in% line_synonym)  %>% 
  filter(!str_detect(word,"\\d"))  %>% 
  filter(nchar(word)>1)  %>% 
  count(word) %>% 
  filter(n>150) %>%   # 過濾出現太少次的字
  arrange(desc(n()))
   # %>% wordcloud2()

由文字雲可看出主要是關於linepay在付款、繳費等方面會有點數回饋的活動

(2)「街口支付」社群文字雲

金管會, 投信 : 金管會對街口電支,街口投信開罰，未確實執行內控及街口託付保等新聞，停止街口電支董事長胡亦嘉執行董事職務1年。
現金回饋：街口支付綁定多家銀行信用
和linepay共提的比例高, 悠遊付
聯名卡: 台新銀行街口聯名卡，且指定“通路”有更高的回饋

(3)「Pi拍錢包」社群文字雲

pipay主要為使用p幣，但只有綁定玉山聯名卡才有p幣

(4)台灣pay社群文字雲

台灣Pay透過三倍券的活動大力推廣台灣Pay,有 23家銀行有推出三倍券 + 台灣 Pay 的活動，這 23家銀行的“金融卡”或銀行帳戶都可以綁定, 但是只有 13家的銀行，可以綁定其信用卡在台灣 Pay 之中
各家銀行APP+ 台灣pay, 或台灣行動支付 APP 中的台灣 Pay 都可以使用
使用台灣 Pay 累積三倍券額度,一定要用“掃碼”方式支付

#stop_words登場 
stop_words <- scan(file = "../dict/mp_stop_words.txt", what=character(),sep='\n', 
                   encoding='utf-8',fileEncoding='utf-8')

mp_lexicon <- scan(file = "../dict/mp_dict.txt", what=character(),sep='\n', 
                   encoding='utf-8',fileEncoding='utf-8')

# stop_words
#清除stop words 
jieba_tokenizer = worker(user="../dict/mp_dict.txt") #還未加stop_words 

# unnest_tokens 使用的bigram分詞函數
# Input: a character vector
# Output: a list of character vectors of the same length
# 直接寫成一個bigram function呼叫 
jieba_bigram <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){ #長度2以上的才可以ngrams(tokens,2)
      tokens <- segment(x, jieba_tokenizer)
      bigram<- ngrams(tokens, 2)
      bigram <- lapply(bigram, paste, collapse = " ") #用空白隔開 
      unlist(bigram)
    }
  })
}

## visualize_bigrams 
visualize_bigrams <- function(bigrams) {
  set.seed(2016)
  a <- grid::arrow(type = "closed", length = unit(.15, "inches"))
  
  bigrams %>%
    graph_from_data_frame() %>%
    ggraph(layout = "fr") +
    geom_edge_link(aes(edge_alpha = n), show.legend = FALSE, arrow = a) +
    geom_node_point(color = "lightblue", size = 5) +
    geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
    theme_void()
}

mobilePay_bigram <- metaData %>%
  unnest_tokens(bigram, sentence, token = jieba_bigram)

mobilePay_bigram_separated <- mobilePay_bigram %>% 
  select(-artPoster,-artCat,-commentNum,-push,-boo) %>%
  filter(!str_detect(bigram, regex("[0-9]"))) %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!(word1 %in% stop_words), !(word2 %in% stop_words))

#找出bigram ,作為自行斷詞的參考
 mobilePay_bigram_separated %>%
  unite_("bigram", c("word1","word2"), sep=" ")  %>%  
  count(bigram, sort = TRUE)

##               bigram   n
##     1:   linepoint s 314
##     2:   悠遊卡 公司 300
##     3:     回饋 上限 210
##     4:     自動 加值 143
##     5: 台灣 行動支付 142
##    ---                  
## 59855:       鑽 法律   1
## 59856:       鑽 金卡   1
## 59857:       鑽 保卡   1
## 59858:     鑽保 數位   1
## 59859:   鑽保卡 這張   1

2.各主要支付之字詞相關性

# 以全形或半形 驚歎號、問號、分號 以及 全形句號 爲依據進行斷句
mobilePay_sentences <- strsplit(metaData$sentence,"[。！；？!?;]")

# 將每句句子，與他所屬的文章連結配對起來，整理成一個dataframe
mobilePay_sentences <- data.frame(
                        artUrl = rep(metaData$artUrl, sapply(mobilePay_sentences, length)), 
                        sentence = unlist(mobilePay_sentences)
                      ) %>%
                      filter(!str_detect(sentence, regex("^(\t|\n| )*$")))
#轉成文字型態
mobilePay_sentences$sentence <- as.character(mobilePay_sentences$sentence)
mobilePay_sentences$sentence <- gsub("~","",mobilePay_sentences$sentence)
# mobilePay_sentences

#加入斷詞
mp_dict <- scan(file = "../dict/mp_dict.txt", what=character(),sep='\n', 
                   encoding='utf-8',fileEncoding='utf-8',quiet = T)
# mp_dict

jieba_tokenizer = worker(write = "NOFILE")

# 重新斷詞
new_user_word(jieba_tokenizer, c(mp_dict))

## [1] TRUE

chi_tokenizer <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer) #用新的tokenizer
      tokens <- tokens[!tokens %in% stop_words] #stop_words就移除
      # 去掉字串長度爲1的詞彙
      tokens <- tokens[nchar(tokens)>1]
      return(tokens)
    }
  })
}

# 剛才的斷詞結果沒有使用新增的辭典，因此我們重新進行斷詞，再計算各詞彙在各文章中出現的次數

mp_words <- mobilePay_sentences %>%
  unnest_tokens(word, sentence, token=chi_tokenizer) %>% #一個字的斷詞 
  filter(!str_detect(word, regex("[0-9]"))) %>% #只去除數字,pay的名字很多有英文字 
  count(artUrl, word, sort = TRUE)

#處理同義字
mp_words$word=synonym(mp_words$word)


word_pairs <- mp_words %>%
  pairwise_count(word, artUrl, sort = TRUE) %>% #每一篇文章同時出現詞彙
  filter(!item1 %in% c("行動支付") & !item2 %in% c("行動支付"))
  
# word_pairs

word_cors <- mp_words %>% 
  filter(!str_detect(word,regex("[0-9]"))) %>% 
  group_by(word) %>%
  filter(n() >= 60) %>% 
  pairwise_cor(word, artUrl, sort = TRUE) #呼叫後就自動加上item1,item2 

# word_cors

# 找出與各個pay相關性最高的 15 個詞彙

#主要行動支付產品 
# mp_products<-c("linepay",  "街口支付","台灣pay","pi錢包","悠遊付","applepay","famipay","samsungpay","pxpay","googlepay")
mp_products<-c("linepay",  "街口支付","台灣pay","pipay") 

word_cors %>%
  filter(item1 %in% mp_products) %>%
  group_by(item1) %>%
  top_n(15) %>%
  ungroup() %>%
  mutate(item2 = reorder(item2, correlation)) %>%
  ggplot(aes(item2, correlation, fill=item1)) +
  geom_bar(stat = "identity",show.legend = F) +
  facet_wrap(~ item1, scales = "free") +
  coord_flip()+ #座標轉向 
  theme(text = element_text(family = "Heiti TC Light")) #加入中文字型設定，避免中文字顯示錯誤。

linepay網友主推回饋點數, 街口支付以現金回饋為賣點。
台灣pay綁定的是「金融卡」，以條碼付款、掃描支付，主推回饋金，2020/6振興三倍券電子票券有3倍回饋，造成網路聲量。
Pi拍錢包僅綁定特定銀行「玉山」銀行推出的聯名卡才能享有「P幣回饋」。

3.行動支付共現圖

set.seed(2020)

filter_word=c("解釋","權利","保留","終止","違反","點選","如有","不可","另行通知","技術","變更","暫停","活動資格","注意事項","辨識","電話","活動期間","活動內容","版本","陸續","取消","狀態","回饋資格","規定","上述","如有","最終","決定","標題","時間")
#線愈粗表示correlation愈高 
# word_cors %>% 
#   filter(!item1 %in% filter_word ) %>% 
#   filter(!item2 %in% filter_word) %>%
#   filter(correlation > 0.4) %>% 
#   graph_from_data_frame() %>% #data_frame轉為圖形 
#   ggraph(layout = "fr") +
#   geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
#   geom_node_point(color = "lightblue", size = 3) +
#   geom_node_text(aes(label = name), repel = TRUE, family = "Heiti TC Light") + 
#   theme_void()

字詞共現多為情報類活動辦法之官方文字
情報類的發文都有各行動支付公司對優惠活動“做出最終解釋，無須另行通知” , “如有任何因電腦、網路、電話、技術或不可歸責於LINE Pay的事由” 的但書
回饋點數將“陸續發放”
本活動期間回饋所有參加者之上,限總金額為“新臺幣”

發文主要為情報版上的活動情報。
line Pay, line points,line pay money的活動，一直是網友的議題
台北市發行的悠遊付電子支付，因2020初一直處封測階段，屬於假上線狀況，一直在社群上有討論，另外悠遊付採用的是掃碼支付方式，目前僅支援萊爾富、頂好等多個通路，無法如同現有的NFC SIM卡或Samsung Pay悠遊卡在所有悠遊卡通路使用。另外有關悠遊付的操作討論也不少。

4.Line Pay/街口支付競爭度比較

LINE Pay x 街口支付共提次數文氏圖共提次數117次,佔line pay提及次數446次，總聲量 26% 佔街口支付提及次數269次的43% ，表示對街口支付的潛在競爭影響較大

art_count <- metaData %>% select (linepay,streetpay)
venn_art_count <- vennCounts(art_count)
vennDiagram(venn_art_count, names = c("Line Pay", "街口支付"), cex = 1, counts.col = "red")

四、行動支付通路比較

1.常用通路探討

#讀取行動支付各通路之超商、商家、量販店字典
#超商如7-11,全家,萊爾富,OK
#量販店 全聯,頂好,家樂福 
#商家如麥當勞、星巴克  

shopName <- read_lines("../dict/shop.txt")
all_tokens$word[which(all_tokens$word %in% c("seven","統一超商","小七"))] = "統一超商"
all_tokens$word[which(all_tokens$word %in% c("全家便利商店","全家"))] = "全家"

top_shops <- all_tokens %>% 
  filter(word %in% shopName) %>% 
  count(word, sort = TRUE) %>% 
  top_n(10)

## Selecting by n

top_shops %>% 
  ggplot(aes(n,reorder(word, n),fill=word))+ 
  geom_col(show.legend = F)+
  labs(x = "提及次數", y = "通路名稱")

目前行動支付最常被提及的通路為實體通路，主要以「超商」-全家,7-11 、「超市」-全聯,頂好,美廉社、「量販店」-家樂福為主
其他線上通路例如:momo、蝦皮賣場等被提及次數仍較少於實體通路。

# 前5家最常被討論的通路的討論聲量趨勢變化
library(TTR)
top_shops <- all_tokens %>% 
  filter(word %in% shopName) %>% 
  count(word, sort = TRUE) %>% 
  top_n(5)

## Selecting by n

all_tokens %>% 
  filter(word %in% top_shops$word) %>% 
  mutate(word = as.factor(word)) %>% 
  group_by(artDate, word) %>% 
  arrange(artDate) %>% 
  summarise(n = n()) %>%
  pivot_wider(id_cols = `artDate`, names_from = `word`, values_from = `n`,values_fill = c(n = 0)) %>% 
  pivot_longer(cols = -c(`artDate`),names_to = "word",values_to = "n") %>% 
  group_by(word) %>% 
  mutate(ma_n = TTR::EMA(n,15)) %>% 
  na.omit() %>% 
  ungroup() %>% 
  ggplot(aes(x = artDate, y = ma_n)) +
  geom_line(aes(col = word),size = 1) +
  geom_smooth(aes(col = word))+
  geom_rect(aes(xmin = as.Date('2020-01-01'), xmax=as.Date('2021-03-31'), ymin=0, ymax = 15),alpha =2e-3,fill = "grey45")+
  scale_x_date(name = NULL, date_breaks = "2 months",date_labels = "%Y/%m")+
  scale_color_discrete(name = "通路名稱")+
  labs(title = "通路名稱討論聲量",x = "日期", y = "提及次數")+
  theme_minimal()

## `summarise()` has grouped output by 'artDate'. You can override using the `.groups` argument.

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

前五大被提之實體通路-全家、7-11、全聯、萊爾富、家樂福，除了與現有的行動支付配合，推出優惠吸引消費者以外，亦擁有自家的行動支付品牌，可能使其被提及的聲量能夠更進一步推升。

2.超商通路探討

#計算各詞彙在各文章中出現的次數
mobile_words <- all_tokens %>%
  count(artUrl, word, sort = TRUE)
# mobile_words

#計算兩個詞彙同時出現的總次數
mobile_word_pairs <- mobile_words %>%
  pairwise_count(word, artUrl, sort = TRUE) 
# mobile_word_pairs

#計算兩個詞彙間的相關性
mobile_word_cor <- mobile_words %>%
  group_by(word) %>%
  filter(n() >= 10) %>%
  pairwise_cor(word, artUrl, sort = TRUE)
# mobile_word_cor

mobile_word_cor %>%
  filter(item1 %in% c("全家","統一超商","萊爾富")) %>%
  group_by(item1) %>%
  top_n(15) %>%
  ungroup() %>%
  mutate(item2 = reorder(item2, correlation)) %>%
  ggplot(aes(item2, correlation, fill = item1)) +
  geom_bar(stat = "identity",show.legend = F) +
  facet_wrap(~ item1, scales = "free") +
  coord_flip() + 
  theme(text = element_text(family = "heiti"))

全家比較常與“7-11”提及，“玉山wallet”在2020年10月於全家、萊爾富有推掃條碼5%現金回饋，全家超商則是推出My FamiPay與信用卡綁定的“咖啡”相關回饋，來吸引消費者使用該通路限定之行動支付。
統一超商：統一超商有自行發行的“icashpay”,“OPEN錢包”支付, 及“open points”回饋，提及率較高。另外橘子支付在統一超商有長期合作，曾推出20%的高回饋。
萊爾富：萊爾富“HiPay”綁定信用卡亦即可享“咖啡”的回饋活動。另悠遊付於萊爾富、頂好、jasons消費亦推出15%回饋活動。

五、情緒分析走勢

1.每日情緒走勢圖

#匯入情緒字典

P <- read_file("../dict/liwc/positive.txt") 
N <- read_file("../dict/liwc/negative.txt") 
P = strsplit(P, ",")[[1]]
N = strsplit(N, ",")[[1]]

P = data.frame(word = P, sentiment = "positive") 
N = data.frame(word = N, sentiment = "negative") 
LIWC = rbind(P, N)

meta_tokens$artDate= meta_tokens$artDate %>% as.Date("%Y/%m/%d")

senti_daily <- meta_tokens %>% 
  select(artDate,word) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=n())

## Joining, by = "word"

## `summarise()` has grouped output by 'artDate'. You can override using the `.groups` argument.

senti_daily %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  scale_x_date(labels = date_format("%y %m /%d"),
               limits = as.Date(c('2020-01-01','2021-03-31')))+
  labs(title = "MobilePay版每日情緒分數")

#mobilePay版每日情緒分數：正面情緒相對較多

#在其中一日有較明顯的負面情緒/正面情緒字詞，尋找該日期
senti_daily %>%
  arrange(desc(count)) %>% head(10)

## # A tibble: 10 x 3
## # Groups:   artDate [9]
##    artDate    sentiment count
##    <date>     <chr>     <int>
##  1 2021-02-04 negative     90
##  2 2021-02-04 positive     83
##  3 2020-12-04 positive     79
##  4 2020-11-30 positive     48
##  5 2020-04-21 positive     47
##  6 2020-06-28 positive     42
##  7 2020-10-06 positive     35
##  8 2020-09-12 positive     34
##  9 2020-03-23 positive     33
## 10 2020-07-01 positive     33

發現2021-02-04出現這個時間區段內最大量的負面情緒字詞，原因是當日新聞報導街口支付遭罰180萬元，因此當天討論串多在說明金管會專案金檢報告，提及街口支付的缺失以及被裁罰之原因，因此負面情緒詞較多

2.正負情緒代表字

#找出正負情緒代表字
senti <- all_tokens %>%
  inner_join(LIWC) %>% 
  group_by(word,sentiment) %>%
  summarise(n=sum(n())) %>% 
  arrange(desc(n))  %>%  
  data.frame()

## Joining, by = "word"

## `summarise()` has grouped output by 'word'. You can override using the `.groups` argument.

senti %>%
  group_by(sentiment) %>%
  slice_max(n, n = 10) %>% 
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(n, word, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(x = "正負情緒代表字",
       y = NULL)

版上提及最多的正、負面字詞分別為：優惠、問題，顯示出在此版上討論的議題多是分享行動支付推出的優惠方案，而遇到相關問題時亦會上此版發文詢問
有趣的是，其中排名第二位的的正、負面字詞分別為：成功、失敗，顯示出使用行動支付成功使用與否，亦是大家關心的重要議題。

3.「問題」「優惠」的內容探討

#探討「問題」「優惠」的內容
#讀取行動支付之優惠&問題的字典
pros <- read_lines("../dict/pros.txt") 
cons <- read_lines("../dict/cons.txt")
pros = data.frame(word = pros, type = "優惠") 
cons = data.frame(word = cons, type = "問題") 
prosncons = rbind(pros, cons)

#將意思相近的字詞歸類
all_tokens$word[which(all_tokens$word %in% c("振興","振興券"))] = "三倍券"
all_tokens$word[which(all_tokens$word %in% c("熱美","熱拿","美式","拿鐵","那堤"))] = "咖啡"
all_tokens$word[which(all_tokens$word %in% c("現折","折扣"))] = "折抵"
all_tokens$word[which(all_tokens$word %in% c("首筆","首筆回饋"))] = "首筆回饋"
all_tokens$word[which(all_tokens$word %in% c("不好感應","無法感應","感應","感應失敗","感應問題"))] = "感應"
all_tokens$word[which(all_tokens$word %in% c("無法扣款","重複扣款","扣款失敗","付款失敗","多扣"))] = "扣款"
all_tokens$word[which(all_tokens$word %in% c("被盜","遭盜","盜刷","盜領"))] = "盜用"

prosncons_discuss <- all_tokens %>% 
  select(word) %>%
  inner_join(prosncons) %>% 
  group_by(type, word) %>%
  summarise(n=n())

## Joining, by = "word"

## `summarise()` has grouped output by 'type'. You can override using the `.groups` argument.

prosncons_discuss %>%
  group_by(type) %>%
  slice_max(n, n = 10) %>% 
  ungroup() %>%
  ggplot(aes(n, reorder(word, n), fill = type)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~type, scales = "free_y") +
  labs(x = "提及次數",
       y =  "「問題」「優惠」的文章代表字")

MobilePay版中，提及的優惠主題有折抵、儲值金、滿額禮、點數回饋、贈送咖啡等，其中於2020年推出的三倍卷亦與行動支付有配合活動，被提及次數也相當多。
另外，MobilePay版提及的行動支付問題則有感應、扣款、自動儲值、異常、盜用等關鍵字，顯示行動支付仍有許多待解決的問題被網友提及。

六、結論

1.各行動支付評比

(1).「LinePay」社群討論度，明顯高於其它行動支付，顯示「linePay」推出的行銷活動，往往能造成社群轉貼優惠情報及網友討論。
(2).「街口支付」為台灣第2大行動支付，但比較特殊的是金管會對街口電支,街口投信開罰，未確實執行內控及街口託付保等負面新聞，呈現負面情緒的高點。
(3) 「台灣Pay」為財政部發行的行動支付，除了去年6-7月配合振興三倍券發行，有較高的討論度外，其它時間討論度偏低，建議可仿效其他行動支付商，結合異業行銷資源或在自身社群管道多張貼優惠資訊、舉辦抽獎活動、直播等，以此提高社群曝光度。
(4).「悠遊卡」逾8350萬流通卡數，背後潛藏「悠遊付」使用族群，在本專案的字詞共現度高，可作為未來觀察的方向。
(5).LINE Pay x 街口支付共提次數，街口支付佔比大於Line Pay，表示街口支付須更提高危機感，以提高競爭力。

2.通路分析

(1).行動支付最常被提及的通路為實體通路，主要以「超商」、「超市」、「量販店」，建議通用型支付在推廣使用通路時，仍以最接近民眾生活的「超商」、「超市」為主。
(2).超商通路中，「咖啡」是經常出現的實體物品，而超商近幾年也開始推廣茶飲調配，或許未來也可以多推出其他飲品的相關優惠，透過行動支付提高產品能見度

3.情緒分析

(1).以情緒分析來衡量消費者關注的議題，顯示出行動支付最「正向」的情緒為推出的多種「優惠」對於消費者有一定的吸引力；最「負向」但同時存在許多「問題」待解決。
(2).「優惠主題」最吸收消費者提及或轉貼的有折抵、儲值金、滿額禮、點數回饋、贈送咖啡等；行動支付「問題」消費者最在乎的則有感應、扣款、自動儲值、異常、盜用等，可作為發行機構的參考

行動支付的戰國時代，發行機構百家爭鳴，搭配的優惠、通路更是五花八門，在進行行動支付的社群分析時，相關行動支付詞彙的斷詞需持續增加、迭代，以提供更準確的分析。

第4組期中報告：行動支付社群分析

蘇舫萱-M094020034 徐明暇-D084020002 陳姵均-M094610007

一、專案簡介

1.動機

2.分析目的

3.資料集

二、行動支付資料總覽

1.網路討論聲量

2.行動支付文字雲

3.發文種類TF-IDF

三、主要行動支付種類評比

1.各支付之社群文字雲

(1)「Line Pay」社群文字雲

(2)「街口支付」社群文字雲

(3)「Pi拍錢包」社群文字雲

(4)台灣pay社群文字雲

2.各主要支付之字詞相關性

3.行動支付共現圖

4.Line Pay/街口支付競爭度比較

四、行動支付通路比較

1.常用通路探討

2.超商通路探討

五、情緒分析走勢

1.每日情緒走勢圖

2.正負情緒代表字

3.「問題」「優惠」的內容探討

六、結論

1.各行動支付評比

2.通路分析

3.情緒分析