台灣安樂死合法化之議題討論

Sys.setlocale("LC_CTYPE", "cht") # 避免中文亂碼

## [1] "Chinese (Traditional)_Taiwan.950"

安裝需要的packages

packages = c("readr", "dplyr", "stringr", "jiebaR", "tidytext", "NLP", "readr", "tidyr", "ggplot2", "ggraph", "igraph", "scales", "reshape2", "widyr","tm", "data.table","topicmodels", "LDAvis", "webshot","purrr","ramify","RColorBrewer", "htmlwidgets","servr","wordcloud2", "rmdformats")

existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)

library(pacman)
library(igraph)

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(widyr)
require(readr)

## Loading required package: readr

require(dplyr)

## Loading required package: dplyr

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

require(stringr)

## Loading required package: stringr

require(jiebaR)

## Loading required package: jiebaR

## Loading required package: jiebaRD

require(tidytext)

## Loading required package: tidytext

require(NLP)

## Loading required package: NLP

require(tidyr)

## Loading required package: tidyr

## 
## Attaching package: 'tidyr'

## The following object is masked from 'package:igraph':
## 
##     crossing

require(ggplot2)

## Loading required package: ggplot2

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:NLP':
## 
##     annotate

require(ggraph)

## Loading required package: ggraph

require(igraph)
require(scales)

## Loading required package: scales

## 
## Attaching package: 'scales'

## The following object is masked from 'package:readr':
## 
##     col_factor

require(reshape2)

## Loading required package: reshape2

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

require(widyr)
require(tm)

## Loading required package: tm

require(data.table)

## Loading required package: data.table

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

require(udpipe)

## Loading required package: udpipe

require(ggplot2)
require(topicmodels)

## Loading required package: topicmodels

require(LDAvis)

## Loading required package: LDAvis

require(wordcloud2)

## Loading required package: wordcloud2

require(webshot)

## Loading required package: webshot

require(htmlwidgets)

## Loading required package: htmlwidgets

require(servr)

## Loading required package: servr

require(purrr)

## Loading required package: purrr

## 
## Attaching package: 'purrr'

## The following object is masked from 'package:data.table':
## 
##     transpose

## The following object is masked from 'package:scales':
## 
##     discard

## The following objects are masked from 'package:igraph':
## 
##     compose, simplify

require(ramify)

## Loading required package: ramify

## 
## Attaching package: 'ramify'

## The following object is masked from 'package:purrr':
## 
##     flatten

## The following object is masked from 'package:webshot':
## 
##     resize

## The following object is masked from 'package:tidyr':
## 
##     fill

## The following object is masked from 'package:graphics':
## 
##     clip

require(RColorBrewer)

## Loading required package: RColorBrewer

mycolors <- colorRampPalette(brewer.pal(10, "Spectral"))(10)
load("final.rdata")

資料描述

透過中山管院文字分析平台，取得PTT Gossiping版2017-01-03 ~ 2020-06-07的資料，以關鍵字為“安樂死”，共取得850篇文章，33342筆回覆
原先欲從安樂死合法案於公共政策網路參與平台上發布的日期2016-09-09開始搜尋資料，但一直到2016-11-03附議通過PTT上都還沒有討論度，一直到2017-01-03才開始漸漸有人討論，其後每個月的討論聲量都有一定熱度

動機

死亡是必然會發生的，但經過痛苦及磨難才發生的死亡，與安寧祥和的死亡終究是不一樣的，我們常說：人生是自己掌控的，那在一段生命的最後決定權，又為何是給別人決定的？因此開始有人提案對於生命的自主權以及安樂善終權，透過安寧緩和的醫療，拒絕那些強迫你接受的醫療處置，讓病人走的祥和且有尊嚴。
安樂死合法案主要有以下兩點：第一、80歲以上、有不治之疾者；第二、年輕人，被醫生證明是絕症者，前提都是自願、沒犯法、沒欠稅，得享有「安樂善終權」。
我們將探討PTT上對於安樂死的看法、支持度，以及與動物安樂死中間的討論用詞差異。

從PTT的Gossiping版取得資料

# 取得文章
Euthanasia<- read_csv("./final_project_articleMetaData.csv") %>%
              mutate(sentence=gsub("[\n]{2,}", "。", sentence)) # 將兩個以上換行符號轉成句號

## Parsed with column specification:
## cols(
##   artTitle = col_character(),
##   artDate = col_date(format = ""),
##   artTime = col_time(format = ""),
##   artUrl = col_character(),
##   artPoster = col_character(),
##   artCat = col_character(),
##   commentNum = col_double(),
##   push = col_double(),
##   boo = col_double(),
##   sentence = col_character()
## )

Euthanasia

## # A tibble: 850 x 10
##    artTitle artDate    artTime  artUrl artPoster artCat commentNum  push   boo
##    <chr>    <date>     <time>   <chr>  <chr>     <chr>       <dbl> <dbl> <dbl>
##  1 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
##  2 [爆卦]衛福部~ 2017-01-04 02:34:06 https~ blacksug~ Gossi~         25     8    11
##  3 [問卦]安樂死~ 2017-01-11 01:04:13 https~ jacktype~ Gossi~         14    10     0
##  4 [ＦＢ]賴清德~ 2017-01-11 03:35:44 https~ medama    Gossi~          8     2     0
##  5 [問卦]如果安~ 2017-01-11 06:07:04 https~ ss910126  Gossi~         19     8     3
##  6 [新聞]媽媽被~ 2017-01-13 03:13:48 https~ purplvam~ Gossi~         14     8     2
##  7 [問卦]支持安~ 2017-01-18 11:16:53 https~ J22796168 Gossi~         30    10     3
##  8 [問卦]安樂死~ 2017-01-18 20:44:04 https~ Eangel    Gossi~         19     4     6
##  9 [問卦]張振聲~ 2017-01-19 21:07:52 https~ kiwibee   Gossi~         13     5     1
## 10 [問卦]台灣為~ 2017-01-21 04:47:28 https~ l10O      Gossi~          8     3     0
## # ... with 840 more rows, and 1 more variable: sentence <chr>

# 取得回覆資料
reviews <- read_csv("./final_project_articleReviews.csv")

## Parsed with column specification:
## cols(
##   artTitle = col_character(),
##   artDate = col_date(format = ""),
##   artTime = col_time(format = ""),
##   artUrl = col_character(),
##   artPoster = col_character(),
##   artCat = col_character(),
##   cmtPoster = col_character(),
##   cmtStatus = col_character(),
##   cmtDate = col_datetime(format = ""),
##   cmtContent = col_character()
## )

reviews

## # A tibble: 33,342 x 10
##    artTitle artDate    artTime  artUrl artPoster artCat cmtPoster cmtStatus
##    <chr>    <date>     <time>   <chr>  <chr>     <chr>  <chr>     <chr>    
##  1 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~ ZO20      →        
##  2 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~ rookiecop →        
##  3 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~ sammoon   噓       
##  4 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~ heat0204  噓       
##  5 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~ cp296633  →        
##  6 [爆卦]衛福部~ 2017-01-04 02:34:06 https~ blacksug~ Gossi~ Ayreon    噓       
##  7 [爆卦]衛福部~ 2017-01-04 02:34:06 https~ blacksug~ Gossi~ leehanhan 推       
##  8 [爆卦]衛福部~ 2017-01-04 02:34:06 https~ blacksug~ Gossi~ gundam06~ 噓       
##  9 [爆卦]衛福部~ 2017-01-04 02:34:06 https~ blacksug~ Gossi~ poeoe     噓       
## 10 [爆卦]衛福部~ 2017-01-04 02:34:06 https~ blacksug~ Gossi~ chadhsieh →        
## # ... with 33,332 more rows, and 2 more variables: cmtDate <dttm>,
## #   cmtContent <chr>

簡單看一下資料集

發現2018/06/07為討論高峰，回顧事件發現當天為傅達仁安樂死執行日

Euthanasia %>% 
  group_by(artDate) %>%
  summarise(count = n())%>%
  ggplot(aes(artDate,count))+
  geom_line(color="blue", size=1)

對全部的文章進行斷句，並儲存結果

# 以全形或半形 驚歎號、問號、分號 以及 全形句號 爲依據進行斷句
Euthanasia_sentences <- strsplit(Euthanasia$sentence,"[。！；？!?;]")

# 將每句句子，與他所屬的文章連結配對起來，整理成一個dataframe
Euthanasia_sentences <- data.frame(
                        artUrl = rep(Euthanasia$artUrl, sapply(Euthanasia_sentences, length)), 
                        sentence = unlist(Euthanasia_sentences)
                      ) %>%
                      filter(!str_detect(sentence, regex("^(\t|\n| )*$")))

Euthanasia_sentences$sentence <- as.character(Euthanasia_sentences$sentence)

接着做斷詞

1.初始化斷詞器

# 使用默認參數初始化一個斷詞引擎
# 先不使用任何的字典和停用詞
jieba_tokenizer = worker()

chi_tokenizer <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer)
      # 去掉字串長度爲1的詞彙
      tokens <- tokens[nchar(tokens)>1]
      return(tokens)
    }
  })
}

2.斷詞與整理斷詞結果

進行斷詞，並計算各詞彙在各文章中出現的次數

Euthanasia_words <- Euthanasia_sentences %>%
  unnest_tokens(word, sentence, token=chi_tokenizer) %>%
  filter(!str_detect(word, regex("[0-9a-zA-Z]"))) %>%
  count(artUrl, word, sort = TRUE)
Euthanasia_words

## # A tibble: 59,940 x 3
##    artUrl                                                   word       n
##    <fct>                                                    <chr>  <int>
##  1 https://www.ptt.cc/bbs/Gossiping/M.1558454127.A.21F.html 朗貝爾    56
##  2 https://www.ptt.cc/bbs/Gossiping/M.1506234590.A.B5C.html 台大      41
##  3 https://www.ptt.cc/bbs/Gossiping/M.1558505376.A.23B.html 動物      40
##  4 https://www.ptt.cc/bbs/Gossiping/M.1503141535.A.302.html 台大      38
##  5 https://www.ptt.cc/bbs/Gossiping/M.1503141535.A.302.html 振聲      28
##  6 https://www.ptt.cc/bbs/Gossiping/M.1506234590.A.B5C.html 振聲      28
##  7 https://www.ptt.cc/bbs/Gossiping/M.1558454127.A.21F.html 法國      26
##  8 https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html 病人      25
##  9 https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html 醫療      25
## 10 https://www.ptt.cc/bbs/Gossiping/M.1528372976.A.5E2.html 傅達仁    25
## # ... with 59,930 more rows

計算每篇文章包含的詞數

total_words <- Euthanasia_words %>% 
  group_by(artUrl) %>% 
  summarize(total = sum(n))
total_words

## # A tibble: 850 x 2
##    artUrl                                                   total
##    <fct>                                                    <int>
##  1 https://www.ptt.cc/bbs/Gossiping/M.1483458289.A.84B.html    43
##  2 https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html   546
##  3 https://www.ptt.cc/bbs/Gossiping/M.1484125815.A.B14.html    66
##  4 https://www.ptt.cc/bbs/Gossiping/M.1484134907.A.F5F.html   194
##  5 https://www.ptt.cc/bbs/Gossiping/M.1484143988.A.774.html    30
##  6 https://www.ptt.cc/bbs/Gossiping/M.1484306396.A.E41.html   190
##  7 https://www.ptt.cc/bbs/Gossiping/M.1484767375.A.124.html    34
##  8 https://www.ptt.cc/bbs/Gossiping/M.1484801407.A.807.html    43
##  9 https://www.ptt.cc/bbs/Gossiping/M.1484889238.A.D58.html    17
## 10 https://www.ptt.cc/bbs/Gossiping/M.1485003210.A.30F.html    73
## # ... with 840 more rows

合併 Euthanasia_words（每個詞彙在每個文章中出現的次數）
與 total_words（每篇文章的詞數）
新增各個詞彙在所有詞彙中的總數欄位

Euthanasia_words <- left_join(Euthanasia_words, total_words)

## Joining, by = "artUrl"

Euthanasia_words

## # A tibble: 59,940 x 4
##    artUrl                                                   word       n total
##    <fct>                                                    <chr>  <int> <int>
##  1 https://www.ptt.cc/bbs/Gossiping/M.1558454127.A.21F.html 朗貝爾    56   870
##  2 https://www.ptt.cc/bbs/Gossiping/M.1506234590.A.B5C.html 台大      41   980
##  3 https://www.ptt.cc/bbs/Gossiping/M.1558505376.A.23B.html 動物      40   758
##  4 https://www.ptt.cc/bbs/Gossiping/M.1503141535.A.302.html 台大      38  2013
##  5 https://www.ptt.cc/bbs/Gossiping/M.1503141535.A.302.html 振聲      28  2013
##  6 https://www.ptt.cc/bbs/Gossiping/M.1506234590.A.B5C.html 振聲      28   980
##  7 https://www.ptt.cc/bbs/Gossiping/M.1558454127.A.21F.html 法國      26   870
##  8 https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html 病人      25   546
##  9 https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html 醫療      25   546
## 10 https://www.ptt.cc/bbs/Gossiping/M.1528372976.A.5E2.html 傅達仁    25   795
## # ... with 59,930 more rows

以LIWC情緒字典分析

載入LIWC情緒字典

# 正向字典txt檔
P <- read_file("liwc/positive.txt")

# 負向字典txt檔
N <- read_file("liwc/negative.txt")

#將字串依,分割
#strsplit回傳list , 我們取出list中的第一個元素
P = strsplit(P, ",")[[1]]
N = strsplit(N, ",")[[1]]

# 建立dataframe 有兩個欄位word,sentiments，word欄位內容是字典向量
P = data.frame(word = P, sentiment = "positive")
N = data.frame(word = N, sentiment = "negative")

LIWC = rbind(P, N)

統計每天的文章正面字的次數與負面字的次數

發現正面字術語負面字數的最高峰都在2018年的6月7日，與前述提及當日文章討論數最高的為同一天
正面字數次高為2019年的5月22日（共190字），依序為5月21日（共111字）
負面字數次高為2019年的5月21日（共167字），依序為5月22日（共126字）

發現2017/10衛福部回應安樂死重點是「尊嚴善終」，不必觸及合法化問題 2018/06/07為傅達仁安樂死執行日：https://disp.cc/b/163-aF0S 而2019/05/21為朗伯爾安樂死執行日：https://www.ptt.cc/bbs/Gossiping/M.1558454127.A.21F.html

#先把artDate放進來
A <- left_join(Euthanasia_words,Euthanasia) %>% select(-artTitle,-artTime,-artPoster,-artCat,-commentNum,-push,-boo,-sentence)

## Joining, by = "artUrl"

## Warning: Column `artUrl` joining factor and character vector, coercing into
## character vector

#統計每篇文章中正負面字詞分別有多少
sentiment_count = A %>%
  select(artDate,word,n) %>%
  inner_join(LIWC) %>% 
  group_by(artDate,sentiment) %>%
  summarise(count=sum(n))

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

#時間太長會不好看出結果，因此分年度觀看
#2017年
sentiment_count %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  xlim(as.Date(c("2017-01-03","2017-12-31"))) +
  ylim(c(0,100))

## Warning: Removed 446 row(s) containing missing values (geom_path).

#2018年
sentiment_count %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  xlim(as.Date(c("2018-01-01","2018-12-31")))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2018/06/07'))[1]])),colour = "red")

## Warning: Removed 453 row(s) containing missing values (geom_path).

#2019年
sentiment_count %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  xlim(as.Date(c("2019-01-01","2019-12-31")))+
  ylim(c(0,200))+
  geom_vline(aes(xintercept = as.numeric(artDate[which(sentiment_count$artDate == as.Date('2019/05/21'))[1]])),colour = "red")

## Warning: Removed 496 row(s) containing missing values (geom_path).

#2020年
sentiment_count %>%
  ggplot()+
  geom_line(aes(x=artDate,y=count,colour=sentiment))+
  xlim(as.Date(c("2020-01-01","2020-06-07")))+
  ylim(c(0,25))

## Warning: Removed 615 row(s) containing missing values (geom_path).

抓出傅達仁安樂死當日（2018-06-07）的文章正負面用詞

A %>%
  filter(artDate == as.Date('2018/06/07')) %>% 
  inner_join(LIWC) %>%
  group_by(word,sentiment) %>%
  summarise(
    count = n()
  ) %>% data.frame() %>% 
  top_n(30,wt = count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Contribution to sentiment",
       x = NULL) +
  theme(text=element_text(size=14))+
  coord_flip()

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

抓出朗貝爾安樂死當日（2019-05-21）的文章正負面用詞

A %>%
  filter(artDate == as.Date('2019/05/21')) %>% 
  inner_join(LIWC) %>%
  group_by(word,sentiment) %>%
  summarise(
    count = n()
  ) %>% data.frame() %>% 
  top_n(30,wt = count) %>%
  ungroup() %>% 
  mutate(word = reorder(word, count)) %>%
  ggplot(aes(word, count, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(y = "Contribution to sentiment",
       x = NULL) +
  theme(text=element_text(size=14))+
  coord_flip()

## Joining, by = "word"

## Warning: Column `word` joining character vector and factor, coercing into
## character vector

發現同樣都是安樂死經常出現的正面字詞都差不多，而主動安樂死（正面個案）出現的負面字詞較為類似；反觀被動安樂死（負面個案）出現的負面字詞較為多元。

計算 tf-idf

# 以每篇文章爲單位，計算每個詞彙在的tf-idf值
Euthanasia_words_tf_idf <- Euthanasia_words %>%
  bind_tf_idf(word, artUrl, n)

Euthanasia_words_tf_idf

## # A tibble: 59,940 x 7
##    artUrl                                  word      n total     tf   idf tf_idf
##    <fct>                                   <chr> <int> <int>  <dbl> <dbl>  <dbl>
##  1 https://www.ptt.cc/bbs/Gossiping/M.155~ 朗貝爾~    56   870 0.0644  6.75 0.434 
##  2 https://www.ptt.cc/bbs/Gossiping/M.150~ 台大     41   980 0.0418  5.36 0.224 
##  3 https://www.ptt.cc/bbs/Gossiping/M.155~ 動物     40   758 0.0528  2.50 0.132 
##  4 https://www.ptt.cc/bbs/Gossiping/M.150~ 台大     38  2013 0.0189  5.36 0.101 
##  5 https://www.ptt.cc/bbs/Gossiping/M.150~ 振聲     28  2013 0.0139  6.05 0.0842
##  6 https://www.ptt.cc/bbs/Gossiping/M.150~ 振聲     28   980 0.0286  6.05 0.173 
##  7 https://www.ptt.cc/bbs/Gossiping/M.155~ 法國     26   870 0.0299  4.55 0.136 
##  8 https://www.ptt.cc/bbs/Gossiping/M.148~ 病人     25   546 0.0458  2.47 0.113 
##  9 https://www.ptt.cc/bbs/Gossiping/M.148~ 醫療     25   546 0.0458  2.04 0.0936
## 10 https://www.ptt.cc/bbs/Gossiping/M.152~ 傅達仁~    25   795 0.0314  2.11 0.0664
## # ... with 59,930 more rows

# 選出每篇文章，tf-idf最大的十個詞
Euthanasia_words_tf_idf %>% 
  group_by(artUrl) %>%
  top_n(10) %>%
  arrange(desc(artUrl))

## Selecting by tf_idf

## # A tibble: 9,796 x 7
## # Groups:   artUrl [850]
##    artUrl                                 word       n total     tf   idf tf_idf
##    <fct>                                  <chr>  <int> <int>  <dbl> <dbl>  <dbl>
##  1 https://www.ptt.cc/bbs/Gossiping/M.15~ 殺人       3    68 0.0441  3.45 0.152 
##  2 https://www.ptt.cc/bbs/Gossiping/M.15~ 失調       2    68 0.0294  5.65 0.166 
##  3 https://www.ptt.cc/bbs/Gossiping/M.15~ 有罪       2    68 0.0294  5.65 0.166 
##  4 https://www.ptt.cc/bbs/Gossiping/M.15~ 思覺       2    68 0.0294  5.65 0.166 
##  5 https://www.ptt.cc/bbs/Gossiping/M.15~ 國際       2    68 0.0294  3.53 0.104 
##  6 https://www.ptt.cc/bbs/Gossiping/M.15~ 棒棒       2    68 0.0294  6.05 0.178 
##  7 https://www.ptt.cc/bbs/Gossiping/M.15~ 必勝       1    68 0.0147  6.75 0.0992
##  8 https://www.ptt.cc/bbs/Gossiping/M.15~ 法律制裁~     1    68 0.0147  6.75 0.0992
##  9 https://www.ptt.cc/bbs/Gossiping/M.15~ 運作       1    68 0.0147  6.75 0.0992
## 10 https://www.ptt.cc/bbs/Gossiping/M.15~ 警案       1    68 0.0147  6.75 0.0992
## # ... with 9,786 more rows

# 選每篇文章，tf-idf最大的十個詞，
# 並查看每個詞被選中的次數
Euthanasia_words_tf_idf %>% 
  group_by(artUrl) %>%
  top_n(10) %>%
  arrange(desc(artUrl)) %>%
  ungroup() %>%
  count(word, sort=TRUE)

## Selecting by tf_idf

## # A tibble: 7,169 x 2
##    word       n
##    <chr>  <int>
##  1 傅達仁    35
##  2 自殺      32
##  3 動物      24
##  4 病人      20
##  5 瑞士      20
##  6 連署      17
##  7 老人      15
##  8 流浪      15
##  9 公投      14
## 10 死刑      14
## # ... with 7,159 more rows

因爲我們是以每篇文章爲一個document單位（總共有850個document）
因此我們就不畫課本第三章中，比較各document中tf-idf較高的詞彙比較圖

jiebar and ngrams

bigram function

#初始化
jieba_tokenizer = worker()

# unnest_tokens 使用的bigram分詞函數
# Input: a character vector
# Output: a list of character vectors of the same length
jieba_bigram <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer)
      bigram<- ngrams(tokens, 2)
      bigram <- lapply(bigram, paste, collapse = " ")
      unlist(bigram)
    }
  })
}

# 執行bigram分詞
Euthanasia_bigram <- Euthanasia %>%
  unnest_tokens(bigram, sentence, token = jieba_bigram)
Euthanasia_bigram

## # A tibble: 129,031 x 10
##    artTitle artDate    artTime  artUrl artPoster artCat commentNum  push   boo
##    <chr>    <date>     <time>   <chr>  <chr>     <chr>       <dbl> <dbl> <dbl>
##  1 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
##  2 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
##  3 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
##  4 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
##  5 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
##  6 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
##  7 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
##  8 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
##  9 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
## 10 [問卦]不想活~ 2017-01-03 07:38:46 https~ ryan0222  Gossi~          5     0     2
## # ... with 129,021 more rows, and 1 more variable: bigram <chr>

# 清楚包含英文或數字的bigram組合
# 計算每個組合出現的次數
Euthanasia_bigram %>%
  filter(!str_detect(bigram, regex("[0-9a-zA-Z]"))) %>%
  count(bigram, sort = TRUE)

## # A tibble: 82,543 x 2
##    bigram            n
##    <chr>         <int>
##  1 安樂死 的       318
##  2 的 人           267
##  3 的 安樂死       101
##  4 完整 新聞       100
##  5 為 什麼          99
##  6 零 安樂死        99
##  7 也 是            97
##  8 都 是            94
##  9 植物 人          94
## 10 安樂死 合法化    93
## # ... with 82,533 more rows

trigram function

#初始化
jieba_tokenizer = worker()

jieba_trigram <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer)
      ngram<- ngrams(unlist(tokens), 3)
      ngram <- lapply(ngram, paste, collapse = " ")
      unlist(ngram)
    }
  })
}

# 執行trigram分詞
Euthanasia_trigram <- Euthanasia %>%
  unnest_tokens(ngrams, sentence, token = jieba_trigram)
Euthanasia_trigram %>%
  filter(!str_detect(ngrams, regex("[0-9a-zA-Z]"))) %>%
  count(ngrams, sort = TRUE)

## # A tibble: 106,191 x 2
##    ngrams               n
##    <chr>            <int>
##  1 傅 達 仁            53
##  2 完整 新聞 連結      51
##  3 完整 新聞 內文      49
##  4 或 短 網址          49
##  5 連結 或 短          49
##  6 新聞 連結 或        49
##  7 體育 主播 傅達仁    41
##  8 安寧 緩和 醫療      33
##  9 零 安樂死 政策      32
## 10 病人 自主 權利      31
## # ... with 106,181 more rows

上方的結果可以發現有很多包含停止詞的trigram組合，所以我們接著將stopwords清除再看看又什麼新組合

Remove stop words

載入stop words字典

#load stop words
stop_words <- scan(file = "stop_words.txt", what=character(),sep='\n',encoding='utf-8')

# remove the stop words in bigram
Euthanasia_bigram %>%
  filter(!str_detect(bigram, regex("[0-9a-zA-Z]"))) %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>% 
  filter(!(word1 %in% stop_words), !(word2 %in% stop_words)) %>%
  count(word1, word2, sort = TRUE) %>%
  unite_("bigram", c("word1","word2"), sep=" ")

## # A tibble: 82,543 x 2
##    bigram            n
##    <chr>         <int>
##  1 安樂死 的       318
##  2 的 人           267
##  3 的 安樂死       101
##  4 完整 新聞       100
##  5 為 什麼          99
##  6 零 安樂死        99
##  7 也 是            97
##  8 都 是            94
##  9 植物 人          94
## 10 安樂死 合法化    93
## # ... with 82,533 more rows

# remove the stop words in trigram
Euthanasia_trigram %>%
  filter(!str_detect(ngrams, regex("[0-9a-zA-Z]"))) %>%
  separate(ngrams, c("word1", "word2", "word3"), sep = " ") %>% 
  filter(!(word1 %in% stop_words), !(word2 %in% stop_words), !(word3 %in% stop_words)) %>%
  count(word1, word2, word3, sort = TRUE) %>%
  unite_("ngrams", c("word1", "word2", "word3"), sep=" ")

## # A tibble: 106,191 x 2
##    ngrams               n
##    <chr>            <int>
##  1 傅 達 仁            53
##  2 完整 新聞 連結      51
##  3 完整 新聞 內文      49
##  4 或 短 網址          49
##  5 連結 或 短          49
##  6 新聞 連結 或        49
##  7 體育 主播 傅達仁    41
##  8 安寧 緩和 醫療      33
##  9 零 安樂死 政策      32
## 10 病人 自主 權利      31
## # ... with 106,181 more rows

從上面的bigram和trigram的結果中，我們可以整理出一個更好的斷詞字典。
我們將詞彙整理好存在dict文件夾中的 euthanasia_lexicon.txt 中

載入安樂死字典

# load euthanasia_lexicon
euthanasia_lexicon <- scan(file = "euthanasia_lexicon.txt", what=character(),sep='\n', encoding='utf-8')

bigram

jieba_tokenizer = worker()

# 使用疫情相關字典重新斷詞
# 把否定詞也加入斷詞
new_user_word(jieba_tokenizer, c(euthanasia_lexicon))

## [1] TRUE

chi_tokenizer <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer)
      tokens <- tokens[!tokens %in% stop_words]
      # 去掉字串長度爲1的詞彙
      tokens <- tokens[nchar(tokens)>1]
      return(tokens)
    }
  })
}

Word Correlation

# 剛才的斷詞結果沒有使用新增的辭典，
# 因此我們重新進行斷詞，再計算各詞彙在各文章中出現的次數
Euthanasia_words <- Euthanasia_sentences %>%
  unnest_tokens(word, sentence, token=chi_tokenizer) %>%
  filter(!str_detect(word, regex("[0-9a-zA-Z]"))) %>%
  count(artUrl, word, sort = TRUE)
Euthanasia_words

## # A tibble: 59,940 x 3
##    artUrl                                                   word       n
##    <fct>                                                    <chr>  <int>
##  1 https://www.ptt.cc/bbs/Gossiping/M.1558454127.A.21F.html 朗貝爾    56
##  2 https://www.ptt.cc/bbs/Gossiping/M.1506234590.A.B5C.html 台大      41
##  3 https://www.ptt.cc/bbs/Gossiping/M.1558505376.A.23B.html 動物      40
##  4 https://www.ptt.cc/bbs/Gossiping/M.1503141535.A.302.html 台大      38
##  5 https://www.ptt.cc/bbs/Gossiping/M.1503141535.A.302.html 振聲      28
##  6 https://www.ptt.cc/bbs/Gossiping/M.1506234590.A.B5C.html 振聲      28
##  7 https://www.ptt.cc/bbs/Gossiping/M.1558454127.A.21F.html 法國      26
##  8 https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html 病人      25
##  9 https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html 醫療      25
## 10 https://www.ptt.cc/bbs/Gossiping/M.1528372976.A.5E2.html 傅達仁    25
## # ... with 59,930 more rows

# 計算兩個詞彙同時出現的總次數
word_pairs <- Euthanasia_words %>%
  pairwise_count(word, artUrl, sort = TRUE)

word_pairs

## # A tibble: 7,675,024 x 3
##    item1  item2      n
##    <chr>  <chr>  <dbl>
##  1 可以   安樂死   309
##  2 安樂死 可以     309
##  3 台灣   安樂死   278
##  4 安樂死 台灣     278
##  5 自己   安樂死   258
##  6 如果   安樂死   258
##  7 安樂死 自己     258
##  8 安樂死 如果     258
##  9 沒有   安樂死   222
## 10 安樂死 沒有     222
## # ... with 7,675,014 more rows

# 計算兩個詞彙間的相關性
word_cors <- Euthanasia_words %>%
  group_by(word) %>%
  filter(n() >= 20) %>%
  pairwise_cor(word, artUrl, sort = TRUE) %>%
  filter(correlation <= 1 & correlation >= -1)

word_cors

## # A tibble: 242,556 x 3
##    item1    item2    correlation
##    <chr>    <chr>          <dbl>
##  1 新聞標題 內文           0.979
##  2 內文     新聞標題       0.979
##  3 主播     體育           0.926
##  4 體育     主播           0.926
##  5 新聞標題 完整           0.906
##  6 完整     新聞標題       0.906
##  7 新聞標題 網址           0.891
##  8 網址     新聞標題       0.891
##  9 新聞標題 來源           0.890
## 10 來源     新聞標題       0.890
## # ... with 242,546 more rows

# 分別尋找與 "傅達仁", "流浪狗"相關性最高的 10 個詞彙
word_cors %>%
  filter(item1 %in% c("傅達仁", "流浪狗")) %>%
  group_by(item1) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(item2 = reorder(item2, correlation)) %>%
  ggplot(aes(item2, correlation)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ item1, scales = "free") +
  coord_flip()

## Selecting by correlation

共現性

Euthanasia_words_cors <-Euthanasia_words %>%
  group_by(word) %>%
  filter(n() >= 10) %>%
  pairwise_cor(word, artUrl, sort = TRUE)

set.seed(2016)
Euthanasia_words_cors %>%
  filter(abs(correlation) > .5 & abs(correlation)<=1 ) %>%
  graph_from_data_frame() %>%
  ggraph(layout = "fr") +
  geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
  geom_node_point(color = "lightblue", size = 5) +
  geom_node_text(aes(label = name), repel = TRUE) +
  theme_void()

從圖中可以看到兩個大主題，分別是傅達仁的相關議題，以及流浪動物的議題

DTM

將資料轉換為Document Term Matrix (DTM)

初始化斷詞引擎，並加入停用字

jieba_tokenizer = worker(stop_word = "stop_words.txt")
jieba_tokenizer <- worker(user="euthanasia_lexicon.txt", stop_word = "stop_words.txt")

#去掉字串長度爲1的詞彙
Euthanasia_tokenizer <- function(t) {
  lapply(t, function(x) {
    if(nchar(x)>1){
      tokens <- segment(x, jieba_tokenizer)
      tokens <- tokens[nchar(tokens)>1] 
      return(tokens)
    }
  })
}

#過濾特殊字元
tokens <- Euthanasia_sentences %>%
  mutate(id=c(1:nrow(Euthanasia_sentences))) %>%
  unnest_tokens(word, sentence, token=chi_tokenizer) %>%
  filter(!str_detect(word, regex("[0-9a-zA-Z]")))

tokens_dtm <- tokens %>%
  count(artUrl, word) %>%
  rename(count=n)

Euthanasia_dtm <- tokens_dtm %>% 
  cast_dtm(artUrl, word, count) 

Euthanasia_dtm

## <<DocumentTermMatrix (documents: 850, terms: 14766)>>
## Non-/sparse entries: 48959/12502141
## Sparsity           : 100%
## Maximal term length: 7
## Weighting          : term frequency (tf)

inspect(Euthanasia_dtm[1:10,1:10])  #查看前十筆資料

## <<DocumentTermMatrix (documents: 10, terms: 10)>>
## Non-/sparse entries: 26/74
## Sparsity           : 74%
## Maximal term length: 3
## Weighting          : term frequency (tf)
## Sample             :
##                                                           Terms
## Docs                                                       人生 不如意 日子
##   https://www.ptt.cc/bbs/Gossiping/M.1483458289.A.84B.html    1      1    1
##   https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html    0      0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484125815.A.B14.html    1      0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484134907.A.F5F.html    1      0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484143988.A.774.html    0      0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484306396.A.E41.html    2      0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484767375.A.124.html    0      0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484801407.A.807.html    0      0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484889238.A.D58.html    0      0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1485003210.A.30F.html    0      0    0
##                                                           Terms
## Docs                                                       仔細 台灣 生活
##   https://www.ptt.cc/bbs/Gossiping/M.1483458289.A.84B.html    1    2    2
##   https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html    0    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484125815.A.B14.html    0    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484134907.A.F5F.html    0    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484143988.A.774.html    0    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484306396.A.E41.html    0    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484767375.A.124.html    0    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484801407.A.807.html    0    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484889238.A.D58.html    0    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1485003210.A.30F.html    0    1    0
##                                                           Terms
## Docs                                                       安樂死 困擾 希望
##   https://www.ptt.cc/bbs/Gossiping/M.1483458289.A.84B.html      3    1    1
##   https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html     10    0    1
##   https://www.ptt.cc/bbs/Gossiping/M.1484125815.A.B14.html      1    1    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484134907.A.F5F.html      4    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484143988.A.774.html      1    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484306396.A.E41.html      5    0    2
##   https://www.ptt.cc/bbs/Gossiping/M.1484767375.A.124.html      1    1    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484801407.A.807.html      2    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484889238.A.D58.html      0    0    0
##   https://www.ptt.cc/bbs/Gossiping/M.1485003210.A.30F.html      3    0    0
##                                                           Terms
## Docs                                                       明顯
##   https://www.ptt.cc/bbs/Gossiping/M.1483458289.A.84B.html    1
##   https://www.ptt.cc/bbs/Gossiping/M.1483526412.A.E3F.html    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484125815.A.B14.html    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484134907.A.F5F.html    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484143988.A.774.html    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484306396.A.E41.html    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484767375.A.124.html    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484801407.A.807.html    0
##   https://www.ptt.cc/bbs/Gossiping/M.1484889238.A.D58.html    0
##   https://www.ptt.cc/bbs/Gossiping/M.1485003210.A.30F.html    0

查看DTM矩陣，可以發現是個稀疏矩陣。

建立LDA模型

嘗試2,5,10,15,25主題數，將結果存起來，再做進一步分析

# ldas = c()
# topics = c(2,5,10,15,25)
# for(topic in topics){
#   start_time <- Sys.time()
#   lda <- LDA(Euthanasia_dtm, k = topic, control = list(seed = 1234))
#   ldas =c(ldas,lda)
#   print(paste(topic ,paste("topic(s) and use time is ", Sys.time() -start_time)))
#   save(ldas,file = "ldas_result")
# }

因為需要執行較久，所以已將主題結果存在lda_result

載入每個主題的LDA結果

load("ldas_result")

透過perplexity找到最佳主題數

topics = c(2,5,10,15,25)
data_frame(k = topics,
           perplex = map_dbl(ldas, topicmodels::perplexity)) %>%
  ggplot(aes(k, perplex)) +
  geom_point() +
  geom_line() +
  labs(title = "Evaluating LDA topic models",
       subtitle = "Optimal number of topics (smaller is better)",
       x = "Number of topics",
       y = "Perplexity")

## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

perplexity 越小越好，但是太小的話，主題數會分太細。通常會找一個主題數適當，且perplexity比較低的主題。因此，在後續分析時，本組將分為 “10個”主題。

\(\phi\) Matrix

查看各個主題的單詞組成比率

Euthanasia_lda = ldas[[3]] ## 選定topic 為10 的結果

topics <- tidy(Euthanasia_lda, matrix = "beta") # 注意，在tidy function裡面要使用"beta"來取出Phi矩陣。
topics

## # A tibble: 147,660 x 3
##    topic term       beta
##    <int> <chr>     <dbl>
##  1     1 人生  0.00189  
##  2     2 人生  0.00133  
##  3     3 人生  0.00201  
##  4     4 人生  0.00110  
##  5     5 人生  0.00150  
##  6     6 人生  0.00109  
##  7     7 人生  0.00138  
##  8     8 人生  0.00414  
##  9     9 人生  0.0000533
## 10    10 人生  0.00230  
## # ... with 147,650 more rows

每一行代表一個主題中的一個詞彙

尋找Topic的代表字

整理出每一個Topic中生成概率最高的10個詞彙。

#取出每一個Topic中生成概率最高(beta值最高)的10個詞彙
top_terms <- topics %>%
  group_by(topic) %>%
  top_n(10, beta) %>% 
  ungroup() %>%
  arrange(topic, -beta)

#繪製長條圖
top_terms %>%
  mutate(topic = as.factor(topic),
      term = reorder_within(term, beta, topic)) %>%
    ggplot(aes(term, beta, fill = topic))  +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free") +
  coord_flip()+
  scale_x_reordered()

可以看到topic都被一開始所使用的搜尋關鍵字影響看不出每一群的差異。

移除常出現、跨主題共享的詞彙，並未主題命名。

remove_word = c("安樂死","生命","自殺","台灣","老人","病人")
top_terms <- topics %>%
  filter(!term  %in% remove_word)%>%
  group_by(topic) %>%
  top_n(10, beta) %>%
  ungroup() %>%
  arrange(topic, -beta)

# New facet label names for supp variable
tn=topic_name<- c('病人自主權利','安樂死法案連署','安樂死民意調查','瑞士合法安樂死','法國安樂死爭議','台灣首位安樂死','主動 V.S. 被動安樂死','植物人安樂死','動物安樂死','台大張振聲')
names(topic_name) <- c(1:10)

top_terms %>%
  mutate(topic = as.factor(topic),
      term = reorder_within(term, beta, topic)) %>%
    ggplot(aes(term, beta, fill = topic))  +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ topic, scales = "free",labeller = labeller(topic = topic_name)) +
  coord_flip()+
  scale_x_reordered()

Document 主題分佈

# for every document we have a probability distribution of its contained topics
tmResult <- posterior(Euthanasia_lda)
doc_pro <- tmResult$topics 
dim(doc_pro)               # nDocs(DTM) distributions over K topics

## [1] 850  10

每篇文章都有topic的分佈，所以總共是：850筆的文章*10個主題

cbind Document 主題分佈

查看每一篇文章的各個主題組成比率

# get document topic proportions 
document_topics <- doc_pro[Euthanasia$artUrl,]
document_topics_df =data.frame(document_topics)
colnames(document_topics_df) = tn
rownames(document_topics_df) = NULL
Euthanasia_topic = cbind(Euthanasia,document_topics_df)

查看特定主題的文章

透過找到特定文章的分佈進行排序之後，可以看到此主題的比重高的文章在討論什麼。

Euthanasia_topic %>%
  arrange(desc(`動物安樂死`)) %>%
  head(10)

可以看到“動物安樂死”這個主題主要在探討流量動物的安置問題與安樂死議題。

了解主題在時間的變化

Euthanasia_topic[,c(11:20)] =sapply(Euthanasia_topic[,c(11:20)] , as.numeric)

Euthanasia_topic %>% 
  select(artDate,病人自主權利:台大張振聲)%>%
  group_by(artDate = format(artDate,"%Y%m")) %>%
  summarise_if(is.double, sum, na.rm = TRUE) %>%
  melt(id.vars = "artDate")%>%
 ggplot( aes(x=artDate, y=value, fill=variable)) + 
  geom_bar(stat = "identity") + ylab("value") + 
  scale_fill_manual(values=mycolors)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

由於安樂死每個月討論量變化較大，因此接下來將挑出討論量前15的月份進行探討。

以比例了解主題時間變化

Euthanasia_topic %>% 
  select(artDate,病人自主權利:台大張振聲)%>%
  group_by(artDate = format(artDate,"%Y%m")) %>%
  summarise_if(is.double, sum, na.rm = TRUE) %>%
  melt(id.vars = "artDate")%>%
  group_by(artDate)%>%
  mutate(total_value =sum(value))%>%
  filter(total_value>15)%>%
 ggplot( aes(x=artDate, y=value/total_value, fill=variable)) + 
  geom_bar(stat = "identity") + ylab("proportion") + 
      scale_fill_manual(values=mycolors)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

“安樂死法案聯署”與“動物安樂死”議題會在特定時間有較多的討論量，其他議題的討論量變化則較不明顯。

繪製網路圖

把回覆與文章合併

#posts_Reviews <- merge(x = Euthanasia, y = reviews, by = "artUrl")
#posts_Reviews

製作使用者名單，註記發文者與回覆者

allPoster <- c(Euthanasia$artPoster, reviews$cmtPoster)
length(unique(allPoster))

## [1] 11973

userList <- data.frame(user=unique(allPoster)) %>%
              mutate(type=ifelse(user%in%Euthanasia$artPoster, "poster", "replyer"))
#userList

取得theta矩陣

# 使用LDA分類每篇文章的主題
test_topics <- tidy(Euthanasia_lda, matrix="gamma") %>% # 在tidy function中使用參數"gamma"來取得 theta矩陣。
                  group_by(document) %>%
                  top_n(1, wt=gamma)
#test_topics

把文章資訊和主題join起來

# 把文章資訊和主題join起來
#posts_Reviews <- merge(x = posts_Reviews, y = test_topics, by.x = "artUrl", by.y="document")
#posts_Reviews

#看各個主題的討論篇數，選擇最多篇的主題來化成網路圖
test_topics %>% group_by(topic) %>% summarise(count = n())

## # A tibble: 10 x 2
##    topic count
##    <int> <int>
##  1     1    89
##  2     2    94
##  3     3    89
##  4     4   134
##  5     5   145
##  6     6    47
##  7     7    54
##  8     8   101
##  9     9    77
## 10    10    20

#選擇瑞士合法安樂死、法國安樂死爭議與植物人安樂死
link <- posts_Reviews %>%
      group_by(cmtPoster, artUrl) %>% 
      filter(n()>3) %>% 
      ungroup() %>% 
      filter(topic == 4 | topic == 5 | topic == 8) %>% 
      select(cmtPoster, artPoster.x, artUrl, topic) %>% 
      unique()
link

## # A tibble: 256 x 4
##    cmtPoster    artPoster.x artUrl                                         topic
##    <chr>        <chr>       <chr>                                          <int>
##  1 GalLe5566    a1e         https://www.ptt.cc/bbs/Gossiping/M.1486878089~     8
##  2 sooppp       a1e         https://www.ptt.cc/bbs/Gossiping/M.1486878089~     8
##  3 mrforget     a1e         https://www.ptt.cc/bbs/Gossiping/M.1486878089~     8
##  4 micr430927   a1e         https://www.ptt.cc/bbs/Gossiping/M.1486878089~     8
##  5 medama       a1e         https://www.ptt.cc/bbs/Gossiping/M.1486878089~     8
##  6 berserk      a1e         https://www.ptt.cc/bbs/Gossiping/M.1486878089~     8
##  7 rrr518       a1e         https://www.ptt.cc/bbs/Gossiping/M.1486878089~     8
##  8 aq10203040   a1e         https://www.ptt.cc/bbs/Gossiping/M.1486878089~     8
##  9 piyobearman  a1e         https://www.ptt.cc/bbs/Gossiping/M.1486878089~     8
## 10 Anemoneheart a1e         https://www.ptt.cc/bbs/Gossiping/M.1486878089~     8
## # ... with 246 more rows

過濾使用者

filtered_user <- userList %>%
          filter(user%in%link$cmtPoster | user%in%link$artPoster.x) %>%
          arrange(desc(type))
#filtered_user

使用者經常參與的文章種類

# 建立網路關係
reviewNetwork <- graph_from_data_frame(d=link, v=filtered_user, directed=F)

# 依據使用者身份對點進行上色
labels <- degree(reviewNetwork)
V(reviewNetwork)$label <- names(labels)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "gold", "lightblue")

# 依據回覆發生的文章所對應的主題，對他們的關聯線進行上色
E(reviewNetwork)$color <- ifelse(E(reviewNetwork)$topic == "4", "lightgreen", ifelse(E(reviewNetwork)$topic == "5", "palevioletred", "#FCCB5E"))

# 畫出社群網路圖
set.seed(5432)
plot(reviewNetwork, vertex.size=3, edge.width=3, vertex.label.dist=1,
     vertex.label=ifelse(degree(reviewNetwork) > 5, V(reviewNetwork)$label, NA),vertex.label.font=2,vertex.label.color = "black")
#瑞士合法安樂死 Linbeage097、kuromai
#法國安樂死爭議 kiwibee、Zcould、apriltino
#植物人安樂死 ale、yha、syearth、guestwhat


# 加入標示
legend("bottomright", c("發文者","回文者"), pch=21, 
  col="#777777", pt.bg=c("gold","lightblue"), pt.cex=1, cex=1)
legend("topleft", c("瑞士合法安樂死","法國安樂死爭議", "植物人安樂死"), 
       col=c("lightgreen","palevioletred", "#FCCB5E"), lty=1, cex=1)

使用者是否受到歡迎

# PTT的回覆有三種，推文、噓文、箭頭
# 我們只要看推噓就好，因此把箭頭清掉
link <- posts_Reviews %>%
      filter(cmtStatus!="→") %>%
      group_by(cmtPoster, artUrl) %>% 
      filter(n()>3) %>% 
      ungroup() %>% 
      select(cmtPoster, artPoster.x, artUrl, cmtStatus) %>% 
      unique()

# 接下來把網路圖畫出來，跟前面做的事都一樣，因此不再細述

# 篩選link中有出現的使用者
filtered_user <- userList %>%
          filter(user%in%link$cmtPoster | user%in%link$artPoster.x) %>%
          arrange(desc(type))

# 建立網路關係
reviewNetwork <- graph_from_data_frame(d=link, v=filtered_user, directed=F)

# 依據使用者身份對點進行上色
labels <- degree(reviewNetwork)
V(reviewNetwork)$label <- names(labels)
V(reviewNetwork)$color <- ifelse(V(reviewNetwork)$type=="poster", "gold", "lightblue")


# 依據回覆發生的文章所對應的主題，對他們的關聯線進行上色
E(reviewNetwork)$color <- ifelse(E(reviewNetwork)$cmtStatus == "推", "lightgreen", "palevioletred")

# 畫出社群網路圖
set.seed(5432)
plot(reviewNetwork, vertex.size=2, edge.width=3, vertex.label.dist=1,
     vertex.label=ifelse(degree(reviewNetwork) > 3, V(reviewNetwork)$label, NA),vertex.label.font=2)

# 加入標示
legend("bottomright", c("發文者","回文者"), pch=21,
  col="#777777", pt.bg=c("gold","lightblue"), pt.cex=1, cex=1)
legend("topleft", c("推","噓"), 
       col=c("lightgreen","palevioletred"), lty=1, cex=1)

#save.image(file = "final.rdata")

結論

安樂死是一個嚴肅的議題，不管是重症病人安樂死，又或是流浪動物安樂死，都需要社會大眾認真看待並理性討論。我們蒐集到的文件與分析結果可以顯示台灣網民的往往就只在名人、大事件發生時才會有討論，熱潮一過就少有人在參與了。就像上面的提到過的傅達仁安樂死與法國安樂死爭議，在事件初期文章與回覆數量大幅增加，然而一兩個月後就沒有下文了。
安樂死這個議題值得更多一些的關注與討問，我們組內的想法是關於重症病人的安樂死，如果病人有先簽屬或囑咐願意放棄治療的情況下，支持其接受安樂死的選擇。至於病人沒有事先聲明的情況下，本組內的意見沒有一致，需要更進一步的討論。
流浪動物安樂死方面，政府當初因應潮流設立的禁止撲殺法案，但是並未詳細考量後續方案，導致收容所負擔大幅增加，因為大量的流浪動物待在收容所沒被領養也沒辦法安樂死。如此一來收容所無法收容新的流浪動物，他們會在街上繼續繁殖增加一直惡性循環下去。
我們的看法是動物安樂死是不得已的選擇，若要解決動物安樂死問題，不是直接設立禁止法案，而是從教育民眾不棄養、用領養，以及補助流浪動物的結紮費用下手，才能從根本上解決這個問題。