系統參數設定
Sys.setlocale(category = "LC_ALL", locale = "zh_TW.UTF-8") # 避免中文亂碼## [1] ""
安裝需要的packages
# echo = T,results = 'hide'
packages = c("dplyr", "tidytext", "stringr", "wordcloud2", "ggplot2",'readr','data.table','reshape2','wordcloud','tidyr','scales','openxlsx','colorspace')
existing = as.character(installed.packages()[,1])
for(pkg in packages[!(packages %in% existing)]) install.packages(pkg)讀進library
library(dplyr)
library(stringr)
library(tidytext)
library(wordcloud2)
library(data.table)
library(ggplot2)
library(reshape2)
library(wordcloud)
library(tidyr)
library(readr)
library(scales)
require(jiebaR)
library(colorspace)今天聖經仍然是世界上發行量第一的書,我們好奇在聖經上神的情感,祂對事物的情緒感受為何?神會憂傷、嫉妒和生氣? 也有憐憫、喜悅和快樂。透過聖經進行情緒分析,我們想窺知一二。本次主要針對以下方向分析:
1.聖經新約/舊約在正面和負面的情緒內容分佈? 2.聖經各卷之正負情緒比例及正負曲線為何? 3.要閱讀聖經時哪些是屬於正面篇章,哪些是負面的? 4.不同分類的聖經,情緒分佈有何不同? 5.比較LIWC, NRC情緒字典所呈現出來的情緒分佈有何不同?
# 資料前處理
metaData = fread('../data/bibleMetaData.txt',encoding = 'UTF-8',fill=TRUE) #載入的資料未完全格式化,fill=TRUE
#針對舊約,新約聖經每一卷進行編碼
#格式是=第1碼是0或1,0:舊約, 1:新約, 第2-3碼為流水號
bible_data <- metaData %>%
mutate(bookcode = cumsum(str_detect(metaData$book,regex("^=[0-1][0-9]{2}")))) %>% select (-book,-chapter) %>%
filter(!(metaData$ch_book %in% c("-"))) #去除特殊的格式
str(bible_data)## Classes 'data.table' and 'data.frame': 31172 obs. of 4 variables:
## $ ch_book : chr "創世紀" "創世紀" "創世紀" "創世紀" ...
## $ ch_chapter: chr "1:1" "1:2" "1:3" "1:4" ...
## $ text : chr "起初 神創造天地。" "地是空虛混沌.淵面黑暗. 神的靈運行在水面上。" " 神說、要有光、就有了光。" " 神看光是好的、就把光暗分開了。" ...
## $ bookcode : int 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, ".internal.selfref")=<externalptr>
bible_book_chname <- metaData %>%
mutate(bookcode = cumsum(str_detect(metaData$book,regex("^=[0-1][0-9]{2}")))) %>%
filter((metaData$ch_book %in% c("-"))) %>%
select(bookcode,ch_chapter) %>%
mutate(ch_bookname=ch_chapter) %>%
select (-ch_chapter)
#舊約
bible_data_old <- bible_data %>%
filter(.$bookcode<40)
#新約
bible_data_new <- bible_data %>%
filter(.$bookcode>=40)
#將書卷作分類
bible_level = c("律法書","紀事書","詩歌","先知書","福音書","使徒行傳","保羅書信","別的書信","啟示錄")
# 設定分類
bible_data = bible_data %>%
mutate(class = lapply(bible_data$bookcode, function(x){
if (x<6) return ("律法書")
else if (x<18) return("紀事書")
else if (x<23) return ("詩歌")
else if (x<40) return ("先知書")
else if (x<44) return ("福音書")
else if (x<45) return ("使徒行傳")
else if (x<58) return ("保羅書信")
else if (x<66) return ("別的書信")
else return("啟示錄")
}) %>% unlist(use.names=FALSE))
# 設定bible_class的levels
bible_data$class = factor(bible_data$class,levels=bible_level )(1). 文章斷詞
設定斷詞引擎
# 加入自定義的字典
jieba_tokenizer <- worker(user="../dict/bible_lexicon.tradictional.txt", stop_word = "../dict/bible_stop_words.txt")
# 設定斷詞function
customized_tokenizer <- function(t) {
lapply(t, function(x) {
tokens <- segment(x, jieba_tokenizer)
return(tokens)
})
}
bible_tokens <- bible_data %>% unnest_tokens(word, text, token=customized_tokenizer) (2). 資料基本清理
# 過濾特殊字元
bible_tokens = bible_tokens %>%
filter(!grepl('[[:punct:]]',word)) %>% # 去標點符號
filter(!grepl("['^0-9a-z']",word)) # 去英文、數字
# 進行combine token
bible_tokens_all <- rbind(
bible_tokens %>%
filter(nchar(.$word)>1) %>% #2個字以上的token
group_by(word) %>%
summarise(sum = n()) %>%
filter(sum>3),
bible_tokens %>%
filter(.$word %in% c("主","神","信","望","愛") ) %>% #一個字的頻率也高 另外加入
group_by(word) %>%
summarise(sum = n())) %>%
arrange(desc(sum))
# bible_tokens_all %>% wordcloud2() #
# 舊約token
bible_tokens_old <- rbind(
bible_tokens %>%
filter(.$bookcode<40) %>%
filter(nchar(.$word)>1) %>%
group_by(word) %>%
summarise(sum = n()) %>%
filter(sum>3),
bible_tokens %>%
filter(.$bookcode<40) %>%
filter(.$word %in% c("主","神","信","望","愛") ) %>%
group_by(word) %>%
summarise(sum = n())) %>%
arrange(desc(sum))
#bible_tokens_old %>% wordcloud2()
## 新約token
bible_tokens_new <- rbind(
bible_tokens %>%
filter(.$bookcode>=40) %>%
filter(nchar(.$word)>1) %>%
group_by(word) %>%
summarise(sum = n()) %>%
filter(sum>3),
bible_tokens %>%
filter(.$bookcode>=40) %>%
filter(.$word %in% c("主","神","信","望","愛") ) %>%
group_by(word) %>%
summarise(sum = n())) %>%
arrange(desc(sum))
#bible_tokens_new %>% wordcloud2() 全名Linguistic Inquiry and Word Counts,由心理學家Pennebaker於2001出版 分為正向情緒與負向情緒
讀檔,字詞間以“,”將字分隔
P <- read_file("../dict/liwc/positive.txt") # 正向字典txt檔
N <- read_file("../dict/liwc/negative.txt") # 負向字典txt檔
#字典txt檔讀進來是一整個字串
typeof(P)## [1] "character"
分割字詞,並將兩個情緒字典併在一起
# 將字串依,分割
# strsplit回傳list , 我們取出list中的第一個元素
P = strsplit(P, ",")[[1]]
N = strsplit(N, ",")[[1]]
# 建立dataframe 有兩個欄位word,sentiments,word欄位內容是字典向量
P = data.frame(word = P, sentiment = "positive") #664
N = data.frame(word = N, sentiment = "negative") #1047
# 把兩個字典拼在一起
LIWC = rbind(P, N)
# 檢視字典
head(LIWC)## word sentiment
## 1 一流 positive
## 2 下定決心 positive
## 3 不拘小節 positive
## 4 不費力 positive
## 5 不錯 positive
## 6 主動 positive
NRC (National Research Council) 情緒和情感詞典是由加拿大國家研究委員會(簡稱NRC)的專家建立 分為正向情緒(Positive)與負向情緒(Negative)
library(openxlsx)
library(tidyr)
NRC <- read.xlsx("../dict/NRC.xlsx")
# 檢視字典
head(NRC)## word sentiment
## 1 鬱悶 negative
## 2 鑽石 positive
## 3 讚許 positive
## 4 讚美 positive
## 5 讚美 positive
## 6 顱內 positive
聖經合和本為新舊約組成,舊約指的是猶太人的希伯來聖經,由耶和華創世開始;新約則記載耶穌基督和其宗徒的言行,以及早期基督教的事件紀錄、使徒保羅寫給教會或其他人的書信、或其他使徒寫給教會的書信。新約新約與舊約相隔400年,有分開探索的必要。
sentiment_word_count = bible_tokens_all %>%
inner_join(LIWC) %>%
arrange(desc(sum)) %>%
data.frame() #存成data frame ## Joining, by = "word"
sentiment_word_old = bible_tokens_old %>%
inner_join(LIWC) %>%
arrange(desc(sum)) %>%
data.frame() #存成data frame ## Joining, by = "word"
sentiment_word_new = bible_tokens_new %>%
inner_join(LIWC) %>%
arrange(desc(sum)) %>%
data.frame() #存成data frame ## Joining, by = "word"
sentiment_word_count %>%
top_n(30,wt = sum) %>%
mutate(word = reorder(word, sum)) %>% #重新排序word,
ggplot(aes(word, sum, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
theme(text=element_text(size=14))+
coord_flip()#處理舊約
sentiment_word_old %>%
top_n(30,wt = sum) %>%
mutate(word = reorder(word, sum)) %>% #重新排序word,
ggplot(aes(word, sum, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
theme(text=element_text(size=14))+
coord_flip()#處理新約
sentiment_word_new %>%
top_n(30,wt = sum) %>%
mutate(word = reorder(word, sum)) %>% #重新排序word,
ggplot(aes(word, sum, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
theme(text=element_text(size=14))+
coord_flip()# sentiment_word_count %>%
# acast(word ~ sentiment, value.var = "sum", fill = 0) %>%
# comparison.cloud(
# colors = c("salmon", "#72bcd4"), # negative positive
# max.words = 50)
#舊約文字雲
sentiment_word_old %>%
acast(word ~ sentiment, value.var = "sum", fill = 0) %>%
comparison.cloud(
colors = c("salmon", "#72bcd4"), # negative positive
max.words = 50) # 新約正負情緒比較文字雲
sentiment_word_new %>%
acast(word ~ sentiment, value.var = "sum", fill = 0) %>%
comparison.cloud(
colors = c("salmon", "#72bcd4"), # negative positive
max.words = 50) # 找出聖經中的情緒token及頻率, 算出每一卷情緒總和(sentiment_count)
# 每一卷sentiment_count:bookcode,sentiment,count
# 第19篇的情緒count很高
sentiment_count = bible_tokens %>%
select(bookcode,word,ch_book) %>%
inner_join(LIWC) %>%
group_by(bookcode,sentiment) %>%
summarise(count=n()) %>%
arrange(bookcode)## Joining, by = "word"
## `summarise()` has grouped output by 'bookcode'. You can override using the `.groups` argument.
3.2.1 各卷正負情緒分數折線圖
#畫出各卷的正負情緒 ,紅線後為新約
sentiment_count %>%
ggplot()+
geom_line(aes(x=bookcode,y=count,colour=sentiment)) +
# 加上新約的界線
geom_vline(xintercept = as.numeric("40"), col='red', size = 1) +
geom_vline(xintercept = as.numeric("19"), col='purple', size = 0.3) + #詩篇
geom_vline(xintercept = as.numeric("24"), col='purple', size = 0.3)分析:
1 舊約大部份的書卷為負向情緒緒詞彙>正向詞彙,其中負向>正向情緒的書卷有23以賽亞書, 24耶利米書,25耶利米哀歌,26以西結書 ; 例外的為舊約詩篇
2 新約整體而言情緒詞彙數目較少,但呈現出正向情緒詞彙出現>負向詞彙
3.2.2 各卷正負情緒分數標準化折線圖 將情緒分數標準化
sentiment_count_ratio= sentiment_count %>%
# 標準化的部分
group_by(bookcode) %>%
mutate(ratio = count/sum(count))
sentiment_count %>%
# 標準化的部分
group_by(bookcode) %>%
mutate(ratio = count/sum(count)) %>%
# 畫圖的部分
ggplot()+
geom_line(aes(x=bookcode,y=ratio,colour=sentiment))+
# 加上標示日期的線
geom_vline(xintercept = as.numeric("40"), col='red', size = 1) +
geom_vline(xintercept = as.numeric("19"), col='purple', size = 0.3) + #詩篇
geom_vline(xintercept = as.numeric("24"), col='purple', size = 0.3) + #耶利米書
geom_vline(xintercept = as.numeric("29"), col='blue', size = 0.3) + #約珥書 (N:51 P:12)
geom_vline(xintercept = as.numeric("51"), col='blue', size = 0.3) #歌羅西書 (N:14 P:71) 分析:將情緒標準化再畫一次圖,各卷之情緒標準化後,正負情緒Gap較大的有29: 約珥書 (N:51 P:12); 51: 歌羅西書 (N:14 P:71),舊約以負向佔比均多,新約則以正向均多 。
當我們閱讀聖經時,我們是否就可以預期選讀的書卷其情緒傾向。
#找出前3名(1)正向>負向情緒的卷 (2)負向>正向情緒的卷
sentiment_book = sentiment_count %>%
group_by(bookcode) %>%
spread(sentiment,count,fill=0) %>%
mutate(s_score=positive-negative) %>%
mutate(booktype=case_when(positive>negative~"positive",TRUE~"negatvie")) %>%
mutate(diff=abs(s_score)) %>%
select(bookcode,booktype,diff) %>%
filter(diff>50) %>% #正負情緒詞頻差異超過50者再列入
left_join(bible_book_chname) %>%
arrange(desc(diff)) ## Joining, by = "bookcode"
sentiment_book %>%
top_n(10,wt = diff) %>%
mutate(ch_bookname = reorder(ch_bookname,diff)) %>% #重新排序bookcode
ggplot(aes(ch_bookname, diff, fill = booktype)) +
geom_col(show.legend = FALSE) +
facet_wrap(~booktype, scales = "free_y") +
labs(y = "正負詞彙差異",
x = NULL) +
theme(text=element_text(size=14))+
coord_flip() 分析:
1.正負情緒詞數量, 負>正的前3卷書有耶利米書,以西結書,撒母耳記上
2.正>負的書卷有詩篇,歷代志上, 利未記
3.4.1 將聖經分為9類, >舊約: “律法書”, 1-5 “紀事書”, 6-17 “詩歌”, 18-22 “先知書”, 23-39 >新約: “福音書”,40-43 “使徒行傳”,44 “保羅書信”,45-57 “別的書信”, 58-65 “啟示錄” 66
sentiment_count_bible_class = bible_tokens %>%
select(class,word) %>%
inner_join(LIWC) %>%
group_by(class,sentiment) %>%
summarise(count=n()) ## Joining, by = "word"
## `summarise()` has grouped output by 'class'. You can override using the `.groups` argument.
sentiment_count_bible_class## # A tibble: 18 x 3
## # Groups: class [9]
## class sentiment count
## <fct> <chr> <int>
## 1 律法書 negative 837
## 2 律法書 positive 812
## 3 紀事書 negative 1240
## 4 紀事書 positive 1120
## 5 詩歌 negative 1774
## 6 詩歌 positive 1894
## 7 先知書 negative 2451
## 8 先知書 positive 1483
## 9 福音書 negative 545
## 10 福音書 positive 596
## 11 使徒行傳 negative 149
## 12 使徒行傳 positive 169
## 13 保羅書信 negative 569
## 14 保羅書信 positive 1035
## 15 別的書信 negative 253
## 16 別的書信 positive 379
## 17 啟示錄 negative 133
## 18 啟示錄 positive 86
sentiment_count_bible_class %>%
ggplot(aes(x=class,y=count,colour=sentiment,group=sentiment))+
geom_point()+geom_line()分析:
1.可以看出以"福音書"以後為新約,正面情緒大於負面情緒
2.舊約為負面情緒大於正面情緒,其中以"先知書" 差別最明顯
3.4.2 以先知書類為例-負向較強
先知書主要記載以色列人歷史中一些特殊人物,他們被神選召,替神傳達信息。 先知書雖然有些談及未來,但是大部分篇幅仍然只是談及當時的情形或責備以色列民的罪惡。
word_count_class =
bible_tokens %>%
select(word,class) %>%
group_by(class,word)%>%
summarise(count=n())## `summarise()` has grouped output by 'class'. You can override using the `.groups` argument.
prophet =
word_count_class %>%
filter(class=="先知書")%>%
inner_join(LIWC) %>%
group_by(word,sentiment) %>%
summarise(
sum = sum(count)
) %>%
arrange(desc(sum)) %>%
data.frame() ## Joining, by = "word"
## `summarise()` has grouped output by 'word'. You can override using the `.groups` argument.
prophet %>%
top_n(30,wt = sum) %>%
mutate(word = reorder(word, sum)) %>%
ggplot(aes(word, sum, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
theme(text=element_text(size=14))+
coord_flip()3.4.2 以保羅書信類為例-正向情緒強
各地教會就各教會內的情況或是對來信詢問教義的人的回信,書信中解釋了許多有關基督教教義的疑難問題及闡明教理原則
paul <-
word_count_class %>%
filter(class=="保羅書信")%>%
inner_join(LIWC) %>%
group_by(word,sentiment) %>%
summarise(
sum = sum(count)
) %>%
arrange(desc(sum)) %>%
data.frame() ## Joining, by = "word"
## `summarise()` has grouped output by 'word'. You can override using the `.groups` argument.
paul %>%
top_n(30,wt = sum) %>%
mutate(word = reorder(word, sum)) %>%
ggplot(aes(word, sum, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
theme(text=element_text(size=14))+
coord_flip()耶利米書是舊約聖經中的第二十四卷經書 當時的猶大國日趨沒落,外有強鄰如亞述、埃及和巴比倫等國,虎視眈眈隨時找機會進行侵略;國內情形,上自君王、首領,下至百姓,都離棄神拜偶像,道德低落;這是先知耶利米當時所處的背景,在這樣的情形下,耶利米盡了他話語的職事。
# 24耶利米書
# rowwise的處理
# https://stackoverflow.com/questions/43970590/mutate-two-new-columns-based-on-splitting-another
bible_tokens_yelly <- bible_tokens %>%
filter(.$bookcode==24) %>%
mutate(tempcol = as.vector(stringr::str_split(.$ch_chapter, ':'))) %>%
rowwise() %>%
mutate(chapter = as.numeric(unlist(tempcol)[1])) %>%
select(-tempcol)
sentiment_count_yelly_LIWC = bible_tokens_yelly %>%
select(chapter,word) %>%
inner_join(LIWC) %>%
group_by(chapter,sentiment) %>%
summarise(count=n()) %>%
mutate(source="LIWC") %>%
arrange(chapter) ## Joining, by = "word"
## `summarise()` has grouped output by 'chapter'. You can override using the `.groups` argument.
#正負情緒長條圖
sentiment_count_yelly_LIWC %>%
ggplot(aes(x = chapter, y = count, fill = sentiment))+
geom_bar(stat = "identity", position = "dodge") +
geom_vline(xintercept = as.numeric("51"), col='red', size = 1) +
labs(y = "count (LIWC) ")sentiment_count_yelly_LIWC_1 = sentiment_count_yelly_LIWC %>%
spread(sentiment,count,fill=0) %>%
mutate(sentiment_score=positive-negative) #正負情緒曲線圖
sentiment_count_yelly_LIWC %>%
ggplot()+
geom_line(aes(x=chapter,y=count,colour=sentiment)) +
# 加上51最negative 的線
geom_vline(xintercept = as.numeric("51"), col='red', size = 1) +
labs(y = "count (LIWC) ")sentiment_count_yelly_NRC = bible_tokens_yelly %>%
select(chapter,word) %>%
inner_join(NRC) %>%
group_by(chapter,sentiment) %>%
summarise(count=n()) %>%
mutate(source="NRC") %>%
arrange(chapter) ## Joining, by = "word"
## `summarise()` has grouped output by 'chapter'. You can override using the `.groups` argument.
sentiment_count_yelly_NRC %>%
ggplot()+
geom_line(aes(x=chapter,y=count,colour=sentiment)) +
# 加上51最negative 的線
geom_vline(xintercept = as.numeric("51"), col='red', size = 1) +
labs(y = "count (NRC) ")sentiment_count_yelly_NRC_1 = sentiment_count_yelly_NRC %>%
spread(sentiment,count,fill=0) %>%
mutate(sentiment_score=positive-negative)
bind_rows(sentiment_count_yelly_LIWC_1,
sentiment_count_yelly_NRC_1) %>%
ggplot(aes(chapter, sentiment_score, fill = source)) +
geom_col(show.legend = FALSE) +
facet_wrap(~source, ncol = 1, scales = "free_y") 分析: 基本上情緒的高低,LIWC,NRC每一章大致相同, LIWC 的負向又比NRC來得強烈
而NRC的正向比LIWC來得強烈
本文的用的作者來源,主要參考這兩篇文章: https://www.konline.org/biblestudy/JBS_A/DBC_08.doc.html,https://www.gotquestions.org/T-Chinese/T-Chinese-authors-Bible.html,另外搭配維基百科的說明
bible_author = tibble::tribble(
~ch_book, ~bookcode, ~author,
"創世紀", 1L,"摩西",
NA, 1L, "摩西",
"-", 2L, "摩西",
"出埃及記", 2L, "摩西",
"利未記", 3L, "摩西",
"民數記", 4L,"摩西",
"申命記", 5L,"摩西",
"約書亞記", 6L, "約書亞",
"士師記", 7L, "撒母耳",
"路得記", 8L,"撒母耳",
"撒母耳記上", 9L,"撒母耳",
"撒母耳記下", 10L,"拿單與迦得",
"列王記上", 11L,"耶利米",
"列王記下", 12L,"耶利米",
"歷代志上", 13L,"以斯拉",
"歷代志下", 14L,"以斯拉",
"以斯拉記", 15L,"以斯拉",
"尼希米記", 16L,"尼希米",
"以斯帖記", 17L,"末底改",
"約伯記", 18L,"以利戶",
"詩篇", 19L, "大衛為主",
"箴言", 20L,"所羅門",
"傳道書", 21L,"所羅門",
"雅歌", 22L,"所羅門",
"以賽亞書", 23L,"以賽亞",
"耶利米書", 24L,"耶利米",
"耶利米哀歌", 25L,"耶利米",
"以西結書", 26L,"以西結",
"但以理書", 27L,"但以理",
"何西阿書", 28L,"何西阿",
"約珥書", 29L,"約珥",
"阿摩司書", 30L,"阿摩司",
"俄巴底亞書", 31L,"俄巴底亞",
"約拿書", 32L,"約拿",
"彌迦書", 33L,"彌迦",
"那鴻書", 34L,"那鴻",
"哈巴谷書", 35L,"哈巴谷",
"西番雅書", 36L,"西番雅",
"哈該書", 37L,"哈該",
"撒迦利亞書", 38L,"撒迦利亞",
"瑪拉基書", 39L,"瑪拉基",
"馬太福音", 40L,"馬太",
"馬可福音", 41L,"馬可",
"路加福音", 42L,"路加",
"約翰福音", 43L,"約翰",
"使徒行傳", 44L,"路加",
"羅馬書", 45L,"保羅",
"哥林多前書", 46L,"保羅",
"哥林多後書", 47L,"保羅",
"加拉太書", 48L,"保羅",
"以弗所書", 49L,"保羅",
"腓立比書", 50L,"保羅",
"歌羅西書", 51L,"保羅",
"帖撒羅尼迦前書", 52L,"保羅",
"帖撒羅尼迦後書", 53L,"保羅",
"提摩太前書", 54L,"保羅",
"提摩太後書", 55L,"保羅",
"提多書", 56L,"保羅",
"腓利門書", 57L,"保羅",
"希伯來書", 58L,"未知",
"雅各書", 59L,"雅各",
"彼得前書", 60L,"彼得",
"彼得後書", 61L,"彼得",
"約翰一書", 62L,"約翰",
"約翰二書", 63L,"約翰",
"約翰三書", 64L,"約翰",
"猶大書", 65L,"猶大",
"啟示錄", 66L,"約翰"
)
head(bible_author)## # A tibble: 6 x 3
## ch_book bookcode author
## <chr> <int> <chr>
## 1 創世紀 1 摩西
## 2 <NA> 1 摩西
## 3 - 2 摩西
## 4 出埃及記 2 摩西
## 5 利未記 3 摩西
## 6 民數記 4 摩西
3.6.1 分析不同作者的文字情緒
比較不同作者的正負面情緒數量
bible_tokens = left_join(bible_tokens, bible_author)## Joining, by = c("ch_book", "bookcode")
#head(bible_tokens)
bible_tokens %>%
# clean data
inner_join(LIWC) %>%
group_by(author, sentiment) %>%
summarise(n = n(),
bookcode = min(bookcode)) %>%
ungroup() %>%
# plot the amount of sentiment
ggplot(aes(x = reorder(author, -bookcode), y = n, fill = sentiment)) +
geom_col(position = "dodge") +
labs(y = "count to sentiment",
x = "") +
coord_flip()## Joining, by = "word"
## `summarise()` has grouped output by 'author'. You can override using the `.groups` argument.
分析: 以作者-耶利米,摩西所撰寫的負向情緒為多,保羅則為正向最高, 路加次之。
3.6.2 比較每位作者的情緒 ratio
比較不同作者的正負面情緒數量
bible_tokens %>%
inner_join(LIWC) %>%
group_by(author, sentiment) %>%
summarise(n = n(),
bookcode = min(bookcode)) %>%
mutate(ratio = n/sum(n)) %>%
ungroup() %>%
# select(author, bookcode, sentiment, ratio) %>%
# pivot_wider(names_from = sentiment, values_from = ratio)
ggplot(aes(x = reorder(author, -bookcode),
y = ratio,
fill = sentiment,
group = sentiment
)
) +
geom_col(position = "stack") +
geom_hline(yintercept = 0.5, color = "grey7") +
# facet_wrap(~sentiment, ncol = 1) +
labs(y = "count to sentiment",
x = "") +
# theme(text=element_text(size=14))+
coord_flip()## Joining, by = "word"
## `summarise()` has grouped output by 'author'. You can override using the `.groups` argument.
分析: 以ratio 作者-約珥,阿摩斯,西番雅所撰寫的負向情緒比例為多,
正向比例末底改最高, 路加次之。
3.6.3 作者的正負面詞彙總數相減
bible_tokens %>%
inner_join(LIWC) %>%
group_by(author, sentiment) %>%
count(word) %>%
top_n(1) %>%
rmarkdown::paged_table()## Joining, by = "word"
## Selecting by n
bible_tokens %>%
inner_join(LIWC) %>%
group_by(author, sentiment) %>%
summarise(n = n(), ch_book = max(ch_book)) %>%
pivot_wider(names_from = sentiment, values_from = n) %>%
ungroup() %>%
mutate(score = positive - negative) %>%
ggplot(aes(x = reorder(author, score), y = score)) +
geom_col(aes(fill = reorder(author, score))) +
geom_text(aes(label = ch_book), check_overlap = TRUE) +
scale_fill_discrete_diverging("Blue-Red2") +
theme(legend.position = "none") +
coord_flip()## Joining, by = "word"
## `summarise()` has grouped output by 'author'. You can override using the `.groups` argument.
分析: 以正負面詞彙總數相減以保羅-羅馬書最正面 , 耶利米-耶利米書最負面。
1.聖經舊約/新約在正面和負面的情緒內容分佈? (1)舊約: N:攻擊,仇敵,惡人,懼怕,發怒 P:智慧,潔淨,平安,榮耀 …
(2)新約: N: 軟弱,憂愁,懼怕,悔改 P:榮耀,信心,真理,盼望,愛心,憐憫…
2.聖經各卷之正負情緒比例及正負曲線為何? (1)舊約大部份的書卷為負向情緒緒詞彙>正向詞彙,呈現出負向情緒曲線出現>正向曲線
(2)新約整體而言情緒詞彙數目較少,但呈現出正向情緒曲線出現>負向曲線
3.要閱讀聖經時哪些是屬於正面篇章,哪些是負面的? (1).正負情緒詞數量, 負>正的前3卷書有耶利米書,以西結書,撒母耳記上 (2).正>負的書卷有詩篇,歷代志上, 利未記
4.不同分類的聖經,情緒分佈有何不同? (1)新約“福音書”類的,正面情緒大於負面情緒 (2)舊約為負面情緒大於正面情緒,其中以“先知書” 差別最明顯
5.比較LIWC, NRC情緒字典所呈現出來的情緒分佈有何不同? 基本上情緒的高低,LIWC,NRC每一章大致相同, LIWC 的負向又比NRC來得強烈 而NRC的正向比LIWC來得強烈, 可以針對翻譯的詞彙再作整理,另外聖經專有情緒詞彙可再搜集。
6.比較不同作者所呈現出來的情緒有何不同? 以正負面詞彙總數相減以保羅-羅馬書最正面 , 耶利米-耶利米書最負面