Abstract
比較阿拉丁、灰姑娘、美女與野獸、睡美人四本童話故事之情緒分析library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
require(tidytext)
## Loading required package: tidytext
library(wordcloud2)
require(data.table)
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
require(ggplot2)
## Loading required package: ggplot2
require(reshape2)
## Loading required package: reshape2
##
## Attaching package: 'reshape2'
## The following objects are masked from 'package:data.table':
##
## dcast, melt
require(wordcloud)
## Loading required package: wordcloud
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'wordcloud'
require(tidyr)
## Loading required package: tidyr
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
require(readr)
## Loading required package: readr
require(scales)
## Loading required package: scales
##
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
##
## col_factor
Cinderalla<-read_file("Cinderella.txt")
C <- strsplit(Cinderalla, "[.]")[[1]]
C <- data.frame(text = C)
C$text<-C$text %>% as.character()
C <- C %>% mutate(linenumber = row_number())%>%unnest_tokens(word, text)
C$book<-c("Cinderalla")
Beauty<-read_file("Beauty and the beast.txt")
B <- strsplit(Beauty, "[.]")[[1]]
B <- data.frame(text = B)
B$text<-B$text %>% as.character()
B <- B %>% mutate(linenumber = row_number())%>%unnest_tokens(word, text)
B$book<-c("Beauty and the Beast")
Sleep<-read_file("Sleeping beauty in the wood.txt")
S <- strsplit(Sleep, "[.]")[[1]]
S <- data.frame(text = S)
S$text<-S$text %>% as.character()
S <- S %>% mutate(linenumber = row_number())%>%unnest_tokens(word, text)
S$book<-c("Sleeping Beauty in the Wood")
Aladdin<-read_file("ALADDIN AND THE WONDERFUL LAMP.txt")
A <- strsplit(Aladdin, "[.]")[[1]]
A <- data.frame(text = A)
A$text<-A$text %>% as.character()
A <- A %>% mutate(linenumber = row_number())%>%unnest_tokens(word, text)
A$book<-c("Aladdin and the Wonderful Lamp")
Fairy_Tales<-rbind(B,C,S,A)
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
Fairy_Tales %>%
filter(book == "Beauty and the Beast") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
## Joining, by = "word"
## # A tibble: 77 x 2
## word n
## <chr> <int>
## 1 beauty 80
## 2 found 24
## 3 good 19
## 4 happy 11
## 5 splendid 8
## 6 journey 6
## 7 love 6
## 8 promise 6
## 9 glad 5
## 10 save 5
## # ... with 67 more rows
library(tidyr)
bing_Fairy_Tales <- Fairy_Tales %>%
inner_join(get_sentiments("bing")) %>%
count(book, index = linenumber %/% 1, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)%>%
mutate(method = "bing")
## Joining, by = "word"
bing_Fairy_Tales
## # A tibble: 526 x 6
## book index negative positive sentiment method
## <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Aladdin and the Wonderful Lamp 1 4 1 -3 bing
## 2 Aladdin and the Wonderful Lamp 2 2 0 -2 bing
## 3 Aladdin and the Wonderful Lamp 3 1 0 -1 bing
## 4 Aladdin and the Wonderful Lamp 4 1 0 -1 bing
## 5 Aladdin and the Wonderful Lamp 5 2 1 -1 bing
## 6 Aladdin and the Wonderful Lamp 8 1 0 -1 bing
## 7 Aladdin and the Wonderful Lamp 10 1 0 -1 bing
## 8 Aladdin and the Wonderful Lamp 11 1 0 -1 bing
## 9 Aladdin and the Wonderful Lamp 12 1 0 -1 bing
## 10 Aladdin and the Wonderful Lamp 13 0 3 3 bing
## # ... with 516 more rows
library(ggplot2)
ggplot(bing_Fairy_Tales, aes(index, sentiment, fill = book)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, ncol = 2, scales = "free_x")
Afinn_Fairy_Tales <- Fairy_Tales %>%
inner_join(get_sentiments("afinn")) %>%
count(book, index = linenumber %/% 1, score) %>%
mutate(sentiment = score*n)%>%
mutate(method = "Afinn")
## Joining, by = "word"
Afinn_Fairy_Tales
## # A tibble: 913 x 6
## book index score n sentiment method
## <chr> <dbl> <int> <int> <int> <chr>
## 1 Aladdin and the Wonderful Lamp 1 -2 2 -4 Afinn
## 2 Aladdin and the Wonderful Lamp 1 2 1 2 Afinn
## 3 Aladdin and the Wonderful Lamp 2 -3 1 -3 Afinn
## 4 Aladdin and the Wonderful Lamp 2 -2 2 -4 Afinn
## 5 Aladdin and the Wonderful Lamp 4 -3 1 -3 Afinn
## 6 Aladdin and the Wonderful Lamp 8 -3 1 -3 Afinn
## 7 Aladdin and the Wonderful Lamp 9 1 1 1 Afinn
## 8 Aladdin and the Wonderful Lamp 11 -2 1 -2 Afinn
## 9 Aladdin and the Wonderful Lamp 12 -1 1 -1 Afinn
## 10 Aladdin and the Wonderful Lamp 13 2 2 4 Afinn
## # ... with 903 more rows
#要把每個index對應的sentiments相加不然圖畫出來數值是錯的
Afinn_Fairy_Tales1<-Afinn_Fairy_Tales %>%
group_by(book,index)%>%
summarise(sentiment=sum(sentiment))%>%
mutate(method = "Afinn")
Afinn_Fairy_Tales1%>%
ggplot(., aes(index, sentiment, fill = book)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, ncol = 2, scales = "free_x")
NRC_Fairy_Tales <- Fairy_Tales %>%
inner_join(get_sentiments("nrc")) %>%
filter(sentiment %in% c("positive", "negative"))%>%
count(book, index = linenumber %/% 1, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)%>%
mutate(method = "NRC")
## Joining, by = "word"
NRC_Fairy_Tales
## # A tibble: 566 x 6
## book index negative positive sentiment method
## <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Aladdin and the Wonderful Lamp 1 1 0 -1 NRC
## 2 Aladdin and the Wonderful Lamp 2 1 0 -1 NRC
## 3 Aladdin and the Wonderful Lamp 3 1 1 0 NRC
## 4 Aladdin and the Wonderful Lamp 4 0 1 1 NRC
## 5 Aladdin and the Wonderful Lamp 5 2 2 0 NRC
## 6 Aladdin and the Wonderful Lamp 6 1 1 0 NRC
## 7 Aladdin and the Wonderful Lamp 7 1 2 1 NRC
## 8 Aladdin and the Wonderful Lamp 8 0 2 2 NRC
## 9 Aladdin and the Wonderful Lamp 9 1 1 0 NRC
## 10 Aladdin and the Wonderful Lamp 10 2 1 -1 NRC
## # ... with 556 more rows
library(ggplot2)
ggplot(NRC_Fairy_Tales, aes(index, sentiment, fill = book)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, ncol = 2, scales = "free_x")
book_all <- bind_rows(bing_Fairy_Tales, Afinn_Fairy_Tales1, NRC_Fairy_Tales)
book_all %>%
ggplot(aes(index, sentiment, fill = method)) +
geom_col(show.legend = FALSE) +
facet_grid(method~book, scales = "free_x") +
labs(x = NULL, y = NULL)
bing_word_counts <- Fairy_Tales %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE)
## Joining, by = "word"
bing_word_counts
## # A tibble: 463 x 3
## word sentiment n
## <chr> <chr> <int>
## 1 beauty positive 83
## 2 good positive 39
## 3 great positive 36
## 4 beautiful positive 22
## 5 well positive 20
## 6 fell negative 18
## 7 gold positive 17
## 8 like positive 15
## 9 dead negative 14
## 10 fine positive 14
## # ... with 453 more rows
bing_word_counts %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(y = "Contribution to sentiment",
x = NULL) +
coord_flip()
## Selecting by n
#睡美人
Sleep_negative<-Fairy_Tales %>%
filter(book=="Sleeping Beauty in the Wood")%>%
inner_join(get_sentiments("bing") %>% filter(sentiment == "negative"))%>%
count(book,word, sort = TRUE)%>%
head(.,10)
## Joining, by = "word"
#阿拉丁
Aladdin_negative<-Fairy_Tales %>%
filter(book=="Aladdin and the Wonderful Lamp")%>%
inner_join(get_sentiments("bing") %>% filter(sentiment == "negative"))%>%
count(book,word, sort = TRUE)%>%
head(.,10)
## Joining, by = "word"
#美女與野獸
Beauty_negative<-Fairy_Tales %>%
filter(book=="Beauty and the Beast")%>%
inner_join(get_sentiments("bing") %>% filter(sentiment == "negative"))%>%
count(book,word, sort = TRUE)%>%
head(.,10)
## Joining, by = "word"
#Cinderalla
Cinderalla_negative<-Fairy_Tales %>%
filter(book=="Cinderalla")%>%
inner_join(get_sentiments("bing") %>% filter(sentiment == "negative"))%>%
count(book,word, sort = TRUE)%>%
head(.,10)
## Joining, by = "word"
all_negative <- bind_rows(Sleep_negative, Beauty_negative, Aladdin_negative, Cinderalla_negative)
all_negative %>%
ggplot(aes(reorder(word, n), n)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, scales = "free_y", ncol = 2) +
geom_text(aes(label=n, hjust = -0.5))+
labs(x = "字詞", y = NULL) +
theme(text=element_text(size=12)) +
coord_flip()
#睡美人
Sleep_positive<-Fairy_Tales %>%
filter(book=="Sleeping Beauty in the Wood")%>%
inner_join(get_sentiments("bing") %>% filter(sentiment == "positive"))%>%
count(book,word, sort = TRUE)%>%
head(.,10)
## Joining, by = "word"
#阿拉丁
Aladdin_positive<-Fairy_Tales %>%
filter(book=="Aladdin and the Wonderful Lamp")%>%
inner_join(get_sentiments("bing") %>% filter(sentiment == "positive"))%>%
count(book,word, sort = TRUE)%>%
head(.,10)
## Joining, by = "word"
#美女與野獸
Beauty_positive<-Fairy_Tales %>%
filter(book=="Beauty and the Beast")%>%
inner_join(get_sentiments("bing") %>% filter(sentiment == "positive"))%>%
count(book,word, sort = TRUE)%>%
head(.,10)
## Joining, by = "word"
#Cinderalla
Cinderalla_positive<-Fairy_Tales %>%
filter(book=="Cinderalla")%>%
inner_join(get_sentiments("bing") %>% filter(sentiment == "positive"))%>%
count(book,word, sort = TRUE)%>%
head(.,10)
## Joining, by = "word"
all_positive <- bind_rows(Sleep_positive, Beauty_positive, Aladdin_positive, Cinderalla_positive)
all_positive %>%
ggplot(aes(reorder(word, n), n)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, scales = "free_y", ncol = 2) +
geom_text(aes(label=n, hjust = 0))+
labs(x = "字詞", y = NULL) +
theme(text=element_text(size=12)) +
coord_flip()
bing <- get_sentiments("bing")
wordcounts <- Fairy_Tales %>%
group_by(book) %>%
summarize(words = n())
plot_Fairt<-Fairy_Tales %>%
inner_join(bing) %>%
group_by(book,sentiment) %>%
summarize(count=n() ) %>%
left_join(wordcounts, by = c("book")) %>%
mutate(ratio= count/words)
## Joining, by = "word"
plot_Fairt
## # A tibble: 8 x 5
## # Groups: book [4]
## book sentiment count words ratio
## <chr> <chr> <int> <int> <dbl>
## 1 Aladdin and the Wonderful Lamp negative 156 5317 0.0293
## 2 Aladdin and the Wonderful Lamp positive 150 5317 0.0282
## 3 Beauty and the Beast negative 241 7196 0.0335
## 4 Beauty and the Beast positive 360 7196 0.0500
## 5 Cinderalla negative 48 2475 0.0194
## 6 Cinderalla positive 96 2475 0.0388
## 7 Sleeping Beauty in the Wood negative 85 3654 0.0233
## 8 Sleeping Beauty in the Wood positive 133 3654 0.0364
plot_Fairt %>%
ggplot(aes(fill=sentiment,x=book,y=ratio))+
geom_bar(position="dodge",stat="identity") +
theme(text = element_text(size=10),
axis.text.x = element_text(vjust = 0.5, hjust = 0.5, angle = 15))