Sentiment analysis of paper
# load in the libraries we'll need
library(tidyverse)
library(tidytext)
library(glue)
library(stringr)
library("corpus")
library(syuzhet)
library(sentimentr)
# get a list of the files in the input directory
files <- readLines("/Users/john/Downloads/Commentary.txt")
fileText <- glue(read_file(files))
# remove any dollar signs (they're special characters in R)
fileText <- gsub("\\$", "", fileText)
# tokenize
tokens <- data_frame(text = fileText) %>% unnest_tokens(word, text)
Calculate overall sentiment: (positive words are assigned +1, negative words are assigned -1, the overall tone is neutral; a score of +4)
tokens %>%
inner_join(get_sentiments("bing")) %>% # pull out only sentiment words
count(sentiment) %>% # count the # of positive & negative words
spread(sentiment, n, fill = 0) %>% # made data wide rather than narrow
mutate(sentiment = positive - negative) # # of positive words - # of negative words
Joining, by = "word"
s_v <- syuzhet::get_sentences(fileText)
s_v_sentiment <- as.data.frame(syuzhet::get_sentiment(s_v,method = "bing"))
colnames(s_v_sentiment) <- "sentiment"
s_v_sentiment <- tibble::rowid_to_column(s_v_sentiment, "sentence")
s_v_sentiment$neg <- s_v_sentiment$sentiment<0
s_v_sentiment$pos <- s_v_sentiment$sentiment>0
DF <- data.frame(x1 = head(s_v_sentiment$sentence, -1), x2 = tail(s_v_sentiment$sentence, -1) ,
y1 = head(s_v_sentiment$sentiment, -1), y2 = tail(s_v_sentiment$sentiment, -1))
DF$col <- DF$y1>=0
ggplot(DF, aes(x=x1, y=y1, xend = x2, yend = y2, colour=col))+geom_segment()+geom_hline(yintercept = 0)+
scale_x_continuous(labels=as.character(s_v_sentiment$sentence),breaks=s_v_sentiment$sentence)+
theme_minimal()+xlab("sentence")+ylab("sentiment")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

now to pull out the negative sentences identified above…
s_v[s_v_sentiment$neg==TRUE]
[1] "In this commentary, we address limitations of the conclusions of the target article that stem from the relatively superficial operationalization of bilingualism."
[2] "Instead of reporting the presence of the variable, we recorded the absence of such information."
[3] "An \"upset plot\" (Figure 1) shows that 28% of the studies reported none of the 14 variables related to bilingualism."
# get the sentiment from the first text:
tokens %>%
inner_join(get_sentiments("bing")) %>% # pull out only sentiment words
count(sentiment) %>% # count the # of positive & negative words
spread(sentiment, n, fill = 0) %>% # made data wide rather than narrow
mutate(sentiment = positive - negative) # # of positive words - # of negative owrds
Joining, by = "word"
# get words already get_sentiments("bing")
(sentiment <- tokens %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE))
Joining, by = "word"
Plot top 20 influential positive and negative words
library(ggplot2)
sentiment %>%
group_by(sentiment) %>%
top_n(20) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(n, word, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(x = "Contribution to sentiment",
y = NULL)
Selecting by n

LS0tCnRpdGxlOiAiU2VudGltZW50IEFuYWx5c2lzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpTZW50aW1lbnQgYW5hbHlzaXMgb2YgcGFwZXIKCmBgYHtyfQojIGxvYWQgaW4gdGhlIGxpYnJhcmllcyB3ZSdsbCBuZWVkCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHRpZHl0ZXh0KQpsaWJyYXJ5KGdsdWUpCmxpYnJhcnkoc3RyaW5ncikKbGlicmFyeSgiY29ycHVzIikKbGlicmFyeShzeXV6aGV0KQpsaWJyYXJ5KHNlbnRpbWVudHIpCgojIGdldCBhIGxpc3Qgb2YgdGhlIGZpbGVzIGluIHRoZSBpbnB1dCBkaXJlY3RvcnkKZmlsZXMgPC0gcmVhZExpbmVzKCIvVXNlcnMvam9obi9Eb3dubG9hZHMvQ29tbWVudGFyeS50eHQiKQpmaWxlVGV4dCA8LSBnbHVlKHJlYWRfZmlsZShmaWxlcykpCiMgcmVtb3ZlIGFueSBkb2xsYXIgc2lnbnMgKHRoZXkncmUgc3BlY2lhbCBjaGFyYWN0ZXJzIGluIFIpCmZpbGVUZXh0IDwtIGdzdWIoIlxcJCIsICIiLCBmaWxlVGV4dCkgCgojIHRva2VuaXplCnRva2VucyA8LSBkYXRhX2ZyYW1lKHRleHQgPSBmaWxlVGV4dCkgJT4lIHVubmVzdF90b2tlbnMod29yZCwgdGV4dCkKYGBgCgpDYWxjdWxhdGUgb3ZlcmFsbCBzZW50aW1lbnQ6IChwb3NpdGl2ZSB3b3JkcyBhcmUgYXNzaWduZWQgKzEsIG5lZ2F0aXZlIHdvcmRzIGFyZSBhc3NpZ25lZCAtMSwgdGhlIG92ZXJhbGwgdG9uZSBpcyBuZXV0cmFsOyBhIHNjb3JlIG9mICs0KQoKYGBge3J9CnRva2VucyAlPiUKICBpbm5lcl9qb2luKGdldF9zZW50aW1lbnRzKCJiaW5nIikpICU+JSAjIHB1bGwgb3V0IG9ubHkgc2VudGltZW50IHdvcmRzCiAgY291bnQoc2VudGltZW50KSAlPiUgIyBjb3VudCB0aGUgIyBvZiBwb3NpdGl2ZSAmIG5lZ2F0aXZlIHdvcmRzCiAgc3ByZWFkKHNlbnRpbWVudCwgbiwgZmlsbCA9IDApICU+JSAjIG1hZGUgZGF0YSB3aWRlIHJhdGhlciB0aGFuIG5hcnJvdwogIG11dGF0ZShzZW50aW1lbnQgPSBwb3NpdGl2ZSAtIG5lZ2F0aXZlKSAjICMgb2YgcG9zaXRpdmUgd29yZHMgLSAjIG9mIG5lZ2F0aXZlIHdvcmRzCmBgYAoKYGBge3J9CnNfdiA8LSBzeXV6aGV0OjpnZXRfc2VudGVuY2VzKGZpbGVUZXh0KQpzX3Zfc2VudGltZW50IDwtIGFzLmRhdGEuZnJhbWUoc3l1emhldDo6Z2V0X3NlbnRpbWVudChzX3YsbWV0aG9kID0gImJpbmciKSkKY29sbmFtZXMoc192X3NlbnRpbWVudCkgPC0gInNlbnRpbWVudCIKc192X3NlbnRpbWVudCA8LSB0aWJibGU6OnJvd2lkX3RvX2NvbHVtbihzX3Zfc2VudGltZW50LCAic2VudGVuY2UiKQpzX3Zfc2VudGltZW50JG5lZyA8LSBzX3Zfc2VudGltZW50JHNlbnRpbWVudDwwCnNfdl9zZW50aW1lbnQkcG9zIDwtIHNfdl9zZW50aW1lbnQkc2VudGltZW50PjAKCgpERiA8LSBkYXRhLmZyYW1lKHgxID0gaGVhZChzX3Zfc2VudGltZW50JHNlbnRlbmNlLCAtMSksIHgyID0gdGFpbChzX3Zfc2VudGltZW50JHNlbnRlbmNlLCAtMSkgLCAKICAgICAgICAgICAgICAgICB5MSA9IGhlYWQoc192X3NlbnRpbWVudCRzZW50aW1lbnQsIC0xKSwgeTIgPSB0YWlsKHNfdl9zZW50aW1lbnQkc2VudGltZW50LCAtMSkpCkRGJGNvbCA8LSBERiR5MT49MAoKCmdncGxvdChERiwgYWVzKHg9eDEsIHk9eTEsIHhlbmQgPSB4MiwgeWVuZCA9IHkyLCBjb2xvdXI9Y29sKSkrZ2VvbV9zZWdtZW50KCkrZ2VvbV9obGluZSh5aW50ZXJjZXB0ID0gMCkrCiAgc2NhbGVfeF9jb250aW51b3VzKGxhYmVscz1hcy5jaGFyYWN0ZXIoc192X3NlbnRpbWVudCRzZW50ZW5jZSksYnJlYWtzPXNfdl9zZW50aW1lbnQkc2VudGVuY2UpKwogIHRoZW1lX21pbmltYWwoKSt4bGFiKCJzZW50ZW5jZSIpK3lsYWIoInNlbnRpbWVudCIpKyB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDkwLCB2anVzdCA9IDAuNSwgaGp1c3Q9MSkpCmBgYApub3cgdG8gcHVsbCBvdXQgdGhlIG5lZ2F0aXZlIHNlbnRlbmNlcyBpZGVudGlmaWVkIGFib3ZlLi4uCmBgYHtyfQpzX3Zbc192X3NlbnRpbWVudCRuZWc9PVRSVUVdCmBgYAoKYGBge3J9CiMgZ2V0IHRoZSBzZW50aW1lbnQgZnJvbSB0aGUgZmlyc3QgdGV4dDogCnRva2VucyAlPiUKICBpbm5lcl9qb2luKGdldF9zZW50aW1lbnRzKCJiaW5nIikpICU+JSAjIHB1bGwgb3V0IG9ubHkgc2VudGltZW50IHdvcmRzCiAgY291bnQoc2VudGltZW50KSAlPiUgIyBjb3VudCB0aGUgIyBvZiBwb3NpdGl2ZSAmIG5lZ2F0aXZlIHdvcmRzCiAgc3ByZWFkKHNlbnRpbWVudCwgbiwgZmlsbCA9IDApICU+JSAjIG1hZGUgZGF0YSB3aWRlIHJhdGhlciB0aGFuIG5hcnJvdwogIG11dGF0ZShzZW50aW1lbnQgPSBwb3NpdGl2ZSAtIG5lZ2F0aXZlKSAjICMgb2YgcG9zaXRpdmUgd29yZHMgLSAjIG9mIG5lZ2F0aXZlIG93cmRzCgojIGdldCB3b3JkcyBhbHJlYWR5IGdldF9zZW50aW1lbnRzKCJiaW5nIikKCihzZW50aW1lbnQgPC0gdG9rZW5zICU+JQppbm5lcl9qb2luKGdldF9zZW50aW1lbnRzKCJiaW5nIikpICU+JQogIGNvdW50KHdvcmQsIHNlbnRpbWVudCwgc29ydCA9IFRSVUUpKQpgYGAKUGxvdCB0b3AgMjAgaW5mbHVlbnRpYWwgcG9zaXRpdmUgYW5kIG5lZ2F0aXZlIHdvcmRzIApgYGB7cn0KbGlicmFyeShnZ3Bsb3QyKQoKc2VudGltZW50ICU+JQogIGdyb3VwX2J5KHNlbnRpbWVudCkgJT4lCiAgdG9wX24oMjApICU+JQogIHVuZ3JvdXAoKSAlPiUKICBtdXRhdGUod29yZCA9IHJlb3JkZXIod29yZCwgbikpICU+JQogIGdncGxvdChhZXMobiwgd29yZCwgZmlsbCA9IHNlbnRpbWVudCkpICsKICBnZW9tX2NvbChzaG93LmxlZ2VuZCA9IEZBTFNFKSArCiAgZmFjZXRfd3JhcCh+c2VudGltZW50LCBzY2FsZXMgPSAiZnJlZV95IikgKwogIGxhYnMoeCA9ICJDb250cmlidXRpb24gdG8gc2VudGltZW50IiwKICAgICAgIHkgPSBOVUxMKQpgYGAK