library(janeaustenr)
library(tidyverse)
library(stringr)
library(tidytext)
Silge, Julia, and David Robinson. Text Mining with R: A Tidy Approach. , 2017. Internet resource.) | | The first part of this assignment is taken directly from their example code. From there, I use what they taught on another text data set and see what results I can produce.
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(
linenumber = row_number(),
chapter = cumsum(str_detect(text,
regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup() %>%
unnest_tokens(word, text)
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
tidy_books %>%
filter(book == "Emma") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
jane_austen_sentiment <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(book, index = linenumber %/% 80, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = positive - negative)
ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, ncol = 2, scales = "free_x")
pride_prejudice <- tidy_books %>%
filter(book == "Pride & Prejudice")
afinn <- pride_prejudice %>%
inner_join(get_sentiments("afinn")) %>%
group_by(index = linenumber %/% 80) %>%
summarise(sentiment = sum(value)) %>%
mutate(method = "AFINN")
bing_and_nrc <- bind_rows(
pride_prejudice %>%
inner_join(get_sentiments("bing")) %>%
mutate(method = "Bing et al."),
pride_prejudice %>%
inner_join(get_sentiments("nrc") %>%
filter(sentiment %in% c("positive",
"negative"))
) %>%
mutate(method = "NRC")) %>%
count(method, index = linenumber %/% 80, sentiment) %>%
pivot_wider(names_from = sentiment,
values_from = n,
values_fill = 0) %>%
mutate(sentiment = positive - negative)
bind_rows(afinn,
bing_and_nrc) %>%
ggplot(aes(index, sentiment, fill = method)) +
geom_col(show.legend = FALSE) +
facet_wrap(~method, ncol = 1, scales = "free_y")
library(corpustools)
tidy_speech <- sotu_texts %>%
mutate(
linenumber = row_number()) %>%
unnest_tokens(word, text)
tidy_speech %>%
filter(party == "Democrats") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
tidy_speech %>%
filter(party == "Republicans") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
union_sentiment <- tidy_speech %>%
inner_join(get_sentiments("bing")) %>%
count(president, index = linenumber %/% 10, sentiment) %>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = positive - negative)
ggplot(union_sentiment, aes(index, sentiment, fill = president)) +
geom_col(show.legend = FALSE) +
facet_wrap(~president, ncol = 2, scales = "free_x")
bad_sentiments <- get_sentiments("nrc") %>%
filter(sentiment %in% c("negative"))
tidy_speech %>%
filter(party == "Democrats") %>%
inner_join(bad_sentiments) %>%
count(word, sort = TRUE)
tidy_speech %>%
filter(party == "Republicans") %>%
inner_join(bad_sentiments) %>%
count(word, sort = TRUE)
library(lexicon)
pos_preposition <- data.frame(pos_preposition) %>%
rename(word = pos_preposition)
tidy_speech %>%
filter(party == "Democrats") %>%
inner_join(pos_preposition) %>%
count(word, sort = TRUE)
tidy_speech %>%
filter(party == "Republicans") %>%
inner_join(pos_preposition) %>%
count(word, sort = TRUE)
union_sentiment_huliu <- tidy_speech %>%
inner_join(hash_sentiment_huliu, b = c ("word"="x")) %>%
count(president, index = linenumber %/% 10, y) %>%
pivot_wider(names_from = y, values_from = n, values_fill = 0,names_prefix = c("neg","pos")) %>%
mutate(sentiment = pos1 - `neg-1`)
ggplot(union_sentiment_huliu, aes(index, sentiment, fill = president)) +
geom_col(show.legend = FALSE) +
facet_wrap(~president, ncol = 2, scales = "free_x")
union_sentiment_jockers <- tidy_speech %>%
inner_join(hash_sentiment_jockers , b = c ("word"="x")) %>%
mutate(sentiment = if_else(y >0,"pos","neg")) %>%
count(president, index = linenumber %/% 10, sentiment, wt = abs(y))%>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = pos-neg )
parse_number("saddsa-22")
## [1] -22
ggplot(union_sentiment_jockers, aes(index, sentiment, fill = president)) +
geom_col(show.legend = FALSE) +
facet_wrap(~president, ncol = 2, scales = "free_x")
union_sentiment_jockers_rinker <-
tidy_speech %>%
inner_join(hash_sentiment_jockers_rinker , b = c ("word"="x")) %>%
mutate(sentiment = if_else(y >0,"pos","neg")) %>%
count(president, index = linenumber %/% 10, sentiment, wt = abs(y))%>%
pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%
mutate(sentiment = pos-neg )
ggplot(union_sentiment_jockers_rinker, aes(index, sentiment, fill = president)) +
geom_col(show.legend = FALSE) +
facet_wrap(~president, ncol = 2, scales = "free_x")