library(tidyverse)
library(tidytext)
library(wordcloud)
library(textdata)
library(stringr)
library(topicmodels)
library(topicdoc)
library(reshape2)
mcdonalds <- read_csv("mcdonalds_reviews.csv")
gamestop_reviews <- read_csv("gamestop_product_reviews.csv")Final Data Assignment
Quarto
Running Code
#Question 1 part a
counts <- mcdonalds %>%
unnest_tokens(word, review, token = "words") %>%
anti_join(stop_words) %>%
count(word, sort = TRUE)%>%
top_n(20)
ggplot(counts) +
geom_col(mapping = aes(x = n, y = reorder(word, n))) +
labs(y = NULL)Question 1 part B
sentiments <- get_sentiments("bing")
sentiments # A tibble: 6,786 × 2
word sentiment
<chr> <chr>
1 2-faces negative
2 abnormal negative
3 abolish negative
4 abominable negative
5 abominably negative
6 abominate negative
7 abomination negative
8 abort negative
9 aborted negative
10 aborts negative
# ℹ 6,776 more rows
bing_sentiment <- get_sentiments("bing")
tokenised_reviews <- mcdonalds %>%
unnest_tokens(word, review)
sentiments <- tokenised_reviews %>%
inner_join(bing_sentiment, by = "word")
sentiment_count <- sentiments %>%
count(sentiment)
common_words <- sentiments %>%
count(sentiment, word, sort = TRUE)
common_words %>%
group_by(sentiment) %>%
top_n(10, n) %>%
arrange(sentiment, desc(n))# A tibble: 20 × 3
# Groups: sentiment [2]
sentiment word n
<chr> <chr> <int>
1 negative worst 215
2 negative bad 185
3 negative wrong 179
4 negative slow 137
5 negative rude 120
6 negative cold 113
7 negative horrible 81
8 negative dirty 71
9 negative hard 66
10 negative problem 65
11 positive like 500
12 positive good 278
13 positive right 239
14 positive fast 232
15 positive work 188
16 positive pretty 146
17 positive well 141
18 positive hot 132
19 positive nice 132
20 positive better 130
common_words %>%
group_by(sentiment) %>%
top_n(10, n) %>%
ggplot(mapping = aes(reorder(word, n), n, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~ sentiment, scales = "free_y") +
labs(y = "Count", x = "Word", title = "Most Common Positive and Negative Words") +
coord_flip()#Question 1 part C
nrc_sentiments <- get_sentiments("nrc")
nrc_sentiments # A tibble: 13,872 × 2
word sentiment
<chr> <chr>
1 abacus trust
2 abandon fear
3 abandon negative
4 abandon sadness
5 abandoned anger
6 abandoned fear
7 abandoned negative
8 abandoned sadness
9 abandonment anger
10 abandonment fear
# ℹ 13,862 more rows
stop_words<- mcdonalds %>%
unnest_tokens(word, review, token = "words") %>%
anti_join(stop_words)
nrc_sentiments <- inner_join(stop_words, nrc_sentiments)