Final Data Assignment

Author

Chloe O Donovan

Quarto

Running Code

library(tidyverse) 
library(tidytext) 
library(wordcloud)
library(textdata)
library(stringr)
library(topicmodels)
library(topicdoc)
library(reshape2)

mcdonalds <- read_csv("mcdonalds_reviews.csv")

gamestop_reviews <- read_csv("gamestop_product_reviews.csv")

#Question 1 part a

counts <- mcdonalds %>%  
  unnest_tokens(word, review, token = "words") %>% 
  anti_join(stop_words) %>% 
  count(word, sort = TRUE)%>% 
  top_n(20) 

ggplot(counts) + 
  geom_col(mapping = aes(x = n, y = reorder(word, n))) + 
  labs(y = NULL)

Question 1 part B

sentiments <- get_sentiments("bing") 
sentiments

# A tibble: 6,786 × 2
   word        sentiment
   <chr>       <chr>    
 1 2-faces     negative 
 2 abnormal    negative 
 3 abolish     negative 
 4 abominable  negative 
 5 abominably  negative 
 6 abominate   negative 
 7 abomination negative 
 8 abort       negative 
 9 aborted     negative 
10 aborts      negative 
# ℹ 6,776 more rows

bing_sentiment <- get_sentiments("bing")

tokenised_reviews <- mcdonalds %>%
  unnest_tokens(word, review)

sentiments <- tokenised_reviews %>%
  inner_join(bing_sentiment, by = "word")

sentiment_count <- sentiments %>%
  count(sentiment)

common_words <- sentiments %>%
  count(sentiment, word, sort = TRUE)

common_words %>%
  group_by(sentiment) %>%
  top_n(10, n) %>%
  arrange(sentiment, desc(n))

# A tibble: 20 × 3
# Groups:   sentiment [2]
   sentiment word         n
   <chr>     <chr>    <int>
 1 negative  worst      215
 2 negative  bad        185
 3 negative  wrong      179
 4 negative  slow       137
 5 negative  rude       120
 6 negative  cold       113
 7 negative  horrible    81
 8 negative  dirty       71
 9 negative  hard        66
10 negative  problem     65
11 positive  like       500
12 positive  good       278
13 positive  right      239
14 positive  fast       232
15 positive  work       188
16 positive  pretty     146
17 positive  well       141
18 positive  hot        132
19 positive  nice       132
20 positive  better     130

common_words %>%
  group_by(sentiment) %>%
  top_n(10, n) %>%
  ggplot(mapping = aes(reorder(word, n), n, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~ sentiment, scales = "free_y") +
  labs(y = "Count", x = "Word", title = "Most Common Positive and Negative Words") +
  coord_flip()

#Question 1 part C

nrc_sentiments <- get_sentiments("nrc")
nrc_sentiments

# A tibble: 13,872 × 2
   word        sentiment
   <chr>       <chr>    
 1 abacus      trust    
 2 abandon     fear     
 3 abandon     negative 
 4 abandon     sadness  
 5 abandoned   anger    
 6 abandoned   fear     
 7 abandoned   negative 
 8 abandoned   sadness  
 9 abandonment anger    
10 abandonment fear     
# ℹ 13,862 more rows

stop_words<- mcdonalds %>% 
  unnest_tokens(word, review, token = "words") %>%
  anti_join(stop_words)

nrc_sentiments <- inner_join(stop_words, nrc_sentiments)