#install.packages(c("tidyverse", "tidytext", "wordcloud", "topicmodels", "topicdoc", "reshape2", "textdata"))
library(tidyverse)
library(tidytext)
library(wordcloud)
library(topicmodels)
library(topicdoc)
library(textdata)Mcdonalds and Gamestop Assignment
1 Introduction
In this assignment we will be looking at text and sentiment analysis and topic modelling in regards to Mcdonalds and Game Stop Reviews. Analyzing the reviews will allow the companies to understand customer needs, improve products/services and boost their satisfaction.
mc <- read_csv("mcdonalds_reviews.csv")
mc_tokens <- unnest_tokens(mc, word, review, token = "words")
count(mc_tokens, word, sort = TRUE)# A tibble: 9,244 × 2
word n
<chr> <int>
1 the 7040
2 i 4664
3 and 4308
4 to 4053
5 a 3536
6 of 2025
7 is 1943
8 this 1848
9 in 1838
10 was 1811
# ℹ 9,234 more rows
data(stop_words)
important_mc_tokens <- anti_join(mc_tokens, stop_words)
count(important_mc_tokens, word, sort = TRUE)# A tibble: 8,617 × 2
word n
<chr> <int>
1 food 866
2 mcdonald's 812
3 drive 691
4 mcdonalds 575
5 time 522
6 service 519
7 location 381
8 people 346
9 fries 307
10 minutes 259
# ℹ 8,607 more rows
2 Barchart showing top 20 most frequently occurring words
word_freq <- important_mc_tokens %>%
count(word, sort = TRUE) %>%
top_n(20)
ggplot(word_freq) +
geom_col(mapping = aes(x = n, y = reorder(word, n))) +
labs ( y = NULL)3 Positive and Negative Sentiments over time
sentiments <- get_sentiments("bing")
mc_sentiments <- mc %>%
unnest_tokens(word, review, token = "words") %>%
anti_join(stop_words) %>%
inner_join(sentiments)
mc_sentiments %>%
filter(sentiment == "positive") %>%
count(word, sort = TRUE)# A tibble: 441 × 2
word n
<chr> <int>
1 fast 232
2 pretty 146
3 hot 132
4 nice 132
5 clean 110
6 friendly 99
7 sweet 86
8 love 71
9 fresh 69
10 free 64
# ℹ 431 more rows
mc_sentiments %>%
filter(sentiment == "negative") %>%
count(word, sort = TRUE)# A tibble: 813 × 2
word n
<chr> <int>
1 worst 215
2 bad 185
3 wrong 179
4 slow 137
5 rude 120
6 cold 113
7 horrible 81
8 dirty 71
9 hard 66
10 terrible 60
# ℹ 803 more rows
mc_sentiments <- mutate(mc_sentiments, block = id%/%150)
mc_blocks <- mc_sentiments %>%
group_by(block) %>%
count(sentiment)
ggplot(mc_blocks) +
geom_col(mapping = aes(x = block, y = n)) +
facet_wrap(~ sentiment, nrow = 1) +
ylab("# Sentiments")We can see that both positive and negative remain relatively stable over time, with negative sentiments consistently higher. The only major deviation is a dramatic reduction in both sentiment types in the final block, likely caused by a drop in data rather than a real behavioral change.customers consistently express more negative than positive sentiment.This pattern might suggests that potential customers may perceive the business as unreliable or unsatisfactory.
4 Top 10 most common words associated with each sentiment.
sentiments_nrc <- get_sentiments("nrc")
mc_nrc <- mc %>%
unnest_tokens(word, review) %>%
anti_join(stop_words)
nrc_sentiments <- mc_nrc %>%
inner_join(sentiments_nrc, by = "word")
top_words_by_sentiment <- nrc_sentiments %>%
count(sentiment, word, sort = TRUE) %>%
group_by(sentiment) %>%
slice_max(order_by = n, n = 10)
top_words_by_sentiment %>%
arrange(sentiment, desc(n))# A tibble: 101 × 3
# Groups: sentiment [10]
sentiment word n
<chr> <chr> <int>
1 anger bad 185
2 anger hot 132
3 anger money 82
4 anger horrible 81
5 anger homeless 64
6 anger terrible 60
7 anger grab 54
8 anger cash 53
9 anger hate 50
10 anger disgusting 37
# ℹ 91 more rows
5 Frequency of the sentiments
sentiment_frequencies <- nrc_sentiments %>%
count(sentiment, sort = TRUE)
sentiment_frequencies# A tibble: 10 × 2
sentiment n
<chr> <int>
1 positive 5896
2 negative 4245
3 trust 3526
4 anticipation 2978
5 joy 2820
6 fear 1910
7 anger 1902
8 sadness 1760
9 disgust 1672
10 surprise 1126
6 Top 20 most frequently occurring bigrams
mc_bigrams <- mc %>%
unnest_tokens(bigram, review, token = "ngrams", n = 2)
bigrams_separated <- mc_bigrams %>%
separate(bigram, into = c("word1", "word2"), sep = " ")
bigrams_filtered <- bigrams_separated %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word)
bigrams_united <- bigrams_filtered %>%
unite(bigram, word1, word2, sep = " ")
top_20_bigrams <- bigrams_united %>%
count(bigram, sort = TRUE) %>%
slice_max(n, n = 20)
top_20_bigrams# A tibble: 20 × 2
bigram n
<chr> <int>
1 fast food 153
2 customer service 116
3 ice cream 61
4 worst mcdonalds 52
5 10 minutes 49
6 parking lot 43
7 worst mcdonald's 42
8 15 minutes 39
9 chicken nuggets 38
10 french fries 34
11 mickey d's 33
12 20 minutes 32
13 5 minutes 29
14 iced coffee 29
15 dollar menu 28
16 late night 28
17 sweet tea 27
18 24 hours 25
19 chicken sandwich 23
20 quarter pounder 23
7 top 20 most frequently occurring trigrams
mc_trigrams <- mc %>%
unnest_tokens(trigram, review, token = "ngrams", n = 3)
trigrams_separated <- mc_trigrams %>%
separate(trigram, into = c("word1", "word2", "word3"), sep = " ")
trigrams_filtered <- trigrams_separated %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
filter(!word3 %in% stop_words$word)
trigrams_united <- trigrams_filtered %>%
unite(trigram, word1, word2, word3, sep = " ")
top_20_trigrams <- trigrams_united %>%
count(trigram, sort = TRUE) %>%
slice_max(n, n = 20)
top_20_trigrams# A tibble: 26 × 2
trigram n
<chr> <int>
1 ice cream machine 10
2 worst customer service 10
3 24 hour drive 9
4 eat fast food 8
5 fast food restaurants 8
6 ice cream cone 8
7 10 piece chicken 7
8 fast food restaurant 7
9 sausage egg mcmuffin 7
10 terrible customer service 7
# ℹ 16 more rows
8 Reviews about waiting
waiting_reviews <- filter(mc, str_detect(review, "waiting"))
waiting_reviews <- filter(mc,
str_detect(review, regex("waiting", ignore_case = TRUE)))
waiting_reviews# A tibble: 129 × 2
id review
<dbl> <chr>
1 2 "Terrible customer service. I came in at 9:30pm and stood in front of …
2 3 "First they \"lost\" my order, actually they gave it to someone one el…
3 8 "One Star and I'm beng kind. I blame management. last day of free coff…
4 9 "Never been upset about any fast food drive thru service till I came t…
5 22 "GHETTO!! went in yesterday just to get a soda and could not even park…
6 31 "It had been a while since I had stopped at this particular one. They …
7 40 "TOXIC DUMP! In food quality and employee humanity/work effortTypicall…
8 53 "Sometimes, you just need a Mickey D's fix. Usually, for me anyway, th…
9 66 "On my way to Curry Honda for my scheduled maintenance appointment, I …
10 69 "I purchased a specialty coffee in the drive through, but soon after I…
# ℹ 119 more rows
write_csv(waiting_reviews, "waiting_reviews.csv")“Waiting” in these reviews is consistently mentioned in a negative context—as evidence of poor service, lack of training, and chaotic operations. Customers are not simply waiting they feel ignored, inconvenienced, and disrespected, which expresses frustration and leads to negative reviews.
9 Reviews about Shamrock Shake
ss_reviews <- filter(mc, str_detect(review, "shamrock shake"))
ss_reviews <- filter(mc,
str_detect(review, regex("shamrock shake", ignore_case = TRUE)))
ss_reviews# A tibble: 11 × 2
id review
<dbl> <chr>
1 359 "I left the Hilton late last night and I was really thirsty. This was …
2 414 "I stop here now and then as it's the closest to where I live. Custome…
3 479 "I have to tell you it's been 2 years since I've been at a McDonalds a…
4 776 "Worst shamrock shake ever. The new shakes are brutal. They didn't eve…
5 786 "This is probably the worst McDonald's ever. It doesn't matter what ti…
6 970 "THIS REVIEW IS FOR THE SHAMROCK SHAKE ONLYIt's brighter green than I …
7 1113 "Went here on March 10th, and there NO Shamrock Shakes. They weren't s…
8 1334 "What is a Shamrock Shake? It's a seasonal shake (milk?) by McDonald's…
9 1401 "I can't comment on the food because when I went to grab a Shamrock Sh…
10 1455 "I don't really eat fast food, let alone Yelp about it. I haven't eate…
11 1473 "This is by far, my favorite McDonald's anywhere. It's completely remo…
write_csv(ss_reviews, "ss_reviews.csv")Reviewers describe the Shamrock Shake in ways that reflect both nostalgia and disappointment. While some enjoy it as a seasonal treat, many describe the shake as poorly mixed, overly artificial, or inferior to older versions. The experience is often worsened by long wait times, inconsistent service, and operational problems at the restaurants serving it.
10 Reviews about the Ice cream machine
icecream_reviews <- filter(mc, str_detect(review, "ice cream machine"))
icecream_reviews <- filter(mc,
str_detect(review, regex("ice cream machine", ignore_case = TRUE)))
icecream_reviews# A tibble: 9 × 2
id review
<dbl> <chr>
1 36 "The ice cream machine is always \"down\" after 11 p.m. If you want a h…
2 108 "Ice cream machine is always down, staff is rude and ghetto, food is al…
3 195 "This is the worst McDonald's I have ever been to.Yes, there ARE better…
4 260 "Every time I go their ice cream machine is down. It's a hang out for a…
5 377 "This place is a joke! It's disgusting enough of a fact that the only t…
6 382 "This is probably the worst McDonald's ever.. They don't know what they…
7 385 "Couldn't get a chocolate-dipped cone because they shut off the ice cre…
8 1120 "This is the McDonald's that my friends and I always go to since it's t…
9 1456 "I have never in my life wrote a corporation to complain about the busi…
write_csv(icecream_reviews, "icecream_reviews.csv")Reviewers consistently mention the ice-cream machine as being almost always “down,” “broken,” or deliberately shut off—especially late at night—even when the restaurant is advertised as open 24 hours. Many customers report going specifically for ice-cream items such as sundaes, cones, or McFlurries only to be told the machine is unavailable, or that toppings have run out. When the machine is working, products are often served incorrectly, in the wrong cups, or only partially filled. These repeated issues cause frustration and lead customers to question the honesty and competence of the staff, who are frequently described as rude, careless, or poorly trained. Overall, the ice-cream machine becomes a symbol of broader operational problems, reinforcing the perception that the restaurant is unreliable, poorly managed, and consistently failing to meet basic customer expectations.
11 Positive and Negative Word Clouds
mc_words <- mc %>%
unnest_tokens(word, review, token = "words") %>%
anti_join(stop_words)
bing_sentiments <- get_sentiments("bing")
mc_word_sentiments <- mc_words %>%
inner_join(bing_sentiments) %>%
count(word, sentiment, sort = TRUE)
mc_pos_sentiments <- filter(mc_word_sentiments, sentiment == "positive")
wordcloud(mc_pos_sentiments$word,
mc_pos_sentiments$n,
min.freq = 50,
colors = brewer.pal(8, "Dark2"))mc_neg_sentiments <- filter(mc_word_sentiments, sentiment == "negative")
wordcloud(mc_neg_sentiments$word,
mc_neg_sentiments$n,
min.freq = 50,
colors = brewer.pal(8, "Dark2"))12 GameStop Review Analysis
Now on to the Game Stop Reviews.
13 Table of topics
game <- read_csv("gamestop_product_reviews.csv")
data(stop_words)
custom_stopwords <- bind_rows(stop_words, tibble(word = c("im", "ive", "id", "theyve", "theyre", "dont")))
game_tokens <- game %>%
unnest_tokens(word, review) %>%
anti_join(custom_stopwords)
game_word_counts <- game_tokens %>%
count(id, word, sort = TRUE)
game_dtm <- cast_dtm(game_word_counts, document = id,term = word,value = n)
game_dtm<<DocumentTermMatrix (documents: 4682, terms: 9599)>>
Non-/sparse entries: 67545/44874973
Sparsity : 100%
Maximal term length: 27
Weighting : term frequency (tf)
set.seed(1234)
game_lda <- LDA(game_dtm, k = 10, method = "Gibbs", control = list(seed = 1234))
game_beta_final <- tidy(game_lda, matrix = "beta")
top_terms_final <- game_beta_final %>%
group_by(topic) %>%
slice_max(beta, n = 10, with_ties = FALSE) %>%
arrange(topic, -beta)
top_terms_final# A tibble: 100 × 3
# Groups: topic [10]
topic term beta
<int> <chr> <dbl>
1 1 tv 0.0398
2 1 quality 0.0290
3 1 sound 0.0277
4 1 picture 0.0206
5 1 headset 0.0170
6 1 set 0.0137
7 1 issues 0.0128
8 1 price 0.0115
9 1 oled 0.0109
10 1 comfortable 0.0108
# ℹ 90 more rows
14 Top 10 terms in each topic
top_terms_final %>%
mutate(term = reorder_within(term, beta, topic)) %>%
group_by(topic, term) %>%
arrange(desc(beta)) %>%
ungroup() %>%
ggplot(aes(beta, term, fill = as.factor(topic))) +
geom_col(show.legend = FALSE) +
scale_y_reordered() +
labs(title = "Top 10 terms in each LDA topic", x = expression(beta), y = NULL) +
facet_wrap(~ topic, ncol = 3, scales = "free")Topic 1 focuses on customer discussions about televisions and similar products. Users mention picture and sound quality, headsets, pricing, and issues. It likely captures reviews and experiences with TVs or related accessories.
Topic 2 is likely about Pokémon games. It involves elements like graphics, different “versions,” and user enjoyment. It reflects enthusiasm and maybe some comparisons between different series. It looks to be positive over all
Topic 3 centers around purchasing controllers or Xbox-related items. People reference buying something recently, gift purchases for their “son” and satisfaction with the product.
Topic 4 focuses on batteries, battery life, and related products like flashlights.The users discuss brands, longevity, and performance.it seems to be quite neutral not leaning to negative or positive.
Topic 5 is about monitors and display performance for gaming and movies. Words like “amazing,” “perfect,” and “beautiful” suggest positive reviews focusing on screen clarity and visual experience. There may be a preferance for Samsumg too as that is mentioned aswell.
Topic 6 is about general gameplay-experience, it has opinions about how fun or engaging a game is, as well as difficulty and graphics. It seems to apply broadly across different games rather than a specific title so it doesn’t give too much insight
Topic 7 revolves around the Zelda series or similar fantasy adventure games. Focus is on narrative, characters, and gameplay.Zelda or similar games may be quite popular for their store.
Topic 8 seems to be a general positive sentiment topic. Users describe loving a product, recommending it, and praising its ease of use. Mentions of “kids” and “switch” suggest family-friendly products or Nintendo Switch content.
Topic 9 focuses on the amount of time spent playing games and whether they offer good value. It shows discussions about gameplay hours, replayability, and overall worth. This is helpful for gamestop to get to know their customers habits better
Topic 10 likely reflects older or classic games, it emphasizes controls, system performance, characters, and technical aspects of older titles.
set.seed(1234)
game_lda <- LDA(game_dtm, k = 10, method = "Gibbs", control = list(seed = 1234))
library(topicdoc)
topic_quality <- topic_diagnostics(game_lda, game_dtm)
topic_quality topic_num topic_size mean_token_length dist_from_corpus tf_df_dist
1 1 1023.1595 5.7 0.6512766 4.433123
2 2 979.5251 6.2 0.6104892 14.769769
3 3 925.7857 4.3 0.6440713 2.417453
4 4 839.6504 6.1 0.6518475 8.366660
5 5 992.0127 7.4 0.6527836 3.237674
6 6 908.6871 4.9 0.6177458 12.166879
7 7 1017.1848 5.5 0.6008149 12.544420
8 8 996.7261 5.2 0.6476142 2.455808
9 9 857.5360 4.9 0.6018382 12.461685
10 10 1058.7326 4.5 0.6276783 3.281093
doc_prominence topic_coherence topic_exclusivity
1 125 -174.7860 9.959100
2 115 -136.9226 9.680701
3 34 -209.6722 9.979300
4 276 -123.5221 9.953439
5 87 -185.8496 9.969981
6 47 -162.3809 9.818089
7 66 -155.5687 9.641824
8 61 -208.0815 9.931432
9 29 -148.6571 9.802699
10 39 -175.3660 9.816286
Topic Size varied across the ten topics. Topic 10 had the largest size (1058.7), indicating that it represents a widely discussed theme in the reviews, while Topic 9 had the smallest size (857.5). Topic size does not directly indicate quality, but larger topics tend to be more stable.
Mean Token Length shows alot of variation. Topic 5 had the highest value (7.4), suggesting it contains longer, more meaningful words, whereas Topic 3 had the lowest mean token length (4.3), indicating that its vocabulary may have shorter, less informative terms.
Topic 4 achieved the best coherence (–123.5), meaning its top words frequently appear together in the same documents. Topic 2 also performed well (–136.9). In contrast, Topic 3 (–209.7) and Topic 9 (–208.1) had the worst coherence scores, indicating that these topics are less consistent and more difficult to interpret.
Topic Exclusivity was relatively similar across topics, though Topic 5 and Topic 1 scored highest (9.97 and 9.96) meaning they contain more unique vocabulary. Topic 6 and Topic 9 had the lowest exclusivity values, suggesting overlap with other topics.
Overall, Topics 4, 5 and 2 appear to be the highest-quality topics, based on their strong coherence, longer token lengths and relatively high exclusivity then Topics 3 and 9 appear to be the lowest-quality topics, showing short token lengths, weak coherence and low exclusivity, which suggests they may be vague or not very informative.
15 Conclusion
From the analysis of Game Stops reviews there are a few actions GameStop could take to improve customer satisfaction. They should promote popular games franchises the topics show strong enthusiasm for Pokémon, Zelda, Fallout and general gameplay/story keywords.GameStop should highlight these titles in marketing campaigns, bundle deals and loyalty promotions.
They also could address controller and console concerns, Words like controller, buy, xbox, money, issues imply recurring performance or value problems.Improve vetting of controllers and offer extended warranties or easy exchanges for defective accessories.
Promoting high-rated monitors and gaming displays. Words like amazing, perfect, excellent, gaming, samsung show very positive sentiment.They could use these reviews in marketing (“customer favourite”) and create promotions.
Use the highly positive sentiment toward family-friendly products. Words like love, easy, recommend, kids, happy appear. Increase marketing for kid-friendly consoles and games, and create “family gaming bundles.”
Provide better guidance for older and classic games, high frequency of Fallout, system, controls, characters suggests interest in retro or older games.Increase stock of classic titles and highlight the information about them.
These recommendations would help Game Stop improve their business.