Assignment 8

Over watch 2 vs Marvel Rivals Reviews

Which Game gets better reviews?

Setup

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidytext)
library(lubridate)
library(ggwordcloud)
library(textdata)
library(tidytext)


bing <- get_sentiments("bing")

Load in csv

reviews <- read_csv("https://myxavier-my.sharepoint.com/:x:/g/personal/griffithj6_xavier_edu/IQCLGxPUEFAtRYOEzOrBLpe3AXHZEHZBv2zQ_zLEjs8Ew3Q?download=1")
Rows: 228 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): game
dbl (1): playtime_at_review
lgl (7): review, voted_up, votes_up, votes_funny, timestamp_created, author_...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
reviews <- reviews %>%
mutate(review = as.character(review),
game = as.character(game),
review_index = row_number())

Tokenize Reviews and Remove Stop Words

tidy_reviews <- reviews %>%
unnest_tokens(word, review) %>%
anti_join(stop_words)
Joining with `by = join_by(word)`

Word Cloud of Common Words

tidy_reviews %>%
group_by(game, word) %>%
summarize(n = n(), .groups="drop") %>%
filter(n > 20) %>%
ggplot(aes(label = word, size = n, color = game)) +
geom_text_wordcloud() +
theme_minimal() +
facet_wrap(~game)
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_text_wordcloud()`).

Chronological Sentiment Analysis (Valence)

chron_valence <- tidy_reviews %>%
  inner_join(bing, by = "word") %>%
  group_by(game, review_index, sentiment) %>%
  summarize(n = n(), .groups="drop") %>%
  pivot_wider(names_from = sentiment, values_from = n, values_fill = 0) %>%  # fill missing with 0
  mutate(
    positive = ifelse(!"positive" %in% names(.), 0, positive),
    negative = ifelse(!"negative" %in% names(.), 0, negative),
    valence = positive - negative
  )

chron_valence %>%
  ggplot(aes(x = review_index, y = valence, color = game)) +
  geom_line() +
  labs(title = "Chronological Valence of Steam Reviews",
       subtitle = "Positive minus negative words per review",
       x = "Review Index (Chronological Order)",
       y = "Valence") +
  theme_minimal()

Emotive Sentiment Analysis (NRC)

nrc <- get_sentiments("nrc")

reviews_sentiment <- tidy_reviews %>%
inner_join(nrc, by = "word", relationship = "many-to-many") %>%
group_by(game, sentiment) %>%
summarize(n = n(), .groups="drop") %>%
arrange(game, -n)

reviews_sentiment %>%
ggplot(aes(x = sentiment, y = n, fill = game)) +
geom_col(position = "dodge") +
labs(title = "Emotive Sentiment Counts",
subtitle = "Overwatch 2 vs Marvel Rivals",
x = "Emotion",
y = "Number of Words") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))