Over watch 2 vs Marvel Rivals Reviews
Which Game gets better reviews?
Setup
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (tidytext)
library (lubridate)
library (ggwordcloud)
library (textdata)
library (tidytext)
bing <- get_sentiments ("bing" )
Load in csv
reviews <- read_csv ("https://myxavier-my.sharepoint.com/:x:/g/personal/griffithj6_xavier_edu/IQCLGxPUEFAtRYOEzOrBLpe3AXHZEHZBv2zQ_zLEjs8Ew3Q?download=1" )
Rows: 228 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): game
dbl (1): playtime_at_review
lgl (7): review, voted_up, votes_up, votes_funny, timestamp_created, author_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
reviews <- reviews %>%
mutate (review = as.character (review),
game = as.character (game),
review_index = row_number ())
Tokenize Reviews and Remove Stop Words
tidy_reviews <- reviews %>%
unnest_tokens (word, review) %>%
anti_join (stop_words)
Joining with `by = join_by(word)`
Word Cloud of Common Words
tidy_reviews %>%
group_by (game, word) %>%
summarize (n = n (), .groups= "drop" ) %>%
filter (n > 20 ) %>%
ggplot (aes (label = word, size = n, color = game)) +
geom_text_wordcloud () +
theme_minimal () +
facet_wrap (~ game)
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_text_wordcloud()`).
Chronological Sentiment Analysis (Valence)
chron_valence <- tidy_reviews %>%
inner_join (bing, by = "word" ) %>%
group_by (game, review_index, sentiment) %>%
summarize (n = n (), .groups= "drop" ) %>%
pivot_wider (names_from = sentiment, values_from = n, values_fill = 0 ) %>% # fill missing with 0
mutate (
positive = ifelse (! "positive" %in% names (.), 0 , positive),
negative = ifelse (! "negative" %in% names (.), 0 , negative),
valence = positive - negative
)
chron_valence %>%
ggplot (aes (x = review_index, y = valence, color = game)) +
geom_line () +
labs (title = "Chronological Valence of Steam Reviews" ,
subtitle = "Positive minus negative words per review" ,
x = "Review Index (Chronological Order)" ,
y = "Valence" ) +
theme_minimal ()
Emotive Sentiment Analysis (NRC)
nrc <- get_sentiments ("nrc" )
reviews_sentiment <- tidy_reviews %>%
inner_join (nrc, by = "word" , relationship = "many-to-many" ) %>%
group_by (game, sentiment) %>%
summarize (n = n (), .groups= "drop" ) %>%
arrange (game, - n)
reviews_sentiment %>%
ggplot (aes (x = sentiment, y = n, fill = game)) +
geom_col (position = "dodge" ) +
labs (title = "Emotive Sentiment Counts" ,
subtitle = "Overwatch 2 vs Marvel Rivals" ,
x = "Emotion" ,
y = "Number of Words" ) +
theme (axis.text.x = element_text (angle = 45 , hjust = 1 ))