Beer Examination

Load Libraries and sentiment data

library(tidyverse) 
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidytext) 
library(gutenbergr)
library(ggwordcloud) 
library(textdata)
afinn <- get_sentiments("afinn")
bing <- get_sentiments("bing")
nrc <- get_sentiments("nrc")

Load brewery rating data

brews <- read_csv("https://asayanalytics.com/brews-csv")
## Rows: 922 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): reviewer_name, reviewer_location, reviewer_city, reviewer_state, re...
## dbl (1): review_rating
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Let’s look at the ratings by provided stars to grasp the general idea of what to expect (1 - 5)

brews %>% 
  ggplot(aes(x=review_rating)) + 
  geom_bar()

Now, it’s time to transform. Clean up and prep the review data

tidy_brews <- 
  brews %>% 
  unnest_tokens(word, review_content) %>%
  anti_join(stop_words)
## Joining, by = "word"
grouped_brews <- 
  tidy_brews %>% 
  group_by(word, brewery) %>%
  summarize(count = n()) %>% 
  inner_join(bing)
## `summarise()` has grouped output by 'word'. You can override using the
## `.groups` argument.
## Joining, by = "word"

Overall contribution to sentiment for words with frequency > 40

grouped_brews %>% 
  filter(count > 40) %>% 
  filter(!word == 'hang') %>%
  mutate(count = ifelse(sentiment == "negative", -count, count)) %>%
  mutate(word = reorder(word, count)) %>% 
  ggplot(aes(word, count, fill = sentiment)) + 
  geom_col() + 
  coord_flip() + 
  labs(y = "Contribution tosentiment")