Beer Examination
Load Libraries and sentiment data
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidytext)
library(gutenbergr)
library(ggwordcloud)
library(textdata)
afinn <- get_sentiments("afinn")
bing <- get_sentiments("bing")
nrc <- get_sentiments("nrc")
Load brewery rating data
brews <- read_csv("https://asayanalytics.com/brews-csv")
## Rows: 922 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): reviewer_name, reviewer_location, reviewer_city, reviewer_state, re...
## dbl (1): review_rating
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Let’s look at the ratings by provided stars to grasp the general
idea of what to expect (1 - 5)
brews %>%
ggplot(aes(x=review_rating)) +
geom_bar()

Overall contribution to sentiment for words with frequency >
40
grouped_brews %>%
filter(count > 40) %>%
filter(!word == 'hang') %>%
mutate(count = ifelse(sentiment == "negative", -count, count)) %>%
mutate(word = reorder(word, count)) %>%
ggplot(aes(word, count, fill = sentiment)) +
geom_col() +
coord_flip() +
labs(y = "Contribution tosentiment")
