brews <- read_csv("http://asayanalytics.com/brews-csv")
## Rows: 922 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): reviewer_name, reviewer_location, reviewer_city, reviewer_state, re...
## dbl (1): review_rating
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tidy_brews<-
brews %>%
unnest_tokens(word,review_content) %>%
anti_join(stop_words)
## Joining, by = "word"
Here is a visualization of the words used over 100 times.
tidy_brews %>%
group_by(word) %>%
summarize(n=n()) %>%
filter(n >= 100) %>%
ggplot(aes(y = reorder(word, n),
x = n)) +
geom_col() +
labs(title = "Word counts for The Reviews",
subtitle = "For words appearing at least 100 times",
x = "Number of times the word appears",
y = "Word")
Here is a list of the top ten words used in the review.
tidy_brews %>%
group_by(word) %>%
summarize(n = n()) %>%
arrange(-n) %>%
head(10)
## # A tibble: 10 × 2
## word n
## <chr> <int>
## 1 beer 1018
## 2 brewery 414
## 3 space 363
## 4 beers 334
## 5 bar 313
## 6 pizza 296
## 7 love 291
## 8 food 266
## 9 time 259
## 10 rhinegeist 255
Based of that, what are the sentiments of those words. First I have to load the sentiments
bing <-
get_sentiments("bing")
Now I can see what those sentiments are
tidy_brews %>%
group_by(word) %>%
summarise(n=n()) %>%
inner_join(bing) %>%
arrange(-n) %>%
head(10)
## Joining, by = "word"
## # A tibble: 10 × 3
## word n sentiment
## <chr> <int> <chr>
## 1 love 291 positive
## 2 friendly 191 positive
## 3 favorite 158 positive
## 4 nice 157 positive
## 5 fun 144 positive
## 6 awesome 141 positive
## 7 cool 139 positive
## 8 amazing 110 positive
## 9 pretty 105 positive
## 10 delicious 89 positive
As you can see, most of the sentiments from these reviews are positive.