Let’s build a visualization for medals at the Great American Beer Festival from this week’s Tidy Tuesday.
Which beer styles are most characteristic of each state?
library(tidyverse)
beer_awards <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-10-20/beer_awards.csv') %>%
mutate(state = str_to_upper(state))
beer_awards
## # A tibble: 4,970 x 7
## medal beer_name brewery city state category year
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Gold Volksbier Vienna Wibby Brewi… Longmo… CO American Amb… 2020
## 2 Silver Oktoberfest Founders Br… Grand … MI American Amb… 2020
## 3 Bronze Amber Lager Skipping Ro… Staunt… VA American Amb… 2020
## 4 Gold Lager at World's End Epidemic Al… Concord CA American Lag… 2020
## 5 Silver Seismic Tremor Seismic Bre… Santa … CA American Lag… 2020
## 6 Bronze Lite Thinking Pollyanna B… Lemont IL American Lag… 2020
## 7 Gold Beachscape Ventura Coa… Ventura CA American Pil… 2020
## 8 Silver Imagine a World with B… Freetail Br… San An… TX American Pil… 2020
## 9 Bronze Pilsner Old Town Br… Portla… OR American Pil… 2020
## 10 Gold Tank 7 Boulevard B… Kansas… MO American-Bel… 2020
## # … with 4,960 more rows
Let’s find the top 3 most distinctive words (using weighted log odds) for each state.
library(tidytext)
library(tidylo)
word_counts <- beer_awards %>%
unnest_tokens(word, category) %>%
anti_join(get_stopwords()) %>%
count(state, word, sort = TRUE)
word_log_odds <- word_counts %>%
bind_log_odds(state, word, n) %>%
arrange(state, word)
state_words <- word_log_odds %>%
group_by(n) %>%
filter(sum(n) > 5) %>%
ungroup() %>%
group_by(state) %>%
top_n(3) %>%
ungroup() %>%
select(state, word)
state_words
## # A tibble: 151 x 2
## state word
## <chr> <chr>
## 1 AK flavored
## 2 AK smoke
## 3 AK vegetable
## 4 AL ale
## 5 AL beer
## 6 AL style
## 7 AR ale
## 8 AR beer
## 9 AR style
## 10 AZ french
## # … with 141 more rows
Now let’s create labels for the plot from these words.
labels <- state_words %>%
group_by(state) %>%
summarise(word = paste(word, collapse = "<br/>"))
labels
## # A tibble: 50 x 2
## state word
## <chr> <chr>
## 1 AK flavored<br/>smoke<br/>vegetable
## 2 AL ale<br/>beer<br/>style
## 3 AR ale<br/>beer<br/>style
## 4 AZ french<br/>malt<br/>scottish
## 5 CA black<br/>double<br/>stouts
## 6 CO foreign<br/>light<br/>premium
## 7 CT ales<br/>old<br/>pale
## 8 DC ale<br/>american<br/>style
## 9 DE black<br/>free<br/>gluten
## 10 FL dusseldorf<br/>munich<br/>scottish
## # … with 40 more rows
We need a GeoJSON file to create a map of the US. Let’s use one from: https://eric.clst.org/tech/usgeojson/
states <- geojsonio::geojson_read("gz_2010_us_040_00_500k.json", what = "sp")
Using joins, let’s set up two quantities to be mapped with this GeoJSON:
medals <- tibble(state.name = states$NAME) %>%
left_join(
beer_awards %>%
count(state) %>%
left_join(tibble(state = state.abb, state.name))
) %>%
left_join(labels) %>%
replace_na(list(n = 0,
word = "<i>none</i>"))
states$medals <- medals$n
label <- map(medals$word, htmltools::HTML)
Now let’s create a map to display the three most distinctive beer category words for each state, along with the total medals for each state.
library(leaflet)
bins <- c(0, 50, 100, 200, 500, 1000)
pal <- colorBin(palette = "OrRd", domain = medals, bins = bins)
m <- leaflet(states) %>%
setView(-96, 37.8, 4) %>%
addTiles() %>%
addPolygons(
fillColor = ~ pal(medals),
weight = 2,
opacity = 1,
color = "white",
fillOpacity = 0.5,
highlight = highlightOptions(
weight = 5,
color = "white",
fillOpacity = 0.7,
bringToFront = TRUE),
label = label,
labelOptions = labelOptions(
style = list("font-weight" = "normal", padding = "3px 8px"),
textsize = "12px",
direction = "auto")) %>%
addLegend("bottomright", pal = pal, values = ~medals,
title = "Total medals",
opacity = 1
)
m