Let’s build a visualization for medals at the Great American Beer Festival from this week’s Tidy Tuesday.

Which beer styles are most characteristic of each state?

Read in the data

library(tidyverse)
beer_awards <- read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-10-20/beer_awards.csv') %>%
    mutate(state = str_to_upper(state))

beer_awards
## # A tibble: 4,970 x 7
##    medal  beer_name               brewery      city    state category       year
##    <chr>  <chr>                   <chr>        <chr>   <chr> <chr>         <dbl>
##  1 Gold   Volksbier Vienna        Wibby Brewi… Longmo… CO    American Amb…  2020
##  2 Silver Oktoberfest             Founders Br… Grand … MI    American Amb…  2020
##  3 Bronze Amber Lager             Skipping Ro… Staunt… VA    American Amb…  2020
##  4 Gold   Lager at World's End    Epidemic Al… Concord CA    American Lag…  2020
##  5 Silver Seismic Tremor          Seismic Bre… Santa … CA    American Lag…  2020
##  6 Bronze Lite Thinking           Pollyanna B… Lemont  IL    American Lag…  2020
##  7 Gold   Beachscape              Ventura Coa… Ventura CA    American Pil…  2020
##  8 Silver Imagine a World with B… Freetail Br… San An… TX    American Pil…  2020
##  9 Bronze Pilsner                 Old Town Br… Portla… OR    American Pil…  2020
## 10 Gold   Tank 7                  Boulevard B… Kansas… MO    American-Bel…  2020
## # … with 4,960 more rows

Find most distinctive words for each state

Let’s find the top 3 most distinctive words (using weighted log odds) for each state.

library(tidytext)
library(tidylo)

word_counts <- beer_awards %>%
    unnest_tokens(word, category) %>%
    anti_join(get_stopwords()) %>%
    count(state, word, sort = TRUE)

word_log_odds <- word_counts %>%
    bind_log_odds(state, word, n) %>%
    arrange(state, word)

state_words <- word_log_odds %>%
    group_by(n) %>%
    filter(sum(n) > 5) %>%
    ungroup() %>%
    group_by(state) %>%
    top_n(3) %>%
    ungroup() %>%
    select(state, word)

state_words
## # A tibble: 151 x 2
##    state word     
##    <chr> <chr>    
##  1 AK    flavored 
##  2 AK    smoke    
##  3 AK    vegetable
##  4 AL    ale      
##  5 AL    beer     
##  6 AL    style    
##  7 AR    ale      
##  8 AR    beer     
##  9 AR    style    
## 10 AZ    french   
## # … with 141 more rows

Now let’s create labels for the plot from these words.

labels <- state_words %>%
    group_by(state) %>%
    summarise(word = paste(word, collapse = "<br/>"))

labels
## # A tibble: 50 x 2
##    state word                              
##    <chr> <chr>                             
##  1 AK    flavored<br/>smoke<br/>vegetable  
##  2 AL    ale<br/>beer<br/>style            
##  3 AR    ale<br/>beer<br/>style            
##  4 AZ    french<br/>malt<br/>scottish      
##  5 CA    black<br/>double<br/>stouts       
##  6 CO    foreign<br/>light<br/>premium     
##  7 CT    ales<br/>old<br/>pale             
##  8 DC    ale<br/>american<br/>style        
##  9 DE    black<br/>free<br/>gluten         
## 10 FL    dusseldorf<br/>munich<br/>scottish
## # … with 40 more rows

Preparing for mapping

We need a GeoJSON file to create a map of the US. Let’s use one from: https://eric.clst.org/tech/usgeojson/

states <- geojsonio::geojson_read("gz_2010_us_040_00_500k.json", what = "sp")

Using joins, let’s set up two quantities to be mapped with this GeoJSON:

  • the total count of medals
  • the labels of distinctive words
medals <- tibble(state.name = states$NAME) %>%
    left_join(
        beer_awards %>%
            count(state) %>%
            left_join(tibble(state = state.abb, state.name))
    ) %>%
    left_join(labels) %>%
    replace_na(list(n = 0,
                    word = "<i>none</i>"))

states$medals <- medals$n

label <- map(medals$word, htmltools::HTML)

Creating a map

Now let’s create a map to display the three most distinctive beer category words for each state, along with the total medals for each state.

library(leaflet)

bins <- c(0, 50, 100, 200, 500, 1000)
pal <- colorBin(palette = "OrRd", domain = medals, bins = bins)

m <- leaflet(states) %>%
    setView(-96, 37.8, 4) %>%
    addTiles() %>%
    addPolygons(
        fillColor = ~ pal(medals),
        weight = 2,
        opacity = 1,
        color = "white",
        fillOpacity = 0.5,
        highlight = highlightOptions(
            weight = 5,
            color = "white",
            fillOpacity = 0.7,
            bringToFront = TRUE),
        label = label,
        labelOptions = labelOptions(
            style = list("font-weight" = "normal", padding = "3px 8px"),
            textsize = "12px",
            direction = "auto")) %>%
    addLegend("bottomright", pal = pal, values = ~medals,
              title = "Total medals",
              opacity = 1
    )

m