As a big fan of beer, I conducted this project in light of my new arrival in Canada, where there are hundreds of lager and ale. Moreover, it is hard to find beer in groceries or normal supermarkets. Hence, collecting the data from LCBO (Liquor Control Board of Ontario) - one of the largest beer distributors in Canada helps me overview the beer market in this country.

Data collection

library(rvest)
library(tidyverse)

# Create page indicies of each beer type
pale_lager_indicies <- c(0:42)*12
amber_lager_indicies <- c(0:1)*12
flavoured_lager_indicies <- c(0:1)*12
dark_lager_indicies <- c(0:1)*12
pale_ale_indicies <- c(0:21)*12
pale_ale2_indicies <- c(0:8)*12
sour_ale_indicies <- c(0:7)*12
stoutnporter_ale_indicies <- c(0:4)*12
belgian_ale_indicies <- c(0:3)*12
other_ale_indicies <- c(0:3)*12
lager_ale_indicies <- c(0:3)*12
flavoured_ale_indicies <- c(0:3)*12
wheat_ale_indicies <- c(0:3)*12
dark_ale_indicies <- c(0:3)*12
# Create urls of each type of beer page
pale_lager_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/beer-cider-16/lager-16023/pale-lager-16023071",
                           "?pageView=grid&orderBy=5&beginIndex=",
                           pale_lager_indicies)

amber_lager_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/amber-lager-16023299",
                            "?pageView=grid&orderBy=5&beginIndex=",
                            amber_lager_indicies)

flavoured_lager_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/flavoured-lager-16023306",
                                "?pageView=grid&orderBy=5&beginIndex=",
                                flavoured_lager_indicies)

dark_lager_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/dark-lager-16023073",
                           "?pageView=grid&orderBy=5&beginIndex=",
                           dark_lager_indicies)

pale_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/india-pale-ale-%28ipa%29-16022066",
                           "?pageView=grid&orderBy=5&beginIndex=",
                           pale_ale_indicies)

pale_ale2_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/pale-ale-16022065",
                          "?pageView=grid&orderBy=5&beginIndex=",
                          pale_ale2_indicies)

sour_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/sour-16022061",
                         "?pageView=grid&orderBy=5&beginIndex=",
                         sour_ale_indicies)

stoutnporter_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/stout-porter",
                         "?pageView=grid&orderBy=5&beginIndex=",
                         stoutnporter_ale_indicies)

belgian_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/belgian-ale-16022064",
                            "?pageView=grid&orderBy=5&beginIndex=",
                            belgian_ale_indicies)

other_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/american-pale-ale-16022287",
                            "?pageView=grid&orderBy=5&beginIndex=",
                            other_ale_indicies)

lager_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/lagered-ale",
                          "?pageView=grid&orderBy=5&beginIndex=",
                          lager_ale_indicies)

flavoured_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/flavoured-beer-16022290",
                          "?pageView=grid&orderBy=5&beginIndex=",
                          flavoured_ale_indicies)

wheat_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/wheat-16022060",
                          "?pageView=grid&orderBy=5&beginIndex=",
                          wheat_ale_indicies)

dark_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/dark-ale-16022343",
                          "?pageView=grid&orderBy=5&beginIndex=",
                          dark_ale_indicies)

# Function to extract the url of specific beer 
get_links <- function(beer_pages){
  links <- c()
  for (i in 1: length(beer_pages)){
    beer_links <- beer_pages[i] %>% 
      read_html() %>% 
      html_nodes(".product_name a") %>% 
      html_attr("href")
    links <- c(links, beer_links)
  }
  return(links)
}

# Links of specific beer types
pale_lager_links  <- get_links(pale_lager_pages)
amber_lager_links <- get_links(amber_lager_pages)
flavoured_lager_links <- get_links(flavoured_lager_pages)
dark_lager_links <- get_links(dark_lager_pages)
pale_ale_links <- get_links(pale_ale_pages)
pale_ale2_links <- get_links(pale_ale2_pages)
sour_ale_links <- get_links(sour_ale_pages)
stoutnporter_ale_links <- get_links(stoutnporter_ale_pages)
belgian_ale_links <- get_links(belgian_ale_pages)
other_ale_links <- get_links(other_ale_pages)
lager_ale_links <- get_links(lager_ale_pages)
flavoured_ale_links <- get_links(flavoured_ale_pages)
wheat_ale_links <- get_links(wheat_ale_pages)
dark_ale_links <- get_links(dark_ale_pages)
# Function to extract information of each type of beers
get_info <- function(links){
  name <- c()
  price <- c()
  volume <- c()
  container <- c()
  alcohol_vol <- c()
  country <- c()
  for (i in 1: length(links)){
    beer_name <- links[i] %>% read_html() %>% html_node(".main_header") %>% html_text()
    name <- c(name, beer_name)
    beer_price  <- links[i] %>% read_html() %>% html_node(".price") %>% 
      html_text() %>% str_replace_all("([\n\t])", "") %>% substring(2) %>% as.numeric()
    price <- c(price, beer_price)
    beer_volume <- links[i] %>% read_html() %>% html_node("dd b") %>% 
      html_text() %>% str_extract(".*mL") %>% str_replace("mL","") %>% str_trim()
    volume  <- c(volume, beer_volume)
    beer_container <- links[i] %>% read_html() %>% html_node("dd b") %>% 
      html_text() %>% str_extract("mL.*") %>% str_replace("mL","") %>% str_trim()
    container <- c(container, beer_container)
    beer_alcohol <- links[i] %>% read_html() %>% html_node("dd:nth-child(3) span") %>% html_text()
    alcohol_vol <- c(alcohol_vol, beer_alcohol)
    beer_country <- links[i] %>% read_html() %>% html_node("dd:nth-child(5) span") %>% html_text() %>% 
      str_replace_all("([\n\t])", "")
    country <- c(country, beer_country)
  }
  beer_info <- data.frame(Name = name, Price = price, Volume = volume,
                          Container_type = container, Alcohol = alcohol_vol,
                          Country = country, stringsAsFactors = FALSE)
  return(beer_info)
}

# Information of specific beer
dark_lager <- get_info(dark_lager_links)
pale_lager <- get_info(pale_lager_links)
amber_lager <- get_info(amber_lager_links)
flavoured_lager <- get_info(flavoured_lager_links)
pale_ale <- get_info(pale_ale_links)
pale_ale2 <- get_info(pale_ale2_links)
sour_ale <- get_info(sour_ale_links)
stout_porter_ale <- get_info(stoutnporter_ale_links)
belgian_ale <- get_info(belgian_ale_links)
other_ale <- get_info(other_ale_links)
lager_ale  <- get_info(lager_ale_links)
flavoured_ale  <- get_info(flavoured_ale_links)
wheat_ale  <- get_info(wheat_ale_links)
dark_ale  <- get_info(dark_ale_links)

# Gather Lager and ALe information
dark_lager <- dark_lager %>% 
  mutate(Minor_type = "Dark Lager")

pale_lager <- pale_lager %>% 
  mutate(Minor_type = "Pale Lager")

amber_lager <- amber_lager %>% 
  mutate(Minor_type = "Amber Lager")

flavoured_lager <- flavoured_lager %>% 
  mutate(Minor_type = "Flavoured Lager")

lager <- rbind(dark_lager, pale_lager, amber_lager, flavoured_lager)
lager <- lager %>% 
  mutate(Beer_type = "Lager")

pale_ale <- pale_ale %>% 
  mutate(Minor_type = "Indian Pale Ale")

pale_ale2 <- pale_ale2 %>% 
  mutate(Minor_type = "Pale Ale")

sour_ale <- sour_ale %>% 
  mutate(Minor_type = "Sour Ale")

stout_porter_ale <- stout_porter_ale %>% 
  mutate(Minor_type = "Stout and Porter Ale")

belgian_ale <- belgian_ale %>% 
  mutate(Minor_type = "Belgian Ale")

other_ale <- other_ale %>% 
  mutate(Minor_type = "Other Ale")

lager_ale <- lager_ale %>% 
  mutate(Minor_type = "Lager Ale")

flavoured_ale <- flavoured_ale %>% 
  mutate(Minor_type = "Flavoured Ale")

wheat_ale <- wheat_ale %>% 
  mutate(Minor_type = "Wheat Ale")

dark_ale <- dark_ale %>% 
  mutate(Minor_type = "Dark Ale")

ale <- rbind(pale_ale, pale_ale2, sour_ale,
             stout_porter_ale, belgian_ale,
             other_ale, lager_ale, flavoured_ale,
             wheat_ale, dark_ale)

ale <- ale %>% 
  mutate(Beer_type = "Ale")

Beer <- rbind(lager, ale)

# Save data 

Beer <- Beer %>% 
  mutate(Package = ifelse(Volume %>% str_detect("x") == TRUE, "package", "single"),
         Alcohol = Alcohol %>% str_replace("%","") %>% as.numeric(),
         Vol = ifelse(Package == "package", 
                      Volume %>% str_extract(".*x") %>% str_replace("x","") %>% str_trim() %>%  as.numeric() * 
                      Volume %>% str_extract("x.*") %>% str_replace("x","") %>% str_trim() %>%  as.numeric(),
                      Volume %>% as.numeric()),
         Price_Vol = (Price/Vol)*1000)


write.csv(Beer,"Canada_beer_info.csv")

The cheapest lager in Canada

# Read data for plotting

Beer <- read.csv("C://Users//Thinh Dao//Desktop//Canada_beer_info.csv", stringsAsFactors = FALSE)

Lager <- Beer %>% filter(Beer_type == "Lager")
Ale <- Beer %>% filter(Beer_type == "Ale")

Lager <- Lager %>% 
  group_by(Name) %>% 
  filter(Price_Vol == min(Price_Vol)) 

Ale <- Ale %>% 
  group_by(Name) %>% 
  filter(Price_Vol == min(Price_Vol))

# The cheapest lager
Lager %>% 
  as.data.frame() %>% 
  mutate(Name = paste0(Name, " - ", Alcohol, "%")) %>% 
  arrange(Price_Vol) %>% 
  slice(c(1:20)) %>% 
  arrange(-Price_Vol) %>% 
  mutate(Name = factor(Name, levels = Name)) %>%
  ggplot(aes(x = Name, y = Price_Vol, label = round(Price_Vol,2)))+
  geom_bar(stat="identity", width = 0.5, fill = "#ffffb2")+
  geom_text(hjust = 1, color = "gray20", size = 6, fontface= "bold")+
  coord_flip() + 
  theme_minimal() +
  theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
  theme(panel.grid = element_blank(),
        axis.text.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
        axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
  labs(y = "Price: CAD/liter", 
       title = "The cheapest lager beer in Canada",
       caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
  theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) + 
  theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> cheap_lager

The most expensive in Canada

# The most expensive lager
Lager %>% 
  as.data.frame() %>% 
  mutate(Name = paste0(Name, " - ", Alcohol, "%")) %>% 
  arrange(-Price_Vol) %>% 
  slice(c(1:20)) %>% 
  arrange(Price_Vol) %>% 
  mutate(Name = factor(Name, levels = Name)) %>%
  ggplot(aes(x = Name, y = Price_Vol, label = round(Price_Vol,2)))+
  geom_bar(stat="identity", width = 0.5, fill = "#ffffb2")+
  geom_text(hjust = 1, color = "gray20", size = 6, fontface= "bold")+
  coord_flip() + 
  theme_minimal() +
  theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
  theme(panel.grid = element_blank(),
        axis.text.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
        axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
  labs(y = "Price: CAD/liter", 
       title = "The most expensive lager beer in Canada",
       caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
  theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) + 
  theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> expensive_lager

The cheapest ale in Canada

# The cheapest ale
Ale %>% 
  as.data.frame() %>% 
  mutate(Name = paste0(Name, " - ", Alcohol, "%")) %>% 
  arrange(Price_Vol) %>% 
  slice(c(1:20)) %>% 
  arrange(-Price_Vol) %>% 
  mutate(Name = factor(Name, levels = Name)) %>%
  ggplot(aes(x = Name, y = Price_Vol, label = round(Price_Vol,2)))+
  geom_bar(stat="identity", width = 0.5, fill = "#bae4b3")+
  geom_text(hjust = 1, color = "gray20", size = 6, fontface= "bold")+
  coord_flip() + 
  theme_minimal() +
  theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
  theme(panel.grid = element_blank(),
        axis.text.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
        axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
  labs(y = "Price: CAD/liter", 
       title = "The cheapest ale in Canada",
       caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
  theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) + 
  theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> cheap_ale

The most expensive ale in Canada

# The most expensive ale
Ale %>% 
  as.data.frame() %>% 
  mutate(Name = paste0(Name, " - ", Alcohol, "%")) %>% 
  arrange(-Price_Vol) %>% 
  slice(c(1:20)) %>% 
  arrange(Price_Vol) %>% 
  mutate(Name = factor(Name, levels = Name)) %>%
  ggplot(aes(x = Name, y = Price_Vol, label = round(Price_Vol,2)))+
  geom_bar(stat="identity", width = 0.5, fill = "#bae4b3")+
  geom_text(hjust = 1, color = "gray20", size = 6, fontface= "bold")+
  coord_flip() + 
  theme_minimal() +
  theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
  theme(panel.grid = element_blank(),
        axis.text.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
        axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
  labs(y = "Price: CAD/liter", 
       title = "The most expensive ale in Canada",
       caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
  theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) + 
  theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> expensive_ale

Average price of lager by country

# Average price of lager by country
Lager %>% 
  mutate(Group = case_when(Country %>% str_detect("Canada") == TRUE ~ "Canada",
                           Country %>% str_detect("USA") == TRUE ~ "USA",
                           Country %>% str_detect("United Kingdom") == TRUE ~ "UK",
                           Country %>% str_detect("Greece") == TRUE ~ "Greece",
                           Country %>% str_detect("France") == TRUE ~ "France",
                           TRUE ~ Country)) %>% 
  group_by(Group) %>% 
  summarise(Mean_price = mean(Price_Vol)) %>% 
  mutate(Group = fct_reorder(Group, Mean_price)) %>% 
  ggplot( aes(x=reorder(Group, Mean_price), y= Mean_price)) + 
  geom_bar(stat = "identity", width = 0.5, fill = "#ffffb2") +
  geom_text(aes(label = round(Mean_price,2)), hjust = 1, color = "gray20", size = 5, fontface= "bold")+
  coord_flip() + 
  theme_minimal() +
  theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
  theme(panel.grid = element_blank(),
        axis.text.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
        axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
  labs(y = "Price: CAD/liter", 
       title = "Average price of lager by country",
       caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
  theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) + 
  theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> lager_country

Average price of ale by country

# Average price of ale by country
Ale %>% 
  mutate(Group = case_when(Country %>% str_detect("Canada") == TRUE ~ "Canada",
                           Country %>% str_detect("USA") == TRUE ~ "USA",
                           Country %>% str_detect("United Kingdom") == TRUE ~ "UK",
                           Country %>% str_detect("Greece") == TRUE ~ "Greece",
                           Country %>% str_detect("France") == TRUE ~ "France",
                           TRUE ~ Country)) %>% 
  group_by(Group) %>% 
  summarise(Mean_price = mean(Price_Vol)) %>% 
  mutate(Group = fct_reorder(Group, Mean_price)) %>% 
  ggplot( aes(x=reorder(Group, Mean_price), y= Mean_price)) + 
  geom_bar(stat = "identity", width = 0.5, fill = "#bae4b3") +
  geom_text(aes(label = round(Mean_price,2)), hjust = 1, color = "gray20", size = 5, fontface= "bold")+
  coord_flip() + 
  theme_minimal() +
  theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
  theme(panel.grid = element_blank(),
        axis.text.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
        axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
  labs(y = "Price: CAD/liter", 
       title = "Average price of ale by country",
       caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
  theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) + 
  theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> ale_country