As a big fan of beer, I conducted this project in light of my new arrival in Canada, where there are hundreds of lager and ale. Moreover, it is hard to find beer in groceries or normal supermarkets. Hence, collecting the data from LCBO (Liquor Control Board of Ontario) - one of the largest beer distributors in Canada helps me overview the beer market in this country.
Data collection
library(rvest)
library(tidyverse)
# Create page indicies of each beer type
pale_lager_indicies <- c(0:42)*12
amber_lager_indicies <- c(0:1)*12
flavoured_lager_indicies <- c(0:1)*12
dark_lager_indicies <- c(0:1)*12
pale_ale_indicies <- c(0:21)*12
pale_ale2_indicies <- c(0:8)*12
sour_ale_indicies <- c(0:7)*12
stoutnporter_ale_indicies <- c(0:4)*12
belgian_ale_indicies <- c(0:3)*12
other_ale_indicies <- c(0:3)*12
lager_ale_indicies <- c(0:3)*12
flavoured_ale_indicies <- c(0:3)*12
wheat_ale_indicies <- c(0:3)*12
dark_ale_indicies <- c(0:3)*12
# Create urls of each type of beer page
pale_lager_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/beer-cider-16/lager-16023/pale-lager-16023071",
"?pageView=grid&orderBy=5&beginIndex=",
pale_lager_indicies)
amber_lager_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/amber-lager-16023299",
"?pageView=grid&orderBy=5&beginIndex=",
amber_lager_indicies)
flavoured_lager_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/flavoured-lager-16023306",
"?pageView=grid&orderBy=5&beginIndex=",
flavoured_lager_indicies)
dark_lager_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/dark-lager-16023073",
"?pageView=grid&orderBy=5&beginIndex=",
dark_lager_indicies)
pale_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/india-pale-ale-%28ipa%29-16022066",
"?pageView=grid&orderBy=5&beginIndex=",
pale_ale_indicies)
pale_ale2_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/pale-ale-16022065",
"?pageView=grid&orderBy=5&beginIndex=",
pale_ale2_indicies)
sour_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/sour-16022061",
"?pageView=grid&orderBy=5&beginIndex=",
sour_ale_indicies)
stoutnporter_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/stout-porter",
"?pageView=grid&orderBy=5&beginIndex=",
stoutnporter_ale_indicies)
belgian_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/belgian-ale-16022064",
"?pageView=grid&orderBy=5&beginIndex=",
belgian_ale_indicies)
other_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/american-pale-ale-16022287",
"?pageView=grid&orderBy=5&beginIndex=",
other_ale_indicies)
lager_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/lagered-ale",
"?pageView=grid&orderBy=5&beginIndex=",
lager_ale_indicies)
flavoured_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/flavoured-beer-16022290",
"?pageView=grid&orderBy=5&beginIndex=",
flavoured_ale_indicies)
wheat_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/wheat-16022060",
"?pageView=grid&orderBy=5&beginIndex=",
wheat_ale_indicies)
dark_ale_pages <- paste0("https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo/dark-ale-16022343",
"?pageView=grid&orderBy=5&beginIndex=",
dark_ale_indicies)
# Function to extract the url of specific beer
get_links <- function(beer_pages){
links <- c()
for (i in 1: length(beer_pages)){
beer_links <- beer_pages[i] %>%
read_html() %>%
html_nodes(".product_name a") %>%
html_attr("href")
links <- c(links, beer_links)
}
return(links)
}
# Links of specific beer types
pale_lager_links <- get_links(pale_lager_pages)
amber_lager_links <- get_links(amber_lager_pages)
flavoured_lager_links <- get_links(flavoured_lager_pages)
dark_lager_links <- get_links(dark_lager_pages)
pale_ale_links <- get_links(pale_ale_pages)
pale_ale2_links <- get_links(pale_ale2_pages)
sour_ale_links <- get_links(sour_ale_pages)
stoutnporter_ale_links <- get_links(stoutnporter_ale_pages)
belgian_ale_links <- get_links(belgian_ale_pages)
other_ale_links <- get_links(other_ale_pages)
lager_ale_links <- get_links(lager_ale_pages)
flavoured_ale_links <- get_links(flavoured_ale_pages)
wheat_ale_links <- get_links(wheat_ale_pages)
dark_ale_links <- get_links(dark_ale_pages)
# Function to extract information of each type of beers
get_info <- function(links){
name <- c()
price <- c()
volume <- c()
container <- c()
alcohol_vol <- c()
country <- c()
for (i in 1: length(links)){
beer_name <- links[i] %>% read_html() %>% html_node(".main_header") %>% html_text()
name <- c(name, beer_name)
beer_price <- links[i] %>% read_html() %>% html_node(".price") %>%
html_text() %>% str_replace_all("([\n\t])", "") %>% substring(2) %>% as.numeric()
price <- c(price, beer_price)
beer_volume <- links[i] %>% read_html() %>% html_node("dd b") %>%
html_text() %>% str_extract(".*mL") %>% str_replace("mL","") %>% str_trim()
volume <- c(volume, beer_volume)
beer_container <- links[i] %>% read_html() %>% html_node("dd b") %>%
html_text() %>% str_extract("mL.*") %>% str_replace("mL","") %>% str_trim()
container <- c(container, beer_container)
beer_alcohol <- links[i] %>% read_html() %>% html_node("dd:nth-child(3) span") %>% html_text()
alcohol_vol <- c(alcohol_vol, beer_alcohol)
beer_country <- links[i] %>% read_html() %>% html_node("dd:nth-child(5) span") %>% html_text() %>%
str_replace_all("([\n\t])", "")
country <- c(country, beer_country)
}
beer_info <- data.frame(Name = name, Price = price, Volume = volume,
Container_type = container, Alcohol = alcohol_vol,
Country = country, stringsAsFactors = FALSE)
return(beer_info)
}
# Information of specific beer
dark_lager <- get_info(dark_lager_links)
pale_lager <- get_info(pale_lager_links)
amber_lager <- get_info(amber_lager_links)
flavoured_lager <- get_info(flavoured_lager_links)
pale_ale <- get_info(pale_ale_links)
pale_ale2 <- get_info(pale_ale2_links)
sour_ale <- get_info(sour_ale_links)
stout_porter_ale <- get_info(stoutnporter_ale_links)
belgian_ale <- get_info(belgian_ale_links)
other_ale <- get_info(other_ale_links)
lager_ale <- get_info(lager_ale_links)
flavoured_ale <- get_info(flavoured_ale_links)
wheat_ale <- get_info(wheat_ale_links)
dark_ale <- get_info(dark_ale_links)
# Gather Lager and ALe information
dark_lager <- dark_lager %>%
mutate(Minor_type = "Dark Lager")
pale_lager <- pale_lager %>%
mutate(Minor_type = "Pale Lager")
amber_lager <- amber_lager %>%
mutate(Minor_type = "Amber Lager")
flavoured_lager <- flavoured_lager %>%
mutate(Minor_type = "Flavoured Lager")
lager <- rbind(dark_lager, pale_lager, amber_lager, flavoured_lager)
lager <- lager %>%
mutate(Beer_type = "Lager")
pale_ale <- pale_ale %>%
mutate(Minor_type = "Indian Pale Ale")
pale_ale2 <- pale_ale2 %>%
mutate(Minor_type = "Pale Ale")
sour_ale <- sour_ale %>%
mutate(Minor_type = "Sour Ale")
stout_porter_ale <- stout_porter_ale %>%
mutate(Minor_type = "Stout and Porter Ale")
belgian_ale <- belgian_ale %>%
mutate(Minor_type = "Belgian Ale")
other_ale <- other_ale %>%
mutate(Minor_type = "Other Ale")
lager_ale <- lager_ale %>%
mutate(Minor_type = "Lager Ale")
flavoured_ale <- flavoured_ale %>%
mutate(Minor_type = "Flavoured Ale")
wheat_ale <- wheat_ale %>%
mutate(Minor_type = "Wheat Ale")
dark_ale <- dark_ale %>%
mutate(Minor_type = "Dark Ale")
ale <- rbind(pale_ale, pale_ale2, sour_ale,
stout_porter_ale, belgian_ale,
other_ale, lager_ale, flavoured_ale,
wheat_ale, dark_ale)
ale <- ale %>%
mutate(Beer_type = "Ale")
Beer <- rbind(lager, ale)
# Save data
Beer <- Beer %>%
mutate(Package = ifelse(Volume %>% str_detect("x") == TRUE, "package", "single"),
Alcohol = Alcohol %>% str_replace("%","") %>% as.numeric(),
Vol = ifelse(Package == "package",
Volume %>% str_extract(".*x") %>% str_replace("x","") %>% str_trim() %>% as.numeric() *
Volume %>% str_extract("x.*") %>% str_replace("x","") %>% str_trim() %>% as.numeric(),
Volume %>% as.numeric()),
Price_Vol = (Price/Vol)*1000)
write.csv(Beer,"Canada_beer_info.csv")
The cheapest lager in Canada
# Read data for plotting
Beer <- read.csv("C://Users//Thinh Dao//Desktop//Canada_beer_info.csv", stringsAsFactors = FALSE)
Lager <- Beer %>% filter(Beer_type == "Lager")
Ale <- Beer %>% filter(Beer_type == "Ale")
Lager <- Lager %>%
group_by(Name) %>%
filter(Price_Vol == min(Price_Vol))
Ale <- Ale %>%
group_by(Name) %>%
filter(Price_Vol == min(Price_Vol))
# The cheapest lager
Lager %>%
as.data.frame() %>%
mutate(Name = paste0(Name, " - ", Alcohol, "%")) %>%
arrange(Price_Vol) %>%
slice(c(1:20)) %>%
arrange(-Price_Vol) %>%
mutate(Name = factor(Name, levels = Name)) %>%
ggplot(aes(x = Name, y = Price_Vol, label = round(Price_Vol,2)))+
geom_bar(stat="identity", width = 0.5, fill = "#ffffb2")+
geom_text(hjust = 1, color = "gray20", size = 6, fontface= "bold")+
coord_flip() +
theme_minimal() +
theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
theme(panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
labs(y = "Price: CAD/liter",
title = "The cheapest lager beer in Canada",
caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) +
theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> cheap_lager
The most expensive in Canada
# The most expensive lager
Lager %>%
as.data.frame() %>%
mutate(Name = paste0(Name, " - ", Alcohol, "%")) %>%
arrange(-Price_Vol) %>%
slice(c(1:20)) %>%
arrange(Price_Vol) %>%
mutate(Name = factor(Name, levels = Name)) %>%
ggplot(aes(x = Name, y = Price_Vol, label = round(Price_Vol,2)))+
geom_bar(stat="identity", width = 0.5, fill = "#ffffb2")+
geom_text(hjust = 1, color = "gray20", size = 6, fontface= "bold")+
coord_flip() +
theme_minimal() +
theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
theme(panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
labs(y = "Price: CAD/liter",
title = "The most expensive lager beer in Canada",
caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) +
theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> expensive_lager
The cheapest ale in Canada
# The cheapest ale
Ale %>%
as.data.frame() %>%
mutate(Name = paste0(Name, " - ", Alcohol, "%")) %>%
arrange(Price_Vol) %>%
slice(c(1:20)) %>%
arrange(-Price_Vol) %>%
mutate(Name = factor(Name, levels = Name)) %>%
ggplot(aes(x = Name, y = Price_Vol, label = round(Price_Vol,2)))+
geom_bar(stat="identity", width = 0.5, fill = "#bae4b3")+
geom_text(hjust = 1, color = "gray20", size = 6, fontface= "bold")+
coord_flip() +
theme_minimal() +
theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
theme(panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
labs(y = "Price: CAD/liter",
title = "The cheapest ale in Canada",
caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) +
theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> cheap_ale
The most expensive ale in Canada
# The most expensive ale
Ale %>%
as.data.frame() %>%
mutate(Name = paste0(Name, " - ", Alcohol, "%")) %>%
arrange(-Price_Vol) %>%
slice(c(1:20)) %>%
arrange(Price_Vol) %>%
mutate(Name = factor(Name, levels = Name)) %>%
ggplot(aes(x = Name, y = Price_Vol, label = round(Price_Vol,2)))+
geom_bar(stat="identity", width = 0.5, fill = "#bae4b3")+
geom_text(hjust = 1, color = "gray20", size = 6, fontface= "bold")+
coord_flip() +
theme_minimal() +
theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
theme(panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
labs(y = "Price: CAD/liter",
title = "The most expensive ale in Canada",
caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) +
theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> expensive_ale
Average price of lager by country
# Average price of lager by country
Lager %>%
mutate(Group = case_when(Country %>% str_detect("Canada") == TRUE ~ "Canada",
Country %>% str_detect("USA") == TRUE ~ "USA",
Country %>% str_detect("United Kingdom") == TRUE ~ "UK",
Country %>% str_detect("Greece") == TRUE ~ "Greece",
Country %>% str_detect("France") == TRUE ~ "France",
TRUE ~ Country)) %>%
group_by(Group) %>%
summarise(Mean_price = mean(Price_Vol)) %>%
mutate(Group = fct_reorder(Group, Mean_price)) %>%
ggplot( aes(x=reorder(Group, Mean_price), y= Mean_price)) +
geom_bar(stat = "identity", width = 0.5, fill = "#ffffb2") +
geom_text(aes(label = round(Mean_price,2)), hjust = 1, color = "gray20", size = 5, fontface= "bold")+
coord_flip() +
theme_minimal() +
theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
theme(panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
labs(y = "Price: CAD/liter",
title = "Average price of lager by country",
caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) +
theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> lager_country
Average price of ale by country
# Average price of ale by country
Ale %>%
mutate(Group = case_when(Country %>% str_detect("Canada") == TRUE ~ "Canada",
Country %>% str_detect("USA") == TRUE ~ "USA",
Country %>% str_detect("United Kingdom") == TRUE ~ "UK",
Country %>% str_detect("Greece") == TRUE ~ "Greece",
Country %>% str_detect("France") == TRUE ~ "France",
TRUE ~ Country)) %>%
group_by(Group) %>%
summarise(Mean_price = mean(Price_Vol)) %>%
mutate(Group = fct_reorder(Group, Mean_price)) %>%
ggplot( aes(x=reorder(Group, Mean_price), y= Mean_price)) +
geom_bar(stat = "identity", width = 0.5, fill = "#bae4b3") +
geom_text(aes(label = round(Mean_price,2)), hjust = 1, color = "gray20", size = 5, fontface= "bold")+
coord_flip() +
theme_minimal() +
theme(plot.background = element_rect(fill = "grey20",linetype = 0))+
theme(panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(size = 15, face = "bold", color = "gray100"),
axis.title.x = element_text(size = 19, face = "bold", color = "gray100"))+
labs(y = "Price: CAD/liter",
title = "Average price of ale by country",
caption = "Data Source: https://www.lcbo.com/webapp/wcs/stores/servlet/en/lcbo")+
theme(plot.title = element_text(size = 29, color = "gray100", face = "bold")) +
theme(plot.caption = element_text(size = 13, color = "gray100", face = "italic")) -> ale_country