Topic: Alcohol consumption and mental health in the World

The topic of this report is the global landscape of alcohol consumption and its relationship with mental health, specifically focusing on suicide rates.

The research questions are: 1. Which countries have the highest and lowest alcohol consumption? 2. How does alcohol consumption evolve in time? 3. What is the prevalence of alcohol use disorder? 4. How does the prevalence of alcohol use disorder vary by gender? 5. How many former drinkers are there per country? 6. What is the prevalence of mental health issues (suicide rates) in the world by country? 7. What is the statistical relationship between alcohol consumption and mental health (suicide rates)? 8. Which countries have both the highest alcohol problem and suicide rate?

Data Acquisition and Preparation

This section handles all data downloading, cleaning, and merging. A robust “download-once” approach is used. Data is downloaded from the API only if a local copy does not already exist.

# --- 1. Helper Function to Download and Cache Data ---
get_who_data <- function(url, file_name) {
  local_path <- file.path("data", file_name)
  if (file.exists(local_path)) {
    message("Loading data from local file: ", local_path)
    data <- readRDS(local_path)
  } else {
    message("Downloading data from API and saving to: ", local_path)
    data <- jsonlite::fromJSON(url)$value
    saveRDS(data, file = local_path)
  }
  return(data)
}

# --- 2. Download or Load All Raw Data ---
locations_lookup <- get_who_data("https://ghoapi.azureedge.net/api/DIMENSION/COUNTRY/DimensionValues", "locations.rds")
gender_lookup    <- get_who_data("https://ghoapi.azureedge.net/api/DIMENSION/SEX/DimensionValues", "genders.rds")
apc_values       <- get_who_data("https://ghoapi.azureedge.net/api/SA_0000001747", "apc_values.rds")
disorder_values  <- get_who_data("https://ghoapi.azureedge.net/api/SA_0000001462", "disorder_values.rds")
former_values    <- get_who_data("https://ghoapi.azureedge.net/api/SA_0000001414", "former_values.rds")
suicide_values   <- get_who_data("https://ghoapi.azureedge.net/api/MH_12", "mh_12_values.rds")
world_map        <- ne_countries(scale = "medium", returnclass = "sf")

# --- 3. Helper Function to Standardize Country Names ---
standardize_country_names <- function(df) {
  df %>%
    mutate(
      join_name = case_when(
        country == "Russian Federation" ~ "Russia",
        country == "Türkiye" ~ "Turkey",
        country == "Iran (Islamic Republic of)" ~ "Iran",
        country == "Venezuela (Bolivarian Republic of)" ~ "Venezuela",
        country == "Bolivia (Plurinational State of)" ~ "Bolivia",
        country == "United Kingdom of Great Britain and Northern Ireland" ~ "United Kingdom",
        country == "Republic of Korea" ~ "South Korea",
        country == "Viet Nam" ~ "Vietnam",
        country == "Democratic People's Republic of Korea" ~ "North Korea",
        country == "Lao People's Democratic Republic" ~ "Laos",
        country == "Congo" ~ "Republic of the Congo",
        country == "Cote d'Ivoire" ~ "Ivory Coast",
        TRUE ~ country
      )
    )
}

# --- 4. Process All Datasets ---
alcohol_consumption_long <- left_join(apc_values, locations_lookup, by = c("SpatialDim" = "Code")) %>%
  select(country = Title, region = ParentTitle, year = TimeDim, alcohol_consumption = NumericValue) %>%
  mutate(alcohol_consumption = as.numeric(alcohol_consumption))
alcohol_wide <- pivot_wider(alcohol_consumption_long, id_cols = c(country, region), names_from = year, values_from = alcohol_consumption)
alcohol_imputed <- mutate(alcohol_wide, `2005` = if_else(is.na(`2005`), `2004`, `2005`))

alcohol_disorder_long <- left_join(disorder_values, locations_lookup, by = c("SpatialDim" = "Code")) %>%
  left_join(gender_lookup, by = c("Dim1" = "Code")) %>%
  select(country = Title.x, region = ParentTitle.x, gender = Title.y, alcohol_disorder = Value) %>%
  mutate(alcohol_disorder = as.numeric(str_extract(alcohol_disorder, "\\d+\\.?\\d*"))) %>%
  filter(!is.na(country))

former_drinkers_data <- left_join(former_values, locations_lookup, by = c("SpatialDim" = "Code")) %>%
  filter(Dim1 == "SEX_BTSX" & TimeDim == 2019) %>%
  select(country = Title, former_drinkers = Value) %>%
  mutate(former_drinkers = as.numeric(str_extract(former_drinkers, "\\d+\\.?\\d*")))

suicide_data <- left_join(suicide_values, locations_lookup, by = c("SpatialDim" = "Code")) %>%
  filter(Dim1 == "SEX_BTSX" & TimeDim == 2019) %>%
  select(country = Title, suicide_rate = NumericValue) %>%
  mutate(suicide_rate = as.numeric(suicide_rate)) %>%
  distinct(country, .keep_all = TRUE)

# --- 5. Create Final Merged Dataset for Analysis ---
master_data <- standardize_country_names(alcohol_imputed) %>%
  inner_join(standardize_country_names(suicide_data), by = "join_name") %>%
  select(country = country.x, region, alcohol_consumption_2022 = `2022`, suicide_rate) %>%
  filter(!is.na(alcohol_consumption_2022) & !is.na(suicide_rate))

Answering the Research Questions

Q1: Which countries have the highest and lowest alcohol consumption?

A bar chart of the top 10 countries provides a clear answer for the highest consumption. For a global overview, the static map shows consumption levels across all countries.

# --- Bar Chart ---
top_10_countries_2022 <- alcohol_imputed %>%
  select(country, consumption = `2022`) %>%
  filter(!is.na(consumption)) %>%
  arrange(desc(consumption)) %>%
  slice(1:10)

fig1_alcohol <- ggplot(top_10_countries_2022, aes(x = reorder(country, consumption), y = consumption)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_text(aes(label = round(consumption, 1)), color = "white", fontface = "bold", hjust = 1.2, size = 4, family = base_font) +
  coord_flip() +
  labs(title = "Top 10 Countries by Alcohol Consumption in 2022", x = NULL, y = "Consumption (Litres per capita)", caption = footnote_text) +
  theme_minimal(base_family = base_font) +
  theme(panel.grid.major.y = element_blank(), plot.caption = element_text(hjust = 0))

ggsave("plots/Q1_Bar_Top10_Consumption.png", plot = fig1_alcohol, width = 10, height = 7)
fig1_alcohol

# --- Choropleth Map ---
map_data_apc <- left_join(world_map, standardize_country_names(alcohol_imputed), by = c("sovereignt" = "join_name"))
map1_alcohol <- ggplot(data = map_data_apc) +
  geom_sf(aes(fill = `2022`), color = "white", size = 0.1) +
  scale_fill_gradient(low = "lightblue", high = "steelblue", na.value = "grey90") +
  labs(title = "Global Alcohol Consumption (2022)", fill = "Litres", caption = footnote_text) +
  theme_void(base_family = base_font) + theme(plot.title = element_text(hjust = 0.5), plot.caption = element_text(hjust = 0))

ggsave("plots/Q1_Map_Consumption.png", plot = map1_alcohol, width = 12, height = 7)
map1_alcohol

Q2: How does alcohol consumption evolve in time?

This line plot tracks alcohol consumption from 2000-2022 for the 10 founding members of the EEC, showing different national trends.

# Prepare data
eu_before_1986 <- c("Belgium", "France", "Germany", "Italy", "Luxembourg", "Netherlands", "Denmark", "Ireland", "United Kingdom", "Greece")
europe_data_long <- alcohol_imputed %>%
  filter(country %in% eu_before_1986) %>%
  pivot_longer(
    cols = matches("^\\d{4}$"),
    names_to = "year",
    values_to = "alcohol_consumption"
  ) %>%
  mutate(year = as.numeric(year)) %>%
  filter(year <= 2022)

# Create the static ggplot chart
fig2_alcohol <- ggplot(data = europe_data_long, 
               aes(x = year, y = alcohol_consumption, group = country, color = country)) +
  geom_line(linewidth = 1.2) +
  labs(
    title = "Evolution of Alcohol Consumption in Early EU Countries",
    subtitle = "Per capita consumption for EEC-10 members (2000-2022)",
    x = "Year", y = "Alcohol Consumption (Litres)", color = NULL,
    caption = footnote_text
  ) +
  theme_minimal(base_family = base_font) +
  theme(
    panel.grid.minor = element_blank(),
    plot.title = element_text(size = 18, face = "bold"),
    plot.subtitle = element_text(size = 14),
    legend.position = "bottom",
    plot.caption = element_text(hjust = 0, size = 10)
  )

# Save the plot
ggsave("plots/Q2_Static_EU_Trends.png", plot = fig2_alcohol, width = 12, height = 8)
fig2_alcohol

Q3: What is the prevalence of alcohol use disorder?

This map shows the percentage of the adult population with an alcohol use disorder in 2019. Darker shades of green indicate higher prevalence.

# Prepare data
disorder_map_data <- filter(alcohol_disorder_long, gender == "Both sexes")
map_data_disorder <- left_join(world_map, standardize_country_names(disorder_map_data), by = c("sovereignt" = "join_name"))

# Create map
map3_disorder <- ggplot(data = map_data_disorder) +
  geom_sf(aes(fill = alcohol_disorder), color = "white", size = 0.1) +
  scale_fill_gradient(low = "lightgreen", high = "darkgreen", na.value = "grey90") +
  labs(title = "Global Prevalence of Alcohol Use Disorders (2019)", fill = "%", caption = footnote_text) +
  theme_void(base_family = base_font) + theme(plot.title = element_text(hjust = 0.5), plot.caption = element_text(hjust = 0))

ggsave("plots/Q3_Map_Disorder.png", plot = map3_disorder, width = 12, height = 7)
map3_disorder

Q4: How does prevalence of alcohol use disorder vary by gender?

This dumbbell chart highlights the gender gap in alcohol use disorders for the 15 countries with the largest difference between male and female prevalence.

# Prepare data
dumbbell_data <- alcohol_disorder_long %>%
  filter(gender %in% c("Male", "Female")) %>%
  pivot_wider(id_cols = country, names_from = gender, values_from = alcohol_disorder) %>%
  filter(!is.na(Male) & !is.na(Female)) %>%
  mutate(gap = abs(Male - Female)) %>%
  arrange(desc(gap)) %>%
  slice(1:15)

# Create static plot
q4_dumbbell_plot <- ggplot(dumbbell_data, aes(y = fct_reorder(country, gap))) +
  geom_segment(aes(x = Female, xend = Male), color = "grey", linewidth = 1.5) +
  geom_point(aes(x = Female, color = "Female"), size = 4) +
  geom_point(aes(x = Male, color = "Male"), size = 4) +
  scale_color_manual(values = c("Female" = "orange", "Male" = "royalblue")) +
  scale_x_continuous(labels = percent_format(scale = 1), limits = c(0, NA)) +
  labs(title = "Gender Gap in Alcohol Use Disorders (2019)", subtitle="Top 15 countries with the largest gap", x = "Prevalence (%)", y = NULL, color = NULL, caption = footnote_text) +
  theme_minimal(base_family = base_font) + theme(panel.grid.major.y = element_blank(), legend.position = "top", plot.caption = element_text(hjust = 0))

ggsave("plots/Q4_Gender_Gap.png", plot = q4_dumbbell_plot, width = 10, height = 8)
q4_dumbbell_plot

Q5: How many former drinkers are there per country?

This map shows the percentage of the adult population who are former drinkers as of 2019.

# Prepare data
map_data_former <- left_join(world_map, standardize_country_names(former_drinkers_data), by = c("sovereignt" = "join_name"))

# Create map
map5_former <- ggplot(data = map_data_former) +
  geom_sf(aes(fill = former_drinkers), color = "white", size = 0.1) +
  scale_fill_gradient(low = "#e6f5ff", high = "#005a9e", na.value = "grey90") +
  labs(title = "Global Prevalence of Former Drinkers (2019)", fill = "%", caption = footnote_text) +
  theme_void(base_family = base_font) + theme(plot.title = element_text(hjust = 0.5), plot.caption = element_text(hjust = 0))

ggsave("plots/Q5_Map_Former_Drinkers.png", plot = map5_former, width = 12, height = 7)
map5_former

Q6: What is the prevalence of mental health issues (suicide rates) in the world?

This map visualizes the geographical distribution of suicide rates across the globe for 2019. A “cool-to-hot” color scale is used, where red indicates higher, more alarming rates.

# Prepare data
map_data_suicide <- left_join(world_map, standardize_country_names(suicide_data), by = c("sovereignt" = "join_name"))

# Create map with an intuitive "cool-to-hot" color scale
map6_suicide <- ggplot(data = map_data_suicide) +
  geom_sf(aes(fill = suicide_rate), color = "white", size = 0.1) +
  scale_fill_gradient(low = "yellow", high = "red", na.value = "grey90") +
  labs(title = "Global Suicide Rates per 100,000 Population (2019)", fill = "Rate", caption = footnote_text) +
  theme_void(base_family = base_font) + theme(plot.title = element_text(hjust = 0.5), plot.caption = element_text(hjust = 0))

ggsave("plots/Q6_Map_Suicide.png", plot = map6_suicide, width = 12, height = 7)
map6_suicide

Q7: What is the statistical relationship between alcohol and mental health?

These interactive scatter plots investigate statistical relationships. Each point is a country, and a trend line shows the overall correlation.

Consumption vs. Male Alcohol Use Disorders (Interactive)

# Prepare data
disorder_male <- standardize_country_names(filter(alcohol_disorder_long, gender == "Male"))
consumption_data <- standardize_country_names(alcohol_imputed)
alcohol_male <- inner_join(disorder_male, consumption_data, by = "join_name") %>%
  select(country = country.x, region = region.x, alcohol_disorder, consumption = `2022`) %>%
  filter(!is.na(consumption) & !is.na(alcohol_disorder))

# Calculate model
lm_model <- lm(alcohol_disorder ~ consumption, data = alcohol_male)
r_squared <- summary(lm_model)$r.squared
equation <- sprintf("R-squared = %.2f", r_squared)

# Create static plot
p7a <- ggplot(alcohol_male, aes(x = consumption, y = alcohol_disorder)) +
  geom_point(aes(color = region, text = paste("Country:", country)), alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "grey30") +
  annotate("text", x = Inf, y = -Inf, label = equation, hjust = 1.1, vjust = -1.1, size = 4, family = base_font) +
  scale_color_manual(values = c("Europe" = "royalblue", "Eastern Mediterranean" = "burlywood", "Africa" = "forestgreen", "Americas" = "firebrick", "Western Pacific" = "orchid", "South-East Asia" = "orange")) +
  labs(title = "Consumption vs. Male Alcohol Use Disorders", x = "Alcohol Consumption (Litres)", y = "Disorder Prevalence (%)", color = "WHO Region", caption = footnote_text) +
  theme_minimal(base_family = base_font) + 
  theme(legend.position = "bottom", plot.caption = element_text(hjust = 0), legend.title = element_text(size = 12), legend.text = element_text(size = 10))

# Convert to interactive
q7a_plotly <- ggplotly(p7a, tooltip = "text")
saveWidget(q7a_plotly, "plots/Q7a_Interactive_Disorder_Scatter.html")
q7a_plotly

Consumption vs. Suicide Rates (Interactive)

# Calculate correlation
correlation <- cor(master_data$alcohol_consumption_2022, master_data$suicide_rate)
corr_text <- sprintf("Pearson's r = %.2f", correlation)

# Create static plot for conversion
p7b <- ggplot(master_data, aes(x = alcohol_consumption_2022, y = suicide_rate)) +
  geom_point(aes(color = region, text = paste("Country:", country)), alpha = 0.7, size = 3) +
  geom_smooth(method = "lm", se = FALSE, color = "grey30") +
  annotate("text", x = Inf, y = -Inf, label = corr_text, hjust = 1.1, vjust = -1.1, size = 4, family = base_font) +
  scale_color_manual(values = c("Europe" = "royalblue", "Eastern Mediterranean" = "burlywood", "Africa" = "forestgreen", "Americas" = "firebrick", "Western Pacific" = "orchid", "South-East Asia" = "orange")) +
  labs(title = "Alcohol Consumption vs. Suicide Rates", x = "Alcohol Consumption (Litres)", y = "Suicide Rate (per 100,000)", color = "WHO Region", caption = footnote_text) +
  theme_minimal(base_family = base_font) + 
  theme(legend.position = "bottom", plot.caption = element_text(hjust = 0), legend.title = element_text(size = 12), legend.text = element_text(size = 10))

# Convert to interactive and save
q7b_plotly <- ggplotly(p7b, tooltip = "text")
saveWidget(q7b_plotly, "plots/Q7b_Interactive_Correlation_Scatter.html")
q7b_plotly

Alcohol Use Disorders vs. Suicide Rates (Interactive)

# Prepare data for this specific plot
disorder_both_sexes <- standardize_country_names(filter(alcohol_disorder_long, gender == "Both sexes"))
suicide_std <- standardize_country_names(suicide_data)
disorder_suicide_data <- inner_join(disorder_both_sexes, suicide_std, by = "join_name") %>%
  select(country = country.x, region, alcohol_disorder, suicide_rate) %>%
  filter(!is.na(alcohol_disorder) & !is.na(suicide_rate))

# Calculate correlation
correlation_ds <- cor(disorder_suicide_data$alcohol_disorder, disorder_suicide_data$suicide_rate)
corr_text_ds <- sprintf("Pearson's r = %.2f", correlation_ds)

# Create static plot
p7c <- ggplot(disorder_suicide_data, aes(x = alcohol_disorder, y = suicide_rate)) +
  geom_point(aes(color = region, text = paste("Country:", country)), alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "grey30") +
  annotate("text", x = Inf, y = -Inf, label = corr_text_ds, hjust = 1.1, vjust = -1.1, size = 4, family = base_font) +
  scale_color_manual(values = c("Europe" = "royalblue", "Eastern Mediterranean" = "burlywood", "Africa" = "forestgreen", "Americas" = "firebrick", "Western Pacific" = "orchid", "South-East Asia" = "orange")) +
  labs(title = "Alcohol Use Disorders vs. Suicide Rates", x = "Disorder Prevalence (%)", y = "Suicide Rate (per 100,000)", color = "WHO Region", caption = footnote_text) +
  theme_minimal(base_family = base_font) + 
  theme(legend.position = "bottom", plot.caption = element_text(hjust = 0), legend.title = element_text(size = 12), legend.text = element_text(size = 10))

# Convert to interactive
q7c_plotly <- ggplotly(p7c, tooltip = "text")
saveWidget(q7c_plotly, "plots/Q7c_Interactive_Disorder_Suicide_Scatter.html")
q7c_plotly

Q8: Which countries have both the highest alcohol problem and suicide rate?

This quadrant chart identifies countries with above-average rates for both alcohol consumption and suicide. These are highlighted in the top-right quadrant.

# Calculate global averages
avg_consumption <- mean(master_data$alcohol_consumption_2022)
avg_suicide <- mean(master_data$suicide_rate)
high_risk_countries <- filter(master_data, alcohol_consumption_2022 > avg_consumption & suicide_rate > avg_suicide)

# Create the quadrant plot
q8_quadrant <- ggplot(master_data, aes(x = alcohol_consumption_2022, y = suicide_rate)) +
  geom_hline(yintercept = avg_suicide, linetype = "dashed", color = "grey50") +
  geom_vline(xintercept = avg_consumption, linetype = "dashed", color = "grey50") +
  geom_point(color = "grey70", alpha = 0.6) +
  geom_point(data = high_risk_countries, color = "firebrick", size = 4) +
  geom_text_repel(data = high_risk_countries, aes(label = country), size = 3.5, family = base_font, max.overlaps = 15) +
  annotate("text", x = Inf, y = Inf, label = "High Consumption,\nHigh Suicide Rate", hjust = 1.1, vjust = 1.1, color = "firebrick", fontface = "bold", family = base_font) +
  labs(title = "Identifying Countries with High Rates of Consumption and Suicide", x = "Alcohol Consumption (Litres)", y = "Suicide Rate (per 100,000)", caption = footnote_text) +
  theme_minimal(base_family = base_font) + theme(plot.caption = element_text(hjust = 0))

ggsave("plots/Q8_Quadrant_Chart.png", plot = q8_quadrant, width = 10, height = 8)
q8_quadrant