# Installing the required packages
#install.packages(c("tidyverse", "lubridate", "scales", "plotly"))
# Importing the libraries
library(tidyverse)
library(plotly)
library(scales)
library(ggplot2)
# Importing the Data
# 1. Annual freshwater withdrawals
# Data Column - Annual freshwater withdrawals: measured in cubic meters
freshwater <- read.csv(
"https://ourworldindata.org/grapher/annual-freshwater-withdrawals.csv?v=1&csvType=full&useColumnShortNames=false"
)
# 2. Agriculture share of water withdrawals
# Data Column - Annual freshwater withdrawals: measured in cubic meters
agri_water <- read.csv(
"https://ourworldindata.org/grapher/agricultural-water-as-a-share-of-total-water-withdrawals.csv?v=1&csvType=full&useColumnShortNames=false"
)
# 3. Water stress / withdrawals as share of internal resources
# Data Column - Annual freshwater withdrawals: measured in cubic meters
water_stress <- read.csv(
"https://ourworldindata.org/grapher/freshwater-withdrawals-as-a-share-of-internal-resources.csv?v=1&csvType=full&useColumnShortNames=false"
)
# Data Preprocessing
# 1. freshwater
freshwater_clean <- freshwater %>%
rename(
country = Entity,
code = Code,
year = Year,
freshwater_withdrawals = `Annual.freshwater.withdrawals`
) %>%
mutate(
freshwater_withdrawals_billion = freshwater_withdrawals / 1e9
)
# 2. agri_water
agri_clean <- agri_water %>%
rename(
country = Entity,
code = Code,
year = Year,
agriculture_share = 4
) %>%
filter(!is.na(code), !is.na(agriculture_share))
# 3. water_stress
stress_clean <- water_stress %>%
rename(
country = Entity,
code = Code,
year = Year,
water_stress = 4
) %>%
filter(!is.na(code), !is.na(water_stress))
# Getting the Latest Available Year for Country-level Comparison
latest_freshwater <- freshwater_clean %>%
group_by(country, code) %>%
filter(year == max(year, na.rm = TRUE)) %>%
ungroup()
latest_agri <- agri_clean %>%
group_by(country, code) %>%
filter(year == max(year, na.rm = TRUE)) %>%
ungroup()
latest_stress <- stress_clean %>%
group_by(country, code) %>%
filter(year == max(year, na.rm = TRUE)) %>%
ungroup()
# Codes to Exclude
excluded_codes <- c(
"WB_ECA", "OWID_EU27", "OWID_HIC", "WB_LAC",
"OWID_LIC", "OWID_LMC", "WB_MENAP", "WB_SA",
"WB_NA", "WB_SSA", "OWID_UMC", "OWID_WRL",
"WB_EAP"
)
# CHART 1
global_freshwater <- freshwater_clean %>%
filter(country == "World") %>%
arrange(year)
p1 <- ggplot(global_freshwater, aes(x = year)) +
geom_line(
aes(
y = freshwater_withdrawals_billion,
text = paste(
"Year:", year,
"<br>Freshwater Withdrawals:",
round(freshwater_withdrawals_billion, 1),
"billion m³"
),
group = 1
),
colour = "#0072B2",
linewidth = 1.3
) +
labs(
x = "Year",
y = "Freshwater Withdrawals (billion m³)"
) +
theme_minimal(base_size = 13)
## Warning in geom_line(aes(y = freshwater_withdrawals_billion, text =
## paste("Year:", : Ignoring unknown aesthetics: text
plotly::ggplotly(p1, tooltip = "text") %>%
layout(
height = 700,
title = list(
text = paste(
"Annual Global Freshwater Withdrawals Over Time",
"<br><sup>Global freshwater withdrawals continue to rise</sup>"
)
),
margin = list(t = 90, r = 70, b = 80, l = 70),
showlegend = FALSE
)
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
top_water_users <- latest_freshwater %>%
filter(!code %in% excluded_codes) %>%
arrange(desc(freshwater_withdrawals_billion)) %>%
slice_head(n = 15) %>%
mutate(code = ifelse(is.na(code), country, code))
p2 <- ggplot(top_water_users, aes(
x = reorder(code, freshwater_withdrawals_billion),
y = freshwater_withdrawals_billion,
fill = freshwater_withdrawals_billion,
text = paste(
"Country:", country,
"<br>Code:", code,
"<br>Year:", year,
"<br>Withdrawals:", round(freshwater_withdrawals_billion, 1), "billion m³"
)
)) +
geom_col(width = 0.75) +
coord_flip() +
scale_fill_gradient(
low = "#BFDCE5",
high = "#0072B2"
) +
labs(
x = "Country Code",
y = "Freshwater Withdrawals (billion m³)"
) +
theme_minimal(base_size = 11) +
theme(
legend.position = "none",
panel.grid.major.y = element_blank()
)
plotly::ggplotly(p2, tooltip = "text") %>%
layout(
title = list(
text = paste(
"Top 15 Countries by Freshwater Withdrawals",
"<br><sup>Freshwater use is concentrated in a few countries</sup>"
)
),
margin = list(t = 90, r = 70, b = 80, l = 70),
showlegend = FALSE
)
chart3_data <- latest_freshwater %>%
select(country, code, freshwater_withdrawals_billion) %>%
inner_join(
latest_agri %>%
select(country, code, agriculture_share),
by = c("country", "code")
) %>%
filter(
!code %in% excluded_codes,
freshwater_withdrawals_billion > 0,
agriculture_share > 0
)
p3 <- ggplot(chart3_data, aes(
x = freshwater_withdrawals_billion,
y = agriculture_share,
text = paste(
"Country:", country,
"<br>Code:", code,
"<br>Freshwater withdrawals:", round(freshwater_withdrawals_billion, 1), "billion m³",
"<br>Agriculture share:", round(agriculture_share, 1), "%"
)
)) +
geom_point(
aes(
colour = agriculture_share,
size = freshwater_withdrawals_billion
),
alpha = 0.8
) +
scale_colour_gradient(
low = "#D6EEF5",
high = "#005F73"
) +
scale_size_continuous(range = c(3, 12)) +
scale_x_continuous(labels = comma) +
labs(
x = "Freshwater Withdrawals (billion m³)",
y = "Agricultural Share of Freshwater Withdrawals (%)"
) +
theme_minimal(base_size = 11)
plotly::ggplotly(p3, tooltip = "text") %>%
layout(
title = list(
text = paste(
"Freshwater Withdrawals Versus Agricultural Dependence",
"<br><sup>Countries vary widely in how much freshwater use goes to agriculture</sup>"
)
),
margin = list(t = 90, r = 70, b = 80, l = 70),
showlegend = FALSE
)
chart4_data <- latest_stress %>%
select(country, code, water_stress) %>%
inner_join(
latest_freshwater %>%
select(country, code, freshwater_withdrawals_billion),
by = c("country", "code")
) %>%
filter(
!code %in% excluded_codes,
freshwater_withdrawals_billion > 0,
water_stress >= 0
)
p4 <- ggplot(chart4_data, aes(
x = freshwater_withdrawals_billion,
y = water_stress,
text = paste(
"Country:", country,
"<br>Code:", code,
"<br>Freshwater Withdrawals:", round(freshwater_withdrawals_billion, 1), "billion m³",
"<br>Water Stress:", round(water_stress, 1), "%"
)
)) +
geom_point(
aes(
colour = water_stress,
size = freshwater_withdrawals_billion
),
alpha = 0.8
) +
scale_colour_gradient(
low = "#BFE8F7",
high = "#D7191C"
) +
scale_size_continuous(range = c(3, 12)) +
scale_x_continuous(labels = comma) +
labs(
x = "Freshwater Withdrawals (billion m³)",
y = "Water Stress (%)"
) +
theme_minimal(base_size = 11)
plotly::ggplotly(p4, tooltip = "text") %>%
layout(
title = list(
text = paste(
"Freshwater Withdrawals and Water Stress",
"<br><sup>High withdrawal countries can face very different levels of water stress</sup>"
)
),
margin = list(t = 90, r = 70, b = 80, l = 70),
showlegend = FALSE
)
combined_latest <- latest_freshwater %>%
select(country, code, freshwater_withdrawals_billion) %>%
inner_join(
latest_agri %>%
select(country, code, agriculture_share),
by = c("country", "code")
) %>%
inner_join(
latest_stress %>%
select(country, code, water_stress),
by = c("country", "code")
) %>%
filter(
!code %in% excluded_codes,
freshwater_withdrawals_billion > 0,
agriculture_share >= 0,
water_stress >= 0
)
p5 <- ggplot(combined_latest, aes(
x = agriculture_share,
y = water_stress,
size = freshwater_withdrawals_billion,
colour = water_stress,
text = paste(
"Country:", country,
"<br>Code:", code,
"<br>Agriculture share:", round(agriculture_share, 1), "%",
"<br>Water stress:", round(water_stress, 1), "%",
"<br>Freshwater withdrawals:", round(freshwater_withdrawals_billion, 1), "billion m³"
)
)) +
geom_point(alpha = 0.75) +
scale_colour_gradient(
low = "#2C7BB6",
high = "#D7191C"
) +
scale_size_continuous(range = c(3, 18)) +
labs(
x = "Agricultural Share of Freshwater Withdrawals (%)",
y = "Water Stress (%)"
) +
theme_minimal(base_size = 11)
plotly::ggplotly(p5, tooltip = "text") %>%
layout(
title = list(
text = paste(
"Where Agricultural Dependence and Water Stress Overlap",
"<br><sup>Bubble size shows total freshwater withdrawals</sup>"
)
),
margin = list(t = 90, r = 70, b = 80, l = 70),
showlegend = FALSE
)