# Installing the required packages
#install.packages(c("tidyverse", "lubridate", "scales", "plotly"))

# Importing the libraries
library(tidyverse)
library(plotly)
library(scales)
library(ggplot2)
# Importing the Data
# 1. Annual freshwater withdrawals
#     Data Column - Annual freshwater withdrawals: measured in cubic meters
freshwater <- read.csv(
  "https://ourworldindata.org/grapher/annual-freshwater-withdrawals.csv?v=1&csvType=full&useColumnShortNames=false"
)

# 2. Agriculture share of water withdrawals
#     Data Column - Annual freshwater withdrawals: measured in cubic meters
agri_water <- read.csv(
  "https://ourworldindata.org/grapher/agricultural-water-as-a-share-of-total-water-withdrawals.csv?v=1&csvType=full&useColumnShortNames=false"
)

# 3. Water stress / withdrawals as share of internal resources
#     Data Column - Annual freshwater withdrawals: measured in cubic meters
water_stress <- read.csv(
  "https://ourworldindata.org/grapher/freshwater-withdrawals-as-a-share-of-internal-resources.csv?v=1&csvType=full&useColumnShortNames=false"
)

# Data Preprocessing
# 1. freshwater
freshwater_clean <- freshwater %>%
  rename(
    country = Entity,
    code = Code,
    year = Year,
    freshwater_withdrawals = `Annual.freshwater.withdrawals`
  ) %>%
  mutate(
    freshwater_withdrawals_billion = freshwater_withdrawals / 1e9
  )

# 2. agri_water
agri_clean <- agri_water %>%
  rename(
    country = Entity,
    code = Code,
    year = Year,
    agriculture_share = 4
  ) %>%
  filter(!is.na(code), !is.na(agriculture_share))

# 3. water_stress
stress_clean <- water_stress %>%
  rename(
    country = Entity,
    code = Code,
    year = Year,
    water_stress = 4
  ) %>%
  filter(!is.na(code), !is.na(water_stress))

# Getting the Latest Available Year for Country-level Comparison
latest_freshwater <- freshwater_clean %>%
  group_by(country, code) %>%
  filter(year == max(year, na.rm = TRUE)) %>%
  ungroup()

latest_agri <- agri_clean %>%
  group_by(country, code) %>%
  filter(year == max(year, na.rm = TRUE)) %>%
  ungroup()

latest_stress <- stress_clean %>%
  group_by(country, code) %>%
  filter(year == max(year, na.rm = TRUE)) %>%
  ungroup()

# Codes to Exclude
excluded_codes <- c(
  "WB_ECA", "OWID_EU27", "OWID_HIC", "WB_LAC",
  "OWID_LIC", "OWID_LMC", "WB_MENAP", "WB_SA",
  "WB_NA", "WB_SSA", "OWID_UMC", "OWID_WRL",
  "WB_EAP"
)
# CHART 1
global_freshwater <- freshwater_clean %>%
  filter(country == "World") %>%
  arrange(year)

p1 <- ggplot(global_freshwater, aes(x = year)) +
  geom_line(
    aes(
      y = freshwater_withdrawals_billion,
      text = paste(
        "Year:", year,
        "<br>Freshwater Withdrawals:",
        round(freshwater_withdrawals_billion, 1),
        "billion m³"
      ),
      group = 1
    ),
    colour = "#0072B2",
    linewidth = 1.3
  ) +
  labs(
    x = "Year",
    y = "Freshwater Withdrawals (billion m³)"
  ) +
  theme_minimal(base_size = 13)
## Warning in geom_line(aes(y = freshwater_withdrawals_billion, text =
## paste("Year:", : Ignoring unknown aesthetics: text
plotly::ggplotly(p1, tooltip = "text") %>%
  layout(
    height = 700,
    title = list(
      text = paste(
        "Annual Global Freshwater Withdrawals Over Time",
        "<br><sup>Global freshwater withdrawals continue to rise</sup>"
      )
    ),
    margin = list(t = 90, r = 70, b = 80, l = 70),
    showlegend = FALSE
  )
## Warning: Specifying width/height in layout() is now deprecated.
## Please specify in ggplotly() or plot_ly()
top_water_users <- latest_freshwater %>%
  filter(!code %in% excluded_codes) %>%
  arrange(desc(freshwater_withdrawals_billion)) %>%
  slice_head(n = 15) %>%
  mutate(code = ifelse(is.na(code), country, code))

p2 <- ggplot(top_water_users, aes(
  x = reorder(code, freshwater_withdrawals_billion),
  y = freshwater_withdrawals_billion,
  fill = freshwater_withdrawals_billion,
  text = paste(
    "Country:", country,
    "<br>Code:", code,
    "<br>Year:", year,
    "<br>Withdrawals:", round(freshwater_withdrawals_billion, 1), "billion m³"
  )
)) +
  geom_col(width = 0.75) +
  coord_flip() +
  scale_fill_gradient(
    low = "#BFDCE5",
    high = "#0072B2"
  ) +
  labs(
    x = "Country Code",
    y = "Freshwater Withdrawals (billion m³)"
  ) +
  theme_minimal(base_size = 11) +
  theme(
    legend.position = "none",
    panel.grid.major.y = element_blank()
  )

plotly::ggplotly(p2, tooltip = "text") %>%
  layout(
    title = list(
      text = paste(
        "Top 15 Countries by Freshwater Withdrawals",
        "<br><sup>Freshwater use is concentrated in a few countries</sup>"
      )
    ),
    margin = list(t = 90, r = 70, b = 80, l = 70),
    showlegend = FALSE
  )
chart3_data <- latest_freshwater %>%
  select(country, code, freshwater_withdrawals_billion) %>%
  inner_join(
    latest_agri %>%
      select(country, code, agriculture_share),
    by = c("country", "code")
  ) %>%
  filter(
    !code %in% excluded_codes,
    freshwater_withdrawals_billion > 0,
    agriculture_share > 0
  )

p3 <- ggplot(chart3_data, aes(
  x = freshwater_withdrawals_billion,
  y = agriculture_share,
  text = paste(
    "Country:", country,
    "<br>Code:", code,
    "<br>Freshwater withdrawals:", round(freshwater_withdrawals_billion, 1), "billion m³",
    "<br>Agriculture share:", round(agriculture_share, 1), "%"
  )
)) +
  geom_point(
    aes(
      colour = agriculture_share,
      size = freshwater_withdrawals_billion
    ),
    alpha = 0.8
  ) +
  scale_colour_gradient(
    low = "#D6EEF5",
    high = "#005F73"
  ) +
  scale_size_continuous(range = c(3, 12)) +
  scale_x_continuous(labels = comma) +
  labs(
    x = "Freshwater Withdrawals (billion m³)",
    y = "Agricultural Share of Freshwater Withdrawals (%)"
  ) +
  theme_minimal(base_size = 11)

plotly::ggplotly(p3, tooltip = "text") %>%
  layout(
    title = list(
      text = paste(
        "Freshwater Withdrawals Versus Agricultural Dependence",
        "<br><sup>Countries vary widely in how much freshwater use goes to agriculture</sup>"
      )
    ),
    margin = list(t = 90, r = 70, b = 80, l = 70),
    showlegend = FALSE
  )
chart4_data <- latest_stress %>%
  select(country, code, water_stress) %>%
  inner_join(
    latest_freshwater %>%
      select(country, code, freshwater_withdrawals_billion),
    by = c("country", "code")
  ) %>%
  filter(
    !code %in% excluded_codes,
    freshwater_withdrawals_billion > 0,
    water_stress >= 0
  )

p4 <- ggplot(chart4_data, aes(
  x = freshwater_withdrawals_billion,
  y = water_stress,
  text = paste(
    "Country:", country,
    "<br>Code:", code,
    "<br>Freshwater Withdrawals:", round(freshwater_withdrawals_billion, 1), "billion m³",
    "<br>Water Stress:", round(water_stress, 1), "%"
  )
)) +
  geom_point(
    aes(
      colour = water_stress,
      size = freshwater_withdrawals_billion
    ),
    alpha = 0.8
  ) +
  scale_colour_gradient(
    low = "#BFE8F7",
    high = "#D7191C"
  ) +
  scale_size_continuous(range = c(3, 12)) +
  scale_x_continuous(labels = comma) +
  labs(
    x = "Freshwater Withdrawals (billion m³)",
    y = "Water Stress (%)"
  ) +
  theme_minimal(base_size = 11)

plotly::ggplotly(p4, tooltip = "text") %>%
  layout(
    title = list(
      text = paste(
        "Freshwater Withdrawals and Water Stress",
        "<br><sup>High withdrawal countries can face very different levels of water stress</sup>"
      )
    ),
    margin = list(t = 90, r = 70, b = 80, l = 70),
    showlegend = FALSE
  )
combined_latest <- latest_freshwater %>%
  select(country, code, freshwater_withdrawals_billion) %>%
  inner_join(
    latest_agri %>%
      select(country, code, agriculture_share),
    by = c("country", "code")
  ) %>%
  inner_join(
    latest_stress %>%
      select(country, code, water_stress),
    by = c("country", "code")
  ) %>%
  filter(
    !code %in% excluded_codes,
    freshwater_withdrawals_billion > 0,
    agriculture_share >= 0,
    water_stress >= 0
  )

p5 <- ggplot(combined_latest, aes(
  x = agriculture_share,
  y = water_stress,
  size = freshwater_withdrawals_billion,
  colour = water_stress,
  text = paste(
    "Country:", country,
    "<br>Code:", code,
    "<br>Agriculture share:", round(agriculture_share, 1), "%",
    "<br>Water stress:", round(water_stress, 1), "%",
    "<br>Freshwater withdrawals:", round(freshwater_withdrawals_billion, 1), "billion m³"
  )
)) +
  geom_point(alpha = 0.75) +
  scale_colour_gradient(
    low = "#2C7BB6",
    high = "#D7191C"
  ) +
  scale_size_continuous(range = c(3, 18)) +
  labs(
    x = "Agricultural Share of Freshwater Withdrawals (%)",
    y = "Water Stress (%)"
  ) +
  theme_minimal(base_size = 11)

plotly::ggplotly(p5, tooltip = "text") %>%
  layout(
    title = list(
      text = paste(
        "Where Agricultural Dependence and Water Stress Overlap",
        "<br><sup>Bubble size shows total freshwater withdrawals</sup>"
      )
    ),
    margin = list(t = 90, r = 70, b = 80, l = 70),
    showlegend = FALSE
  )