library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
library(here)
library(slider)
library(extRemes)
Determining whether 21st century (2000-2024) droughts are unprecedented and quantifying their “extremeness” depends on the reference. Here, we determine these aspects based on alternative choices of the reference:
The magnitude of 21st century droughts are estimated based on ERA5 climate reanalysis data for years 2000-2024.
Interpreting the objects into nice and tidy data frames.
list_era5 <- read_rds(here("data/regional_results_ERA5Land.rds"))
nregions <- length(list_era5)
make_df_era5 <- function(vec){
tibble(
year = 1970:2024,
pcwd = vec
)
}
df_era5 <- purrr::map(
list_era5,
~make_df_era5(.)
) |>
bind_rows(.id = "region")
extract_ensemble_number <- function(char){
char |>
str_remove("_tidy") |>
str_remove("m0") |>
as.integer()
}
make_df_modesim <- function(df){
as_tibble(df) |>
# not sure this is correct. Better to take years forward as part of the workflow.
mutate(year = 1420:2009) |>
pivot_longer(
cols = ends_with("_tidy"),
names_to = "member",
values_to = "pcwd",
names_transform = extract_ensemble_number
)
}
list_modesim <- read_rds(here("data/regional_results_ModESim_bc.rds"))
df_modesim <- purrr::map(
list_modesim,
~make_df_modesim(.)
) |>
bind_rows(.id = "region")
Create objects that contain records for different references in different datasets.
df_record_era5 <- df_era5 |>
filter(year %in% 1975:1999) |>
group_by(region) |>
summarise(pcwd_max0 = max(pcwd)) |>
left_join(
df_era5 |>
filter(year %in% 2000:2024) |>
group_by(region) |>
summarise(pcwd_max1 = max(pcwd)),
by = join_by(region)
)
Plot.
tmp <- df_record_era5 |>
mutate(is_greater = pcwd_max0 < pcwd_max1)
n_greater <- tmp |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg1 <- tmp |>
ggplot(aes(pcwd_max0, pcwd_max1, color = is_greater)) +
geom_point(size = 0.75, show.legend = FALSE) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ERA5 1975-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = "Record annual PCWD"
)
gg1
tmp |>
ggplot(aes(pcwd_max1 / pcwd_max0)) +
geom_histogram(color = "black", fill = "grey", bins = 10) +
geom_vline(xintercept = 1.0, linetype = "dotted") +
theme_classic() +
labs(
x = "Ratio (unitless)",
title = "Amplification"
)
Record PCWD by region in ERA5, years 2000-2024 vs. in years 1975-1999 and all ensemble members in ModE-Sim.
df_record_era5_modesim_recent <- df_modesim |>
filter(year %in% 1975:1999) |>
group_by(region, member) |>
summarise(pcwd_max0 = max(pcwd)) |>
ungroup() |>
group_by(region) |>
summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |>
left_join(
df_era5 |>
filter(year %in% 2000:2024) |>
group_by(region) |>
summarise(pcwd_max1 = max(pcwd)),
by = join_by(region)
)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by region and member.
## ℹ Output is grouped by region.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(region, member))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
Plot.
tmp <- df_record_era5_modesim_recent |>
mutate(is_greater = pcwd_max0_right < pcwd_max1)
n_greater <- tmp |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg2 <- tmp |>
ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
geom_segment(show.legend = FALSE, linewidth = 0.75) +
geom_point(show.legend = FALSE, size = 0.75) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ModE-Sim 1975-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = " "
)
gg2
Record PCWD by region in ERA5, years 2000-2024 vs. in years 1850-1999 and all ensemble members in ModE-Sim.
df_record_era5_modesim_historical <- df_modesim |>
filter(year %in% 1850:1999) |>
group_by(region, member) |>
summarise(pcwd_max0 = max(pcwd)) |>
ungroup() |>
group_by(region) |>
summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |>
left_join(
df_era5 |>
filter(year %in% 2000:2024) |>
group_by(region) |>
summarise(pcwd_max1 = max(pcwd)),
by = join_by(region)
)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by region and member.
## ℹ Output is grouped by region.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(region, member))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
Plot.
tmp <- df_record_era5_modesim_historical |>
mutate(is_greater = pcwd_max0_right < pcwd_max1)
n_greater <- tmp |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg3 <- tmp |>
ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
geom_segment(show.legend = FALSE, linewidth = 0.75) +
geom_point(show.legend = FALSE, size = 0.75) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ModE-Sim 1850-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = " "
)
gg3
Record PCWD by region in ERA5, years 2000-2024 vs. in years 1850-1999 and all ensemble members in ModE-Sim.
df_record_era5_modesim_multicent <- df_modesim |>
filter(year %in% 1420:1999) |>
group_by(region, member) |>
summarise(pcwd_max0 = max(pcwd)) |>
ungroup() |>
group_by(region) |>
summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |>
left_join(
df_era5 |>
filter(year %in% 2000:2024) |>
group_by(region) |>
summarise(pcwd_max1 = max(pcwd)),
by = join_by(region)
)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by region and member.
## ℹ Output is grouped by region.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(region, member))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
Plot.
tmp <- df_record_era5_modesim_multicent |>
mutate(is_greater = pcwd_max0_right < pcwd_max1)
n_greater <- tmp |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg4 <- tmp |>
ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
geom_segment(show.legend = FALSE, linewidth = 0.75) +
geom_point(show.legend = FALSE, size = 0.75) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ModE-Sim 1420-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = " "
)
gg4
row1 <- cowplot::plot_grid(
gg1, gg2, gg3, gg4,
nrow = 1,
align = "v",
labels = letters[1:4]
)
ggsave(here("fig/annual_records_references.pdf"), plot = row1, width = 15, height = 4)
Calculate rolling means of all data, separated by region (ModE-Sim also separated by ensemble member).
df_era5_25 <- df_era5 |>
pivot_wider(values_from = "pcwd", names_from = "region") |>
mutate(across(-year, ~slide_dbl(.x, mean, .before = 24, .complete = TRUE)))
df_modesim_25 <- df_modesim |>
group_by(region, member) |>
nest() |>
mutate(
data = map(data, ~ .x |>
mutate(
roll_mean = slide_dbl(
.x$pcwd,
mean,
.before = 24, # 2 before = 3-point window (includes current)
.complete = TRUE # require full window, so first 2 will be NA
)
)
)
)
df_record25_era5 <- df_era5_25 |>
filter(year %in% c(1999, 2024)) |>
pivot_longer(cols = -year, values_to = "pcwd", names_to = "region") |>
pivot_wider(values_from = "pcwd", names_from = "year") |>
rename(pcwd_max0 = "1999", pcwd_max1 = "2024")
Plot.
tmp <- df_record25_era5 |>
mutate(is_greater = pcwd_max0 < pcwd_max1)
n_greater <- tmp |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg5 <- tmp |>
ggplot(aes(pcwd_max0, pcwd_max1, color = is_greater)) +
geom_point(size = 0.75, show.legend = FALSE) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ERA5 1975-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = "Record 25-year mean PCWD"
)
gg5
get_record <- function(df){
df |>
filter(year %in% 1975:1999) |>
filter(roll_mean == max(roll_mean)) |>
pull(roll_mean)
}
df_record25_era5_modesim_recent <- df_modesim_25 |>
mutate(pcwd_max0 = purrr::map_dbl(data, ~get_record(.))) |>
select(-data) |>
ungroup() |>
group_by(region) |>
summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |>
left_join(
df_record25_era5 |>
select(region, pcwd_max1),
by = join_by(region)
)
Plot.
tmp <- df_record25_era5_modesim_recent |>
mutate(is_greater = pcwd_max0_right < pcwd_max1)
n_greater <- tmp |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg6 <- tmp |>
ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
geom_segment(show.legend = FALSE, linewidth = 0.75) +
geom_point(show.legend = FALSE, size = 0.75) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ModE-Sim 1975-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = " "
)
gg6
get_record <- function(df){
df |>
filter(year %in% 1850:1999) |>
filter(roll_mean == max(roll_mean)) |>
pull(roll_mean)
}
df_record25_era5_modesim_historical <- df_modesim_25 |>
mutate(pcwd_max0 = purrr::map_dbl(data, ~get_record(.))) |>
select(-data) |>
ungroup() |>
group_by(region) |>
summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |>
left_join(
df_record25_era5 |>
select(region, pcwd_max1),
by = join_by(region)
)
Plot.
tmp <- df_record25_era5_modesim_historical |>
mutate(is_greater = pcwd_max0_right < pcwd_max1)
n_greater <- tmp |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg7 <- tmp |>
ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
geom_segment(show.legend = FALSE, linewidth = 0.75) +
geom_point(show.legend = FALSE, size = 0.75) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ModE-Sim 1850-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = " "
)
gg7
get_record <- function(df){
df |>
filter(year %in% 1420:1999) |>
filter(roll_mean == max(roll_mean, na.rm = TRUE)) |>
pull(roll_mean)
}
df_record25_era5_modesim_multicent <- df_modesim_25 |>
mutate(pcwd_max0 = purrr::map_dbl(data, ~get_record(.))) |>
select(-data) |>
ungroup() |>
group_by(region) |>
summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |>
left_join(
df_record25_era5 |>
select(region, pcwd_max1),
by = join_by(region)
)
Plot.
tmp <- df_record25_era5_modesim_multicent |>
mutate(is_greater = pcwd_max0_right < pcwd_max1)
n_greater <- tmp |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg8 <- tmp |>
ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
geom_segment(show.legend = FALSE, linewidth = 0.75) +
geom_point(show.legend = FALSE, size = 0.75) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ModE-Sim 1420-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = " "
)
gg8
row2 <- cowplot::plot_grid(
gg5, gg6, gg7, gg8,
nrow = 1,
align = "v",
labels = letters[5:8]
)
ggsave(here("fig/30yrmeans_records_references.pdf"), plot = row2, width = 15, height = 4)
Calculate rolling 30-year extremes of all data, separated by region (ModE-Sim also separated by ensemble member).
# Function: fit Gumbel with extRemes and estimate given year return level
fit_gumbel_return <- function(vec, return_period = 30) {
# Fit Gumbel (GEV with shape = 0)
fit <- extRemes::fevd(vec, type = "Gumbel")
# Estimate T-year return level
rl <- unname(c(return.level(fit, return.period = return_period)))
return(rl)
}
calc_30x <- function(df){
fit_gumbel_return(df$pcwd)
}
calc_30x_across_members <- function(df){
# Collapse selected columns into vector
vec <- unlist(df %>% select(-year))
vec <- vec[is.finite(vec)]
fit_gumbel_return(vec, return_period = 30)
}
apply_sliding <- function(df){
df %>%
mutate(return_level = slide_index_dbl(
seq_len(nrow(.)),
.i = year,
.before = 24,
.complete = TRUE,
.f = ~ calc_30x_across_members(df[.x, ])
))
}
apply_pooled <- function(df, years){
df |>
filter(year %in% years) |>
calc_30x_across_members()
}
# fit two extreme value distributions on the two ERA5 periods and extract the
# 30-year extreme value.
df_era5_30x <- df_era5 |>
mutate(period = ifelse(year %in% 1975:1999, "pcwd_x30_ref0", ifelse(year %in% 2000:2024, "pcwd_x30_ref1", NA))) |>
group_by(region, period) |>
nest() |>
drop_na(period) |>
mutate(pcwd_30x = purrr::map_dbl(data, ~calc_30x(.))) |>
pivot_wider(id_cols = -data, values_from = "pcwd_30x", names_from = "period")
# this takes long to compute
nmembers <- length(unique(df_modesim$member))
# fit extreme value distributions on sliding windows of 25 years, gathering values from all ensemble members
# warning: this takes a few minutes
df_modesim_30x_sliding <- df_modesim |>
mutate(member = paste0("m", as.integer(member))) |>
pivot_wider(names_from = "member", values_from = "pcwd") |>
group_by(region) |>
nest() |>
mutate(data = map(data, apply_sliding)) |>
mutate(data = map(data, ~select(., year, pcwd_30x = return_level))) |>
unnest(data)
# fit extreme value distributions on data pooled from all years in different periods
df_modesim_30x_pooled <- df_modesim |>
mutate(member = paste0("m", as.integer(member))) |>
pivot_wider(names_from = "member", values_from = "pcwd") |>
group_by(region) |>
nest()
df_modesim_30x_pooled_multicent <- df_modesim_30x_pooled |>
mutate(pcwd_30x = map_dbl(data, ~apply_pooled(., years = 1420:1999)))
df_modesim_30x_pooled_historical <- df_modesim_30x_pooled |>
mutate(pcwd_30x = map_dbl(data, ~apply_pooled(., years = 1850:1999)))
df_modesim_30x_pooled_recent <- df_modesim_30x_pooled |>
mutate(pcwd_30x = map_dbl(data, ~apply_pooled(., years = 1975:1999)))
tmp <- df_era5_30x |>
mutate(is_greater = pcwd_x30_ref0 < pcwd_x30_ref1)
n_greater <- tmp |>
ungroup() |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg9 <- tmp |>
ggplot(aes(pcwd_x30_ref0, pcwd_x30_ref1, color = is_greater)) +
geom_point(size = 0.75, show.legend = FALSE) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ERA5 1975-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = "30-year extreme PCWD"
)
gg9
get_minmax <- function(df){
df |>
filter(year %in% 1975:1999) |>
ungroup() |>
summarise(pcwd_30x_ref0_max = max(pcwd_30x, na.rm = TRUE), pcwd_30x_ref0_min = min(pcwd_30x, na.rm = TRUE))
}
df_30x_sliding_recent <- df_modesim_30x_sliding |>
group_by(region) |>
nest() |>
mutate(data = purrr::map(data, ~get_minmax(.))) |>
unnest(data) |>
left_join(
df_era5_30x |>
select(region, pcwd_x30_ref1),
by = join_by(region)
)
tmp <- df_30x_sliding_recent |>
mutate(is_greater = pcwd_x30_ref1 > pcwd_30x_ref0_max)
n_greater <- tmp |>
ungroup() |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg10 <- tmp |>
ggplot(aes(x = pcwd_30x_ref0_max, xend = pcwd_30x_ref0_min, y = pcwd_x30_ref1, color = is_greater)) +
geom_segment(show.legend = FALSE, linewidth = 0.75) +
geom_point(show.legend = FALSE, size = 0.75) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ModE-Sim 1975-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = " "
)
gg10
get_minmax <- function(df){
df |>
filter(year %in% 1850:1999) |>
ungroup() |>
summarise(pcwd_30x_ref0_max = max(pcwd_30x, na.rm = TRUE), pcwd_30x_ref0_min = min(pcwd_30x, na.rm = TRUE))
}
df_30x_sliding_historical <- df_modesim_30x_sliding |>
group_by(region) |>
nest() |>
mutate(data = purrr::map(data, ~get_minmax(.))) |>
unnest(data) |>
left_join(
df_era5_30x |>
select(region, pcwd_x30_ref1),
by = join_by(region)
)
tmp <- df_30x_sliding_historical |>
mutate(is_greater = pcwd_x30_ref1 > pcwd_30x_ref0_max)
n_greater <- tmp |>
ungroup() |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg11 <- tmp |>
ggplot(aes(x = pcwd_30x_ref0_max, xend = pcwd_30x_ref0_min, y = pcwd_x30_ref1, color = is_greater)) +
geom_segment(show.legend = FALSE, linewidth = 0.75) +
geom_point(show.legend = FALSE, size = 0.75) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ModE-Sim 1850-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = " "
)
gg11
get_minmax <- function(df){
df |>
filter(year %in% 1420:1999) |>
ungroup() |>
summarise(pcwd_30x_ref0_max = max(pcwd_30x, na.rm = TRUE), pcwd_30x_ref0_min = min(pcwd_30x, na.rm = TRUE))
}
df_30x_sliding_multicent <- df_modesim_30x_sliding |>
group_by(region) |>
nest() |>
mutate(data = purrr::map(data, ~get_minmax(.))) |>
unnest(data) |>
left_join(
df_era5_30x |>
select(region, pcwd_x30_ref1),
by = join_by(region)
)
tmp <- df_30x_sliding_multicent |>
mutate(is_greater = pcwd_x30_ref1 > pcwd_30x_ref0_max)
n_greater <- tmp |>
ungroup() |>
summarise(n_greater = sum(is_greater)) |>
pull(n_greater)
gg12 <- tmp |>
ggplot(aes(x = pcwd_30x_ref0_max, xend = pcwd_30x_ref0_min, y = pcwd_x30_ref1, color = is_greater)) +
geom_segment(show.legend = FALSE, linewidth = 0.75) +
geom_point(show.legend = FALSE, size = 0.75) +
geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
coord_equal() +
scale_color_manual(values = c("grey30", "tomato")) +
theme_classic() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "ModE-Sim 1420-1999 (mm)",
y = "ERA5 2000-2024 (mm)",
subtitle = bquote(italic(N) == .(n_greater)),
title = " "
)
gg12
row3 <- cowplot::plot_grid(
gg9, gg10, gg11, gg12,
nrow = 1,
align = "v",
labels = letters[9:12]
)
ggsave(here("fig/30yextremes_records_references.pdf"), plot = row3, width = 15, height = 4)
gg_combined <- cowplot::plot_grid(
row1, row2, row3,
ncol = 1
)
ggsave(here("fig/combined_references.pdf"), plot = gg_combined, width = 15, height = 12)