library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'dplyr' was built under R version 4.5.2
library(here)
library(slider)
library(extRemes)

Approach

Determining whether 21st century (2000-2024) droughts are unprecedented and quantifying their “extremeness” depends on the reference. Here, we determine these aspects based on alternative choices of the reference:

  1. Recent past climate reanalysis (ERA5) data (1975-1999)
  2. Recent past climate model simulation, across all ensemble members (1975-1999)
  3. Historical climate model simulation, across all ensemble members (1850-1999)
  4. Multi-centennial preindustrial climate model simulation, across all ensemble members (1420-1999)

The magnitude of 21st century droughts are estimated based on ERA5 climate reanalysis data for years 2000-2024.

Read data

ERA5

Interpreting the objects into nice and tidy data frames.

list_era5 <- read_rds(here("data/regional_results_ERA5Land.rds"))

nregions <- length(list_era5)

make_df_era5 <- function(vec){
  tibble(
    year = 1970:2024,
    pcwd = vec
  )
}

df_era5 <- purrr::map(
  list_era5, 
  ~make_df_era5(.)
) |> 
  bind_rows(.id = "region")

ModE-Sim

extract_ensemble_number <- function(char){
  char |> 
    str_remove("_tidy") |> 
    str_remove("m0") |> 
    as.integer()
}

make_df_modesim <- function(df){
  as_tibble(df) |> 
    # not sure this is correct. Better to take years forward as part of the workflow. 
    mutate(year = 1420:2009) |> 
    pivot_longer(
      cols = ends_with("_tidy"), 
      names_to = "member", 
      values_to = "pcwd", 
      names_transform = extract_ensemble_number
      )
}

list_modesim <- read_rds(here("data/regional_results_ModESim_bc.rds"))

df_modesim <- purrr::map(
  list_modesim,
  ~make_df_modesim(.)
) |> 
  bind_rows(.id = "region")

Record annual values

Create objects that contain records for different references in different datasets.

ERA5

  1. ERA5: Record PCWD by region in years 1975-1999 and in years 2000-2024.
df_record_era5 <- df_era5 |> 
  filter(year %in% 1975:1999) |> 
  group_by(region) |> 
  summarise(pcwd_max0 = max(pcwd)) |> 
  left_join(
    df_era5 |> 
      filter(year %in% 2000:2024) |> 
      group_by(region) |> 
      summarise(pcwd_max1 = max(pcwd)),
    by = join_by(region)
  )

Plot.

tmp <- df_record_era5 |> 
  mutate(is_greater = pcwd_max0 < pcwd_max1)

n_greater <- tmp |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg1 <- tmp |> 
  ggplot(aes(pcwd_max0, pcwd_max1, color = is_greater)) +
  geom_point(size = 0.75, show.legend = FALSE) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ERA5 1975-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "Record annual PCWD"
  )

gg1

tmp |> 
  ggplot(aes(pcwd_max1 / pcwd_max0)) +
  geom_histogram(color = "black", fill = "grey", bins = 10) +
  geom_vline(xintercept = 1.0, linetype = "dotted") +
  theme_classic() +
  labs(
    x = "Ratio (unitless)",
    title = "Amplification"
  )

ModE-Sim recent (1975-1999)

Record PCWD by region in ERA5, years 2000-2024 vs. in years 1975-1999 and all ensemble members in ModE-Sim.

df_record_era5_modesim_recent <- df_modesim |> 
  filter(year %in% 1975:1999) |> 
  group_by(region, member) |> 
  summarise(pcwd_max0 = max(pcwd)) |> 
  ungroup() |> 
  group_by(region) |> 
  summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |> 
  left_join(
    df_era5 |> 
      filter(year %in% 2000:2024) |> 
      group_by(region) |> 
      summarise(pcwd_max1 = max(pcwd)),
    by = join_by(region)
  )
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by region and member.
## ℹ Output is grouped by region.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(region, member))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.

Plot.

tmp <- df_record_era5_modesim_recent |> 
  mutate(is_greater = pcwd_max0_right < pcwd_max1)

n_greater <- tmp |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg2 <- tmp |> 
  ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
  geom_segment(show.legend = FALSE, linewidth = 0.75) +
  geom_point(show.legend = FALSE, size = 0.75) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ModE-Sim 1975-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "   "
  )

gg2

ModE-Sim historical (1850-1999)

Record PCWD by region in ERA5, years 2000-2024 vs. in years 1850-1999 and all ensemble members in ModE-Sim.

df_record_era5_modesim_historical <- df_modesim |> 
  filter(year %in% 1850:1999) |> 
  group_by(region, member) |> 
  summarise(pcwd_max0 = max(pcwd)) |> 
  ungroup() |> 
  group_by(region) |> 
  summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |> 
  left_join(
    df_era5 |> 
      filter(year %in% 2000:2024) |> 
      group_by(region) |> 
      summarise(pcwd_max1 = max(pcwd)),
    by = join_by(region)
  )
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by region and member.
## ℹ Output is grouped by region.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(region, member))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.

Plot.

tmp <- df_record_era5_modesim_historical |> 
  mutate(is_greater = pcwd_max0_right < pcwd_max1)

n_greater <- tmp |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg3 <- tmp |> 
  ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
  geom_segment(show.legend = FALSE, linewidth = 0.75) +
  geom_point(show.legend = FALSE, size = 0.75) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ModE-Sim 1850-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "   "
  )

gg3

ModE-Sim multi-century (1420-1999)

Record PCWD by region in ERA5, years 2000-2024 vs. in years 1850-1999 and all ensemble members in ModE-Sim.

df_record_era5_modesim_multicent <- df_modesim |> 
  filter(year %in% 1420:1999) |> 
  group_by(region, member) |> 
  summarise(pcwd_max0 = max(pcwd)) |> 
  ungroup() |> 
  group_by(region) |> 
  summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |> 
  left_join(
    df_era5 |> 
      filter(year %in% 2000:2024) |> 
      group_by(region) |> 
      summarise(pcwd_max1 = max(pcwd)),
    by = join_by(region)
  )
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by region and member.
## ℹ Output is grouped by region.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(region, member))` for per-operation grouping
##   (`?dplyr::dplyr_by`) instead.

Plot.

tmp <- df_record_era5_modesim_multicent |> 
  mutate(is_greater = pcwd_max0_right < pcwd_max1)

n_greater <- tmp |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg4 <- tmp |> 
  ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
  geom_segment(show.legend = FALSE, linewidth = 0.75) +
  geom_point(show.legend = FALSE, size = 0.75) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ModE-Sim 1420-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "   "
  )

gg4

Combined plot

row1 <- cowplot::plot_grid(
  gg1, gg2, gg3, gg4, 
  nrow = 1, 
  align = "v",
  labels = letters[1:4]
  )
ggsave(here("fig/annual_records_references.pdf"), plot = row1, width = 15, height = 4)

Record 25-year means

Calculate rolling means of all data, separated by region (ModE-Sim also separated by ensemble member).

df_era5_25 <- df_era5 |> 
  pivot_wider(values_from = "pcwd", names_from = "region") |> 
  mutate(across(-year, ~slide_dbl(.x, mean, .before = 24, .complete = TRUE)))

df_modesim_25 <- df_modesim |> 
  group_by(region, member) |> 
  nest() |> 
  mutate(
    data = map(data, ~ .x  |> 
      mutate(
        roll_mean = slide_dbl(
          .x$pcwd,
          mean,
          .before = 24,     # 2 before = 3-point window (includes current)
          .complete = TRUE  # require full window, so first 2 will be NA
        )
      )
    )
  )

ERA5

df_record25_era5 <- df_era5_25 |> 
  filter(year %in% c(1999, 2024)) |> 
  pivot_longer(cols = -year, values_to = "pcwd", names_to = "region") |> 
  pivot_wider(values_from = "pcwd", names_from = "year") |> 
  rename(pcwd_max0 = "1999", pcwd_max1 = "2024")

Plot.

tmp <- df_record25_era5 |> 
  mutate(is_greater = pcwd_max0 < pcwd_max1)

n_greater <- tmp |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg5 <- tmp |> 
  ggplot(aes(pcwd_max0, pcwd_max1, color = is_greater)) +
  geom_point(size = 0.75, show.legend = FALSE) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ERA5 1975-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "Record 25-year mean PCWD"
  )

gg5

ModE-Sim recent (1975-1999)

get_record <- function(df){
  df |> 
    filter(year %in% 1975:1999) |> 
    filter(roll_mean == max(roll_mean)) |> 
    pull(roll_mean)
}

df_record25_era5_modesim_recent <- df_modesim_25 |> 
  mutate(pcwd_max0 = purrr::map_dbl(data, ~get_record(.))) |> 
  select(-data) |> 
  ungroup() |> 
  group_by(region) |> 
  summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |> 
  left_join(
    df_record25_era5 |> 
      select(region, pcwd_max1),
    by = join_by(region)
  )

Plot.

tmp <- df_record25_era5_modesim_recent |> 
  mutate(is_greater = pcwd_max0_right < pcwd_max1)

n_greater <- tmp |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg6 <- tmp |> 
  ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
  geom_segment(show.legend = FALSE, linewidth = 0.75) +
  geom_point(show.legend = FALSE, size = 0.75) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ModE-Sim 1975-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "   "
  )

gg6

ModE-Sim historical (1850-1999)

get_record <- function(df){
  df |> 
    filter(year %in% 1850:1999) |> 
    filter(roll_mean == max(roll_mean)) |> 
    pull(roll_mean)
}

df_record25_era5_modesim_historical <- df_modesim_25 |> 
  mutate(pcwd_max0 = purrr::map_dbl(data, ~get_record(.))) |> 
  select(-data) |> 
  ungroup() |> 
  group_by(region) |> 
  summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |> 
  left_join(
    df_record25_era5 |> 
      select(region, pcwd_max1),
    by = join_by(region)
  )

Plot.

tmp <- df_record25_era5_modesim_historical |> 
  mutate(is_greater = pcwd_max0_right < pcwd_max1)

n_greater <- tmp |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg7 <- tmp |> 
  ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
  geom_segment(show.legend = FALSE, linewidth = 0.75) +
  geom_point(show.legend = FALSE, size = 0.75) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ModE-Sim 1850-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "   "
  )

gg7

ModE-Sim multi-century (1420-1999)

get_record <- function(df){
  df |> 
    filter(year %in% 1420:1999) |> 
    filter(roll_mean == max(roll_mean, na.rm = TRUE)) |> 
    pull(roll_mean)
}

df_record25_era5_modesim_multicent <- df_modesim_25 |> 
  mutate(pcwd_max0 = purrr::map_dbl(data, ~get_record(.))) |> 
  select(-data) |> 
  ungroup() |> 
  group_by(region) |> 
  summarise(pcwd_max0_left = min(pcwd_max0), pcwd_max0_right = max(pcwd_max0)) |> 
  left_join(
    df_record25_era5 |> 
      select(region, pcwd_max1),
    by = join_by(region)
  )

Plot.

tmp <- df_record25_era5_modesim_multicent |> 
  mutate(is_greater = pcwd_max0_right < pcwd_max1)

n_greater <- tmp |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg8 <- tmp |> 
  ggplot(aes(x = pcwd_max0_right, xend = pcwd_max0_left, y = pcwd_max1, color = is_greater)) +
  geom_segment(show.legend = FALSE, linewidth = 0.75) +
  geom_point(show.legend = FALSE, size = 0.75) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ModE-Sim 1420-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "   "
  )

gg8

Combined plot

row2 <- cowplot::plot_grid(
  gg5, gg6, gg7, gg8, 
  nrow = 1, 
  align = "v",
  labels = letters[5:8]
  )
ggsave(here("fig/30yrmeans_records_references.pdf"), plot = row2, width = 15, height = 4)

30-year extremes (sliding)

Calculate rolling 30-year extremes of all data, separated by region (ModE-Sim also separated by ensemble member).

# Function: fit Gumbel with extRemes and estimate given year return level
fit_gumbel_return <- function(vec, return_period = 30) {
  # Fit Gumbel (GEV with shape = 0)
  fit <- extRemes::fevd(vec, type = "Gumbel")
  
  # Estimate T-year return level
  rl <- unname(c(return.level(fit, return.period = return_period)))

  return(rl)
}

calc_30x <- function(df){
  fit_gumbel_return(df$pcwd)
}

calc_30x_across_members <- function(df){
  # Collapse selected columns into vector
  vec <- unlist(df %>% select(-year))
  vec <- vec[is.finite(vec)]
  fit_gumbel_return(vec, return_period = 30)
}

apply_sliding <- function(df){
  df %>%
    mutate(return_level = slide_index_dbl(
      seq_len(nrow(.)),
      .i = year,
      .before = 24,
      .complete = TRUE,
      .f = ~ calc_30x_across_members(df[.x, ])
    ))
}

apply_pooled <- function(df, years){
  df |> 
    filter(year %in% years) |> 
    calc_30x_across_members()
}

# fit two extreme value distributions on the two ERA5 periods and extract the 
# 30-year extreme value.
df_era5_30x <- df_era5 |> 
  mutate(period = ifelse(year %in% 1975:1999, "pcwd_x30_ref0", ifelse(year %in% 2000:2024, "pcwd_x30_ref1", NA))) |> 
  group_by(region, period) |> 
  nest() |> 
  drop_na(period) |> 
  mutate(pcwd_30x = purrr::map_dbl(data, ~calc_30x(.))) |> 
  pivot_wider(id_cols = -data, values_from = "pcwd_30x", names_from = "period")

# this takes long to compute
nmembers <- length(unique(df_modesim$member))

# fit extreme value distributions on sliding windows of 25 years, gathering values from all ensemble members
# warning: this takes a few minutes
df_modesim_30x_sliding <- df_modesim |> 
  mutate(member = paste0("m", as.integer(member))) |> 
  pivot_wider(names_from = "member", values_from = "pcwd") |> 
  group_by(region) |> 
  nest() |> 
  mutate(data = map(data, apply_sliding)) |> 
  mutate(data = map(data, ~select(., year, pcwd_30x = return_level))) |> 
  unnest(data)

# fit extreme value distributions on data pooled from all years in different periods
df_modesim_30x_pooled <- df_modesim |> 
  mutate(member = paste0("m", as.integer(member))) |> 
  pivot_wider(names_from = "member", values_from = "pcwd") |> 
  group_by(region) |> 
  nest()
  
df_modesim_30x_pooled_multicent <- df_modesim_30x_pooled |> 
  mutate(pcwd_30x = map_dbl(data, ~apply_pooled(., years = 1420:1999)))

df_modesim_30x_pooled_historical <- df_modesim_30x_pooled |> 
  mutate(pcwd_30x = map_dbl(data, ~apply_pooled(., years = 1850:1999)))

df_modesim_30x_pooled_recent <- df_modesim_30x_pooled |> 
  mutate(pcwd_30x = map_dbl(data, ~apply_pooled(., years = 1975:1999)))

ERA5

tmp <- df_era5_30x |> 
  mutate(is_greater = pcwd_x30_ref0 < pcwd_x30_ref1)

n_greater <- tmp |> 
  ungroup() |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg9 <- tmp |> 
  ggplot(aes(pcwd_x30_ref0, pcwd_x30_ref1, color = is_greater)) +
  geom_point(size = 0.75, show.legend = FALSE) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ERA5 1975-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "30-year extreme PCWD"
  )

gg9

ModE-Sim recent (1975-1999)

get_minmax <- function(df){
  df |> 
    filter(year %in% 1975:1999) |> 
    ungroup() |> 
    summarise(pcwd_30x_ref0_max = max(pcwd_30x, na.rm = TRUE), pcwd_30x_ref0_min = min(pcwd_30x, na.rm = TRUE))
}

df_30x_sliding_recent <- df_modesim_30x_sliding |> 
  group_by(region) |> 
  nest() |> 
  mutate(data = purrr::map(data, ~get_minmax(.))) |> 
  unnest(data) |> 
  left_join(
    df_era5_30x |> 
      select(region, pcwd_x30_ref1),
    by = join_by(region)
  )
tmp <- df_30x_sliding_recent |> 
  mutate(is_greater = pcwd_x30_ref1 > pcwd_30x_ref0_max)

n_greater <- tmp |> 
  ungroup() |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg10 <- tmp |> 
  ggplot(aes(x = pcwd_30x_ref0_max, xend = pcwd_30x_ref0_min, y = pcwd_x30_ref1, color = is_greater)) +
  geom_segment(show.legend = FALSE, linewidth = 0.75) +
  geom_point(show.legend = FALSE, size = 0.75) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ModE-Sim 1975-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "   "
  )

gg10

ModE-Sim historical (1850-1999)

get_minmax <- function(df){
  df |> 
    filter(year %in% 1850:1999) |> 
    ungroup() |> 
    summarise(pcwd_30x_ref0_max = max(pcwd_30x, na.rm = TRUE), pcwd_30x_ref0_min = min(pcwd_30x, na.rm = TRUE))
}

df_30x_sliding_historical <- df_modesim_30x_sliding |> 
  group_by(region) |> 
  nest() |> 
  mutate(data = purrr::map(data, ~get_minmax(.))) |> 
  unnest(data) |> 
  left_join(
    df_era5_30x |> 
      select(region, pcwd_x30_ref1),
    by = join_by(region)
  )
tmp <- df_30x_sliding_historical |> 
  mutate(is_greater = pcwd_x30_ref1 > pcwd_30x_ref0_max)

n_greater <- tmp |> 
  ungroup() |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg11 <- tmp |> 
  ggplot(aes(x = pcwd_30x_ref0_max, xend = pcwd_30x_ref0_min, y = pcwd_x30_ref1, color = is_greater)) +
  geom_segment(show.legend = FALSE, linewidth = 0.75) +
  geom_point(show.legend = FALSE, size = 0.75) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ModE-Sim 1850-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "   "
  )

gg11

ModE-Sim multi-century (1420-1999)

get_minmax <- function(df){
  df |> 
    filter(year %in% 1420:1999) |> 
    ungroup() |> 
    summarise(pcwd_30x_ref0_max = max(pcwd_30x, na.rm = TRUE), pcwd_30x_ref0_min = min(pcwd_30x, na.rm = TRUE))
}

df_30x_sliding_multicent <- df_modesim_30x_sliding |> 
  group_by(region) |> 
  nest() |> 
  mutate(data = purrr::map(data, ~get_minmax(.))) |> 
  unnest(data) |> 
  left_join(
    df_era5_30x |> 
      select(region, pcwd_x30_ref1),
    by = join_by(region)
  )
tmp <- df_30x_sliding_multicent |> 
  mutate(is_greater = pcwd_x30_ref1 > pcwd_30x_ref0_max)

n_greater <- tmp |> 
  ungroup() |> 
  summarise(n_greater = sum(is_greater)) |> 
  pull(n_greater)

gg12 <- tmp |> 
  ggplot(aes(x = pcwd_30x_ref0_max, xend = pcwd_30x_ref0_min, y = pcwd_x30_ref1, color = is_greater)) +
  geom_segment(show.legend = FALSE, linewidth = 0.75) +
  geom_point(show.legend = FALSE, size = 0.75) +
  geom_abline(slope = 1, intercept = 0, linetype = "dotted") +
  coord_equal() +
  scale_color_manual(values = c("grey30", "tomato")) +
  theme_classic() +
  scale_x_log10() +
  scale_y_log10() +
  labs(
    x = "ModE-Sim 1420-1999 (mm)",
    y = "ERA5 2000-2024 (mm)",
    subtitle = bquote(italic(N) == .(n_greater)),
    title = "   "
  )

gg12

Combined plot

row3 <- cowplot::plot_grid(
  gg9, gg10, gg11, gg12,
  nrow = 1, 
  align = "v",
  labels = letters[9:12]
  )
ggsave(here("fig/30yextremes_records_references.pdf"), plot = row3, width = 15, height = 4)

All combined plot

gg_combined <- cowplot::plot_grid(
  row1, row2, row3,
  ncol = 1
  )
ggsave(here("fig/combined_references.pdf"), plot = gg_combined, width = 15, height = 12)