library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at /Users/benjaminstocker/lastmill_droughts-stineb
library(slider)
library(extRemes)
## Loading required package: Lmoments
## Loading required package: distillery
## 
## Attaching package: 'extRemes'
## 
## The following objects are masked from 'package:stats':
## 
##     qqnorm, qqplot

Read data

Interpreting the objects into nice and tidy data frames. Select only for one region (‘MED’ here).

df_era5 <- tibble(
  pcwd = read_rds(here("data/regional_results_ERA5Land.rds"))$MED
  ) |> 
  # not sure this is correct. Better to take years forward as part of the workflow. 
  mutate(year = 1970:2024)

extract_ensemble_number <- function(char){
  char |> 
    str_remove("_tidy") |> 
    str_remove("m0") |> 
    as.integer()
}

df_modesim <- read_rds(here("data/regional_results_ModESim_bc.rds"))$MED |> 
  as_tibble() |> 
  # not sure this is correct. Better to take years forward as part of the workflow. 
  mutate(year = 1420:2009)

df_modesim_long <- df_modesim |> 
  pivot_longer(cols = ends_with("_tidy"), names_to = "member", values_to = "pcwd", names_transform = extract_ensemble_number)

Annual values

Determine single maximum in reference

max_modesim <- df_modesim_long |> 
  filter(pcwd == max(pcwd))

max_era5 <- df_era5 |> 
  filter(pcwd == max(pcwd))

level_recordshattering <- max_modesim$pcwd + 2 * sd(df_modesim_long$pcwd, na.rm = TRUE)

Visualise

ggplot() +
  geom_line(aes(year, pcwd, group = member), alpha = 0.1, data = df_modesim_long) +
  geom_line(aes(year, pcwd), color = "tomato", data = df_era5) +
  geom_point(aes(year, pcwd), alpha = 0.5, size = 2, data = max_modesim) +
  geom_point(aes(year, pcwd), color = "tomato", size = 2, data = max_era5) +
  geom_hline(yintercept = max_modesim$pcwd, linetype = "dotted", alpha = 0.3) +
  geom_hline(yintercept = level_recordshattering, linetype = "dotted", alpha = 0.3) +
  theme_classic()

30-year means

df_modesim_30 <- df_modesim |> 
  mutate(across(ends_with("_tidy"), ~slide_dbl(.x, mean, .before = 29, .complete = TRUE, .names = "roll30"))) |> 
  pivot_longer(cols = ends_with("_tidy"), names_to = "member", values_to = "pcwd30", names_transform = extract_ensemble_number)

df_era5_30 <- df_era5 |> 
  mutate(across(pcwd, ~slide_dbl(.x, mean, .before = 29, .complete = TRUE, .names = "roll30"))) 

Determine single maximum in reference

max_modesim_30 <- df_modesim_30 |> 
  filter(pcwd30 == max(pcwd30, na.rm = TRUE))

max_era5_30 <- df_era5_30 |> 
  filter(pcwd == max(pcwd, na.rm = TRUE))

level_recordshattering <- max_modesim_30$pcwd30[1] + 2 * sd(df_modesim_30$pcwd30, na.rm = TRUE)

Visualise

ggplot() +
  geom_line(aes(year, pcwd30, group = member), alpha = 0.3, data = df_modesim_30) +
  geom_line(aes(year, pcwd), color = "tomato", data = df_era5_30) +
  geom_point(aes(year, pcwd30), alpha = 0.5, size = 2, data = max_modesim_30) +
  geom_point(aes(year, pcwd), color = "tomato", size = 2, data = max_era5_30) +
  geom_hline(yintercept = max_modesim_30$pcwd30, linetype = "dotted", alpha = 0.3) +
  geom_hline(yintercept = level_recordshattering, linetype = "dotted", alpha = 0.3) +
  theme_classic()
## Warning: Removed 580 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_line()`).

30-year extreme value

Could also use a 5-year extreme value.

# Function: fit Gumbel with extRemes and estimate T-year return level
fit_gumbel_return <- function(x, T = 30) {
  # Fit Gumbel (GEV with shape = 0)
  fit <- fevd(x, type = "Gumbel")
  
  # Estimate T-year return level
  rl <- unname(c(return.level(fit, return.period = T)))

  return(rl)
}
df_modesim_x30 <- df_modesim |> 
  mutate(across(ends_with("_tidy"), ~slide_dbl(.x, fit_gumbel_return, .before = 29, .complete = TRUE))) |> 
  pivot_longer(cols = ends_with("_tidy"), names_to = "member", values_to = "pcwdx30", names_transform = extract_ensemble_number)

df_era5_x30 <- df_era5 |> 
  mutate(across(pcwd, ~slide_dbl(.x, fit_gumbel_return, .before = 29, .complete = TRUE))) 

Determine single maximum in reference

max_modesim_x30 <- df_modesim_x30 |> 
  filter(pcwdx30 == max(pcwdx30, na.rm = TRUE))

max_era5_x30 <- df_era5_x30 |> 
  filter(pcwd == max(pcwd, na.rm = TRUE))

level_recordshattering <- max_modesim_x30$pcwdx30 + 2 * sd(df_modesim_x30$pcwdx30, na.rm = TRUE)

Visualise

ggplot() +
  geom_line(aes(year, pcwdx30, group = member), alpha = 0.3, data = df_modesim_x30) +
  geom_line(aes(year, pcwd), color = "tomato", data = df_era5_x30) +
  geom_point(aes(year, pcwdx30), alpha = 0.5, size = 2, data = max_modesim_x30) +
  geom_point(aes(year, pcwd), color = "tomato", size = 2, data = max_era5_x30) +
  geom_hline(yintercept = max_modesim_x30$pcwdx30, linetype = "dotted", alpha = 0.3) +
  geom_hline(yintercept = level_recordshattering, linetype = "dotted", alpha = 0.3) +
  theme_classic()
## Warning: Removed 580 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_line()`).

Conclusion