library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
set.seed(123)
create_ar_series <- function(idx, len, amplitude = 2){
# generate a auto-regressive time series from white noise plus a sine wave
dfa <- tibble(
idx = idx,
year = seq(len),
ar_series = stats::filter(rnorm(len), filter = 0.7, method = "recursive"),
sine_wave = amplitude * sin(seq(len)*2*pi/len)
) |>
# synthetic PCWD time series as the sum of the AR t series plus sine wave
# plus 10 to have all values positive
mutate(
pcwd = ar_series + sine_wave + 10
)
}
ztransform <- function(df, mymean, mysd){
# z-transform pcwd values
df |>
mutate(pcwd_zscore = (pcwd - mymean)/mysd)
}
df_list <- purrr::map(
seq(1:22),
~create_ar_series(., 1000)
)
# get mean and sd across all ensemble members for z-transformation
mysumm <- df_list |>
bind_rows() |> # make flat
dplyr::filter(year %in% 1:30) |> # reference period
ungroup() |>
summarise(
pcwd_mean = mean(pcwd, na.rm = TRUE),
pcwd_sd = sd(pcwd, na.rm = TRUE)
)
df_list <- df_list |>
purrr::map(~ztransform(., mysumm$pcwd_mean, mysumm$pcwd_sd))
Visualise.
gg1 <- ggplot(
aes(year, pcwd, group = idx),
data = bind_rows(df_list)
) +
geom_line(alpha = 0.2)
gg2 <- ggplot(
aes(year, pcwd_zscore, group = idx),
data = bind_rows(df_list)
) +
geom_line(alpha = 0.2) +
# mark reference period
geom_vline(xintercept = 30, linetype = "dotted") +
# mark the interval of +/- 1.96 SD - where ~95% of observations are expected to fall into
# based on distribution of annual values during reference period
geom_hline(yintercept = c(-1.960, 1.960), linetype = "dotted")
gg1 / gg2
# Define the 30-year moving average filter
get_rollmean <- function(df, window_size){
weights <- rep(1/window_size, window_size) # Equal weights for simple moving average
df |>
dplyr::select(idx, year, pcwd) |>
mutate(pcwd = stats::filter(pcwd, filter = weights, sides = 1))
}
df30_list <- df_list |>
purrr::map(~get_rollmean(., 30))
Add z-score of 30-year means.
# get mean and sd across all ensemble members for z-transformation
mysumm <- df30_list |>
bind_rows() |> # make flat
dplyr::filter(year %in% 1:30) |> # reference period
tidyr::drop_na(pcwd) |> # this yields 22 values
ungroup() |>
summarise(
pcwd_mean = mean(pcwd, na.rm = TRUE),
pcwd_sd = sd(pcwd, na.rm = TRUE)
)
df30_list <- df30_list |>
purrr::map(~ztransform(., mysumm$pcwd_mean, mysumm$pcwd_sd))
Visualise for one ensemble member (red is 30-year rolling mean as trailing mean).
ggplot() +
geom_line(aes(year, pcwd), data = df_list[[1]], color = "grey") +
geom_line(aes(year, pcwd), data = df30_list[[1]], color = "red")
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Warning: Removed 29 rows containing missing values or values outside the scale range
## (`geom_line()`).
Visualise.
gg1_30 <- ggplot(
aes(year, pcwd, group = idx),
data = bind_rows(df30_list)
) +
geom_line(alpha = 0.2)
gg2_30 <- ggplot(
aes(year, pcwd_zscore, group = idx),
data = bind_rows(df30_list)
) +
geom_line(alpha = 0.2) +
# mark reference period
geom_vline(xintercept = 30, linetype = "dotted") +
# mark the interval of +/- 1.96 SD - where ~95% of observations are expected to fall into
# based on distribution of annual values during reference period
geom_hline(yintercept = c(-1.960, 1.960), linetype = "dotted")
gg1_30 / gg2_30
## Warning: Removed 638 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 638 rows containing missing values or values outside the scale range
## (`geom_line()`).
For direct comparison again: z-transformed values
gg2 <- gg2 + ylim(-10, 10) + labs(title = "Z-scores from annual values")
gg2_30 <- gg2_30 + ylim(-10, 10) + labs(title = "Z-scores from 30-year means")
gg2 / gg2_30
## Warning: Removed 638 rows containing missing values or values outside the scale range
## (`geom_line()`).
This demonstrates that the long-term variability is more clearly detected from 30-year means.