synchrony

options(dplyr.summarise.inform = FALSE)

library(tidyverse)
library(stringdist)
library(ggplot2)
library(gridExtra)
na_plots <- read.csv("C:/Users/mh471/Downloads/radiation/na_plotData.csv")
eu_plots <- read.csv("C:/Users/mh471/Downloads/radiation/eu_plotData.csv")
delta_yr_wide <- read.csv("C:/Users/mh471/Downloads/continent comparison/east_europe_west_1105/randYrEst.csv")

eu_plots <- eu_plots %>% rename(ecoRegWWF = ecoReg)

delta_yr <- delta_yr_wide %>%
  pivot_longer(cols = starts_with("X"), names_to = "year", values_to = "delta_yr") %>%
  mutate(year = as.numeric(str_remove(year, "^X")))
# 模糊匹配函数
find_best_match <- function(delta_name, plot_names, threshold = 0.25) {
  delta_clean <- tolower(gsub(" ", "", delta_name))
  distances <- stringdist(delta_clean, tolower(gsub(" ", "", plot_names)), method = "jw")
  best_idx <- which.min(distances)
  best_distance <- distances[best_idx]
  best_match <- plot_names[best_idx]
  if (best_distance < threshold) list(match = best_match, distance = best_distance)
  else list(match = NA_character_, distance = best_distance)
}

delta_yr_ecos <- unique(delta_yr$ecoReg)
plot_ecos_na  <- unique(na_plots$ecoRegWWF)
plot_ecos_eu  <- unique(eu_plots$ecoRegWWF)
plot_ecos_all <- unique(c(plot_ecos_na, plot_ecos_eu))

name_mapping <- tibble(
  delta_yr_name = delta_yr_ecos,
  plot_name = NA_character_,
  distance = NA_real_,
  in_na = FALSE,
  in_eu = FALSE
)

for (i in seq_len(nrow(name_mapping))) {
  res <- find_best_match(name_mapping$delta_yr_name[i], plot_ecos_all)
  name_mapping$plot_name[i] <- res$match
  name_mapping$distance[i]  <- res$distance
  if (!is.na(res$match)) {
    name_mapping$in_na[i] <- res$match %in% plot_ecos_na
    name_mapping$in_eu[i] <- res$match %in% plot_ecos_eu
  }
}

# 仅保留在 delta_yr 中有变异的生态区
useful_ecos <- delta_yr %>%
  group_by(ecoReg) %>%
  summarise(var = var(delta_yr, na.rm = TRUE), .groups = "drop") %>%
  filter(var > 0, !is.na(var)) %>%
  pull(ecoReg)

name_mapping_useful <- name_mapping %>%
  filter(delta_yr_name %in% useful_ecos, !is.na(plot_name))

# 提取质心
extract_centroids <- function(plots_data, eco_lookup, region_label) {
  plots_data %>%
    filter(ecoRegWWF %in% eco_lookup$plot_name) %>%
    group_by(ecoRegWWF) %>%
    summarise(lon = mean(lon, na.rm = TRUE),
              lat = mean(lat, na.rm = TRUE),
              n_plots = n(), .groups = "drop") %>%
    left_join(eco_lookup %>% select(delta_yr_name, plot_name),
              by = c("ecoRegWWF" = "plot_name")) %>%
    transmute(ecoReg = delta_yr_name, lon, lat, n_plots, continent = region_label) %>%
    drop_na(ecoReg)
}

na_ecos_clean <- extract_centroids(na_plots, name_mapping_useful %>% filter(in_na), "North America")
eu_ecos_clean <- extract_centroids(eu_plots, name_mapping_useful %>% filter(in_eu), "Europe")

ecoregion_coords_clean <- bind_rows(na_ecos_clean, eu_ecos_clean) %>%
  group_by(ecoReg) %>%
  arrange(desc(n_plots)) %>%
  slice(1) %>%
  ungroup() %>%
  arrange(ecoReg)

# 时间序列数据
temporal_data_clean <- delta_yr %>%
  filter(ecoReg %in% ecoregion_coords_clean$ecoReg) %>%
  left_join(ecoregion_coords_clean %>% select(ecoReg, continent), by = "ecoReg") %>%
  filter(!is.na(continent), delta_yr != 0)

# 快速浏览
head(name_mapping_useful)
# A tibble: 6 x 5
  delta_yr_name                                   plot_name distance in_na in_eu
  <chr>                                           <chr>        <dbl> <lgl> <lgl>
1 Alpsconiferandmixedforests                      Alps con~        0 TRUE  TRUE 
2 Atlanticmixedforests                            Atlantic~        0 FALSE TRUE 
3 DinaricMountainsmixedforests                    Dinaric ~        0 FALSE TRUE 
4 NortheasternSpainandSouthernFranceMediterranea~ Northeas~        0 FALSE TRUE 
5 Pannonianmixedforests                           Pannonia~        0 FALSE TRUE 
6 Pyreneesconiferandmixedforests                  Pyrenees~        0 FALSE TRUE 
head(ecoregion_coords_clean)
# A tibble: 6 x 5
  ecoReg                                             lon   lat n_plots continent
  <chr>                                            <dbl> <dbl>   <int> <chr>    
1 AegeanandWesternTurkeysclerophyllousandmixedfo~  22.9   39.0      37 Europe   
2 Alpsconiferandmixedforests                        8.87  45.9    3930 Europe   
3 Appalachian-BlueRidgeforests                    -81.2   37.2    7181 North Am~
4 Appalachianmixedmesophyticforests               -82.4   37.8    9398 North Am~
5 Appeninedeciduousmontaneforests                  12.4   43.0     826 Europe   
6 Atlanticcoastalpinebarrens                      -74.2   39.9     563 North Am~
head(temporal_data_clean)
# A tibble: 6 x 6
  species   ecoReg                     log               year delta_yr continent
  <chr>     <chr>                      <chr>            <dbl>    <dbl> <chr>    
1 abiesAlba Alpsconiferandmixedforests abies_europe_ou~  2003     1.75 Europe   
2 abiesAlba Alpsconiferandmixedforests abies_europe_ou~  2004    -1.18 Europe   
3 abiesAlba Alpsconiferandmixedforests abies_europe_ou~  2005    -1.14 Europe   
4 abiesAlba Alpsconiferandmixedforests abies_europe_ou~  2006     1.49 Europe   
5 abiesAlba Alpsconiferandmixedforests abies_europe_ou~  2007    -1.67 Europe   
6 abiesAlba Alpsconiferandmixedforests abies_europe_ou~  2008    -1.6  Europe   
# 年度聚合
temporal_sync <- temporal_data_clean %>%
  group_by(year, continent) %>%
  summarise(mean_delta = mean(delta_yr, na.rm = TRUE),
            sd_delta   = sd(delta_yr,   na.rm = TRUE),
            n_regions  = n_distinct(ecoReg),
            .groups = "drop") %>%
  arrange(year)

# 大陆间时间相关
ts_wide <- temporal_sync %>%
  select(year, continent, mean_delta) %>%
  pivot_wider(names_from = continent, values_from = mean_delta) %>%
  drop_na()

cor_temporal <- NULL
if (nrow(ts_wide) > 3 &&
    sd(ts_wide$`North America`, na.rm = TRUE) > 0 &&
    sd(ts_wide$Europe, na.rm = TRUE) > 0) {
  cor_temporal <- cor.test(ts_wide$`North America`, ts_wide$Europe, method = "spearman")
}

# Moran's I(基于距离阈值的行标准化权重)
calculate_morans_i <- function(year_val, continent_label, data, eco_coords) {
  subset_data <- data %>%
    filter(year == year_val, continent == continent_label) %>%
    left_join(eco_coords %>% select(ecoReg, lon, lat), by = "ecoReg") %>%
    drop_na(lon, lat, delta_yr)
  if (nrow(subset_data) < 4) return(NA_real_)
  eco_data <- subset_data %>%
    group_by(ecoReg, lon, lat) %>%
    summarise(delta_mean = mean(delta_yr, na.rm = TRUE), .groups = "drop")
  if (nrow(eco_data) < 4) return(NA_real_)
  coords <- eco_data %>% select(lon, lat) %>% as.matrix()
  W <- as.matrix(dist(coords))
  threshold <- if (nrow(eco_data) < 10) 20 else 15
  W[W > threshold] <- 0
  W[W > 0] <- 1 / (W[W > 0])
  diag(W) <- 0
  row_sums <- rowSums(W)
  if (sum(row_sums) == 0) return(NA_real_)
  W <- W / pmax(row_sums, 1e-10)
  y <- eco_data$delta_mean
  yc <- y - mean(y)
  n <- length(y)
  num <- as.numeric(t(yc) %*% W %*% yc)
  den <- as.numeric(t(yc) %*% yc)
  if (den <= 0) return(NA_real_)
  (n / sum(W)) * num / den
}

years <- sort(unique(temporal_data_clean$year))
morans_results <- expand_grid(year = years, continent = c("North America", "Europe")) %>%
  mutate(morans_i = map2_dbl(year, continent,
                             ~calculate_morans_i(.x, .y, temporal_data_clean, ecoregion_coords_clean)))

morans_summary <- morans_results %>%
  drop_na(morans_i) %>%
  group_by(continent) %>%
  summarise(n_valid_years = n(),
            mean_I = mean(morans_i), sd_I = sd(morans_i),
            median_I = median(morans_i), min_I = min(morans_i),
            max_I = max(morans_i),
            pct_positive = 100 * mean(morans_i > 0),
            .groups = "drop")

temporal_sync %>% head()
# A tibble: 6 x 5
   year continent     mean_delta sd_delta n_regions
  <dbl> <chr>              <dbl>    <dbl>     <int>
1  1959 North America      1.39    NA             1
2  1960 North America     -1.16    NA             1
3  1961 Europe             0.301    0.926         1
4  1961 North America      0.169    1.08          3
5  1962 Europe            -0.862    0.873         1
6  1962 North America      1.58     2.00          4
morans_summary
# A tibble: 2 x 8
  continent     n_valid_years mean_I  sd_I median_I  min_I max_I pct_positive
  <chr>                 <int>  <dbl> <dbl>    <dbl>  <dbl> <dbl>        <dbl>
1 Europe                   36 -0.123 0.297  -0.106  -1.18  0.339         33.3
2 North America            64  0.110 0.349   0.0817 -0.541 0.984         65.6
cor_temporal

    Spearman's rank correlation rho

data:  ts_wide$`North America` and ts_wide$Europe
S = 36574, p-value = 0.1088
alternative hypothesis: true rho is not equal to 0
sample estimates:
     rho 
0.200743 
fig1 <- ggplot(temporal_sync, aes(year, mean_delta, color = continent)) +
  geom_line(linewidth = 1) + geom_point(size = 2) +
  theme_minimal() +
  labs(title = "Temporal Synchrony", x = "Year", y = "Mean Annual Effect")

fig2 <- ggplot(morans_results %>% drop_na(), aes(year, morans_i, color = continent)) +
  geom_line(linewidth = 1) + geom_point(size = 2) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme_minimal() +
  labs(title = "Spatial Synchrony (Moran's I)", x = "Year", y = "I")

grid.arrange(fig1, fig2, nrow = 2)

distance_decay_timeseries <- function(continent_label, data, eco_coords) {
  ecos <- eco_coords %>% filter(continent == continent_label) %>% pull(ecoReg)
  if (length(ecos) < 4) return(NULL)
  eco_ts <- data %>%
    filter(ecoReg %in% ecos) %>%
    group_by(ecoReg, year) %>%
    summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop") %>%
    pivot_wider(id_cols = ecoReg, names_from = year, values_from = mean_effect) %>%
    arrange(ecoReg)
  if (nrow(eco_ts) < 4) return(NULL)
  ts_matrix <- eco_ts %>% select(-ecoReg) %>% as.matrix()
  if (all(is.na(ts_matrix))) return(NULL)
  coords_matrix <- eco_coords %>%
    filter(ecoReg %in% eco_ts$ecoReg) %>%
    arrange(ecoReg) %>% select(lon, lat) %>% as.matrix()
  dist_mat <- dist(coords_matrix)
  cor_mat  <- cor(t(ts_matrix), use = "pairwise.complete")
  if (all(is.na(cor_mat))) return(NULL)
  tibble(
    distance = as.numeric(dist_mat)[upper.tri(as.matrix(dist_mat))],
    correlation = as.numeric(cor_mat)[upper.tri(cor_mat)],
    continent = continent_label
  ) %>% drop_na()
}

dd_combined <- bind_rows(
  distance_decay_timeseries("North America", temporal_data_clean, ecoregion_coords_clean),
  distance_decay_timeseries("Europe",        temporal_data_clean, ecoregion_coords_clean)
)

if (!is.null(dd_combined) && nrow(dd_combined) > 10) {
  ggplot(dd_combined, aes(distance, correlation, color = continent)) +
    geom_point(alpha = 0.4) +
    geom_smooth(method = "lm", se = TRUE, alpha = 0.15) +
    facet_wrap(~continent, scales = "free") +
    theme_minimal() +
    labs(title = "Distance–Decay (Time Series)", x = "Distance (degrees)", y = "Correlation")
} else {
  tibble(note = "Insufficient pairs for distance–decay") 
}

# 物种概览
species_summary <- temporal_data_clean %>%
  group_by(species) %>%
  summarise(
    n_records    = n(),
    n_ecoregions = n_distinct(ecoReg),
    n_years      = n_distinct(year),
    mean_effect  = mean(delta_yr, na.rm = TRUE),
    sd_effect    = sd(delta_yr,   na.rm = TRUE),
    max_effect   = max(delta_yr,  na.rm = TRUE),
    min_effect   = min(delta_yr,  na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(n_records))

head(species_summary, 20)
# A tibble: 20 x 8
   species       n_records n_ecoregions n_years mean_effect sd_effect max_effect
   <chr>             <int>        <int>   <int>       <dbl>     <dbl>      <dbl>
 1 abiesProcera        225            4      59  0.00000978     2.23        2.86
 2 abiesAmabilis       176            3      65 -0.0000812      2.14        3.06
 3 quercusRubra        159           15      35 -0.000137       0.996       2.81
 4 acerRubrum          148            7      35  0.0000141      1.09        2.91
 5 fraxinusAmer~       143            7      36 -0.0000154      0.992       2.51
 6 piceaAbies          137           16      65  0.0000293      1.16        2.89
 7 quercusVelut~       128            7      35  0.000353       0.901       2.86
 8 acerSaccharu        127            8      35  0.000213       1.06        2.59
 9 prunusSeroti~       126           12      28  0.0000121      0.678       2.24
10 quercusAlba         121            9      35  0.000159       0.981       2.7 
11 fagusSylvatic       119            8      46 -0.000257       1.73        2.38
12 abiesMagnific       117            3      60  0.000130       2.14        3.21
13 fagusGrandifo       114            7      35  0.000186       1.02        2.1 
14 abiesConcolor       113            7      45  0.000169       2.03        3.49
15 tsugaCanadens       111           10      35 -0.000205       1.13        2.76
16 caryaOvalis-g        95            5      35  0.000233       0.897       2.54
17 pinusStrobus         94           12      26 -0.00000815     0.914       1.82
18 tiliaAmerican        94            9      35  0.000268       0.860       2.36
19 pinusSylvestr        93           13      51 -0.000128       0.973       2.13
20 cornusFlorida        92            6      35 -0.00271        0.971       2.27
# i 1 more variable: min_effect <dbl>
species_summary %>% arrange(desc(sd_effect)) %>% head(30)
# A tibble: 30 x 8
   species       n_records n_ecoregions n_years mean_effect sd_effect max_effect
   <chr>             <int>        <int>   <int>       <dbl>     <dbl>      <dbl>
 1 betulaNeoala~        49            1      49 -0.000673        2.33       2.27
 2 abiesGrandis         57            2      57  0.000754        2.25       2.98
 3 abiesProcera        225            4      59  0.00000978      2.23       2.86
 4 abiesMagnific       117            3      60  0.000130        2.14       3.21
 5 abiesAmabilis       176            3      65 -0.0000812       2.14       3.06
 6 abiesConcolor       113            7      45  0.000169        2.03       3.49
 7 abiesLasiocar        75            3      67  0.000248        2.01       3.27
 8 thujaOccident        33            6      18 -0.000636        1.81       2.86
 9 fagusSylvatic       119            8      46 -0.000257        1.73       2.38
10 abiesCephalon        25            4      16 -0.000547        1.66       2.8 
# i 20 more rows
# i 1 more variable: min_effect <dbl>
# 按物种计算 Moran's I(逐年平均后)
calculate_morans_by_species <- function(species_name, data, eco_coords) {
  species_data <- data %>%
    filter(species == species_name) %>%
    group_by(ecoReg, year) %>%
    summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop")
  if (n_distinct(species_data$ecoReg) < 4) return(NULL)
  years_with_data <- unique(species_data$year)
  results <- c()
  for (yr in years_with_data) {
    year_subset <- species_data %>%
      filter(year == yr) %>%
      left_join(eco_coords %>% select(ecoReg, lon, lat, continent), by = "ecoReg") %>%
      drop_na(lon, lat)
    if (nrow(year_subset) < 4) next
    coords <- year_subset %>% select(lon, lat) %>% as.matrix()
    W <- as.matrix(dist(coords))
    W[W > 15] <- 0
    W[W > 0] <- 1 / (W[W > 0])
    diag(W) <- 0
    row_sums <- rowSums(W)
    if (sum(row_sums) == 0) next
    W <- W / pmax(row_sums, 1e-10)
    y  <- year_subset$mean_effect
    yc <- y - mean(y)
    n  <- length(y)
    den <- as.numeric(t(yc) %*% yc)
    if (den <= 0) next
    I <- (n / sum(W)) * (as.numeric(t(yc) %*% W %*% yc)) / den
    results <- c(results, I)
  }
  if (length(results) == 0) return(NULL)
  tibble(
    species = species_name,
    n_years_analyzed = length(results),
    mean_morans_i = mean(results, na.rm = TRUE),
    sd_morans_i   = sd(results,   na.rm = TRUE),
    pct_positive  = 100 * mean(results > 0)
  )
}

top_species <- species_summary %>% filter(n_records >= 10) %>% pull(species) %>% head(15)
species_morans <- map_df(top_species, ~calculate_morans_by_species(.x, temporal_data_clean, ecoregion_coords_clean))

species_morans %>% arrange(desc(abs(mean_morans_i)))
# A tibble: 13 x 5
   species          n_years_analyzed mean_morans_i sd_morans_i pct_positive
   <chr>                       <int>         <dbl>       <dbl>        <dbl>
 1 fagusGrandifo                  17        -0.324       0.296        11.8 
 2 abiesProcera                   55        -0.312       0.181         5.45
 3 acerRubrum                     25        -0.289       0.177         4   
 4 acerSaccharu                   17        -0.235       0.209         5.88
 5 quercusVelutina                17        -0.228       0.132         5.88
 6 tsugaCanadens                   8        -0.226       0.205        25   
 7 quercusAlba                    17        -0.220       0.180        11.8 
 8 prunusSerotina                 18        -0.211       0.134         5.56
 9 fraxinusAmerican               25        -0.207       0.197         8   
10 fagusSylvatic                   7        -0.190       0.264        28.6 
11 quercusRubra                   18        -0.173       0.269        16.7 
12 abiesConcolor                   2        -0.169       0.978        50   
13 piceaAbies                      6        -0.133       0.287        16.7 
temporal_by_spp <- temporal_data_clean %>%
  group_by(species, year, continent) %>%
  summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop")

species_corr_results <- temporal_by_spp %>%
  filter(continent %in% c("North America", "Europe")) %>%
  group_by(species) %>%
  group_modify(~{
    na_df <- .x %>% filter(continent == "North America") %>% select(year, na = mean_effect)
    eu_df <- .x %>% filter(continent == "Europe") %>% select(year, eu = mean_effect)
    both <- inner_join(na_df, eu_df, by = "year")
    if (nrow(both) < 3) return(tibble())
    if (sd(both$na, na.rm = TRUE) == 0 || sd(both$eu, na.rm = TRUE) == 0) return(tibble())
    ct <- suppressWarnings(cor.test(both$na, both$eu, method = "spearman"))
    tibble(n_years = nrow(both), rho = unname(ct$estimate), p_value = ct$p.value, significant = ct$p.value < 0.05)
  }) %>%
  ungroup() %>%
  arrange(desc(abs(rho)))

head(species_corr_results, 20)
# A tibble: 20 x 5
   species          n_years    rho p_value significant
   <chr>              <int>  <dbl>   <dbl> <lgl>      
 1 asiminaTriloba         3 -1      0.333  FALSE      
 2 diospyroVirginia       4 -1      0.0833 FALSE      
 3 calocedrDecurren       6 -0.829  0.0583 FALSE      
 4 liquidamStyracif       4  0.8    0.333  FALSE      
 5 quercusMacrocar        4  0.8    0.333  FALSE      
 6 taxodiumDistichu       6 -0.657  0.175  FALSE      
 7 prunusAvium            9  0.65   0.0666 FALSE      
 8 piceaAbies             4 -0.6    0.417  FALSE      
 9 gleditsiTriacant       6  0.543  0.297  FALSE      
10 acerNegundo            3  0.5    1      FALSE      
11 pinusBanksian          3  0.5    1      FALSE      
12 tiliaCordata           3  0.5    1      FALSE      
13 abiesConcolor          7  0.429  0.354  FALSE      
14 acerPlatanoi          13  0.401  0.176  FALSE      
15 liriodenTulipife       4  0.4    0.75   FALSE      
16 quercusCoccinea        4  0.4    0.75   FALSE      
17 tsugaCanadens          7  0.393  0.396  FALSE      
18 juglansNigra           7 -0.321  0.498  FALSE      
19 quercusRobur           5 -0.3    0.683  FALSE      
20 ilexAquifoli           9  0.233  0.552  FALSE      
all_species_ts <- temporal_data_clean %>%
  group_by(year, continent) %>%
  summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop")

all_species_wide <- all_species_ts %>%
  pivot_wider(id_cols = year, names_from = continent, values_from = mean_effect) %>%
  drop_na()

if (nrow(all_species_wide) >= 3) {
  all_corr <- cor.test(all_species_wide$`North America`, all_species_wide$Europe, method = "spearman")
  all_corr
} else {
  tibble(note = "Insufficient years for aggregate correlation")
}

    Spearman's rank correlation rho

data:  all_species_wide$`North America` and all_species_wide$Europe
S = 36574, p-value = 0.1088
alternative hypothesis: true rho is not equal to 0
sample estimates:
     rho 
0.200743 
plot_species <- species_summary %>%
  filter(n_records >= 30) %>%
  slice(1:6) %>%
  pull(species)

plot_data <- temporal_data_clean %>%
  filter(species %in% plot_species) %>%
  group_by(species, year, continent) %>%
  summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop")

ggplot(plot_data, aes(year, mean_effect, color = continent, linetype = species)) +
  geom_line(alpha = 0.7, linewidth = 0.8) +
  facet_wrap(~species) +
  theme_minimal() +
  labs(title = "不同物种的大陆间 masting 模式",
       x = "Year", y = "Mean Annual Effect",
       color = "Continent", linetype = "Species")

# 1) 滞后相关(±3 年)
lag_correlations <- function(all_species_wide, max_lag = 3) {
  if (nrow(all_species_wide) < 3) return(tibble())
  na_vec <- all_species_wide$`North America`
  eu_vec <- all_species_wide$Europe
  out <- map_df(-max_lag:max_lag, ~{
    l <- .x
    if (l < 0)   { x <- na_vec[1:(length(na_vec)+l)]; y <- eu_vec[(-l+1):length(eu_vec)] }
    else if(l>0){ x <- na_vec[(l+1):length(na_vec)]; y <- eu_vec[1:(length(eu_vec)-l)] }
    else         { x <- na_vec; y <- eu_vec }
    if (length(x) >= 3 && sd(x, na.rm = TRUE) > 0 && sd(y, na.rm = TRUE) > 0) {
      ct <- suppressWarnings(cor.test(x, y, method = "spearman"))
      tibble(lag = l, rho = unname(ct$estimate), p_value = ct$p.value)
    } else tibble()
  })
  out
}

lag_results <- lag_correlations(all_species_wide, 3)
lag_results
# A tibble: 7 x 3
    lag      rho p_value
  <int>    <dbl>   <dbl>
1    -3  0.0969   0.453 
2    -2 -0.0204   0.874 
3    -1 -0.117    0.357 
4     0  0.201    0.109 
5     1 -0.164    0.196 
6     2 -0.00461  0.971 
7     3  0.278    0.0289
# 2) 极端年份(±2 SD)
if (nrow(all_species_wide) >= 3) {
  extreme_years <- all_species_wide %>%
    mutate(
      na_extreme = abs(`North America` - mean(`North America`, na.rm = TRUE)) > 2*sd(`North America`, na.rm = TRUE),
      eu_extreme = abs(Europe - mean(Europe, na.rm = TRUE)) > 2*sd(Europe, na.rm = TRUE),
      both_extreme = na_extreme & eu_extreme
    )
  extreme_years
} else {
  tibble(note = "Insufficient years for extreme-year detection")
}
# A tibble: 65 x 6
    year `North America`  Europe na_extreme eu_extreme both_extreme
   <dbl>           <dbl>   <dbl> <lgl>      <lgl>      <lgl>       
 1  1961           0.169  0.301  FALSE      FALSE      FALSE       
 2  1962           1.58  -0.862  FALSE      FALSE      FALSE       
 3  1963          -2.20  -1.54   TRUE       TRUE       TRUE        
 4  1964          -0.308 -1.58   FALSE      TRUE       FALSE       
 5  1965           0.570 -0.698  FALSE      FALSE      FALSE       
 6  1966          -0.964  0.26   FALSE      FALSE      FALSE       
 7  1967          -0.865 -1.30   FALSE      FALSE      FALSE       
 8  1968           2.28  -0.0600 TRUE       FALSE      FALSE       
 9  1969          -2.34  -0.557  TRUE       FALSE      FALSE       
10  1970          -0.533 -0.374  FALSE      FALSE      FALSE       
# i 55 more rows
# 3) 变异分解(物种贡献)
variance_decomposition <- temporal_data_clean %>%
  group_by(species, continent) %>%
  summarise(variance = var(delta_yr, na.rm = TRUE), n = n(), .groups = "drop") %>%
  group_by(continent) %>%
  mutate(pct_variance = 100 * variance / sum(variance, na.rm = TRUE)) %>%
  arrange(continent, desc(pct_variance))
variance_decomposition %>% group_by(continent) %>% slice_head(n = 10)
# A tibble: 20 x 5
# Groups:   continent [2]
   species          continent     variance     n pct_variance
   <chr>            <chr>            <dbl> <int>        <dbl>
 1 fagusSylvatic    Europe            3.01   119         4.33
 2 abiesCephalon    Europe            2.75    25         3.95
 3 prunusSerotina   Europe            2.69     2         3.87
 4 carpinusBetulus  Europe            2.69    52         3.87
 5 sorbusAucupari   Europe            2.58    60         3.70
 6 juniperuCommunis Europe            2.46     2         3.54
 7 quercusRotundif  Europe            2.14    32         3.08
 8 quercusCanarien  Europe            2.07    14         2.97
 9 abiesAlba        Europe            1.92    87         2.76
10 abiesPinsapo     Europe            1.72    22         2.47
11 betulaNeoalask   North America     5.44    49         3.31
12 abiesGrandis     North America     5.05    57         3.07
13 abiesProcera     North America     4.97   225         3.03
14 abiesMagnific    North America     4.59   117         2.79
15 abiesAmabilis    North America     4.57   176         2.79
16 abiesConcolor    North America     4.41   104         2.69
17 abiesLasiocar    North America     4.06    75         2.47
18 piceaOmorika     North America     3.65     2         2.22
19 thujaOccident    North America     3.49    31         2.13
20 ilexDecidua      North America     2.75    28         1.67
# 4) 时间趋势(线性)
if (nrow(all_species_wide) >= 3) {
  trend_df <- all_species_wide %>% arrange(year) %>% mutate(time_index = row_number())
  lm_na <- lm(`North America` ~ time_index, data = trend_df)
  lm_eu <- lm(Europe ~ time_index, data = trend_df)
  list(
    NA_trend = broom::tidy(lm_na),
    EU_trend = broom::tidy(lm_eu)
  )
} else {
  tibble(note = "Insufficient years for trend test")
}
$NA_trend
# A tibble: 2 x 5
  term        estimate std.error statistic p.value
  <chr>          <dbl>     <dbl>     <dbl>   <dbl>
1 (Intercept) -0.175     0.246      -0.712   0.479
2 time_index   0.00266   0.00647     0.412   0.682

$EU_trend
# A tibble: 2 x 5
  term        estimate std.error statistic p.value
  <chr>          <dbl>     <dbl>     <dbl>   <dbl>
1 (Intercept) -0.314     0.151       -2.07  0.0421
2 time_index   0.00693   0.00398      1.74  0.0868
# 5) 生态区聚类(按欧氏距离,Ward.D2)
ecoregion_clustering <- function(continent_label, data, eco_coords) {
  ecos <- eco_coords %>% filter(continent == continent_label) %>% pull(ecoReg)
  eco_ts <- data %>%
    filter(ecoReg %in% ecos) %>%
    group_by(ecoReg, year) %>%
    summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop") %>%
    pivot_wider(id_cols = ecoReg, names_from = year, values_from = mean_effect) %>%
    arrange(ecoReg)
  if (nrow(eco_ts) < 4) return(NULL)
  X <- eco_ts %>% select(-ecoReg) %>% as.matrix()
  rownames(X) <- eco_ts$ecoReg
  valid <- rowSums(!is.na(X)) > 0
  X <- X[valid, , drop = FALSE]
  if (nrow(X) < 4) return(NULL)
  for (i in 1:nrow(X)) {
    rmean <- mean(X[i, ], na.rm = TRUE)
    X[i, is.na(X[i, ])] <- rmean
  }
  X[!is.finite(X)] <- 0
  hc <- hclust(dist(X, method = "euclidean"), method = "ward.D2")
  plot(hc, main = paste("ecoregion -", continent_label))
  invisible(hc)
}

par(mfrow = c(1,2))
hc_na <- ecoregion_clustering("North America", temporal_data_clean, ecoregion_coords_clean)
hc_eu <- ecoregion_clustering("Europe",        temporal_data_clean, ecoregion_coords_clean)

par(mfrow = c(1,1))
# 6) 物种组成
species_composition <- temporal_data_clean %>%
  group_by(year, continent, species) %>%
  summarise(count = n(), .groups = "drop") %>%
  pivot_wider(id_cols = c(year, continent), names_from = species, values_from = count, values_fill = 0) %>%
  rowwise() %>% mutate(total = sum(c_across(-c(year, continent)))) %>% ungroup()

comp_pct <- species_composition %>%
  pivot_longer(cols = -c(year, continent, total), names_to = "species", values_to = "count") %>%
  mutate(pct = if_else(total > 0, 100 * count / total, NA_real_)) %>%
  group_by(continent, species) %>%
  summarise(mean_pct = mean(pct, na.rm = TRUE), .groups = "drop") %>%
  arrange(continent, desc(mean_pct))

head(comp_pct, 15)
# A tibble: 15 x 3
   continent species          mean_pct
   <chr>     <chr>               <dbl>
 1 Europe    piceaAbies          26.8 
 2 Europe    pinusSylvestr       21.1 
 3 Europe    fagusSylvatic        9.76
 4 Europe    abiesAlba            3.08
 5 Europe    aesculusTurbinat     2.85
 6 Europe    sorbusAucupari       2.74
 7 Europe    pinusCembra          2.42
 8 Europe    castaneaSativa       2.24
 9 Europe    quercusPetraea       1.95
10 Europe    quercusCerris        1.76
11 Europe    pinusHalepens        1.74
12 Europe    larixDecidua         1.72
13 Europe    pinusPinea           1.62
14 Europe    quercusPubescen      1.38
15 Europe    quercusRotundif      1.27
# 年度均值 + 标准差带(按大陆)
ts_ribbon <- temporal_data_clean %>%
  group_by(year, continent) %>%
  summarise(mean_delta = mean(delta_yr, na.rm = TRUE),
            sd_delta   = sd(delta_yr,   na.rm = TRUE),
            .groups = "drop")

ggplot(ts_ribbon, aes(year, mean_delta, color = continent, fill = continent)) +
  geom_ribbon(aes(ymin = mean_delta - sd_delta, ymax = mean_delta + sd_delta),
              alpha = 0.15, color = NA) +
  geom_line(linewidth = 1) +
  theme_minimal() +
  labs(title = "Mean Annual Effect",
       x = "Year", y = "Mean Annual Effect (±1 SD)",
       color = "Continent", fill = "Continent")

p_hist <- ggplot(morans_results %>% drop_na(morans_i),
                 aes(morans_i, fill = continent, color = continent)) +
  geom_histogram(alpha = 0.25, position = "identity", bins = 30) +
  theme_minimal() +
  labs(title = "Moran's I distribution", x = "Moran's I", y = "Count")

p_line <- ggplot(morans_results %>% drop_na(morans_i),
                 aes(year, morans_i, color = continent)) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  geom_line(linewidth = 1) + geom_point(size = 1.8) +
  theme_minimal() +
  labs(title = "Moran's I year trend", x = "Year", y = "Moran's I")

gridExtra::grid.arrange(p_hist, p_line, ncol = 2)

if (exists("species_morans") && nrow(species_morans) > 0) {
  species_morans %>%
    arrange(desc(abs(mean_morans_i))) %>%
    slice(1:min(15, n())) %>%
    mutate(species = factor(species, levels = rev(species))) %>%
    ggplot(aes(x = species, y = mean_morans_i)) +
    geom_segment(aes(xend = species, y = 0, yend = mean_morans_i), linewidth = 1, color = "gray60") +
    geom_point(size = 3, aes(color = mean_morans_i > 0)) +
    coord_flip() +
    theme_minimal() +
    labs(title = "species average  Moran's I (top 15[definite value])",
         x = "Species", y = "Mean Moran's I", color = "I > 0?")
} else {
  tibble(note = "无可用的物种 Moran's I 结果")
}

if (exists("species_corr_results") && nrow(species_corr_results) > 0) {
  species_corr_results %>%
    mutate(sig = if_else(significant, "p < 0.05", "ns")) %>%
    arrange(desc(abs(rho))) %>%
    mutate(lbl = if_else(row_number() <= 12, species, NA_character_)) %>%
    ggplot(aes(x = n_years, y = rho, color = sig)) +
    geom_hline(yintercept = 0, linetype = "dashed") +
    geom_point(size = 2.5, alpha = 0.8) +
    ggrepel::geom_text_repel(aes(label = lbl), size = 3, na.rm = TRUE, max.overlaps = 30) +
    theme_minimal() +
    labs(title = "Species Spearman ρ",
         x = "Years with data (both continents)", y = "Spearman ρ",
         color = "Significance")
} else {
  tibble(note = "无可用的物种 NA–EU 相关结果")
}

# 选择数据较完整的一个年份(两洲均≥8个生态区)
year_choice <- temporal_data_clean %>%
  count(year, continent, name = "n") %>%
  group_by(year) %>%
  summarise(min_n = min(n), .groups = "drop") %>%
  arrange(desc(min_n), desc(year)) %>%
  slice(1) %>%
  pull(year)

eco_year <- temporal_data_clean %>%
  filter(year == year_choice) %>%
  group_by(ecoReg, continent) %>%
  summarise(delta_mean = mean(delta_yr, na.rm = TRUE), .groups = "drop") %>%
  left_join(ecoregion_coords_clean, by = c("ecoReg", "continent"))

ggplot(eco_year, aes(lon, lat, color = delta_mean)) +
  geom_point(size = 3, alpha = 0.9) +
  facet_wrap(~continent) +
  scale_color_gradient2(low = "#4575b4", mid = "white", high = "#d73027", midpoint = 0) +
  theme_minimal() +
  labs(title = paste0("space distribution and annual signal(", year_choice, ")"),
       x = "Longitude", y = "Latitude", color = "Mean effect")

var_top10 <- temporal_data_clean %>%
  group_by(species, continent) %>%
  summarise(variance = var(delta_yr, na.rm = TRUE), .groups = "drop") %>%
  group_by(continent) %>%
  mutate(pct = 100 * variance / sum(variance, na.rm = TRUE)) %>%
  arrange(continent, desc(pct)) %>%
  group_by(continent) %>% slice_head(n = 10) %>%
  ungroup() %>%
  mutate(species = fct_reorder(species, pct))

ggplot(var_top10, aes(pct, species, fill = continent)) +
  geom_col(alpha = 0.85) +
  facet_wrap(~continent, scales = "free_y") +
  theme_minimal() +
  labs(title = "top 10 variance distributed species",
       x = "Percent variance (%)", y = "Species", fill = "Continent")

p_na_ts <- all_species_ts %>%
  filter(continent == "North America") %>%
  ggplot(aes(year, mean_effect)) +
  geom_line(color = "#3b8bc2", linewidth = 1) +
  theme_minimal() + labs(title = "North America", x = "Year", y = "Mean effect")

p_eu_ts <- all_species_ts %>%
  filter(continent == "Europe") %>%
  ggplot(aes(year, mean_effect)) +
  geom_line(color = "#c43c39", linewidth = 1) +
  theme_minimal() + labs(title = "Europe", x = "Year", y = "Mean effect")

gridExtra::grid.arrange(p_na_ts, p_eu_ts, ncol = 2)

# 1. 按物种和大陆分组,计算各自的 sd_effect (标准差)
species_volatility <- temporal_data_clean %>%
  group_by(species, continent) %>%
  summarise(
    sd_effect = sd(delta_yr, na.rm = TRUE),
    n_records = n(),
    .groups = "drop"
  ) %>%
  filter(n_records >= 30) %>% # 仅保留有足够数据的物种
  drop_na(sd_effect)

# 2. 提取每个大陆波动性最强的前 15 个物种
top_volatility <- species_volatility %>%
  group_by(continent) %>%
  slice_max(sd_effect, n = 15) %>%
  ungroup()

# 3. 绘制条形图
ggplot(top_volatility, 
       aes(x = sd_effect, 
           y = reorder(species, sd_effect), 
           fill = continent)) +
  geom_col() +
  facet_wrap(~continent, scales = "free_y") +
  theme_minimal() +
  guides(fill = "none") +
  labs(
    title = "species sd_effect Top 15",
    x = "delta_yr sd effect)",
    y = "Species"
  )

top_drivers_eu <- variance_decomposition %>% 
  filter(continent == "Europe") %>% 
  slice_head(n = 5) %>% 
  pull(species)

top_drivers_na <- variance_decomposition %>% 
  filter(continent == "North America") %>% 
  slice_head(n = 5) %>% 
  pull(species)

top_drivers_all <- c(top_drivers_eu, top_drivers_na)

# 2. 筛选出这些 "主力物种" 的数据
plot_data_drivers <- temporal_data_clean %>%
  filter(species %in% top_drivers_all) %>%
  group_by(species, year, continent) %>%
  summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop")

# 3. 绘图
ggplot(plot_data_drivers, 
       aes(x = year, y = mean_effect, color = continent)) +
  geom_line(linewidth = 0.8, alpha = 0.9) +
  geom_point(size = 0.5) +
  facet_wrap(~species, scales = "free_y") +
  theme_minimal() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
  labs(
    title = "variance Top 5 delta_yr",
    x = "Year",
    y = "Mean Annual Effect (delta_yr)",
    color = "Continent"
  )