options(dplyr.summarise.inform = FALSE)
library(tidyverse)
library(stringdist)
library(ggplot2)
library(gridExtra)synchrony
na_plots <- read.csv("C:/Users/mh471/Downloads/radiation/na_plotData.csv")
eu_plots <- read.csv("C:/Users/mh471/Downloads/radiation/eu_plotData.csv")
delta_yr_wide <- read.csv("C:/Users/mh471/Downloads/continent comparison/east_europe_west_1105/randYrEst.csv")
eu_plots <- eu_plots %>% rename(ecoRegWWF = ecoReg)
delta_yr <- delta_yr_wide %>%
pivot_longer(cols = starts_with("X"), names_to = "year", values_to = "delta_yr") %>%
mutate(year = as.numeric(str_remove(year, "^X")))# 模糊匹配函数
find_best_match <- function(delta_name, plot_names, threshold = 0.25) {
delta_clean <- tolower(gsub(" ", "", delta_name))
distances <- stringdist(delta_clean, tolower(gsub(" ", "", plot_names)), method = "jw")
best_idx <- which.min(distances)
best_distance <- distances[best_idx]
best_match <- plot_names[best_idx]
if (best_distance < threshold) list(match = best_match, distance = best_distance)
else list(match = NA_character_, distance = best_distance)
}
delta_yr_ecos <- unique(delta_yr$ecoReg)
plot_ecos_na <- unique(na_plots$ecoRegWWF)
plot_ecos_eu <- unique(eu_plots$ecoRegWWF)
plot_ecos_all <- unique(c(plot_ecos_na, plot_ecos_eu))
name_mapping <- tibble(
delta_yr_name = delta_yr_ecos,
plot_name = NA_character_,
distance = NA_real_,
in_na = FALSE,
in_eu = FALSE
)
for (i in seq_len(nrow(name_mapping))) {
res <- find_best_match(name_mapping$delta_yr_name[i], plot_ecos_all)
name_mapping$plot_name[i] <- res$match
name_mapping$distance[i] <- res$distance
if (!is.na(res$match)) {
name_mapping$in_na[i] <- res$match %in% plot_ecos_na
name_mapping$in_eu[i] <- res$match %in% plot_ecos_eu
}
}
# 仅保留在 delta_yr 中有变异的生态区
useful_ecos <- delta_yr %>%
group_by(ecoReg) %>%
summarise(var = var(delta_yr, na.rm = TRUE), .groups = "drop") %>%
filter(var > 0, !is.na(var)) %>%
pull(ecoReg)
name_mapping_useful <- name_mapping %>%
filter(delta_yr_name %in% useful_ecos, !is.na(plot_name))
# 提取质心
extract_centroids <- function(plots_data, eco_lookup, region_label) {
plots_data %>%
filter(ecoRegWWF %in% eco_lookup$plot_name) %>%
group_by(ecoRegWWF) %>%
summarise(lon = mean(lon, na.rm = TRUE),
lat = mean(lat, na.rm = TRUE),
n_plots = n(), .groups = "drop") %>%
left_join(eco_lookup %>% select(delta_yr_name, plot_name),
by = c("ecoRegWWF" = "plot_name")) %>%
transmute(ecoReg = delta_yr_name, lon, lat, n_plots, continent = region_label) %>%
drop_na(ecoReg)
}
na_ecos_clean <- extract_centroids(na_plots, name_mapping_useful %>% filter(in_na), "North America")
eu_ecos_clean <- extract_centroids(eu_plots, name_mapping_useful %>% filter(in_eu), "Europe")
ecoregion_coords_clean <- bind_rows(na_ecos_clean, eu_ecos_clean) %>%
group_by(ecoReg) %>%
arrange(desc(n_plots)) %>%
slice(1) %>%
ungroup() %>%
arrange(ecoReg)
# 时间序列数据
temporal_data_clean <- delta_yr %>%
filter(ecoReg %in% ecoregion_coords_clean$ecoReg) %>%
left_join(ecoregion_coords_clean %>% select(ecoReg, continent), by = "ecoReg") %>%
filter(!is.na(continent), delta_yr != 0)
# 快速浏览
head(name_mapping_useful)# A tibble: 6 x 5
delta_yr_name plot_name distance in_na in_eu
<chr> <chr> <dbl> <lgl> <lgl>
1 Alpsconiferandmixedforests Alps con~ 0 TRUE TRUE
2 Atlanticmixedforests Atlantic~ 0 FALSE TRUE
3 DinaricMountainsmixedforests Dinaric ~ 0 FALSE TRUE
4 NortheasternSpainandSouthernFranceMediterranea~ Northeas~ 0 FALSE TRUE
5 Pannonianmixedforests Pannonia~ 0 FALSE TRUE
6 Pyreneesconiferandmixedforests Pyrenees~ 0 FALSE TRUE
head(ecoregion_coords_clean)# A tibble: 6 x 5
ecoReg lon lat n_plots continent
<chr> <dbl> <dbl> <int> <chr>
1 AegeanandWesternTurkeysclerophyllousandmixedfo~ 22.9 39.0 37 Europe
2 Alpsconiferandmixedforests 8.87 45.9 3930 Europe
3 Appalachian-BlueRidgeforests -81.2 37.2 7181 North Am~
4 Appalachianmixedmesophyticforests -82.4 37.8 9398 North Am~
5 Appeninedeciduousmontaneforests 12.4 43.0 826 Europe
6 Atlanticcoastalpinebarrens -74.2 39.9 563 North Am~
head(temporal_data_clean)# A tibble: 6 x 6
species ecoReg log year delta_yr continent
<chr> <chr> <chr> <dbl> <dbl> <chr>
1 abiesAlba Alpsconiferandmixedforests abies_europe_ou~ 2003 1.75 Europe
2 abiesAlba Alpsconiferandmixedforests abies_europe_ou~ 2004 -1.18 Europe
3 abiesAlba Alpsconiferandmixedforests abies_europe_ou~ 2005 -1.14 Europe
4 abiesAlba Alpsconiferandmixedforests abies_europe_ou~ 2006 1.49 Europe
5 abiesAlba Alpsconiferandmixedforests abies_europe_ou~ 2007 -1.67 Europe
6 abiesAlba Alpsconiferandmixedforests abies_europe_ou~ 2008 -1.6 Europe
# 年度聚合
temporal_sync <- temporal_data_clean %>%
group_by(year, continent) %>%
summarise(mean_delta = mean(delta_yr, na.rm = TRUE),
sd_delta = sd(delta_yr, na.rm = TRUE),
n_regions = n_distinct(ecoReg),
.groups = "drop") %>%
arrange(year)
# 大陆间时间相关
ts_wide <- temporal_sync %>%
select(year, continent, mean_delta) %>%
pivot_wider(names_from = continent, values_from = mean_delta) %>%
drop_na()
cor_temporal <- NULL
if (nrow(ts_wide) > 3 &&
sd(ts_wide$`North America`, na.rm = TRUE) > 0 &&
sd(ts_wide$Europe, na.rm = TRUE) > 0) {
cor_temporal <- cor.test(ts_wide$`North America`, ts_wide$Europe, method = "spearman")
}
# Moran's I(基于距离阈值的行标准化权重)
calculate_morans_i <- function(year_val, continent_label, data, eco_coords) {
subset_data <- data %>%
filter(year == year_val, continent == continent_label) %>%
left_join(eco_coords %>% select(ecoReg, lon, lat), by = "ecoReg") %>%
drop_na(lon, lat, delta_yr)
if (nrow(subset_data) < 4) return(NA_real_)
eco_data <- subset_data %>%
group_by(ecoReg, lon, lat) %>%
summarise(delta_mean = mean(delta_yr, na.rm = TRUE), .groups = "drop")
if (nrow(eco_data) < 4) return(NA_real_)
coords <- eco_data %>% select(lon, lat) %>% as.matrix()
W <- as.matrix(dist(coords))
threshold <- if (nrow(eco_data) < 10) 20 else 15
W[W > threshold] <- 0
W[W > 0] <- 1 / (W[W > 0])
diag(W) <- 0
row_sums <- rowSums(W)
if (sum(row_sums) == 0) return(NA_real_)
W <- W / pmax(row_sums, 1e-10)
y <- eco_data$delta_mean
yc <- y - mean(y)
n <- length(y)
num <- as.numeric(t(yc) %*% W %*% yc)
den <- as.numeric(t(yc) %*% yc)
if (den <= 0) return(NA_real_)
(n / sum(W)) * num / den
}
years <- sort(unique(temporal_data_clean$year))
morans_results <- expand_grid(year = years, continent = c("North America", "Europe")) %>%
mutate(morans_i = map2_dbl(year, continent,
~calculate_morans_i(.x, .y, temporal_data_clean, ecoregion_coords_clean)))
morans_summary <- morans_results %>%
drop_na(morans_i) %>%
group_by(continent) %>%
summarise(n_valid_years = n(),
mean_I = mean(morans_i), sd_I = sd(morans_i),
median_I = median(morans_i), min_I = min(morans_i),
max_I = max(morans_i),
pct_positive = 100 * mean(morans_i > 0),
.groups = "drop")
temporal_sync %>% head()# A tibble: 6 x 5
year continent mean_delta sd_delta n_regions
<dbl> <chr> <dbl> <dbl> <int>
1 1959 North America 1.39 NA 1
2 1960 North America -1.16 NA 1
3 1961 Europe 0.301 0.926 1
4 1961 North America 0.169 1.08 3
5 1962 Europe -0.862 0.873 1
6 1962 North America 1.58 2.00 4
morans_summary# A tibble: 2 x 8
continent n_valid_years mean_I sd_I median_I min_I max_I pct_positive
<chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Europe 36 -0.123 0.297 -0.106 -1.18 0.339 33.3
2 North America 64 0.110 0.349 0.0817 -0.541 0.984 65.6
cor_temporal
Spearman's rank correlation rho
data: ts_wide$`North America` and ts_wide$Europe
S = 36574, p-value = 0.1088
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.200743
fig1 <- ggplot(temporal_sync, aes(year, mean_delta, color = continent)) +
geom_line(linewidth = 1) + geom_point(size = 2) +
theme_minimal() +
labs(title = "Temporal Synchrony", x = "Year", y = "Mean Annual Effect")
fig2 <- ggplot(morans_results %>% drop_na(), aes(year, morans_i, color = continent)) +
geom_line(linewidth = 1) + geom_point(size = 2) +
geom_hline(yintercept = 0, linetype = "dashed") +
theme_minimal() +
labs(title = "Spatial Synchrony (Moran's I)", x = "Year", y = "I")
grid.arrange(fig1, fig2, nrow = 2)distance_decay_timeseries <- function(continent_label, data, eco_coords) {
ecos <- eco_coords %>% filter(continent == continent_label) %>% pull(ecoReg)
if (length(ecos) < 4) return(NULL)
eco_ts <- data %>%
filter(ecoReg %in% ecos) %>%
group_by(ecoReg, year) %>%
summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(id_cols = ecoReg, names_from = year, values_from = mean_effect) %>%
arrange(ecoReg)
if (nrow(eco_ts) < 4) return(NULL)
ts_matrix <- eco_ts %>% select(-ecoReg) %>% as.matrix()
if (all(is.na(ts_matrix))) return(NULL)
coords_matrix <- eco_coords %>%
filter(ecoReg %in% eco_ts$ecoReg) %>%
arrange(ecoReg) %>% select(lon, lat) %>% as.matrix()
dist_mat <- dist(coords_matrix)
cor_mat <- cor(t(ts_matrix), use = "pairwise.complete")
if (all(is.na(cor_mat))) return(NULL)
tibble(
distance = as.numeric(dist_mat)[upper.tri(as.matrix(dist_mat))],
correlation = as.numeric(cor_mat)[upper.tri(cor_mat)],
continent = continent_label
) %>% drop_na()
}
dd_combined <- bind_rows(
distance_decay_timeseries("North America", temporal_data_clean, ecoregion_coords_clean),
distance_decay_timeseries("Europe", temporal_data_clean, ecoregion_coords_clean)
)
if (!is.null(dd_combined) && nrow(dd_combined) > 10) {
ggplot(dd_combined, aes(distance, correlation, color = continent)) +
geom_point(alpha = 0.4) +
geom_smooth(method = "lm", se = TRUE, alpha = 0.15) +
facet_wrap(~continent, scales = "free") +
theme_minimal() +
labs(title = "Distance–Decay (Time Series)", x = "Distance (degrees)", y = "Correlation")
} else {
tibble(note = "Insufficient pairs for distance–decay")
}# 物种概览
species_summary <- temporal_data_clean %>%
group_by(species) %>%
summarise(
n_records = n(),
n_ecoregions = n_distinct(ecoReg),
n_years = n_distinct(year),
mean_effect = mean(delta_yr, na.rm = TRUE),
sd_effect = sd(delta_yr, na.rm = TRUE),
max_effect = max(delta_yr, na.rm = TRUE),
min_effect = min(delta_yr, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(n_records))
head(species_summary, 20)# A tibble: 20 x 8
species n_records n_ecoregions n_years mean_effect sd_effect max_effect
<chr> <int> <int> <int> <dbl> <dbl> <dbl>
1 abiesProcera 225 4 59 0.00000978 2.23 2.86
2 abiesAmabilis 176 3 65 -0.0000812 2.14 3.06
3 quercusRubra 159 15 35 -0.000137 0.996 2.81
4 acerRubrum 148 7 35 0.0000141 1.09 2.91
5 fraxinusAmer~ 143 7 36 -0.0000154 0.992 2.51
6 piceaAbies 137 16 65 0.0000293 1.16 2.89
7 quercusVelut~ 128 7 35 0.000353 0.901 2.86
8 acerSaccharu 127 8 35 0.000213 1.06 2.59
9 prunusSeroti~ 126 12 28 0.0000121 0.678 2.24
10 quercusAlba 121 9 35 0.000159 0.981 2.7
11 fagusSylvatic 119 8 46 -0.000257 1.73 2.38
12 abiesMagnific 117 3 60 0.000130 2.14 3.21
13 fagusGrandifo 114 7 35 0.000186 1.02 2.1
14 abiesConcolor 113 7 45 0.000169 2.03 3.49
15 tsugaCanadens 111 10 35 -0.000205 1.13 2.76
16 caryaOvalis-g 95 5 35 0.000233 0.897 2.54
17 pinusStrobus 94 12 26 -0.00000815 0.914 1.82
18 tiliaAmerican 94 9 35 0.000268 0.860 2.36
19 pinusSylvestr 93 13 51 -0.000128 0.973 2.13
20 cornusFlorida 92 6 35 -0.00271 0.971 2.27
# i 1 more variable: min_effect <dbl>
species_summary %>% arrange(desc(sd_effect)) %>% head(30)# A tibble: 30 x 8
species n_records n_ecoregions n_years mean_effect sd_effect max_effect
<chr> <int> <int> <int> <dbl> <dbl> <dbl>
1 betulaNeoala~ 49 1 49 -0.000673 2.33 2.27
2 abiesGrandis 57 2 57 0.000754 2.25 2.98
3 abiesProcera 225 4 59 0.00000978 2.23 2.86
4 abiesMagnific 117 3 60 0.000130 2.14 3.21
5 abiesAmabilis 176 3 65 -0.0000812 2.14 3.06
6 abiesConcolor 113 7 45 0.000169 2.03 3.49
7 abiesLasiocar 75 3 67 0.000248 2.01 3.27
8 thujaOccident 33 6 18 -0.000636 1.81 2.86
9 fagusSylvatic 119 8 46 -0.000257 1.73 2.38
10 abiesCephalon 25 4 16 -0.000547 1.66 2.8
# i 20 more rows
# i 1 more variable: min_effect <dbl>
# 按物种计算 Moran's I(逐年平均后)
calculate_morans_by_species <- function(species_name, data, eco_coords) {
species_data <- data %>%
filter(species == species_name) %>%
group_by(ecoReg, year) %>%
summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop")
if (n_distinct(species_data$ecoReg) < 4) return(NULL)
years_with_data <- unique(species_data$year)
results <- c()
for (yr in years_with_data) {
year_subset <- species_data %>%
filter(year == yr) %>%
left_join(eco_coords %>% select(ecoReg, lon, lat, continent), by = "ecoReg") %>%
drop_na(lon, lat)
if (nrow(year_subset) < 4) next
coords <- year_subset %>% select(lon, lat) %>% as.matrix()
W <- as.matrix(dist(coords))
W[W > 15] <- 0
W[W > 0] <- 1 / (W[W > 0])
diag(W) <- 0
row_sums <- rowSums(W)
if (sum(row_sums) == 0) next
W <- W / pmax(row_sums, 1e-10)
y <- year_subset$mean_effect
yc <- y - mean(y)
n <- length(y)
den <- as.numeric(t(yc) %*% yc)
if (den <= 0) next
I <- (n / sum(W)) * (as.numeric(t(yc) %*% W %*% yc)) / den
results <- c(results, I)
}
if (length(results) == 0) return(NULL)
tibble(
species = species_name,
n_years_analyzed = length(results),
mean_morans_i = mean(results, na.rm = TRUE),
sd_morans_i = sd(results, na.rm = TRUE),
pct_positive = 100 * mean(results > 0)
)
}
top_species <- species_summary %>% filter(n_records >= 10) %>% pull(species) %>% head(15)
species_morans <- map_df(top_species, ~calculate_morans_by_species(.x, temporal_data_clean, ecoregion_coords_clean))
species_morans %>% arrange(desc(abs(mean_morans_i)))# A tibble: 13 x 5
species n_years_analyzed mean_morans_i sd_morans_i pct_positive
<chr> <int> <dbl> <dbl> <dbl>
1 fagusGrandifo 17 -0.324 0.296 11.8
2 abiesProcera 55 -0.312 0.181 5.45
3 acerRubrum 25 -0.289 0.177 4
4 acerSaccharu 17 -0.235 0.209 5.88
5 quercusVelutina 17 -0.228 0.132 5.88
6 tsugaCanadens 8 -0.226 0.205 25
7 quercusAlba 17 -0.220 0.180 11.8
8 prunusSerotina 18 -0.211 0.134 5.56
9 fraxinusAmerican 25 -0.207 0.197 8
10 fagusSylvatic 7 -0.190 0.264 28.6
11 quercusRubra 18 -0.173 0.269 16.7
12 abiesConcolor 2 -0.169 0.978 50
13 piceaAbies 6 -0.133 0.287 16.7
temporal_by_spp <- temporal_data_clean %>%
group_by(species, year, continent) %>%
summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop")
species_corr_results <- temporal_by_spp %>%
filter(continent %in% c("North America", "Europe")) %>%
group_by(species) %>%
group_modify(~{
na_df <- .x %>% filter(continent == "North America") %>% select(year, na = mean_effect)
eu_df <- .x %>% filter(continent == "Europe") %>% select(year, eu = mean_effect)
both <- inner_join(na_df, eu_df, by = "year")
if (nrow(both) < 3) return(tibble())
if (sd(both$na, na.rm = TRUE) == 0 || sd(both$eu, na.rm = TRUE) == 0) return(tibble())
ct <- suppressWarnings(cor.test(both$na, both$eu, method = "spearman"))
tibble(n_years = nrow(both), rho = unname(ct$estimate), p_value = ct$p.value, significant = ct$p.value < 0.05)
}) %>%
ungroup() %>%
arrange(desc(abs(rho)))
head(species_corr_results, 20)# A tibble: 20 x 5
species n_years rho p_value significant
<chr> <int> <dbl> <dbl> <lgl>
1 asiminaTriloba 3 -1 0.333 FALSE
2 diospyroVirginia 4 -1 0.0833 FALSE
3 calocedrDecurren 6 -0.829 0.0583 FALSE
4 liquidamStyracif 4 0.8 0.333 FALSE
5 quercusMacrocar 4 0.8 0.333 FALSE
6 taxodiumDistichu 6 -0.657 0.175 FALSE
7 prunusAvium 9 0.65 0.0666 FALSE
8 piceaAbies 4 -0.6 0.417 FALSE
9 gleditsiTriacant 6 0.543 0.297 FALSE
10 acerNegundo 3 0.5 1 FALSE
11 pinusBanksian 3 0.5 1 FALSE
12 tiliaCordata 3 0.5 1 FALSE
13 abiesConcolor 7 0.429 0.354 FALSE
14 acerPlatanoi 13 0.401 0.176 FALSE
15 liriodenTulipife 4 0.4 0.75 FALSE
16 quercusCoccinea 4 0.4 0.75 FALSE
17 tsugaCanadens 7 0.393 0.396 FALSE
18 juglansNigra 7 -0.321 0.498 FALSE
19 quercusRobur 5 -0.3 0.683 FALSE
20 ilexAquifoli 9 0.233 0.552 FALSE
all_species_ts <- temporal_data_clean %>%
group_by(year, continent) %>%
summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop")
all_species_wide <- all_species_ts %>%
pivot_wider(id_cols = year, names_from = continent, values_from = mean_effect) %>%
drop_na()
if (nrow(all_species_wide) >= 3) {
all_corr <- cor.test(all_species_wide$`North America`, all_species_wide$Europe, method = "spearman")
all_corr
} else {
tibble(note = "Insufficient years for aggregate correlation")
}
Spearman's rank correlation rho
data: all_species_wide$`North America` and all_species_wide$Europe
S = 36574, p-value = 0.1088
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.200743
plot_species <- species_summary %>%
filter(n_records >= 30) %>%
slice(1:6) %>%
pull(species)
plot_data <- temporal_data_clean %>%
filter(species %in% plot_species) %>%
group_by(species, year, continent) %>%
summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop")
ggplot(plot_data, aes(year, mean_effect, color = continent, linetype = species)) +
geom_line(alpha = 0.7, linewidth = 0.8) +
facet_wrap(~species) +
theme_minimal() +
labs(title = "不同物种的大陆间 masting 模式",
x = "Year", y = "Mean Annual Effect",
color = "Continent", linetype = "Species")# 1) 滞后相关(±3 年)
lag_correlations <- function(all_species_wide, max_lag = 3) {
if (nrow(all_species_wide) < 3) return(tibble())
na_vec <- all_species_wide$`North America`
eu_vec <- all_species_wide$Europe
out <- map_df(-max_lag:max_lag, ~{
l <- .x
if (l < 0) { x <- na_vec[1:(length(na_vec)+l)]; y <- eu_vec[(-l+1):length(eu_vec)] }
else if(l>0){ x <- na_vec[(l+1):length(na_vec)]; y <- eu_vec[1:(length(eu_vec)-l)] }
else { x <- na_vec; y <- eu_vec }
if (length(x) >= 3 && sd(x, na.rm = TRUE) > 0 && sd(y, na.rm = TRUE) > 0) {
ct <- suppressWarnings(cor.test(x, y, method = "spearman"))
tibble(lag = l, rho = unname(ct$estimate), p_value = ct$p.value)
} else tibble()
})
out
}
lag_results <- lag_correlations(all_species_wide, 3)
lag_results# A tibble: 7 x 3
lag rho p_value
<int> <dbl> <dbl>
1 -3 0.0969 0.453
2 -2 -0.0204 0.874
3 -1 -0.117 0.357
4 0 0.201 0.109
5 1 -0.164 0.196
6 2 -0.00461 0.971
7 3 0.278 0.0289
# 2) 极端年份(±2 SD)
if (nrow(all_species_wide) >= 3) {
extreme_years <- all_species_wide %>%
mutate(
na_extreme = abs(`North America` - mean(`North America`, na.rm = TRUE)) > 2*sd(`North America`, na.rm = TRUE),
eu_extreme = abs(Europe - mean(Europe, na.rm = TRUE)) > 2*sd(Europe, na.rm = TRUE),
both_extreme = na_extreme & eu_extreme
)
extreme_years
} else {
tibble(note = "Insufficient years for extreme-year detection")
}# A tibble: 65 x 6
year `North America` Europe na_extreme eu_extreme both_extreme
<dbl> <dbl> <dbl> <lgl> <lgl> <lgl>
1 1961 0.169 0.301 FALSE FALSE FALSE
2 1962 1.58 -0.862 FALSE FALSE FALSE
3 1963 -2.20 -1.54 TRUE TRUE TRUE
4 1964 -0.308 -1.58 FALSE TRUE FALSE
5 1965 0.570 -0.698 FALSE FALSE FALSE
6 1966 -0.964 0.26 FALSE FALSE FALSE
7 1967 -0.865 -1.30 FALSE FALSE FALSE
8 1968 2.28 -0.0600 TRUE FALSE FALSE
9 1969 -2.34 -0.557 TRUE FALSE FALSE
10 1970 -0.533 -0.374 FALSE FALSE FALSE
# i 55 more rows
# 3) 变异分解(物种贡献)
variance_decomposition <- temporal_data_clean %>%
group_by(species, continent) %>%
summarise(variance = var(delta_yr, na.rm = TRUE), n = n(), .groups = "drop") %>%
group_by(continent) %>%
mutate(pct_variance = 100 * variance / sum(variance, na.rm = TRUE)) %>%
arrange(continent, desc(pct_variance))
variance_decomposition %>% group_by(continent) %>% slice_head(n = 10)# A tibble: 20 x 5
# Groups: continent [2]
species continent variance n pct_variance
<chr> <chr> <dbl> <int> <dbl>
1 fagusSylvatic Europe 3.01 119 4.33
2 abiesCephalon Europe 2.75 25 3.95
3 prunusSerotina Europe 2.69 2 3.87
4 carpinusBetulus Europe 2.69 52 3.87
5 sorbusAucupari Europe 2.58 60 3.70
6 juniperuCommunis Europe 2.46 2 3.54
7 quercusRotundif Europe 2.14 32 3.08
8 quercusCanarien Europe 2.07 14 2.97
9 abiesAlba Europe 1.92 87 2.76
10 abiesPinsapo Europe 1.72 22 2.47
11 betulaNeoalask North America 5.44 49 3.31
12 abiesGrandis North America 5.05 57 3.07
13 abiesProcera North America 4.97 225 3.03
14 abiesMagnific North America 4.59 117 2.79
15 abiesAmabilis North America 4.57 176 2.79
16 abiesConcolor North America 4.41 104 2.69
17 abiesLasiocar North America 4.06 75 2.47
18 piceaOmorika North America 3.65 2 2.22
19 thujaOccident North America 3.49 31 2.13
20 ilexDecidua North America 2.75 28 1.67
# 4) 时间趋势(线性)
if (nrow(all_species_wide) >= 3) {
trend_df <- all_species_wide %>% arrange(year) %>% mutate(time_index = row_number())
lm_na <- lm(`North America` ~ time_index, data = trend_df)
lm_eu <- lm(Europe ~ time_index, data = trend_df)
list(
NA_trend = broom::tidy(lm_na),
EU_trend = broom::tidy(lm_eu)
)
} else {
tibble(note = "Insufficient years for trend test")
}$NA_trend
# A tibble: 2 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) -0.175 0.246 -0.712 0.479
2 time_index 0.00266 0.00647 0.412 0.682
$EU_trend
# A tibble: 2 x 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) -0.314 0.151 -2.07 0.0421
2 time_index 0.00693 0.00398 1.74 0.0868
# 5) 生态区聚类(按欧氏距离,Ward.D2)
ecoregion_clustering <- function(continent_label, data, eco_coords) {
ecos <- eco_coords %>% filter(continent == continent_label) %>% pull(ecoReg)
eco_ts <- data %>%
filter(ecoReg %in% ecos) %>%
group_by(ecoReg, year) %>%
summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(id_cols = ecoReg, names_from = year, values_from = mean_effect) %>%
arrange(ecoReg)
if (nrow(eco_ts) < 4) return(NULL)
X <- eco_ts %>% select(-ecoReg) %>% as.matrix()
rownames(X) <- eco_ts$ecoReg
valid <- rowSums(!is.na(X)) > 0
X <- X[valid, , drop = FALSE]
if (nrow(X) < 4) return(NULL)
for (i in 1:nrow(X)) {
rmean <- mean(X[i, ], na.rm = TRUE)
X[i, is.na(X[i, ])] <- rmean
}
X[!is.finite(X)] <- 0
hc <- hclust(dist(X, method = "euclidean"), method = "ward.D2")
plot(hc, main = paste("ecoregion -", continent_label))
invisible(hc)
}
par(mfrow = c(1,2))
hc_na <- ecoregion_clustering("North America", temporal_data_clean, ecoregion_coords_clean)
hc_eu <- ecoregion_clustering("Europe", temporal_data_clean, ecoregion_coords_clean)par(mfrow = c(1,1))# 6) 物种组成
species_composition <- temporal_data_clean %>%
group_by(year, continent, species) %>%
summarise(count = n(), .groups = "drop") %>%
pivot_wider(id_cols = c(year, continent), names_from = species, values_from = count, values_fill = 0) %>%
rowwise() %>% mutate(total = sum(c_across(-c(year, continent)))) %>% ungroup()
comp_pct <- species_composition %>%
pivot_longer(cols = -c(year, continent, total), names_to = "species", values_to = "count") %>%
mutate(pct = if_else(total > 0, 100 * count / total, NA_real_)) %>%
group_by(continent, species) %>%
summarise(mean_pct = mean(pct, na.rm = TRUE), .groups = "drop") %>%
arrange(continent, desc(mean_pct))
head(comp_pct, 15)# A tibble: 15 x 3
continent species mean_pct
<chr> <chr> <dbl>
1 Europe piceaAbies 26.8
2 Europe pinusSylvestr 21.1
3 Europe fagusSylvatic 9.76
4 Europe abiesAlba 3.08
5 Europe aesculusTurbinat 2.85
6 Europe sorbusAucupari 2.74
7 Europe pinusCembra 2.42
8 Europe castaneaSativa 2.24
9 Europe quercusPetraea 1.95
10 Europe quercusCerris 1.76
11 Europe pinusHalepens 1.74
12 Europe larixDecidua 1.72
13 Europe pinusPinea 1.62
14 Europe quercusPubescen 1.38
15 Europe quercusRotundif 1.27
# 年度均值 + 标准差带(按大陆)
ts_ribbon <- temporal_data_clean %>%
group_by(year, continent) %>%
summarise(mean_delta = mean(delta_yr, na.rm = TRUE),
sd_delta = sd(delta_yr, na.rm = TRUE),
.groups = "drop")
ggplot(ts_ribbon, aes(year, mean_delta, color = continent, fill = continent)) +
geom_ribbon(aes(ymin = mean_delta - sd_delta, ymax = mean_delta + sd_delta),
alpha = 0.15, color = NA) +
geom_line(linewidth = 1) +
theme_minimal() +
labs(title = "Mean Annual Effect",
x = "Year", y = "Mean Annual Effect (±1 SD)",
color = "Continent", fill = "Continent")p_hist <- ggplot(morans_results %>% drop_na(morans_i),
aes(morans_i, fill = continent, color = continent)) +
geom_histogram(alpha = 0.25, position = "identity", bins = 30) +
theme_minimal() +
labs(title = "Moran's I distribution", x = "Moran's I", y = "Count")
p_line <- ggplot(morans_results %>% drop_na(morans_i),
aes(year, morans_i, color = continent)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_line(linewidth = 1) + geom_point(size = 1.8) +
theme_minimal() +
labs(title = "Moran's I year trend", x = "Year", y = "Moran's I")
gridExtra::grid.arrange(p_hist, p_line, ncol = 2)if (exists("species_morans") && nrow(species_morans) > 0) {
species_morans %>%
arrange(desc(abs(mean_morans_i))) %>%
slice(1:min(15, n())) %>%
mutate(species = factor(species, levels = rev(species))) %>%
ggplot(aes(x = species, y = mean_morans_i)) +
geom_segment(aes(xend = species, y = 0, yend = mean_morans_i), linewidth = 1, color = "gray60") +
geom_point(size = 3, aes(color = mean_morans_i > 0)) +
coord_flip() +
theme_minimal() +
labs(title = "species average Moran's I (top 15[definite value])",
x = "Species", y = "Mean Moran's I", color = "I > 0?")
} else {
tibble(note = "无可用的物种 Moran's I 结果")
}if (exists("species_corr_results") && nrow(species_corr_results) > 0) {
species_corr_results %>%
mutate(sig = if_else(significant, "p < 0.05", "ns")) %>%
arrange(desc(abs(rho))) %>%
mutate(lbl = if_else(row_number() <= 12, species, NA_character_)) %>%
ggplot(aes(x = n_years, y = rho, color = sig)) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_point(size = 2.5, alpha = 0.8) +
ggrepel::geom_text_repel(aes(label = lbl), size = 3, na.rm = TRUE, max.overlaps = 30) +
theme_minimal() +
labs(title = "Species Spearman ρ",
x = "Years with data (both continents)", y = "Spearman ρ",
color = "Significance")
} else {
tibble(note = "无可用的物种 NA–EU 相关结果")
}# 选择数据较完整的一个年份(两洲均≥8个生态区)
year_choice <- temporal_data_clean %>%
count(year, continent, name = "n") %>%
group_by(year) %>%
summarise(min_n = min(n), .groups = "drop") %>%
arrange(desc(min_n), desc(year)) %>%
slice(1) %>%
pull(year)
eco_year <- temporal_data_clean %>%
filter(year == year_choice) %>%
group_by(ecoReg, continent) %>%
summarise(delta_mean = mean(delta_yr, na.rm = TRUE), .groups = "drop") %>%
left_join(ecoregion_coords_clean, by = c("ecoReg", "continent"))
ggplot(eco_year, aes(lon, lat, color = delta_mean)) +
geom_point(size = 3, alpha = 0.9) +
facet_wrap(~continent) +
scale_color_gradient2(low = "#4575b4", mid = "white", high = "#d73027", midpoint = 0) +
theme_minimal() +
labs(title = paste0("space distribution and annual signal(", year_choice, ")"),
x = "Longitude", y = "Latitude", color = "Mean effect")var_top10 <- temporal_data_clean %>%
group_by(species, continent) %>%
summarise(variance = var(delta_yr, na.rm = TRUE), .groups = "drop") %>%
group_by(continent) %>%
mutate(pct = 100 * variance / sum(variance, na.rm = TRUE)) %>%
arrange(continent, desc(pct)) %>%
group_by(continent) %>% slice_head(n = 10) %>%
ungroup() %>%
mutate(species = fct_reorder(species, pct))
ggplot(var_top10, aes(pct, species, fill = continent)) +
geom_col(alpha = 0.85) +
facet_wrap(~continent, scales = "free_y") +
theme_minimal() +
labs(title = "top 10 variance distributed species",
x = "Percent variance (%)", y = "Species", fill = "Continent")p_na_ts <- all_species_ts %>%
filter(continent == "North America") %>%
ggplot(aes(year, mean_effect)) +
geom_line(color = "#3b8bc2", linewidth = 1) +
theme_minimal() + labs(title = "North America", x = "Year", y = "Mean effect")
p_eu_ts <- all_species_ts %>%
filter(continent == "Europe") %>%
ggplot(aes(year, mean_effect)) +
geom_line(color = "#c43c39", linewidth = 1) +
theme_minimal() + labs(title = "Europe", x = "Year", y = "Mean effect")
gridExtra::grid.arrange(p_na_ts, p_eu_ts, ncol = 2)# 1. 按物种和大陆分组,计算各自的 sd_effect (标准差)
species_volatility <- temporal_data_clean %>%
group_by(species, continent) %>%
summarise(
sd_effect = sd(delta_yr, na.rm = TRUE),
n_records = n(),
.groups = "drop"
) %>%
filter(n_records >= 30) %>% # 仅保留有足够数据的物种
drop_na(sd_effect)
# 2. 提取每个大陆波动性最强的前 15 个物种
top_volatility <- species_volatility %>%
group_by(continent) %>%
slice_max(sd_effect, n = 15) %>%
ungroup()
# 3. 绘制条形图
ggplot(top_volatility,
aes(x = sd_effect,
y = reorder(species, sd_effect),
fill = continent)) +
geom_col() +
facet_wrap(~continent, scales = "free_y") +
theme_minimal() +
guides(fill = "none") +
labs(
title = "species sd_effect Top 15",
x = "delta_yr sd effect)",
y = "Species"
)top_drivers_eu <- variance_decomposition %>%
filter(continent == "Europe") %>%
slice_head(n = 5) %>%
pull(species)
top_drivers_na <- variance_decomposition %>%
filter(continent == "North America") %>%
slice_head(n = 5) %>%
pull(species)
top_drivers_all <- c(top_drivers_eu, top_drivers_na)
# 2. 筛选出这些 "主力物种" 的数据
plot_data_drivers <- temporal_data_clean %>%
filter(species %in% top_drivers_all) %>%
group_by(species, year, continent) %>%
summarise(mean_effect = mean(delta_yr, na.rm = TRUE), .groups = "drop")
# 3. 绘图
ggplot(plot_data_drivers,
aes(x = year, y = mean_effect, color = continent)) +
geom_line(linewidth = 0.8, alpha = 0.9) +
geom_point(size = 0.5) +
facet_wrap(~species, scales = "free_y") +
theme_minimal() +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
labs(
title = "variance Top 5 delta_yr",
x = "Year",
y = "Mean Annual Effect (delta_yr)",
color = "Continent"
)