1211paper_draft

MASTIF Continental Comparison: Light Limitation and Tree Fecundity

EU vs NA Forest Reproductive Sensitivity Analysis

Research Questions:

Q1: How do light environments differ between EU and NA forests?

Q2: Do EU species show greater reproductive sensitivity to light limitation?

Q3: Is shade sensitivity phylogenetically constrained or environmentally labile?

Q4: Which species/traits are most vulnerable? (Oak case study)

Results Structure:

3.1 Light Environment Differs Between Continents

3.2 European Trees Show Greater Sensitivity to Light Limitation

3.3 Shade Sensitivity Is Environmentally Labile, Not Phylogenetically Conserved

3.4 Oak Case Study: Seed Size Amplifies Light Limitation

# Set working directory and paths (Windows)
setwd("R:/clark/clark.unix/makeMast/maps")

library(RANN)
library(ape)
library(phytools)
Loading required package: maps
library(ggplot2)
library(dplyr)

Attaching package: 'dplyr'
The following object is masked from 'package:ape':

    where
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyr)
library(patchwork)
library(nlme)

Attaching package: 'nlme'
The following object is masked from 'package:dplyr':

    collapse
# Define paths
path <- "R:/clark/clark.unix/"
dataPlotPath <- '../'
sumPlotPath  <- '../inventories/continentPred/annual/'
traitPath    <- '../../'
invPath      <- '../inventories/'
path2Traits  <- paste0(traitPath, 'traitsByGroup/')
phyloPath    <- "R:/clark/clark.unix/phylogram/"

# Source required functions
source('../Rfunctions/mastifFunctions.r')
source('R:/clark/clark.unix/makeMast/moreFunctionsOld.r')
Rcpp::sourceCpp('../RcppFunctions/cppFns.cpp')
source('R:/clark/clark.unix/makeMast/predictionMastFunctions.R')

# Load phylo functions
setwd("R:/clark/clark.unix/phylogram")
source('phyloFunctions.R')
setwd("R:/clark/clark.unix/makeMast/maps")
parEst_path <- 'R:/clark/clark.unix/makeMast/fitLogs/output3/east_europe_west/parEst.csv'
parEst <- read.csv(parEst_path, check.names = FALSE)
head(parEst[,1])
[1] "abiesAlba"     "abiesAmabilis" "abiesBalsamea" "abiesCephalon"
[5] "abiesCilicica" "abiesConcolor"
# Check for duplicates
sum(duplicated(parEst[,1]))
[1] 7
# 查看parEst的结构
cat("=== parEst structure ===\n")
=== parEst structure ===
cat("Dimensions:", dim(parEst), "\n")
Dimensions: 321 35 
cat("\nFirst 5 row names:\n")

First 5 row names:
print(head(rownames(parEst), 5))
[1] "1" "2" "3" "4" "5"
cat("\nFirst 5 column names:\n")

First 5 column names:
print(head(colnames(parEst), 5))
[1] "species"                    "log"                       
[3] "bfec.aspect1"               "bfec.aspect1.defSite.slope"
[5] "bfec.aspect1.slope"        
cat("\nFirst 5x5 corner:\n")

First 5x5 corner:
print(parEst[1:5, 1:5])
        species                                             log bfec.aspect1
1     abiesAlba abies_europe_output3_LFI50SqrtUpdate_2025:11:05            0
2 abiesAmabilis         abies_west_output3_LFI30Sqrt_2025:11:08            0
3 abiesBalsamea         abies_east_output3_LFI30Sqrt_2025:11:02            0
4 abiesCephalon abies_europe_output3_LFI50SqrtUpdate_2025:11:05            0
5 abiesCilicica abies_europe_output3_LFI50SqrtUpdate_2025:11:05            0
  bfec.aspect1.defSite.slope bfec.aspect1.slope
1                    -0.0378              -3.13
2                     0.1110              -0.22
3                     0.0476               5.47
4                     0.0687               1.61
5                    -0.2110              -6.26
# 如果有species列
cat("\n=== Check for species column ===\n")

=== Check for species column ===
if("species" %in% colnames(parEst)) {
  cat("'species' column exists\n")
  print(head(parEst$species, 10))
}
'species' column exists
 [1] "abiesAlba"     "abiesAmabilis" "abiesBalsamea" "abiesCephalon"
 [5] "abiesCilicica" "abiesConcolor" "abiesFraseri"  "abiesGrandis" 
 [9] "abiesLasiocar" "abiesMagnific"
# 查看所有列名
cat("\n=== All column names ===\n")

=== All column names ===
print(colnames(parEst))
 [1] "species"                    "log"                       
 [3] "bfec.aspect1"               "bfec.aspect1.defSite.slope"
 [5] "bfec.aspect1.slope"         "bfec.aspect2"              
 [7] "bfec.aspect2.defSite.slope" "bfec.aspect2.slope"        
 [9] "bfec.defAnom"               "bfec.defSite"              
[11] "bfec.diam"                  "bfec.I.defSite.2."         
[13] "bfec.I.diam.2."             "bfec.I.tempSite.2."        
[15] "bfec.intercept"             "bfec.isolate"              
[17] "bfec.lfi10SqrtAnom"         "bfec.lfi10SqrtSite"        
[19] "bfec.lfi30SqrtAnom"         "bfec.lfi30SqrtSite"        
[21] "bfec.lfi50SqrtAnom"         "bfec.lfi50SqrtSite"        
[23] "bfec.ph"                    "bfec.shade"                
[25] "bfec.slope"                 "bfec.tempSite"             
[27] "bfec.tpi"                   "brep.diam"                 
[29] "brep.intercept"             "plots"                     
[31] "randEffVar"                 "sigma"                     
[33] "trees"                      "treeYr"                    
[35] "upar"                      
setwd("R:/clark/clark.unix/makeMast/maps")

library(RANN)
library(ape)
library(phytools)
library(ggplot2)
library(dplyr)
library(tidyr)
library(patchwork)
library(nlme)

# Define paths
path <- "R:/clark/clark.unix/"
dataPlotPath <- '../'
sumPlotPath  <- '../inventories/continentPred/annual/'
traitPath    <- '../../'
invPath      <- '../inventories/'
path2Traits  <- paste0(traitPath, 'traitsByGroup/')
phyloPath    <- "R:/clark/clark.unix/phylogram/"

# Source required functions
source('../Rfunctions/mastifFunctions.r')
source('R:/clark/clark.unix/makeMast/moreFunctionsOld.r')
Rcpp::sourceCpp('../RcppFunctions/cppFns.cpp')
source('R:/clark/clark.unix/makeMast/predictionMastFunctions.R')

# Load phylo functions
setwd("R:/clark/clark.unix/phylogram")
source('phyloFunctions.R')
setwd("R:/clark/clark.unix/makeMast/maps")

load_mastif_data <- function() {
  parEst_path <- 'R:/clark/clark.unix/makeMast/fitLogs/output3/east_europe_west/parEst.csv'
  parSE_path  <- 'R:/clark/clark.unix/makeMast/fitLogs/output3/east_europe_west/parSE.csv'
  
  parEst <- read.csv(parEst_path, stringsAsFactors = FALSE, check.names = FALSE)
  parSE  <- read.csv(parSE_path, stringsAsFactors = FALSE, check.names = FALSE)
  
  # Load traits
  traits <- read.csv(paste0(path2Traits, 'plantTraits.csv'), stringsAsFactors = FALSE)
  
  # Load phylogeny
  tree <- read.tree("R:/clark/clark.unix/phylogram/Vascular_Plants_rooted.dated.tre")
  
  cat(sprintf("  Loaded %d species from parEst\n", nrow(parEst)))
  cat(sprintf(" parameters: %d\n", ncol(parEst)))
  cat(sprintf("  Loaded %d species in traits\n", nrow(traits)))
  cat(sprintf("  Phylogeny has %d tips\n", length(tree$tip.label)))
  
  if("species" %in% colnames(parEst)){
    cat(sprintf("unique species: %d\n",length(parEst$species)))
  }
  
  return(list(
    parEst = parEst,
    parSE = parSE,
    traits = traits,
    tree = tree
  ))
}

data <- load_mastif_data()
  Loaded 321 species from parEst
 parameters: 35
  Loaded 12053 species in traits
  Phylogeny has 31749 tips
unique species: 321
parEst <- data$parEst
parSE <- data$parSE
traits <- data$traits
tree <- data$tree
extract_region <- function(log_string) {
  if(grepl("_europe_", log_string)) {
    return("europe")
  } else if(grepl("_east_", log_string)) {
    return("east")
  } else if(grepl("_west_", log_string)) {
    return("west")
  } else {
    return(NA)
  }
}

extract_genus <- function(species_name) {
  genus <- gsub("^([a-z]+).*", "\\1", species_name)
  return(genus)
}

RESULT 3.2: EUROPEAN TREES SHOW GREATER SENSITIVITY TO LIGHT LIMITATION

Q2: Do EU species show greater reproductive sensitivity to light limitation?

Q3: Is shade sensitivity phylogenetically constrained or environmentally labile?

Q4: Which species/traits are most vulnerable? Validation with Quercus

oak_sections <- list(
  # Subgenus Quercus
  sect_quercus = c(
    # White oaks 
    "quercusAlba", "quercusMacrocar", "quercusStellata", 
    "quercusMontana", "quercusBicolor", "quercusDouglasi",
    "quercusGeminata", "quercusVirginia",
    "quercusRobur", "quercusPetraea", "quercusPubescen", 
    "quercusFaginea", "quercusIncana", "quercusIthabure",
    "quercusCanarien"
  ),
  sect_lobatae = c(
    # Red oaks (New World)
    "quercusRubra", "quercusVelutina", "quercusFalcata",
    "quercusCoccinea", "quercusPalustri", "quercusNigra",
    "quercusPhellos", "quercusKelloggi", "quercusMariland",
    "quercusLaevis", "quercusHemispha", "quercusTexana",
    "quercusMuehlenb", "quercusAgrifoli", "quercusChrysole",
    "quercusMargaret"
  ),
  sect_cerris = c(
    "quercusCerris", "quercusSuber"
  ),
  sect_ilex = c(
    "quercusIlex", "quercusCoccifer", "quercusRotundif"
  )
)
prepare_shade_data <- function(parEst, parSE) {
  
  parEst <- parEst %>%
    mutate(
      region    = sapply(log, extract_region),
      genus     = sapply(species, extract_genus),
      continent = ifelse(region == "europe", "EU", "NA"),
      species_region = paste0(species, "_", region)
    )
  
  shade_se <- if ("bfec.shade" %in% colnames(parSE)) {
    parSE$bfec.shade
  } else {
    rep(NA_real_, nrow(parEst))
  }
  
  shade_data_raw <- tibble(
    species           = parEst$species,
    species_region    = parEst$species_region,
    region            = parEst$region,
    continent         = parEst$continent,
    genus             = parEst$genus,
    shade_sensitivity = parEst$bfec.shade,
    shade_se          = shade_se,
    n_trees           = parEst$trees,
    n_treeYr          = parEst$treeYr
  ) %>%
    filter(!is.na(region))
  
  return(shade_data_raw)
}

filter_shade_data <- function(shade_data_raw,
                              min_trees = 50,
                              min_treeYr = 100) {
  
  shade_data_filtered <- shade_data_raw %>%
    filter(
      n_trees  >= min_trees,
      n_treeYr >= min_treeYr
    )
  
  genera_before <- shade_data_raw %>%
    count(genus, continent) %>%
    pivot_wider(names_from = continent,
                values_from = n,
                values_fill = 0)
  
  genera_after <- shade_data_filtered %>%
    count(genus, continent) %>%
    pivot_wider(names_from = continent,
                values_from = n,
                values_fill = 0)
  
  lost_genera <- setdiff(
    genera_before %>% filter(EU > 0 & NA > 0) %>% pull(genus),
    genera_after  %>% filter(EU > 0 & NA > 0) %>% pull(genus)
  )
  
  return(list(
    shade_data_filtered = shade_data_filtered,
    lost_genera = lost_genera
  ))
}
compare_shade_by_genus <- function(shade_data_filtered,
                                   exclude_genera = c("castanea"),
                                   min_trees_per_continent = NULL) {
  
  df <- shade_data_filtered %>%
    filter(!genus %in% exclude_genera)
  
  shade_by_genus_region <- df %>%
    group_by(genus, continent) %>%
    summarise(
      shade_mean  = mean(shade_sensitivity, na.rm = TRUE),
      shade_se    = ifelse(n() > 1,
                           sd(shade_sensitivity, na.rm = TRUE) / sqrt(n()),
                           mean(shade_se, na.rm = TRUE)),
      n_species   = n(),
      total_trees = sum(n_trees),
      total_treeYr = sum(n_treeYr),
      .groups = "drop"
    )
  
  if (!is.null(min_trees_per_continent)) {
    shade_by_genus_region <- shade_by_genus_region %>%
      filter(total_trees >= min_trees_per_continent) %>%
      group_by(genus) %>%
      filter(n() == 2) %>%
      ungroup()
  }
  
  shade_wide <- shade_by_genus_region %>%
    pivot_wider(
      names_from = continent,
      values_from = c(shade_mean, shade_se,
                      n_species, total_trees, total_treeYr),
      names_sep = "_"
    ) %>%
    filter(!is.na(shade_mean_EU),
           !is.na(shade_mean_NA)) %>%
    mutate(
      diff = shade_mean_EU - shade_mean_NA,
      eu_more_sensitive = diff < 0
    )
  
  n_total   <- nrow(shade_wide)
  n_eu_more <- sum(shade_wide$eu_more_sensitive)
  
  binom_result <- if (n_total >= 5) {
    binom.test(n_eu_more, n_total, p = 0.5, alternative = "greater")
  } else NULL
  
  return(list(
    shade_wide = shade_wide,
    n_genera = n_total,
    n_eu_more_sensitive = n_eu_more,
    binom_test = binom_result
  ))
}
plot_shade_sensitivity <- function(shade_comparison) {
  
  df <- shade_comparison$shade_wide
  if (nrow(df) == 0) return(NULL)
  
  lims <- range(c(df$shade_mean_EU,
                  df$shade_mean_NA),
                na.rm = TRUE) + c(-0.3, 0.3)
  
  ggplot(df, aes(x = shade_mean_NA,
                 y = shade_mean_EU)) +
    geom_abline(slope = 1, intercept = 0,
                linetype = "dashed",
                color = "gray50") +
    geom_point(aes(color = eu_more_sensitive),
               size = 3.5) +
    coord_fixed(xlim = lims,
                ylim = lims) +
    scale_color_manual(
      values = c("TRUE" = "#E69F00",
                 "FALSE" = "#56B4E9"),
      labels = c("EU more sensitive",
                 "NA more sensitive"),
      name = ""
    ) +
    theme_bw(base_size = 12)
}
prepare_phylo_trait_data <- function(shade_data_filtered,
                                     traits,
                                     tree) {
  
  shade_by_species <- shade_data_filtered %>%
    group_by(species) %>%
    summarise(
      shade_sensitivity = mean(shade_sensitivity, na.rm = TRUE),
      .groups = "drop"
    )
  
  phylo_data <- shade_by_species %>%
    left_join(
      traits %>%
        select(code8, gmPerSeed, maxHt) %>%
        rename(species = code8),
      by = "species"
    ) %>%
    mutate(
      genus   = sub("^([a-z]+).*", "\\1", species),
      epithet = tolower(sub("^[a-z]+", "", species)),
      tree_name = paste0(
        toupper(substr(genus, 1, 1)),
        substr(genus, 2, nchar(genus)),
        "_",
        epithet
      )
    )
  
  return(phylo_data)
}
analyze_oak_shade <- function(shade_data_filtered) {
  
  oak_data <- shade_data_filtered %>%
    filter(genus == "quercus")
  
  if (nrow(oak_data) == 0) return(NULL)
  
  oak_summary <- oak_data %>%
    group_by(continent) %>%
    summarise(
      mean_shade  = mean(shade_sensitivity, na.rm = TRUE),
      se_shade    = sd(shade_sensitivity, na.rm = TRUE) / sqrt(n()),
      n           = n(),
      total_trees = sum(n_trees),
      .groups = "drop"
    )
  
  oak_eu <- oak_data$shade_sensitivity[oak_data$continent == "EU"]
  oak_na <- oak_data$shade_sensitivity[oak_data$continent == "NA"]
  
  t_result <- if (length(oak_eu) >= 2 && length(oak_na) >= 2)
    t.test(oak_eu, oak_na, alternative = "less") else NULL
  
  wilcox_result <- if (length(oak_eu) >= 2 && length(oak_na) >= 2)
    wilcox.test(oak_eu, oak_na, alternative = "less") else NULL
  
  return(list(
    oak_data = oak_data,
    oak_summary = oak_summary,
    t_test = t_result,
    wilcox_test = wilcox_result
  ))
}

analyze_oak_seedmass_categorical <- function(shade_data_filtered, traits) {
  
  oak_data <- shade_data_filtered %>%
    filter(genus == "quercus") %>%
    left_join(
      traits %>%
        select(code8, gmPerSeed) %>%
        rename(species = code8),
      by = "species"
    ) %>%
    filter(!is.na(gmPerSeed))
  
  if (nrow(oak_data) < 6) return(NULL)
  
  median_seed <- median(oak_data$gmPerSeed, na.rm = TRUE)
  
  oak_data <- oak_data %>%
    mutate(seed_category =
             ifelse(gmPerSeed > median_seed,
                    "Large seed", "Small seed"))
  
  seed_summary <- oak_data %>%
    group_by(seed_category, continent) %>%
    summarise(
      n = n(),
      mean_shade = mean(shade_sensitivity, na.rm = TRUE),
      se_shade = sd(shade_sensitivity, na.rm = TRUE) / sqrt(n()),
      .groups = "drop"
    )
  
  return(list(
    data = oak_data,
    summary = seed_summary,
    median_seed = median_seed
  ))
}
library(dplyr)

new_traits <- read.csv(
  "C:/Users/mh471/Downloads/radiation/plant_traits_species_matrix_wide.csv",
  stringsAsFactors = FALSE
)

cat(sprintf(
  "新 trait 数据: %d 物种, %d 列\n",
  nrow(new_traits), ncol(new_traits)
))
<U+65B0> trait <U+6570><U+636E>: 11972 <U+7269><U+79CD>, 109 <U+5217>
## ---- continent ----
new_traits <- new_traits %>%
  mutate(
    continent = case_when(
      region %in% c("east", "west", "east_west", "west_east", "east_west_europe") ~ "NA",
      region %in% c("europe", "europe_asia", "europe_africa") ~ "EU",
      TRUE ~ NA_character_
    )
  )

print(table(new_traits$continent, useNA = "ifany"))

   EU    NA  <NA> 
  190   505 11277 
## ---- shade sensitivity ----
new_traits <- new_traits %>%
  mutate(
    shade_sensitivity    = bfec.shade.y,
    shade_sensitivity_se = bfec.shade.x
  )

## ---- genus ----
new_traits$genus <- sapply(strsplit(new_traits$species_clean, " "), `[`, 1)

## ---- categorical → numeric traits ----

# Shade tolerance (1–5)
new_traits$shade_tolerance_numeric <- case_when(
  new_traits$Species.tolerance.to.shade == "intolerant"        ~ 1,
  new_traits$Species.tolerance.to.shade == "intermediate_low" ~ 2,
  new_traits$Species.tolerance.to.shade == "intermediate"     ~ 3,
  new_traits$Species.tolerance.to.shade == "tolerant"         ~ 4,
  new_traits$Species.tolerance.to.shade == "very_tolerant"    ~ 5,
  TRUE ~ NA_real_
)

# Growth rate (1–3)
new_traits$growth_rate_numeric <- case_when(
  new_traits$Plant.growth.rate == "slow"     ~ 1,
  new_traits$Plant.growth.rate == "moderate" ~ 2,
  new_traits$Plant.growth.rate == "rapid"    ~ 3,
  TRUE ~ NA_real_
)

# Fire tolerance (0–3)
new_traits$fire_tolerance_numeric <- case_when(
  new_traits$Fire.tolerance == "none"   ~ 0,
  new_traits$Fire.tolerance == "low"    ~ 1,
  new_traits$Fire.tolerance == "medium" ~ 2,
  new_traits$Fire.tolerance == "high"   ~ 3,
  TRUE ~ NA_real_
)

# Leaf thickness
new_traits$leaf_thickness_numeric <-
  round(as.numeric(new_traits$Leaf.thickness), 2)
Warning: NAs introduced by coercion
## ---- sanity checks ----
print(table(new_traits$shade_tolerance_numeric, useNA = "ifany"))

    1     2     3     4     5  <NA> 
  430     3   220   173     1 11145 
print(table(new_traits$growth_rate_numeric, useNA = "ifany"))

    1     2     3  <NA> 
  167   262   262 11281 
print(table(new_traits$fire_tolerance_numeric, useNA = "ifany"))

    0     1     2     3  <NA> 
    6   192   179   198 11397 
df <- new_traits

stopifnot(
  is.data.frame(df),
  "shade_sensitivity" %in% colnames(df),
  "continent" %in% colnames(df)
)



trait_info <- list(
  # Seed
  "Seed mass (g)"        = list(col = "gmPerSeed", log = TRUE,  category = "Seed"),
  "Seed coat ratio"     = list(col = "seedCoatRatio", log = FALSE, category = "Seed"),
  "Seeds per fruit"     = list(col = "seedsPerFruit", log = FALSE, category = "Seed"),
  "Seed moisture"       = list(col = "seedMoistureFraction", log = FALSE, category = "Seed"),
  "Cone mass (g)"       = list(col = "gmPerCone", log = TRUE,  category = "Seed"),
  
  # Size
  "Max height (m)"      = list(col = "maxHt", log = TRUE,  category = "Size"),
  "Max DBH (cm)"        = list(col = "maxDbh", log = TRUE,  category = "Size"),
  "Plant height"        = list(col = "Plant.height", log = FALSE, category = "Size"),
  
  # Leaf
  "SLA"                 = list(col = "SLA", log = FALSE, category = "Leaf"),
  "Leaf N"              = list(col = "leafN", log = FALSE, category = "Leaf"),
  "Leaf P"              = list(col = "leafP", log = FALSE, category = "Leaf"),
  "Leaf N:P"            = list(col = "leafNP", log = FALSE, category = "Leaf"),
  "Leaf C:N ratio"      = list(col = "Leaf.carbon.nitrogen..C.N..ratio", log = FALSE, category = "Leaf"),
  "LDMC"                = list(col = "Leaf.dry.matter.content.per.leaf.water.saturated.mass..LDMC.",
                               log = FALSE, category = "Leaf"),
  "Leaf N per area"     = list(col = "Leaf.nitrogen..N..content.per.leaf.area", log = FALSE, category = "Leaf"),
  "Leaf thickness"      = list(col = "leaf_thickness_numeric", log = FALSE, category = "Leaf"),
  
  # Wood
  "Wood density"        = list(col = "woodSG", log = FALSE, category = "Wood"),
  "Wood density (g/cm)" = list(col = "gmPerCm", log = FALSE, category = "Wood"),
  
  # Photosynthesis
  "Asat"                = list(col = "Leaf.photosynthesis.at.saturating.light.and.ambient.CO2.per.leaf.area..Asat.",
                               log = FALSE, category = "Photosynthesis"),
  "Amax"                = list(col = "Leaf.photosynthesis.at.saturating.light.and.saturating.CO2.per.leaf.area..Amax.",
                               log = FALSE, category = "Photosynthesis"),
  "Vcmax (25C)"         = list(col = "Photosynthetic.carboxylation.capacity..Vcmax..Vmax..per.leaf.area.at.25.C",
                               log = FALSE, category = "Photosynthesis"),
  "Jmax"                = list(col = "Jmax.per.leaf.area.at.ambient.or.leaf.temperature",
                               log = FALSE, category = "Photosynthesis"),
  
  # Growth
  "Growth rate"         = list(col = "growth_rate_numeric", log = FALSE, category = "Growth"),
  "Relative growth rate"= list(col = "Plant.relative.growth.rate", log = FALSE, category = "Growth"),
  
  # Tolerance
  "Shade tolerance"     = list(col = "shade_tolerance_numeric", log = FALSE, category = "Tolerance"),
  "Drought tolerance"   = list(col = "Species.tolerance.to.drought..numeric.", log = FALSE, category = "Tolerance"),
  "Fire tolerance"      = list(col = "fire_tolerance_numeric", log = FALSE, category = "Tolerance")
)

## =========================================================
## 4. Overall correlations
## =========================================================

overall_results <- list()

for (label in names(trait_info)) {
  
  info <- trait_info[[label]]
  col  <- info$col
  
  if (!col %in% colnames(df)) next
  
  x <- df[[col]]
  if (info$log) x <- log10(x + 1e-3)
  y <- df$shade_sensitivity
  
  complete <- complete.cases(x, y)
  if (sum(complete) < 15) next
  
  ct <- suppressWarnings(
    cor.test(x[complete], y[complete], method = "spearman")
  )
  
  overall_results[[label]] <- data.frame(
    trait    = label,
    category = info$category,
    n        = sum(complete),
    rho      = unname(ct$estimate),
    p_value  = ct$p.value,
    stringsAsFactors = FALSE
  )
}

overall_df <- if (length(overall_results) == 0) {
  data.frame()
} else {
  bind_rows(overall_results) %>%
    arrange(p_value) %>%
    mutate(
      sig = case_when(
        p_value < 0.001 ~ "***",
        p_value < 0.01  ~ "**",
        p_value < 0.05  ~ "*",
        p_value < 0.1   ~ ".",
        TRUE ~ ""
      )
    )
}

print(overall_df)
                  trait       category   n          rho     p_value sig
1                  LDMC           Leaf 107  0.252595537 0.008667706  **
2           Vcmax (25C) Photosynthesis  56 -0.296803965 0.026330711   *
3          Plant height           Size  27  0.364330470 0.061715237   .
4        Leaf C:N ratio           Leaf 101  0.179555131 0.072386843   .
5         Cone mass (g)           Seed  22 -0.363841866 0.095998399   .
6       Leaf N per area           Leaf 148 -0.125980419 0.127082529    
7     Drought tolerance      Tolerance 178 -0.107013609 0.155093936    
8        Leaf thickness           Leaf 137  0.107024501 0.213208803    
9       Shade tolerance      Tolerance 162  0.098119788 0.214166423    
10         Wood density           Wood 106  0.116484879 0.234393124    
11                  SLA           Leaf 106  0.115483530 0.238467117    
12  Wood density (g/cm)           Wood 107  0.113954581 0.242515741    
13             Leaf N:P           Leaf 100  0.114759167 0.255569914    
14                 Jmax Photosynthesis  50  0.161523499 0.262443976    
15      Seeds per fruit           Seed 131  0.078616971 0.372086991    
16        Seed moisture           Seed  26 -0.142588479 0.487138587    
17         Max DBH (cm)           Size 133 -0.037749251 0.666184799    
18       Max height (m)           Size 164 -0.027681034 0.724953607    
19               Leaf N           Leaf 181  0.026165096 0.726611562    
20      Seed coat ratio           Seed  41 -0.050185146 0.755344130    
21       Fire tolerance      Tolerance 126  0.014701239 0.870215167    
22          Growth rate         Growth 143  0.004740046 0.955194005    
23               Leaf P           Leaf 165 -0.001312555 0.986650497    
24 Relative growth rate         Growth  75 -0.001629047 0.988932892    
25        Seed mass (g)           Seed 203  0.000338193 0.996179143    
## =========================================================
## 5. Continent-specific correlations
## =========================================================

continent_results <- list()

for (cont in c("EU", "NA")) {
  
  df_c <- df[df$continent == cont, , drop = FALSE]
  if (nrow(df_c) == 0) next
  
  for (label in names(trait_info)) {
    
    info <- trait_info[[label]]
    col  <- info$col
    
    if (!col %in% colnames(df_c)) next
    
    x <- df_c[[col]]
    if (info$log) x <- log10(x + 1e-3)
    y <- df_c$shade_sensitivity
    
    complete <- complete.cases(x, y)
    if (sum(complete) < 8) next
    
    ct <- suppressWarnings(
      cor.test(x[complete], y[complete], method = "spearman")
    )
    
    continent_results[[paste0(cont, "_", label)]] <- data.frame(
      continent = cont,
      trait     = label,
      category  = info$category,
      n         = sum(complete),
      rho       = unname(ct$estimate),
      p_value   = ct$p.value,
      stringsAsFactors = FALSE
    )
  }
}

continent_df <- if (length(continent_results) == 0) {
  data.frame()
} else {
  do.call(rbind, continent_results) %>%
    mutate(
      sig = ifelse(
        p_value < 0.001, "***",
        ifelse(p_value < 0.01, "**",
               ifelse(p_value < 0.05, "*",
                      ifelse(p_value < 0.1, ".", "")))
      )
    )
}

rownames(continent_df) <- NULL

if (nrow(continent_df) > 0) {
  print(
    continent_df[
      continent_df$continent == "EU" & continent_df$p_value < 0.1,
    ][order(continent_df$p_value[
      continent_df$continent == "EU" & continent_df$p_value < 0.1
    ]), ]
  )
  
  print(
    continent_df[
      continent_df$continent == "NA" & continent_df$p_value < 0.1,
    ][order(continent_df$p_value[
      continent_df$continent == "NA" & continent_df$p_value < 0.1
    ]), ]
  )
} else {
  print("No continent-specific correlations passed thresholds.")
}
   continent           trait category  n       rho    p_value sig
3         EU Seeds per fruit     Seed 28 0.4744193 0.01075032   *
1         EU   Seed mass (g)     Seed 56 0.3141363 0.01838416   *
10        EU            LDMC     Leaf 44 0.3278007 0.02984059   *
5         EU    Max DBH (cm)     Size 24 0.4348772 0.03369424   *
   continent        trait category   n        rho     p_value sig
30        NA     Leaf N:P     Leaf  57  0.3616473 0.005707664  **
28        NA       Leaf N     Leaf 125  0.1947830 0.029498786   *
25        NA Max DBH (cm)     Size 103 -0.1724040 0.081609850   .
26        NA Plant height     Size  26  0.3447922 0.084529904   .
new_traits <- read.csv("C:/Users/mh471/Downloads/radiation/plant_traits_species_matrix_wide.csv",
                       stringsAsFactors = FALSE)

# 查看 region 分布
table(new_traits$region, useNA = "ifany")

                                                         africa 
                           8125                             143 
          africa_australia_east                   africa_europe 
                              1                               1 
                   africa_south                            asia 
                              1                             374 
                    asia_africa            asia_australia_south 
                              1                               1 
                      asia_east                asia_east_europe 
                             14                               1 
                 asia_east_west                     asia_europe 
                              2                               5 
               asia_europe_east    asia_europe_east_west_europe 
                              3                               1 
                    asia_oceana                 asia_south_east 
                              1                               1 
    asia_south_east_europe_west                        atlantic 
                              1                               1 
                atlantic_europe                       australia 
                              2                               1 
                 australia_west                            east 
                              1                             286 
                      east_asia                     east_europe 
                              1                               2 
        east_europe_africa_asia                      east_south 
                              2                               6 
                      east_west                east_west_europe 
                             11                               5 
          east_west_europe_asia east_west_europe_asia_australia 
                              6                               1 
                east_west_south                          europe 
                              2                             110 
                  europe_africa              europe_africa_asia 
                              8                               6 
        europe_africa_asia_east                     europe_asia 
                              1                              72 
             europe_asia_africa    europe_asia_africa_east_west 
                              3                               2 
               europe_asia_east           europe_asia_east_west 
                              1                               1 
               europe_asia_west                     europe_east 
                              1                               5 
          europe_east_australia                europe_east_west 
                              1                               1 
          europe_east_west_asia                     europe_west 
                              1                               3 
                         oceana                         oceania 
                              1                              46 
                 oceania_europe                         pacific 
                              1                              21 
                          south                    south_africa 
                           2458                               2 
           south_africa_oceania                      south_east 
                              1                               7 
                     south_west                            west 
                              3                             198 
                      west_east           west_east_europe_asia 
                              5                               2 
                     west_south 
                              8 
# 创建 continent 列 - 更全面的分类
new_traits <- new_traits %>%
  mutate(
    continent = case_when(
      # 纯北美 (east, west, south 在 MASTIF 中都是 NA 地区)
      region %in% c("east", "west", "south", "east_west", "west_east", 
                    "east_south", "south_east", "west_south", "south_west",
                    "east_west_south") ~ "NA",
      
      # 纯欧洲
      region %in% c("europe") ~ "EU",
      
      # 欧洲为主的混合(包含 europe 但不包含 NA 地区)
      region %in% c("europe_asia", "europe_africa", "europe_africa_asia",
                    "europe_asia_africa", "asia_europe") ~ "EU",
      
      # 跨大陆的混合 - 设为 NA(后续可能需要排除)
      grepl("europe", region) & grepl("east|west|south", region) ~ "mixed",
      
      # 其他地区(亚洲、非洲、大洋洲等)- 不属于 EU 或 NA
      TRUE ~ "other"
    )
  )

# 检查结果
cat("=== continent 分布 ===\n")
=== continent <U+5206><U+5E03> ===
print(table(new_traits$continent, useNA = "ifany"))

   EU mixed    NA other 
  204    41  2984  8743 
# 查看每个 continent 类别包含哪些 region
cat("\n=== NA 类别的 region ===\n")

=== NA <U+7C7B><U+522B><U+7684> region ===
print(table(new_traits$region[new_traits$continent == "NA"]))

           east      east_south       east_west east_west_south           south 
            286               6              11               2            2458 
     south_east      south_west            west       west_east      west_south 
              7               3             198               5               8 
cat("\n=== EU 类别的 region ===\n")

=== EU <U+7C7B><U+522B><U+7684> region ===
print(table(new_traits$region[new_traits$continent == "EU"]))

       asia_europe             europe      europe_africa europe_africa_asia 
                 5                110                  8                  6 
       europe_asia europe_asia_africa 
                72                  3 
cat("\n=== mixed 类别的 region ===\n")

=== mixed <U+7C7B><U+522B><U+7684> region ===
print(table(new_traits$region[new_traits$continent == "mixed"]))

               asia_east_europe                asia_europe_east 
                              1                               3 
   asia_europe_east_west_europe     asia_south_east_europe_west 
                              1                               1 
                    east_europe         east_europe_africa_asia 
                              2                               2 
               east_west_europe           east_west_europe_asia 
                              5                               6 
east_west_europe_asia_australia         europe_africa_asia_east 
                              1                               1 
   europe_asia_africa_east_west                europe_asia_east 
                              2                               1 
          europe_asia_east_west                europe_asia_west 
                              1                               1 
                    europe_east           europe_east_australia 
                              5                               1 
               europe_east_west           europe_east_west_asia 
                              1                               1 
                    europe_west           west_east_europe_asia 
                              3                               2 
cat("\n=== other 类别的 region ===\n")

=== other <U+7C7B><U+522B><U+7684> region ===
print(table(new_traits$region[new_traits$continent == "other"]))

                                     africa africa_australia_east 
                 8125                   143                     1 
        africa_europe          africa_south                  asia 
                    1                     1                   374 
          asia_africa  asia_australia_south             asia_east 
                    1                     1                    14 
       asia_east_west           asia_oceana       asia_south_east 
                    2                     1                     1 
             atlantic       atlantic_europe             australia 
                    1                     2                     1 
       australia_west             east_asia                oceana 
                    1                     1                     1 
              oceania        oceania_europe               pacific 
                   46                     1                    21 
         south_africa  south_africa_oceania 
                    2                     1