Phylogenies

knitr::opts_knit$set(root.dir = getwd())

library(RColorBrewer)
library(dplyr)
library(ggplot2)
library(RANN)
library(ape)
library(stringr)
library(castor)
library(corrplot)
library(ggplot2)
library(officer)
library(rvg)

setwd("R:/clark/clark.unix/makeMast/maps")
library(RANN)
path <-"/Volumes/research/clark/clark.unix/"
dataPlotPath <- '../'
sumPlotPath  <- '../inventories/continentPred/annual/'
traitPath    <- '../../'
invPath      <- '../inventories/'
path2Traits  <- paste(traitPath, 'traitsByGroup/', sep='')
source( '../Rfunctions/mastifFunctions.r' )
source( 'R:/clark/clark.unix/makeMast/moreFunctionsOld.r' )
Rcpp::sourceCpp( '../RcppFunctions/cppFns.cpp' )

source( 'R:/clark/clark.unix/makeMast/predictionMastFunctions.R' )

setwd("R:/clark/clark.unix/phylogram")
source('phyloFunctions.R')

setwd("R:/clark/clark.unix/phylogram")
# Load phylogenetic tree
trAll <- read.tree("Vascular_Plants_rooted.dated.tre")
trAll <- fixPhyloNames(trAll)

# Get species and traits
spp <- getFittedSpecies(
    dirs = 'output2/east_europe_west',
    keep = c('east', 'west', 'europe')
)

# Get trait data
tmp <- phyloTraits(
    tree = trAll, 
    species = spp,
    traitNames = c("gmPerSeed", "dispersal", "fruit", 
                   "pollination","gmPerCm", "leafN", "leafNP", "leafP", 
                      "maxHt", "maxDbh", "SLA", "woodSG")
)


specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

specCode fixed in function nameFix:
[1] "caryaGlabra"   "caryaOvalis-g"

traitTab <- tmp$traitTab
trq <- tmp$tree
tree     <- tmp$tree

library( RANN )
library( ape )
library( phytools )
library( plotrix )
library( stringr )
library( castor )
library( corrplot )

setwd("R:/clark/clark.unix/phylogram")
paramPath   <- "../makeMast/fitLogs/output2/east_europe_west/"
path2Traits <- "../traitsByGroup/"
traitFile   <- paste( path2Traits, 'plantTraits.csv', sep = '' )

source('../makeMast/predictionMastFunctions.r')
source('../makeMast/RFunctions/mastifFunctions.r')
source('../makeMast/moreFunctionsOld.r')
source('phyloFunctions.R')

vars <- getFittedVariables( continent = 'northAmerica', 
                            ppath = '../makeMast/fitLogs/', dirs = 'output2')
print( vars )

 [1] "aspect1.defSite.slope" "aspect1.slope"         "aspect2.defSite.slope"
 [4] "aspect2.slope"         "defAnom"               "defSite"              
 [7] "diam"                  "I.diam.2."             "I.tempSite.2."        
[10] "intercept"             "isolate"               "lfi10SqrtAnom"        
[13] "lfi10SqrtSite"         "lfi30SqrtAnom"         "lfi30SqrtSite"        
[16] "lfi50SqrtAnom"         "lfi50SqrtSite"         "ph"                   
[19] "shade"                 "slope"                 "tempSite"             
[22] "tpi"

tmp <- phyloSensitivity( tree, species = tree$tip.label, 
                         betaNames = c( "ph", "tpi", "slope", "shade", "defAnom" ),
                         paramPath = paramPath, path2Traits = path2Traits )
parEst   <- tmp$parEst
traitSig <- tmp$parSig
tree     <- tmp$tree

include  <- c( 'west', 'east', 'europe' )
dfColumn <- 'region'
#trAng <- read.tree("Angiosperms_100trees.tre")
trAll <- read.tree("Vascular_Plants_rooted.dated.tre")   # all vascular 
trAll <- fixPhyloNames( trAll )

#trq <- fixPhyloNames( trAng )
tmp <- phyloTraits( trq, species = spp[ grep( 'Quercus', spp ) ], 
                    traitNames = c( "gmPerSeed", "dispersal", "fruit", "leaves", 
                                    "pollination", "sex", "thorns" ) )
traitTab <- tmp$traitTab
trq      <- tmp$tree

phyloSubset( tree = trq, df = traitTab, dfColumn = 'genus', 
             include = 'Quercus', colRamp = regionCols[1:3],
             colColumn = 'region', edge.color = 'region',
             taxon = 'taxon', cex = .6, legendCorner = 'topleft' )
axisPhylo( side = 1 )
mtext( 'Mya', side = 1, line = 2.5 )

x <- read.csv( 'speciesByResponse.csv', row.names = 1 ) # file created in regionAnalysis.Rmd
rownames(x)[ rownames(x) == 'caryaOvalis-g' ] <- 'caryaOvalis'
spp   <- code2species( rownames(x) )

tmp <- phyloTraits( tree = trAll, species = spp, 
                    traitNames = c( "gmPerSeed", "dispersal", "fruit", "leaves", 
                                    "pollination", "sex", "thorns" ) )
traitTab <- tmp$traitTab
trRsp    <- tmp$tree
species  <- trRsp$tip.label
traitTab <- cbind( traitTab, x[ rownames(traitTab), ] )
rownames( traitTab ) <- species

# here is the phylogenetic distance matrix
pdist <- cophenetic( trRsp )  

# here is the trait distance matrix (columns with 'Anom')
dcols <- colnames(traitTab)[ grep('Anom', colnames( traitTab )) ]  # response to anomalies
tdist <- as.matrix( dist( traitTab[, dcols ] ) )  # trait distance
rownames( tdist ) <- colnames( tdist ) <- paste( traitTab$genus, traitTab$specEpith )

wp    <- which( rownames( pdist ) %in% rownames( tdist ) )
pdist <- pdist[ wp, wp ]
wp    <- which( rownames( tdist ) %in% rownames( pdist ) )
tdist <- tdist[ wp, wp ]
pdist <- pdist[ rownames(tdist), colnames( tdist ) ]
traitTab <- traitTab[ wp, ]

# here is a comparison of distances by family
diag( pdist ) <- diag( tdist ) <- NA

ftab     <- table( traitTab$genus )
genus <- names( ftab )[ ftab > 3 ]

par( mfrow = c(3,4), mar = c(1, .5, .8, .5 ), omi = c(1,1,.3,.3), bty = 'n' )
for( k in 1:length( genus ) ){
  wk <- which( traitTab$genus == genus[k] )
  plot( jitter(pdist[wk,wk]), tdist[wk,wk], xlab = '', ylab = '', cex = .6, pch = 16,
        xaxt = 'n', yaxt = 'n', col = .getColor( 'darkblue', .4 ) )
  axis( 1, labels = F )
  axis( 2, labels = F )
  mk <- cbind( pdist[wk,wk][lower.tri( pdist[wk,wk] ) ], 
               tdist[wk,wk][lower.tri( tdist[wk,wk] ) ] ) 
  mk <- Hmisc::rcorr( mk ) 
  lab <- paste( genus[k], round( mk$r[2], 2 ), sep = ', ' )
  if( mk$P[2] < .05 )lab <- paste( lab, '*', sep = '' )
           
  .plotLabel( lab, 'topleft', cex = 1.2, above = T )
}

mtext( 'Phylogenetic distance', 1, outer = T, line = 2 )
mtext( 'Anomaly response distance', 2, outer = T, line = 2 )

library(phylosignal)
library(phylobase)

vars <- c("shade")

tnames <- c( "gmPerSeed", 
             "ph","shade","capsule","fleshy","nut", "pod","samara","syncarpium",
             "anemochory","hydrochory","zoochory", "animal","wind", "broad_evergreen",
             "needle_deciduous","needle_evergreen","scalelike_evergreen",
             "monoecious","non.monoecious"  )

tdata <- as.matrix( traitTab[ , tnames ] )

pt <- phylo4d(trRsp, tdata) 


# color code tips by family
fams    <- sort( unique( traitTab$genus ) )
famCols <- colorRampPalette( colContrast )( length(fams) )
names( famCols ) <- fams
tip.col <- famCols[ traitTab$genus ]
names( tip.col ) <- trRsp$tip.label


# 使用 asp 参数强制1:1宽高比
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1, 1), asp = 1)

# 然后绘制
barplot.phylo4d(pt, tree.type = "fan", 
                tip.cex = 0.6, 
                trait = vars, 
                tree.ladderize = TRUE,
                tip.col = tip.col, 
                bar.col = tip.col, 
                bar.lwd = 4,
                trait.bg.col = c( rep( '#c7eae5', 3), '#dfc27d' ),
                show.trait = FALSE,
                add = TRUE)  # 如果支持add参数

par(pty = "s")  # 's' 表示 square（正方形）
par(mar = c(1, 1, 1, 1))  # 四边留一点边距
par(oma = c(0, 0, 0, 0))

barplot.phylo4d(pt, tree.type = "fan", 
                tip.cex = 0.6, 
                trait = vars, 
                tree.ladderize = TRUE,
                tip.col = tip.col, 
                bar.col = tip.col, 
                bar.lwd = 4,center = FALSE, scale = FALSE,
                trait.bg.col = c( rep( '#c7eae5', 3), '#dfc27d' ),
                show.trait = FALSE)


psig   <- phyloSignal(p4d = pt, method = "all")
pstats <- signif( psig$stat, 3 )
pvalue <- signif( psig$pvalue, 3 )

# 为 vars 里的每个trait提取值
trait_of_interest <- vars[1]  # 你的 vars = c("shade")

k_val   <- psig$stat[trait_of_interest, "K"]
k_p     <- psig$pvalue[trait_of_interest, "K"]
lambda_val <- psig$stat[trait_of_interest, "Lambda"]
lambda_p   <- psig$pvalue[trait_of_interest, "Lambda"]

# 然后你的 mtext 代码就能用了
mtext(side = 1, line = 2, 
      text = paste("Phylogenetic Signal: K =", round(k_val, 3), 
                   ifelse(k_p < 0.05, "(significant)", "(ns)"),
                   " | λ =", round(lambda_val, 3),
                   ifelse(lambda_p < 0.05, "(significant)", "(ns)")),
      cex = 1, font = 2)

pdist <- cophenetic( tree )

setwd("R:/clark/clark.unix/phylogram")
mu <- read.csv('C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/parEst.csv')
se <- read.csv('C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/parSe.csv')
trAll <- read.tree("Vascular_Plants_rooted.dated.tre") 
paramPath   <- "../makeMast/fitLogs/output2/east_europe_west/"
#path2Traits <- "../traitsByGroup/"
#traitFile   <- paste( path2Traits, 'plantTraits.csv', sep = '' )
 path2Traits= "C:/Users/mh471/Downloads/radiation/"
traitFile   <- read.csv("C:/Users/mh471/Downloads/radiation/plant_traits_transformed.csv")

source('../makeMast/predictionMastFunctions.R')
source('../makeMast/RFunctions/mastifFunctions.R')
source('../makeMast/moreFunctionsOld.R')
source('phyloFunctions.R')

library(tidyverse)
library(ggplot2)
library(ggrepel)
library(gridExtra)

traitFile <- read.csv("C:/Users/mh471/Downloads/radiation/plant_traits_transformed.csv")

mu_processed <- mu

if(!"genus" %in% names(mu_processed)) mu_processed$genus <- NA
if(!"region" %in% names(mu_processed)) mu_processed$region <- NA

for(i in 1:nrow(mu_processed)) {
  species_code <- mu_processed$species[i]
  match_idx <- which(traitFile$code8 == species_code)
  
  if(length(match_idx) > 0) {
    idx <- match_idx[1]
    mu_processed$N_area[i] <- traitFile$`Leaf.nitrogen..N..content.per.leaf.area..g.m.2.`[idx]
    mu_processed$Vcmax[i] <- traitFile$`Photosynthetic.carboxylation.capacity..Vcmax..Vmax..per.leaf.area.at.25.C..Âµmol.mâ..Â².sâ..Â¹._x`[idx]
    mu_processed$Jmax[i] <- traitFile$`Jmax.per.leaf.area.at.ambient.or.leaf.temperature..Âµmol.mâ..Â².sâ..Â¹._x`[idx]
    mu_processed$Amax[i] <- traitFile$`Leaf.photosynthesis.at.saturating.light.and.saturating.CO2.per.leaf.area..Amax...Âµmol.mâ..Â².sâ..Â¹._x`[idx]
    mu_processed$SLA_val[i] <- traitFile$SLA[idx]
    mu_processed$genus[i] <- traitFile$genus[idx]
    mu_processed$region[i] <- traitFile$region[idx]
  }
}

# Fix region classification
mu_processed <- mu_processed %>%
  mutate(continent = case_when(
    grepl("^west", region) | grepl("^east", region) ~ "NA",
    grepl("^europe", region) ~ "EU",
    TRUE ~ "OTHER"
  ))

se_processed <- se %>%
  left_join(mu_processed, by = c("species" = "species"))

# Generic function to process any trait
process_genus_trait_continental <- function(mu_data, se_data, genus, trait_name) {
  subset_data <- mu_data[mu_data$genus == genus, ]
  
  if(nrow(subset_data) == 0) return(NULL)
  
  # Get trait data for both regions
  eu_data <- subset_data[subset_data$continent == "EU" & !is.na(subset_data[[trait_name]]), ]
  na_data <- subset_data[subset_data$continent == "NA" & !is.na(subset_data[[trait_name]]), ]
  
  if(nrow(eu_data) == 0 || nrow(na_data) == 0) return(NULL)
  
  result <- data.frame(
    genus = genus,
    na_value = mean(na_data[[trait_name]], na.rm = TRUE),
    eu_value = mean(eu_data[[trait_name]], na.rm = TRUE),
    na_error = sd(na_data[[trait_name]], na.rm = TRUE) / sqrt(nrow(na_data)),
    eu_error = sd(eu_data[[trait_name]], na.rm = TRUE) / sqrt(nrow(eu_data)),
    n_eu = nrow(eu_data),
    n_na = nrow(na_data)
  )
  
  # Handle NA errors
  result$na_error[is.na(result$na_error)] <- 0.05 * abs(result$na_value[is.na(result$na_error)])
  result$eu_error[is.na(result$eu_error)] <- 0.05 * abs(result$eu_value[is.na(result$eu_error)])
  
  return(result)
}

# Create plot function
create_continental_plot <- function(results_df, trait_name, unit) {
  if(is.null(results_df) || nrow(results_df) == 0) return(NULL)
  
  results_df <- results_df %>%
    mutate(diff = eu_value - na_value)
  
  # Auto-scale axes
  x_range <- range(results_df$na_value, na.rm = TRUE)
  y_range <- range(results_df$eu_value, na.rm = TRUE)
  plot_range <- range(c(x_range, y_range))
  plot_range <- plot_range + c(-0.1, 0.1) * diff(plot_range)
  
  plot <- ggplot(results_df, aes(x = na_value, y = eu_value, color = diff)) +
    geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
    geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
                  width = diff(plot_range) * 0.01, linewidth = 0.5) +
    geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
                   height = diff(plot_range) * 0.01, linewidth = 0.5) +
    geom_point(size = 4) +
    geom_text_repel(aes(label = genus),
                    size = 3.5,
                    show.legend = FALSE, 
                    box.padding = 0.3,   
                    point.padding = 0.2,
                    min.segment.length = 0,
                    segment.color = "grey50",
                    segment.size = 0.5,
                    max.overlaps = 30) +
    scale_color_gradient2(
      low = "blue4",    
      mid = "grey50",    
      high = "darkred",    
      midpoint = 0,
      name = "EU - NA"
    ) +
    scale_x_continuous(limits = plot_range) +
    scale_y_continuous(limits = plot_range) +
    coord_fixed(ratio = 1) +
    theme_minimal() +
    theme(
      aspect.ratio = 1,
      panel.grid.minor = element_blank(),
      panel.grid.major = element_line(color = "grey90"),
      axis.title = element_text(size = 11),
      axis.text = element_text(size = 9),
      legend.position = "right",
      plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
      plot.margin = margin(10, 10, 10, 10)
    ) +
    labs(
      x = paste0("North America, ", trait_name, " (", unit, ")"),
      y = paste0("Europe, ", trait_name, " (", unit, ")"),
      title = paste0(trait_name, " comparison"))
  
  return(plot)
}

# Process only the traits that have data
traits_to_analyze <- list(
  N_area = list(name = "Leaf N", unit = "g/m²"),
  Vcmax = list(name = "Vcmax", unit = "μmol/m²/s"),
  Jmax = list(name = "Jmax", unit = "μmol/m²/s")
)

all_plots <- list()

for(trait in names(traits_to_analyze)) {
  print(paste("\n\nAnalyzing", trait, ":"))
  
  # Check data availability - fixed to handle missing columns
  continental_data <- mu_processed %>%
    filter(!is.na(.[[trait]]), !is.na(genus), continent %in% c("EU", "NA")) %>%
    group_by(genus, continent) %>%
    summarise(n = n(), mean_val = mean(.[[trait]], na.rm = TRUE), .groups = 'drop') %>%
    pivot_wider(names_from = continent, values_from = c(n, mean_val), values_fill = list(n = 0, mean_val = NA))
  
  # Check which columns exist and filter appropriately
  if("n_EU" %in% names(continental_data) && "n_NA" %in% names(continental_data)) {
    both_continents <- continental_data %>% 
      filter(n_EU > 0 & n_NA > 0)
    
    print(paste("Genera with", trait, "data from both continents:"))
    print(both_continents)
    
    if(nrow(both_continents) > 0) {
      genera_with_both <- unique(both_continents$genus)
      results <- lapply(genera_with_both, function(g) 
        process_genus_trait_continental(mu_processed, se_processed, g, trait))
      results_df <- do.call(rbind, results[!sapply(results, is.null)])
      
      if(!is.null(results_df) && nrow(results_df) > 0) {
        plot <- create_continental_plot(results_df, 
                                       traits_to_analyze[[trait]]$name, 
                                       traits_to_analyze[[trait]]$unit)
        all_plots[[trait]] <- plot
        print(plot)
        
        # Print statistics
        print(paste("\nResults for", trait, ":"))
        print(results_df)
        
        # Paired t-test if enough data
        if(nrow(results_df) >= 3) {
          t_test <- t.test(results_df$eu_value, results_df$na_value, paired = TRUE)
          print(paste("Paired t-test: p =", round(t_test$p.value, 4)))
        }
      }
    }
  } else {
    print(paste("No genera have", trait, "data from both continents"))
  }
}

[1] "\n\nAnalyzing N_area :"
[1] "Genera with N_area data from both continents:"
# A tibble: 12 x 5
   genus     n_EU  n_NA mean_val_EU mean_val_NA
   <chr>    <int> <int>       <dbl>       <dbl>
 1 Abies        1     1        2.19        2.19
 2 Acer         4     2        2.19        2.19
 3 Alnus        1     1        2.19        2.19
 4 Betula       1     2        2.19        2.19
 5 Fraxinus     3     2        2.19        2.19
 6 Juglans      1     1        2.19        2.19
 7 Larix        1     1        2.19        2.19
 8 Picea        1     3        2.19        2.19
 9 Pinus        5    12        2.19        2.19
10 Prunus       1     1        2.19        2.19
11 Quercus      8     7        2.19        2.19
12 Tilia        2     1        2.19        2.19

[1] "\nResults for N_area :"
      genus  na_value eu_value   na_error   eu_error n_eu n_na
1     Abies 2.6208651 4.002096 0.13104326 0.20010479    1    1
2      Acer 0.6938030 1.597970 0.23212296 0.35972783    4    2
3     Alnus 1.7964072 2.424583 0.08982036 0.12122916    1    1
4    Betula 2.1179585 0.760942 0.38204150 0.03804710    1    2
5  Fraxinus 1.4417187 2.655459 0.13722868 0.33921588    3    2
6   Juglans 0.9704918 1.501604 0.04852459 0.07508018    1    1
7     Larix 1.7160000 2.459099 0.08580000 0.12295496    1    1
8     Picea 3.0375359 6.225000 0.51332510 0.31125000    1    3
9     Pinus 3.1629821 2.732445 0.44521937 0.45392105    5   12
10   Prunus 1.0742000 2.108458 0.05371000 0.10542289    1    1
11  Quercus 1.9682340 2.688094 0.13562578 0.17006554    8    7
12    Tilia 1.6483516 1.351567 0.08241758 0.45485319    2    1
[1] "Paired t-test: p = 0.0553"
[1] "\n\nAnalyzing Vcmax :"
[1] "Genera with Vcmax data from both continents:"
# A tibble: 7 x 5
  genus     n_EU  n_NA mean_val_EU mean_val_NA
  <chr>    <int> <int>       <dbl>       <dbl>
1 Acer         2     1        45.8        45.8
2 Betula       1     1        45.8        45.8
3 Fraxinus     1     2        45.8        45.8
4 Juglans      1     1        45.8        45.8
5 Picea        1     2        45.8        45.8
6 Pinus        2     5        45.8        45.8
7 Quercus      3     3        45.8        45.8

[1] "\nResults for Vcmax :"
     genus   na_value eu_value   na_error  eu_error n_eu n_na
1     Acer   9.595942 19.57134  0.4797971 0.6566609    2    1
2   Betula 100.555556  1.00000  5.0277778 0.0500000    1    1
3 Fraxinus  19.111661 21.99600  1.5271897 1.0998000    1    2
4  Juglans  14.763158 26.85714  0.7381579 1.3428571    1    1
5    Picea  29.077500 77.79541  0.7775000 3.8897705    1    2
6    Pinus  65.273786 40.60216 17.3780500 5.8921600    2    5
7  Quercus  58.948687 60.04116 23.1511123 8.7614009    3    3
[1] "Paired t-test: p = 0.6997"
[1] "\n\nAnalyzing Jmax :"
[1] "Genera with Jmax data from both continents:"
# A tibble: 7 x 5
  genus     n_EU  n_NA mean_val_EU mean_val_NA
  <chr>    <int> <int>       <dbl>       <dbl>
1 Acer         1     1        108.        108.
2 Betula       1     1        108.        108.
3 Fraxinus     1     1        108.        108.
4 Juglans      1     1        108.        108.
5 Picea        1     3        108.        108.
6 Pinus        4     5        108.        108.
7 Quercus      3     2        108.        108.

[1] "\nResults for Jmax :"
     genus  na_value eu_value  na_error  eu_error n_eu n_na
1     Acer 102.00000  45.1800  5.100000  2.259000    1    1
2   Betula 165.00000  93.3043  8.250000  4.665215    1    1
3 Fraxinus 211.00000  31.6000 10.550000  1.580000    1    1
4  Juglans 102.64481  46.9154  5.132241  2.345770    1    1
5    Picea  93.67134 130.6008 13.591975  6.530040    1    3
6    Pinus 102.76130 102.0039 15.304755 61.176629    4    5
7  Quercus 113.66825 136.8199 12.431749 23.416720    3    2
[1] "Paired t-test: p = 0.1682"

# Create combined plot
if(length(all_plots) >= 2) {
  print("\n\nCreating combined plot...")
  # Arrange plots in a grid
  if(length(all_plots) == 2) {
    combined <- grid.arrange(grobs = all_plots, ncol = 2)
  } else if(length(all_plots) == 3) {
    combined <- grid.arrange(grobs = all_plots, ncol = 2, nrow = 2)
  }
}

[1] "\n\nCreating combined plot..."

# Final summary
print("\n\nFINAL SUMMARY:")

[1] "\n\nFINAL SUMMARY:"

print(paste("Successfully created", length(all_plots), "comparison plots"))

[1] "Successfully created 3 comparison plots"

for(trait in names(all_plots)) {
  print(paste("-", traits_to_analyze[[trait]]$name))
}

[1] "- Leaf N"
[1] "- Vcmax"
[1] "- Jmax"

phyloTraits <- function( tree, species = tree$tip.label, fill = 'genus',
                         genera = NULL, 
                         traitNames = c( "gmPerSeed", "dispersal", "fruit" ),
                         path2Traits = "C:/Users/mh471/Downloads/radiation/", 
                         traitFile = "plant_traits_transformed.csv",
                         taxonGroup = "plants" ){  # Added taxonGroup parameter with default
  
  require( stringr )
  require( castor )
  
  tree$tip.label <- .replaceString( tree$tip.label , '_', ' ' )
  if( !is.null( species ) )species <- .replaceString( species , '_', ' ' )
  
  if( is.null( genera ) )GENERA <- F
  
  genSpec   <- columnSplit( species, ' ' )
  specNames <- paste( tolower( substr( genSpec[,1], 1, 8 ) ),
                      upperFirstLetter( substr( genSpec[,2], 1, 8 ) ), sep = '' )
  
  FAMILY <- T
  if( fill == 'genus' )FAMILY <- F
  
  tnew <- numeric(0)
  
  # Fixed: changed tnames to traitNames
  for( k in 1:length( traitNames )){
    tk <- getTraits( specNames = specNames, genera = genera, traitName = traitNames[k], FAMILY = FAMILY,
                     path2Traits = path2Traits, traitFile = traitFile )
    tnew <- data.frame( cbind(tnew, tk) )
  }
  names( tnew ) <- traitNames
  tnew$code8 <- specNames
  
  traitTab <- tnew
  # rm( tnew )
  
  
  if( is.null( genera ) ){
    
    species   <- species[ specNames %in% rownames( traitTab ) ]
    specNames <- specNames[ specNames %in% rownames( traitTab ) ]
    
    GENERA <- F
    #  genSpec <- columnSplit( species, ' ' )
    #  specNames <- paste( tolower( substr( genSpec[,1], 1, 8 ) ),
    #                      upperFirstLetter( substr( genSpec[,2], 1, 8 ) ), sep = '' )
    species <- .replaceString( species, '_', ' ' )
    tree$tip.label <- .replaceString( tree$tip.label , '_', ' ' )
    
    ws <- which( species %in% tree$tip.label )
    # species  <- species[ species %in% tree$tip.label ]
    trSpec   <- tree$tip.label
    wf       <- which( trSpec %in% species )
    wn       <- which( !trSpec %in% species )
    tree     <- drop.tip(tree, tip = tree$tip.label[wn] )
    species  <- tree$tip.label
    gg       <- columnSplit( species, ' ' )
    specNames <- species2code( gg[,1], gg[,2] )
    traitTab <- traitTab[ specNames, ]
    
  }else{
    GENERA <- T
    specNames <- NULL
    ts   <- tree$tip.label
    tgen <- columnSplit( ts, ' ' )[,1]
    ww   <- which( tgen %in% genera )
    wd   <- which( duplicated( tgen ) | !tgen %in% genera )
    tree <- drop.tip( tree, wd )
    genera <- columnSplit( tree$tip.label, ' ' )[,1]
    tree$tip.label <- genera
    
    tnew <- numeric(0)
    # Fixed: changed tnames to traitNames
    for( k in 1:length( traitNames )){
      tk <- getTraits( specNames = NULL, genera = genera, traitName = traitNames[k], FAMILY = FAMILY,
                       path2Traits = path2Traits, traitFile = traitFile )
      tnew <- data.frame( cbind(tnew, tk) )
    }
    names( tnew ) <- traitNames
    traitTab <- tnew
  }
  
  regName <- 'region'
  if( taxonGroup == 'mammals' ){
    regName <- 'biogeographical_realm'
    rownames(traitTab) <- traitTab$scientificName 
  }
  
  # get first region
  if( !GENERA ){
    region <- getTraits( specNames = traitTab$code8, traitName = regName, 
                         path2Traits = path2Traits, traitFile = traitFile )
    #    rownames(traitTab) <- paste( traitTab$genus, traitTab$specEpith )
  }else{
    region <- getTraits( genera = traitTab$genus, traitName = regName, 
                         path2Traits = path2Traits, traitFile = traitFile )
    if( taxonGroup == 'mammals' )region <- .replaceString( region, ', ', '_' )
    rownames(traitTab) <- traitTab$genus 
  }
  
  if( taxonGroup == 'plants'){
    nd <- stringi::stri_count(region, fixed = '_' )
    for( k in 1:5){
      wk <- which(nd == k)
      if( length(wk) == 0 )next
      region[ wk ] <- columnSplit( region[ wk ] )[,1]
    }
    region[ is.na(region) ] <- 'UNKN'
    region[ region == 'asia' ] <- 'EEA'
    region[ region == 'europe' ] <- 'WEA'
    region[ region == 'africa' ] <- 'AFR'
    region[ region == 'east' ] <- 'ENA'
    region[ region == 'west' ] <- 'WNA'
  }
  if( taxonGroup == 'mammals' ){
    gg <- grep( 'Afro', region )
    region[ gg ] <- 'AFR'
    gg <- grep( 'Austral', region )
    region[ gg ] <- 'OCEAN'
    gg <- grep( 'Oceanian', region )
    region[ gg ] <- 'OCEAN'
    gg <- grep( 'Nearctic', region )
    region[ gg ] <- 'NA'
    gg <- grep( 'Neotropical', region )
    region[ gg ] <- 'SA'
    gg <- grep( 'Indomalayan', region )
    region[ gg ] <- 'ASIA'
    gg <- grep( 'Palearctic', region )
    region[ gg ] <- 'ASIA'
  }
  
  traitTab$region <- region
  traitTab$taxon  <- tree$tip.label
  
  list( tree = tree, traitTab = traitTab )
}

original_names <- names(traitFile)

# 清理编码问题（那些 Â 之类的字符）
clean_names <- original_names
clean_names <- gsub("Â", "", clean_names)                     # 移除 Â
clean_names <- gsub("µ", "u", clean_names)                     # µ 改为 u  
clean_names <- gsub("â..", "", clean_names)                   # 移除其他编码问题
clean_names <- gsub("\\.{2,}", ".", clean_names)              # 多个点改为单个点
clean_names <- gsub("\\.$", "", clean_names)                  # 移除末尾的点
clean_names <- gsub("^\\.", "", clean_names)                  # 移除开头的点

# 对于那些特别长的列名，可以选择性地简化
# 但保留 gmPerSeed, dispersal, fruit 这类简短的原始名称不变
for(i in 1:length(clean_names)) {
  # 只处理特别长的列名（比如超过50个字符的）
  if(nchar(clean_names[i]) > 50) {
    # 简化光合作用相关的列名
    clean_names[i] <- gsub("per.leaf.area.at.ambient.or.leaf.temperature", "", clean_names[i])
    clean_names[i] <- gsub("per.leaf.dry.mass..specific.leaf.area..SLA.or.1.LMA...undefined.if.petiole.is.in..or.excluded", "", clean_names[i])
    clean_names[i] <- gsub("measured.by.light.absoption..e.g..SPAD.chlorophyll.meter", "SPAD", clean_names[i])
    clean_names[i] <- gsub("at.saturating.light.and.ambient.CO2.per.leaf.area", "", clean_names[i])
    clean_names[i] <- gsub("at.saturating.light.and.saturating.CO2.per.leaf.area", "", clean_names[i])
    clean_names[i] <- gsub("per.area.at.saturating.light.at.PAR.1200.micromol.m.2.s.1", "PAR1200", clean_names[i])
    clean_names[i] <- gsub("per.area.in.shade.at.PAR.50.micromol.m.2.s.1", "PAR50", clean_names[i])
    clean_names[i] <- gsub("..umol.m..2.s..1.", "", clean_names[i])
    clean_names[i] <- gsub("photosynthesis.transpiration", "", clean_names[i])
    clean_names[i] <- gsub("..dimensionless.", "", clean_names[i])
    clean_names[i] <- gsub("..micromol.millimol.", "", clean_names[i])
  }
}

# 再次清理多余的点
clean_names <- gsub("\\.{2,}", ".", clean_names)
clean_names <- gsub("\\.$", "", clean_names)

# 应用新的列名
names(traitFile) <- clean_names

# 查看部分改变的列名对比
changes <- data.frame(
  old = original_names[c(14, 22, 25, 51:55, 91:95)],
  new = clean_names[c(14, 22, 25, 51:55, 91:95)]
)
print(changes)

                                                                                                                  old
1                                                                                                           gmPerSeed
2                                                                                                           dispersal
3                                                                                                               fruit
4                                           Jmax.per.leaf.area.at.ambient.or.leaf.temperature..Âµmol.mâ..Â².sâ..Â¹._x
5  Leaf.area.per.leaf.dry.mass..specific.leaf.area..SLA.or.1.LMA...undefined.if.petiole.is.in..or.excluded..mm2.mg.1.
6                                                                    Leaf.carbon..C..content.per.leaf.dry.mass..mg.g.
7                                                                Leaf.carbon.isotope.signature..delta.13C...per.mill.
8                                                                          Leaf.carbon.nitrogen..C.N..ratio..g.g.1._x
9                                           Jmax.per.leaf.area.at.ambient.or.leaf.temperature..Âµmol.mâ..Â².sâ..Â¹._y
10                                                                            Leaf.carbon.content.per.dry.mass..mg.g.
11                                                                         Leaf.carbon.nitrogen..C.N..ratio..g.g.1._y
12                                                                    Leaf.chlorophyll.a.b.content.per.area..g.m.2._y
13    Leaf.chlorophyll.per.area..measured.by.light.absoption..e.g..SPAD.chlorophyll.meter...SPAD.Chlorophyll.Index._y
                                                                                                           new
1                                                                                                    gmPerSeed
2                                                                                                    dispersal
3                                                                                                        fruit
4                                                                                           Jmax.umol.m².s¹._x
5  Leaf.area.per.leaf.dry.mass.specific.leaf.area.SLA.or.1.LMA.undefined.if.petiole.is.in.or.excluded.mm2.mg.1
6                                                                 Leaf.carbon.C.content.per.leaf.dry.mass.mg.g
7                                                             Leaf.carbon.isotope.signature.delta.13C.per.mill
8                                                                      Leaf.carbon.nitrogen.C.N.ratio.g.g.1._x
9                                                                                           Jmax.umol.m².s¹._y
10                                                                       Leaf.carbon.content.per.dry.mass.mg.g
11                                                                     Leaf.carbon.nitrogen.C.N.ratio.g.g.1._y
12                                                              Leaf.chlorophyll.a.b.content.per.area.g.m.2._y
13  Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter.SPAD.Chlorophyll.Index._y

# 如果你想查看哪些列有 _x 和 _y 版本（可能是重复数据）
x_cols <- grep("_x$", names(traitFile), value = TRUE)
y_cols <- grep("_y$", names(traitFile), value = TRUE)

if(length(x_cols) > 0) {
  cat("\n带 _x 后缀的列（可能来自第一个数据源）:\n")
  print(x_cols[1:min(5, length(x_cols))])  # 显示前5个
}


<U+5E26> _x <U+540E><U+7F00><U+7684><U+5217>(<U+53EF><U+80FD><U+6765><U+81EA><U+7B2C><U+4E00><U+4E2A><U+6570><U+636E><U+6E90>):
[1] "Jmax.umol.m².s¹._x"                                                                                        
[2] "Leaf.carbon.nitrogen.C.N.ratio.g.g.1._x"                                                                   
[3] "Leaf.chlorophyll.a.b.content.per.area.g.m.2._x"                                                            
[4] "Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter.SPAD.Chlorophyll.Index._x"
[5] "Leaf.dry.matter.content.per.leaf.water.saturated.mass.LDMC.g.g._x"

if(length(y_cols) > 0) {
  cat("\n带 _y 后缀的列（可能来自第二个数据源）:\n")
  print(y_cols[1:min(5, length(y_cols))])  # 显示前5个
}


<U+5E26> _y <U+540E><U+7F00><U+7684><U+5217>(<U+53EF><U+80FD><U+6765><U+81EA><U+7B2C><U+4E8C><U+4E2A><U+6570><U+636E><U+6E90>):
[1] "Jmax.umol.m².s¹._y"                                                                                        
[2] "Leaf.carbon.nitrogen.C.N.ratio.g.g.1._y"                                                                   
[3] "Leaf.chlorophyll.a.b.content.per.area.g.m.2._y"                                                            
[4] "Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter.SPAD.Chlorophyll.Index._y"
[5] "Leaf.dry.matter.content.per.leaf.water.saturated.mass.LDMC.g.g._y"

# 查看最终的列名
colnames(traitFile)

  [1] "SpeciesName"                                                                                                
  [2] "AccSpeciesID"                                                                                               
  [3] "AccSpeciesName"                                                                                             
  [4] "TraitID"                                                                                                    
  [5] "DataID"                                                                                                     
  [6] "species_clean"                                                                                              
  [7] "code8"                                                                                                      
  [8] "genus"                                                                                                      
  [9] "specEpith"                                                                                                  
 [10] "subSpec"                                                                                                    
 [11] "class"                                                                                                      
 [12] "family"                                                                                                     
 [13] "section"                                                                                                    
 [14] "gmPerSeed"                                                                                                  
 [15] "gmPerSeed_Sd"                                                                                               
 [16] "seedCoatRatio"                                                                                              
 [17] "seedCoatRatio_Sd"                                                                                           
 [18] "seedMoistureFraction"                                                                                       
 [19] "seedMoistureFraction_Sd"                                                                                    
 [20] "kCalPerGm"                                                                                                  
 [21] "degDaySeed"                                                                                                 
 [22] "dispersal"                                                                                                  
 [23] "drought"                                                                                                    
 [24] "flood"                                                                                                      
 [25] "fruit"                                                                                                      
 [26] "gmPerCm"                                                                                                    
 [27] "leafN"                                                                                                      
 [28] "leafNP"                                                                                                     
 [29] "leafP"                                                                                                      
 [30] "leaves"                                                                                                     
 [31] "maxDbh"                                                                                                     
 [32] "maxHt"                                                                                                      
 [33] "pollination"                                                                                                
 [34] "region"                                                                                                     
 [35] "seedsPerFruit"                                                                                              
 [36] "seedsPerFruit_Sd"                                                                                           
 [37] "sex"                                                                                                        
 [38] "shade"                                                                                                      
 [39] "SLA"                                                                                                        
 [40] "woodSG"                                                                                                     
 [41] "xylem"                                                                                                      
 [42] "years2fruitMature"                                                                                          
 [43] "gmPerCone"                                                                                                  
 [44] "gmPerCone_Sd"                                                                                               
 [45] "coneRatio"                                                                                                  
 [46] "thorns"                                                                                                     
 [47] "spinoseLeaf"                                                                                                
 [48] "pubescence"                                                                                                 
 [49] "bfec.shade.x"                                                                                               
 [50] "bfec.shade.y"                                                                                               
 [51] "Jmax.umol.m².s¹._x"                                                                                         
 [52] "Leaf.area.per.leaf.dry.mass.specific.leaf.area.SLA.or.1.LMA.undefined.if.petiole.is.in.or.excluded.mm2.mg.1"
 [53] "Leaf.carbon.C.content.per.leaf.dry.mass.mg.g"                                                               
 [54] "Leaf.carbon.isotope.signature.delta.13C.per.mill"                                                           
 [55] "Leaf.carbon.nitrogen.C.N.ratio.g.g.1._x"                                                                    
 [56] "Leaf.chlorophyll.a.b.content.per.area.g.m.2._x"                                                             
 [57] "Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter.SPAD.Chlorophyll.Index._x" 
 [58] "Leaf.dry.matter.content.per.leaf.water.saturated.mass.LDMC.g.g._x"                                          
 [59] "Leaf.lamina.dry.matter.content.LDMC.petiole.excluded.g.g._x"                                                
 [60] "Leaf.nitrogen.N.content.per.leaf.area.g.m.2"                                                                
 [61] "Leaf.nitrogen.N.content.per.leaf.dry.mass.mg.g"                                                             
 [62] "Leaf.nitrogen.N.isotope.signature.delta.15N.per.mill"                                                       
 [63] "Leaf.nitrogen.phosphorus.N.P.ratio.g.g._x"                                                                  
 [64] "Leaf.phosphorus.P.content.per.leaf.area.g.m.2"                                                              
 [65] "Leaf.photosynthesis.Asat.umol.m².s¹._x"                                                                     
 [66] "Leaf.photosynthesis.Amax.umol.m².s¹._x"                                                                     
 [67] "Leaf.photosynthesis.PAR1200.Asat.umol.m².s¹._x"                                                             
 [68] "Leaf.photosynthesis.PAR50.umol.m².s¹._x"                                                                    
 [69] "Leaf.thickness.mm._x"                                                                                       
 [70] "Leaf.water.content.per.leaf.water.saturated.mass.LWC.1.LDMC.g.g._x"                                         
 [71] "Net.photosynthesis.at.field.capacity.umol.m².s¹._x"                                                         
 [72] "Net.photosynthetic.rate.at.100.PPFD.umol.m².s¹._x"                                                          
 [73] "Net.photosynthetic.rate.at.20.PPFD.umol.m².s¹._x"                                                           
 [74] "Net.photosynthetic.rate.at.200.PPFD.umol.m².s¹._x"                                                          
 [75] "Net.photosynthetic.rate.at.300.PPFD.umol.m².s¹._x"                                                          
 [76] "Net.photosynthetic.rate.at.50.PPFD.umol.m².s¹._x"                                                           
 [77] "Net.photosynthetic.rate.at.500.PPFD.umol.m².s¹._x"                                                          
 [78] "Net.photosynthetic.rate.at.800.PPFD.umol.m².s¹._x"                                                          
 [79] "Photosynthesis.per.leaf.area.at.leaf.temperature.Aarea.umol.m².s¹._x"                                       
 [80] "Photosynthesis.rate.per.leaf.dry.mass.micro.mol.g.1.s.1"                                                    
 [81] "Photosynthesis.rate.per.leaf.transpiration.photosynthetic.water.use.effinciency.WUE.micromol.millimol"      
 [82] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.25.C.umol.m².s¹._x"                       
 [83] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.leaf.temperature.umol.m².s¹._x"           
 [84] "Photosynthetic.electron.transport.capacity.per.leaf.area.at.25C.Jmax25area.umol.m².s¹._x"                   
 [85] "Plant.height.m"                                                                                             
 [86] "Plant.height.relative.growth.rate.cm.cm.1.d.1"                                                              
 [87] "Plant.relative.growth.rate.g.g.1.d.1"                                                                       
 [88] "Species.environmental.indicator.value.according.to.Ellenberg.pH.dimensionless"                              
 [89] "Ellenberg.indicator.value.pH.reaction.dimensionless"                                                        
 [90] "Instantaneous.water.use.effinciency.WUE.micromol.millimol"                                                  
 [91] "Jmax.umol.m².s¹._y"                                                                                         
 [92] "Leaf.carbon.content.per.dry.mass.mg.g"                                                                      
 [93] "Leaf.carbon.nitrogen.C.N.ratio.g.g.1._y"                                                                    
 [94] "Leaf.chlorophyll.a.b.content.per.area.g.m.2._y"                                                             
 [95] "Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter.SPAD.Chlorophyll.Index._y" 
 [96] "Leaf.delta.13C.Leaf.carbon.isotope.ratio.per.mill"                                                          
 [97] "Leaf.delta.15nitrogen.per.mill"                                                                             
 [98] "Leaf.dry.matter.content.per.leaf.water.saturated.mass.LDMC.g.g._y"                                          
 [99] "Leaf.lamina.dry.matter.content.LDMC.petiole.excluded.g.g._y"                                                
[100] "Leaf.nitrogen.content.per.area.Narea.g.m.2"                                                                 
[101] "Leaf.nitrogen.content.per.dry.mass.Nmass.mg.g"                                                              
[102] "Leaf.nitrogen.phosphorus.N.P.ratio.g.g._y"                                                                  
[103] "Leaf.phosphorus.content.per.area.Parea.g.m.2"                                                               
[104] "Leaf.photosynthesis.Asat.umol.m².s¹._y"                                                                     
[105] "Leaf.photosynthesis.Amax.umol.m².s¹._y"                                                                     
[106] "Leaf.photosynthesis.PAR1200.Asat.umol.m².s¹._y"                                                             
[107] "Leaf.photosynthesis.PAR50.umol.m².s¹._y"                                                                    
[108] "Leaf.thickness.mm._y"                                                                                       
[109] "Leaf.water.content.per.leaf.water.saturated.mass.LWC.1.LDMC.g.g._y"                                         
[110] "Net.photosynthesis.at.field.capacity.umol.m².s¹._y"                                                         
[111] "Net.photosynthetic.rate.at.100.PPFD.umol.m².s¹._y"                                                          
[112] "Net.photosynthetic.rate.at.20.PPFD.umol.m².s¹._y"                                                           
[113] "Net.photosynthetic.rate.at.200.PPFD.umol.m².s¹._y"                                                          
[114] "Net.photosynthetic.rate.at.300.PPFD.umol.m².s¹._y"                                                          
[115] "Net.photosynthetic.rate.at.50.PPFD.umol.m².s¹._y"                                                           
[116] "Net.photosynthetic.rate.at.500.PPFD.umol.m².s¹._y"                                                          
[117] "Net.photosynthetic.rate.at.800.PPFD.umol.m².s¹._y"                                                          
[118] "Photosynthesis.per.leaf.area.at.leaf.temperature.Aarea.umol.m².s¹._y"                                       
[119] "Photosynthesis.per.leaf.dry.mass.Amass.micro.mol.g.1.s.1"                                                   
[120] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.25.C.umol.m².s¹._y"                       
[121] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.leaf.temperature.umol.m².s¹._y"           
[122] "Photosynthetic.electron.transport.capacity.per.leaf.area.at.25C.Jmax25area.umol.m².s¹._y"                   
[123] "Plant.height.at.age.of.20.years.m"                                                                          
[124] "Plant.height.relative.growth.rate.RGR_height.cm.cm.1.d.1"                                                   
[125] "Plant.relative.growth.rate.RGR.g.g.1.d.1"                                                                   
[126] "SLA.undefined.if.petiole.in.or.excluded.mm2.mg.1"                                                           
[127] "Ellenberg.indicator.value.pH.reaction._std"                                                                 
[128] "Instantaneous.water.use.effinciency.WUE._std"                                                               
[129] "Jmax._std"                                                                                                  
[130] "Leaf.carbon.content.per.dry.mass_std"                                                                       
[131] "Leaf.carbon.nitrogen.C.N.ratio_std"                                                                         
[132] "Leaf.chlorophyll.a.b.content.per.area_std"                                                                  
[133] "Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter._std"                      
[134] "Leaf.delta.13C.Leaf.carbon.isotope.ratio._std"                                                              
[135] "Leaf.delta.15nitrogen_std"                                                                                  
[136] "Leaf.dry.matter.content.per.leaf.water.saturated.mass.LDMC._std"                                            
[137] "Leaf.lamina.dry.matter.content.LDMC.petiole.excluded._std"                                                  
[138] "Leaf.nitrogen.content.per.area.Narea._std"                                                                  
[139] "Leaf.nitrogen.content.per.dry.mass.Nmass._std"                                                              
[140] "Leaf.nitrogen.phosphorus.N.P.ratio_std"                                                                     
[141] "Leaf.phosphorus.content.per.area.Parea._std"                                                                
[142] "Leaf.photosynthesis.Asat._std"                                                                              
[143] "Leaf.photosynthesis.Amax._std"                                                                              
[144] "Leaf.photosynthesis.PAR1200.Asat._std"                                                                      
[145] "Leaf.photosynthesis.PAR50_std"                                                                              
[146] "Leaf.thickness_std"                                                                                         
[147] "Leaf.water.content.per.leaf.water.saturated.mass.LWC.1.LDMC._std"                                           
[148] "Net.photosynthetic.rate.at.100.PPFD_std"                                                                    
[149] "Net.photosynthetic.rate.at.20.PPFD_std"                                                                     
[150] "Net.photosynthetic.rate.at.200.PPFD_std"                                                                    
[151] "Net.photosynthetic.rate.at.300.PPFD_std"                                                                    
[152] "Net.photosynthetic.rate.at.50.PPFD_std"                                                                     
[153] "Net.photosynthetic.rate.at.500.PPFD_std"                                                                    
[154] "Net.photosynthetic.rate.at.800.PPFD_std"                                                                    
[155] "Photosynthesis.per.leaf.area.at.leaf.temperature.Aarea._std"                                                
[156] "Photosynthesis.per.leaf.dry.mass.Amass._std"                                                                
[157] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.25.C_std"                                 
[158] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.leaf.temperature_std"                     
[159] "Photosynthetic.electron.transport.capacity.per.leaf.area.at.25C.Jmax25area._std"                            
[160] "Plant.height.relative.growth.rate.RGR_height._std"                                                          
[161] "Plant.relative.growth.rate.RGR._std"                                                                        
[162] "SLA.undefined.if.petiole.in.or.excluded_std"

trAll <- fixPhyloNames( trAll )

tmp <- phyloTraits(tree = trAll, species = spp)
traitTab <- tmp$traitTab

mu_processed <- mu %>%
  left_join(traitTab, by = c("species" = "code8"))

se_processed<-se%>%left_join(mu_processed, by = c("species" = "species"))

write.csv(se_processed,"C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/mu_trait_0801.csv")

process_genus_data <- function(mu_data, se_data, genus, min_se = 1e-6) { 
  subset_data <- mu_data[mu_data$genus == genus, ]
  
  if(nrow(subset_data) == 0) return(NULL)
  
  is_europe <- subset_data$region == "WEA"
  is_na <- subset_data$region %in% c("ENA", "WNA")
  
  species_data <- data.frame(
    species = subset_data$species,
    mu_shade = subset_data$bfec.shade,
    se_shade = se_data$bfec.shade[match(subset_data$species, se_data$species)],
    region = ifelse(is_europe, "EU",
                    ifelse(is_na, "NA", "OTHER"))
  )
  # Remove NA values, filter for EU and NA regions, and apply SE threshold
  species_data <- species_data %>%
    filter(!is.na(mu_shade), 
           !is.na(se_shade), 
           region %in% c("EU", "NA"),
           se_shade >= min_se)  # Filter out too small SE
  
  if(nrow(species_data) == 0) return(NULL)
  
  # Calculate weights
  species_data$weight <- 1 / (species_data$se_shade^2)
  
  regional_stats <- species_data %>%
    group_by(region) %>%
    summarise(
      mean_shade = sum(mu_shade * weight) / sum(weight),
      se_shade = sqrt(abs(sum(mu_shade^2 * weight) / sum(weight) - mean_shade^2)),
      n = n(),
      .groups = 'drop'
    )
  
  if(nrow(regional_stats) < 2) return(NULL)
  
  eu_species <- sort(unique(subset_data$species[is_europe]))
  na_species <- sort(unique(subset_data$species[is_na]))
  n_eu <- length(eu_species)
  n_na <- length(na_species)
  
  result <- data.frame(
    genus = genus,
    na_value = regional_stats$mean_shade[regional_stats$region == "NA"],
    eu_value = regional_stats$mean_shade[regional_stats$region == "EU"],
    na_error = regional_stats$se_shade[regional_stats$region == "NA"],
    eu_error = regional_stats$se_shade[regional_stats$region == "EU"],
    n_eu = n_eu,
    n_na = n_na
  )
  
  return(result)
}

genera_list <- c("Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula", "Castanea",
                 "Fraxinus", "Alnus", "Larix", "Carpinus", "Acer", "Prunus", "Tilia",
                 "Juniperus", "Ulmus", "Ostrya", "Arbutus")


results <- lapply(genera_list, function(g) process_genus_data(mu_processed, se, g, min_se=1e-6))
results_df <- do.call(rbind, results[!sapply(results, is.null)])
mu_processed$genus <- sapply(strsplit(mu_processed$taxon, " "), `[`, 1)
se$genus <- mu_processed$genus[match(se$species, mu_processed$species)]
results <- lapply(genera_list, function(g) process_genus_data(mu_processed, se, g, min_se=1e-6))
results_df <- do.call(rbind, results[!sapply(results, is.null)])

genera_list <- c(
  "Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula", "Castanea",
  "Fraxinus", "Alnus", "Larix", "Carpinus", "Acer",
  "Prunus", "Tilia", "Juniperus", "Ulmus", "Ostrya", "Arbutus"
)

results <- lapply(genera_list, function(g) process_genus_data(mu_processed, se, g, min_se=1e-6))
results_df <- do.call(rbind, results[!sapply(results, is.null)])

results_df <- results_df %>%
  mutate(diff = eu_value - na_value)

plot <- ggplot(results_df, aes(x = na_value, y = eu_value, color = diff)) +
  
  # 1:1 虚线参考线
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
  
  geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
                width = 0.02) +
  geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
                 height = 0.02) +
  
  geom_point(size = 3) +
  
  geom_text_repel(aes(label = genus),
                  size = 4,            
                  show.legend = FALSE, 
                  box.padding = 0.3,   
                  point.padding = 0.3,
                  min.segment.length = 0,
                  segment.color="grey80") +
  
  scale_color_gradient2(
    low = "blue4",    
    mid = "grey70",    
    high = "red4",    
    midpoint = 0      
  ) +
  
  scale_x_continuous(
    limits = c(-3, 0.5), 
    breaks = seq(-3, 0.5, 0.5),
    expand = expansion(mult = 0.05)  
  ) +
  scale_y_continuous(
    limits = c(-3, 0.5),
    breaks = seq(-3, 0.5, 0.5),
    expand = expansion(mult = 0.05)  
  ) +
  
  # 1:1 纵横比
  coord_fixed(ratio = 1) +
  
  theme_bw() +
  theme(
    aspect.ratio = 1,
    panel.grid.minor = element_blank(),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.position="right"
  ) +
  
  labs(
    x = "North America, shade sensitivity",
    y = "Europe, shade sensitivity",
    title = "Shade sensitivity comparison of genus")

plot

results_df <- results_df %>%
  mutate(diff = eu_value - na_value)

plot <- ggplot(results_df, aes(x = na_value, y = eu_value, color = diff)) +
  
  # 1:1 reference line
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
  
  # Error bars
  geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
                width = 0.02, linewidth = 0.5) +
  geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
                 height = 0.02, linewidth = 0.5) +
  
  # Points with only color aesthetic (no fill)
  geom_point(size = 4) +
  
  # Improved text labels
  geom_text_repel(aes(label = genus),
                  size = 4.5,
                  fontface = "bold",
                  show.legend = FALSE, 
                  box.padding = 0.5,   
                  point.padding = 0.3,
                  min.segment.length = 0,
                  segment.color = "grey50",
                  segment.size = 0.5,
                  max.overlaps = 30) +
  
  # Deeper color gradient (only one scale)
  scale_color_gradient2(
    low = "blue4",    
    mid = "grey50",    
    high = "darkred",    
    midpoint = 0,
    name = "EU - NA difference"
  ) +
  
  scale_x_continuous(
    limits = c(-3, 0.5), 
    breaks = seq(-3, 0.5, 0.5),
    expand = expansion(mult = 0.05)  
  ) +
  scale_y_continuous(
    limits = c(-3, 0.5),
    breaks = seq(-3, 0.5, 0.5),
    expand = expansion(mult = 0.05)  
  ) +
  
  # 1:1 aspect ratio
  coord_fixed(ratio = 1) +
  
  # Cleaner theme with no border
  theme_minimal() +
  theme(
    aspect.ratio = 1,
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "grey90"),
    axis.title = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    legend.position = "right",
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10),
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.margin = margin(20, 20, 20, 20)
  ) +
  
  labs(
    x = "North America, shade sensitivity",
    y = "Europe, shade sensitivity",
    title = "Shade sensitivity comparison by genus")

plot

diet diversity

mu_processed <- mu %>%
  left_join(traitTab, by = c("species" = "code8"))
fruit_df <- mu_processed %>%
  select(species, region, fruit, bfec.shade) %>%
  left_join(
    se %>% select(species, bfec.shade),
    by = "species",
    suffix = c(".mu", ".se")
  ) %>%
  mutate(region = case_when(
    region == "WEA"             ~ "EU",  # West Europe
    region %in% c("ENA", "WNA") ~ "NA",  # East/West North America
    TRUE                        ~ "OTHER"
  )) %>%
  filter(region %in% c("EU", "NA")) %>%
  filter(!fruit %in% c("achenosum"))%>%
  filter(
    !is.na(bfec.shade.mu),
    !is.na(bfec.shade.se),
    bfec.shade.se >= 1e-6
  ) %>%
  mutate(weight = 1 / (bfec.shade.se^2))

# 对 (fruit, region) 两列进行分组汇总
fruit_results_df <- fruit_df %>%
  group_by(fruit, region) %>%
  summarise(
    mean_shade = sum(bfec.shade.mu * weight) / sum(weight),
    se_shade = sqrt(abs(
      sum((bfec.shade.mu^2) * weight) / sum(weight) - mean_shade^2
    )),
    n = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    names_from = region,
    values_from = c(mean_shade, se_shade, n)
  ) %>%
  rename(
    eu_value  = mean_shade_EU,
    na_value  = mean_shade_NA,
    eu_error  = se_shade_EU,
    na_error  = se_shade_NA,
    n_eu      = n_EU,
    n_na      = n_NA
  ) %>%
  filter(!is.na(eu_value), !is.na(na_value)) %>%
  mutate(diff = eu_value - na_value)

fruit_results_df

# A tibble: 10 x 8
   fruit        eu_value   na_value eu_error na_error  n_eu  n_na    diff
   <chr>           <dbl>      <dbl>    <dbl>    <dbl> <int> <int>   <dbl>
 1 ""           -1.87    -0.256      0.470    0.324       2     8 -1.61  
 2 "berry"      -0.630   -0.225      0.00482  0.0920      3     3 -0.405 
 3 "berry-cone" -1.37    -0.314      0.270    0.180       2     4 -1.05  
 4 "capsule"    -0.00409 -2.73       0.00698  0           2     1  2.73  
 5 "drupe"      -0.454   -0.113      0.369    0.0807      8    10 -0.341 
 6 "nut"        -0.182   -0.0160     0.657    0.0816     23    42 -0.166 
 7 "nutlet"     -2.59    -2.49       0.714    0           2     1 -0.0991
 8 "pod"        -0.731   -0.0182     0        0.0192      1     2 -0.713 
 9 "pome"       -1.52    -0.431      0.137    0           2     1 -1.09  
10 "samara"     -0.0132  -0.0000393  0.0882   0.00598    28    56 -0.0132

 plot_fruit <- ggplot(fruit_results_df, aes(x = na_value, y = eu_value, color = diff)) +
  # 1:1 虚线参考线
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
  
  geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
                width = 0.02) +
  geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
                 height = 0.02) +
  geom_point(size = 3) +
  
  geom_text_repel(aes(label = fruit),
                  size = 4,
                  show.legend = FALSE,
                  box.padding = 0.3,
                  point.padding = 0.3,
                  min.segment.length = 0,
                  segment.color = "grey80") +
  
  scale_color_gradient2(
    low = "blue4",
    mid = "grey70",
    high = "red4",
    midpoint = 0
  ) +
  
  scale_x_continuous(
    limits = c(-3, 0.5), 
    breaks = seq(-3, 0.5, 0.5),
    expand = expansion(mult = 0.05)
  ) +
  scale_y_continuous(
    limits = c(-3, 0.5),
    breaks = seq(-3, 0.5, 0.5),
    expand = expansion(mult = 0.05)
  ) +
  
  coord_fixed(ratio = 1) +
  theme_bw() +
  theme(
    aspect.ratio = 1,
    panel.grid.minor = element_blank(),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.position = "right"
  ) +
  labs(
    x = "North America",
    y = "Europe",
    title = "Shade sensitivity comparison of diet diverisity"
  )

plot_fruit

results_df <- results_df %>%
  mutate(diff = eu_value - na_value,
         comparison = ifelse(eu_value < na_value, "Europe < North America", "Europe > North America"),
         label_text = paste0(genus, "\n(n=", n_eu + n_na, ")")) 

plot <- ggplot(results_df, aes(x = na_value, y = eu_value, color = comparison)) +
  
  # 添加背景色分区 - 使用多边形
  annotate("polygon", 
           x = c(0.3, -3.2, -3.2, 0.3), 
           y = c(0.3, -3.2, 0.3, 0.3),
           fill = "#E8F4F8", alpha = 0.5) +  # 上方区域（Europe > North America）
  
  annotate("polygon", 
           x = c(0.3, -3.2, -3.2, 0.3), 
           y = c(0.3, -3.2, -3.2, -3.2),
           fill = "#F0F8E8", alpha = 0.5) +  # 下方区域（Europe < North America）
  
  # 1:1 虚线参考线
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30", size = 0.8) +
  
  geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
                width = 0.02,
                size = 0.7,
                alpha = 0.6) +
  geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
                 height = 0.02,
                 size = 0.7,
                 alpha = 0.6) +
  
  # 数据点 - 根据分组着色
  geom_point(size = 4, 
             alpha = 0.9) +
  
  # 标签 - 放大字体，允许伸出边界
  geom_text_repel(aes(label = label_text ),  # 如果要用species，改为label = species
                  size = 6,  # 增大字体
                  show.legend = FALSE, 
                  box.padding = 0.3,
                  point.padding = 0.3,
                  min.segment.length = 0.05,
                  segment.color = "grey60",
                  segment.size = 0.4,
                  color = "grey30",
                  fontface = "italic",  # 斜体（物种名常用）
                  max.overlaps = Inf,  # 允许所有标签显示
                  force = 3,
                  force_pull = 0.8,
                  max.time = 3,
                  max.iter = 20000,
                  direction = "both",
                  xlim = c(-Inf, Inf),  # 允许标签超出x轴范围
                  ylim = c(-Inf, Inf),  # 允许标签超出y轴范围
                  seed = 42) +
  
  # 手动设置颜色
  scale_color_manual(values = c("Europe < North America" = "#2E7D32",  # 深绿色
                                "Europe > North America" = "#1565C0"),  # 深蓝色
                     name = "Comparison") +
  
  # 反转X轴：只显示首尾刻度
  scale_x_reverse(
    limits = c(0.3, -3.2), 
    breaks = c(0, -3),  # 只显示首尾
    labels = c("0", "-3"),
    expand = expansion(mult = 0.02)  
  ) +
  
  # 反转Y轴：只显示首尾刻度
  scale_y_reverse(
    limits = c(0.3, -3.2),
    breaks = c(0, -3),  # 只显示首尾
    labels = c("0", "-3"),
    expand = expansion(mult = 0.02)  
  ) +
  
  # 1:1 纵横比
  coord_fixed(ratio = 1, clip = "off") +  # clip = "off" 允许标签超出图表区域
  
  theme_classic() +
  theme(
    aspect.ratio = 1,
    panel.grid.minor = element_blank(),
    panel.grid.major = element_blank(),
    panel.background = element_rect(fill = "white", color = NA),
    axis.title = element_text(size = 16, face = "bold"),  # 把 13 改为 16 或更大
    axis.text = element_text(size = 16),
    axis.line = element_line(size = 0.8),
    plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
    plot.margin = margin(20, 30, 20, 20),
    legend.position = "none"
  )+
  
  labs(
    x = "North America",
    y = "Europe")

plot

plot_fruit <- ggplot(fruit_results_df, aes(x = na_value, y = eu_value, color = diff)) +
  # 1:1 reference line
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
  
  # Error bars with thicker lines
  geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
                width = 0.02, linewidth = 0.5) +
  geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
                 height = 0.02, linewidth = 0.5) +
  
  # Larger points with better visibility
  geom_point(size = 4) +
  
  # Improved text labels
  geom_text_repel(aes(label = fruit),
                  size = 4.5,
                  fontface = "bold",
                  show.legend = FALSE, 
                  box.padding = 0.5,   
                  point.padding = 0.3,
                  min.segment.length = 0,
                  segment.color = "grey50",
                  segment.size = 0.5,
                  max.overlaps = 30) +
  
  # Deeper color gradient
  scale_color_gradient2(
    low = "blue4",    
    mid = "grey50",    
    high = "darkred",    
    midpoint = 0,
    name = "EU - NA difference"
  ) +
  
  scale_x_continuous(
    limits = c(-3, 0.5), 
    breaks = seq(-3, 0.5, 0.5),
    expand = expansion(mult = 0.05)  
  ) +
  scale_y_continuous(
    limits = c(-3, 0.5),
    breaks = seq(-3, 0.5, 0.5),
    expand = expansion(mult = 0.05)  
  ) +
  
  # 1:1 aspect ratio
  coord_fixed(ratio = 1) +
  
  # Cleaner theme with no border
  theme_minimal() +
  theme(
    aspect.ratio = 1,
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "grey90"),
    axis.title = element_text(size = 14, face = "bold"),
    axis.text = element_text(size = 12),
    legend.position = "right",
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10),
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.margin = margin(20, 20, 20, 20)
  ) +
  
  labs(
    x = "North America, shade sensitivity",
    y = "Europe, shade sensitivity",
    title = "Shade sensitivity comparison by fruit type")

plot_fruit

dispersal

mu_processed <- mu %>%
  left_join(traitTab, by = c("species" = "code8"))
dispersal_df <- mu_processed %>%
  select(species, region, dispersal, bfec.shade) %>%
  left_join(
    se %>% select(species, bfec.shade),
    by = "species",
    suffix = c(".mu", ".se")  # 区分合并后重名列
  ) %>%
  # region 转化为 EU/NA, 其他的就标记为 OTHER
  mutate(region = case_when(
    region == "WEA"             ~ "EU",  # West Europe
    region %in% c("ENA", "WNA") ~ "NA",  # East/West North America
    TRUE                        ~ "OTHER"
  )) %>%
  # 只保留 EU 和 NA
  filter(region %in% c("EU", "NA")) %>%
  filter(!dispersal %in% c("hydrochory"))%>%
  # 去除 NA 并做最小 SE 的过滤
  filter(
    !is.na(bfec.shade.mu),
    !is.na(bfec.shade.se),
    bfec.shade.se >= 1e-6
  ) %>%
  # 计算权重 (1/SE^2)
  mutate(weight = 1 / (bfec.shade.se^2))

# 按 (dispersal, region) 两列分组，求加权平均值和标准误
dispersal_results_df <- dispersal_df %>%
  group_by(dispersal, region) %>%
  summarise(
    mean_shade = sum(bfec.shade.mu * weight) / sum(weight),
    se_shade   = sqrt(abs(
      sum((bfec.shade.mu^2) * weight) / sum(weight) - mean_shade^2
    )),
    n = n(),
    .groups = "drop"
  ) %>%
  # 宽表展开，以便得到 EU 和 NA 两列
  pivot_wider(
    names_from = region,
    values_from = c(mean_shade, se_shade, n)
  ) %>%
  # 重命名，方便可视化
  rename(
    eu_value  = mean_shade_EU,
    na_value  = mean_shade_NA,
    eu_error  = se_shade_EU,
    na_error  = se_shade_NA,
    n_eu      = n_EU,
    n_na      = n_NA
  ) %>%
  # 只保留同时有 EU 与 NA 信息的 dispersal
  filter(!is.na(eu_value), !is.na(na_value)) %>%
  # （可选）计算差值
  mutate(diff = eu_value - na_value)

# 查看结果
dispersal_results_df

# A tibble: 4 x 8
  dispersal    eu_value   na_value eu_error na_error  n_eu  n_na     diff
  <chr>           <dbl>      <dbl>    <dbl>    <dbl> <int> <int>    <dbl>
1 ""           -0.00486 -0.00635     0.0299  0.316      10    11  0.00149
2 "anemochory" -0.0126  -0.0000375   0.0873  0.00624    24    47 -0.0126 
3 "barochory"  -0.598   -0.295       0.307   0.258      11     4 -0.303  
4 "zoochory"   -0.207   -0.00296     0.674   0.0292     28    71 -0.204

library(ggrepel)

plot_dispersal <- ggplot(dispersal_results_df, aes(x = na_value, y = eu_value, color = diff)) +
  # 1:1 虚线参考线
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
  
  geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
                width = 0.02) +
  geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
                 height = 0.02) +
  geom_point(size = 3) +
  
  # 用 dispersal 作为文本标签
  geom_text_repel(aes(label = dispersal),
                  size = 4,
                  show.legend = FALSE,
                  box.padding = 0.3,
                  point.padding = 0.3,
                  min.segment.length = 0,
                  segment.color = "grey80") +
  
  scale_color_gradient2(
    low = "blue4",
    mid = "grey70",
    high = "red4",
    midpoint = 0
  ) +
  
  # 这里演示与原先类似的坐标范围，你可酌情调整
  scale_x_continuous(
    limits = c(-3, 0.5), 
    breaks = seq(-3, 0.5, 0.5),
    expand = expansion(mult = 0.05)
  ) +
  scale_y_continuous(
    limits = c(-3, 0.5),
    breaks = seq(-3, 0.5, 0.5),
    expand = expansion(mult = 0.05)
  ) +
  
  # 1:1
  coord_fixed(ratio = 1) +
  theme_bw() +
  theme(
    aspect.ratio = 1,
    panel.grid.minor = element_blank(),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.position = "right"
  ) +
  labs(
    x = "North America",
    y = "Europe",
     title = "Shade sensitivity comparison of dispersal type"
  )

plot_dispersal

setwd("R:/clark/clark.unix/makeMast/maps") 
get_genus_isp <- function(continent = c("europe", "northAmerica"), 
                          genus) {
  # continent: "europe" or "northAmerica"
  # genus: character, e.g. "Abies"
  
  tdata <- getFittedIndividual(continent, genus = genus)
  
  if(nrow(tdata) == 0) {
    return( list(mean = NA, sd = NA) )
  }
  
  # 2) 计算 ISP
  ISP <- tdata$fecGmMu / ( (tdata$diam / 2)^2 * pi )
  
  # 3) 加权累加
  i1 <- ISP * tdata$wtPerHa
  i2 <- (ISP^2) * tdata$wtPerHa
  
  # 4) 加权平均
  sum_wt <- sum(tdata$wtPerHa, na.rm = TRUE)
  if(sum_wt == 0) {
    # 避免除以 0
    return( list(mean = NA, sd = NA) )
  }
  
  MU <- sum(i1, na.rm = TRUE) / sum_wt
  SD <- sqrt( (sum(i2, na.rm = TRUE) / sum_wt) - MU^2 )
  
  # 以列表形式返回
  return( list(mean = MU, sd = SD) )
}

setwd("R:/clark/clark.unix/makeMast/maps")

invPath      <- '../inventories'
genera_list <- c(
  "Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula","Castanea",
  "Fraxinus", "Alnus", "Larix", "Carpinus", "Acer",
  "Prunus", "Tilia", "Juniperus", "Ulmus", "Ostrya", "Arbutus"
)

genera_list <- tolower(genera_list)
genera_list

 [1] "pinus"     "picea"     "quercus"   "fagus"     "abies"     "betula"   
 [7] "castanea"  "fraxinus"  "alnus"     "larix"     "carpinus"  "acer"     
[13] "prunus"    "tilia"     "juniperus" "ulmus"     "ostrya"    "arbutus"

# 遍历每个 genus，分别取 "europe", "northAmerica" 的 ISP
results_isp <- lapply(genera_list, function(g) {
  eu_res <- get_genus_isp("europe", g)
  na_res <- get_genus_isp("northAmerica", g)
  
  data.frame(
    genus    = g,
    eu_value = eu_res$mean,
    eu_sd    = eu_res$sd,
    na_value = na_res$mean,
    na_sd    = na_res$sd
  )
})

# 把列表组合成 data.frame
results_isp_df <- do.call(rbind, results_isp)
results_isp_df

       genus   eu_value      eu_sd     na_value        na_sd
1      pinus 0.04425046 0.13947398  0.045085542  0.311828218
2      picea 0.01000835 0.02563520  0.030352770  0.094867611
3    quercus 1.30462745 3.02823910  1.723802997 10.762136778
4      fagus 0.04362936 0.11001884  0.101029352  0.263519792
5      abies 0.02639642 0.06519988  0.030244262  0.079348122
6     betula 0.02992234 0.03382724  0.022416140  0.079033977
7   castanea 8.94515864 5.21243526 34.683751169 97.696610288
8   fraxinus 0.32642941 0.39375472  0.209692801  0.698789285
9      alnus 0.01800777 0.08891566  0.004502417  0.002379182
10     larix 0.83748591 1.02968040  0.339876710  1.167653518
11  carpinus 0.30568051 0.28451363  1.101144459  0.774381414
12      acer 0.44759677 0.59375091  0.555037358  1.103731557
13    prunus 0.26633540 0.42573410  0.135853083  0.598744166
14     tilia 0.22495711 0.27789523  0.039440871  0.065918767
15 juniperus 1.06111389 2.48942540  0.090142002  0.220846257
16     ulmus 0.07950197 0.15224935  0.014576553  0.034995716
17    ostrya 0.41018058 0.17899184  0.169710244  0.168627560
18   arbutus 2.46200024 2.35968589  2.681926391  5.712747273

results_isp_df <- results_isp_df %>%
  mutate(
    diff = eu_value - na_value,
    comparison = ifelse(eu_value < na_value, "Europe < North America", "Europe > North America")
  )

df_filtered <- results_isp_df %>%
  filter(!genus %in% c("quercus", "castanea"))

plot_isp <- ggplot(df_filtered, aes(x = na_value, y = eu_value, color = comparison)) +
  
  # 1:1 参考线
  geom_abline(slope = 1, intercept = 0, 
              linetype = "dashed", 
              color = "gray30",
              linewidth = 0.8) +
  
  # Error bars - 根据分组着色
  geom_errorbar(aes(ymin = eu_value - eu_sd, ymax = eu_value + eu_sd),
                width = 0.05,
                linewidth = 0.7,
                alpha = 0.6) +
  
  geom_errorbarh(aes(xmin = na_value - na_sd, xmax = na_value + na_sd),
                 height = 0.05,
                 linewidth = 0.7,
                 alpha = 0.6) +
  
  # 数据点
  geom_point(size = 3.5, alpha = 0.8) +
  
  # 标签
  geom_text_repel(
    aes(label = genus),
    size = 3.5,
    show.legend = FALSE,
    box.padding = 0.5,
    point.padding = 0.4,
    min.segment.length = 0.1,
    segment.color = "grey70",
    segment.size = 0.4,
    color = "black",
    max.overlaps = 20,
    force = 2,
    seed = 42
  ) +
  
  # 手动设置颜色 - 与第一个图一致
  scale_color_manual(values = c("Europe < North America" = "darkgreen",
                                "Europe > North America" = "darkblue"),
                     name = "Comparison") +
  
  scale_x_log10(limits = c(0.005, 20),
                breaks = c(10^-2, 10^-1, 10^0, 10^1)) +
  scale_y_log10(limits = c(0.005, 20),
                breaks = c(10^-2, 10^-1, 10^0, 10^1)) +
  
  annotation_logticks(sides = "bl", 
                     size = 0.3,
                     short = unit(0.07, "cm"),
                     mid = unit(0.14, "cm"),
                     long = unit(0.21, "cm")) +
  
  coord_fixed(ratio = 1) +
  
  theme_classic() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
    plot.margin = margin(10, 15, 10, 10),
    legend.position = "bottom",
    legend.title = element_text(size = 11, face = "bold"),
    legend.text = element_text(size = 10)
  ) +
  
  labs(
    x = "North America fecundity(g seed/cm² basal area)",
    y = "Europe fecundity(g seed/cm² basal area)",
    title = "ISP comparison of genus"
  )

plot_isp

write.csv(results_isp_df,"C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_isp_df.csv")

setwd("R:/clark/clark.unix/makeMast/maps")

# 定义函数来计算单个genus在单个continent的species-level ISP
get_species_isp <- function(continent, genus) {
  tdata <- getFittedIndividual(continent, genus = genus)
  
  # 如果没有数据，返回空
  if(nrow(tdata) == 0) {
    return(NULL)
  }
  
  # 计算ISP
  ISP <- tdata$fecGmMu / ((tdata$diam/2)^2 * pi)
  i1 <- ISP * tdata$wtPerHa
  i2 <- ISP^2 * tdata$wtPerHa
  
  # 按species分组计算
  MU <- tapply(i1, tdata$species, sum, na.rm = T) /
        tapply(tdata$wtPerHa, tdata$species, sum, na.rm = T)
  SD <- sqrt(tapply(i2, tdata$species, sum, na.rm = T) /
             tapply(tdata$wtPerHa, tdata$species, sum, na.rm = T) - MU^2)
  n <- table(tdata$species)[names(MU)]
  
  # 组合结果
  result <- data.frame(
    species = names(MU),
    mean = signif(MU, 4),
    sd = signif(SD, 4),
    n = as.numeric(n),
    row.names = NULL
  )
  
  return(result)
}

# genus列表
genera_list <- tolower(c(
  "Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula", "Castanea",
  "Fraxinus", "Alnus", "Larix", "Carpinus", "Acer",
  "Prunus", "Tilia", "Juniperus", "Ulmus", "Ostrya", "Arbutus"
))

# 创建一个列表来存储所有结果
all_results <- list()

# 遍历每个genus
for(genus in genera_list) {
  cat("Processing genus:", genus, "\n")
  
  # Europe数据
  eu_result <- get_species_isp("europe", genus)
  if(!is.null(eu_result)) {
    eu_result$continent <- "europe"
    eu_result$genus <- genus
  }
  
  # North America数据
  na_result <- get_species_isp("northAmerica", genus)
  if(!is.null(na_result)) {
    na_result$continent <- "northAmerica"
    na_result$genus <- genus
  }
  
  # 存储结果
  all_results[[paste0(genus, "_europe")]] <- eu_result
  all_results[[paste0(genus, "_northAmerica")]] <- na_result
}

Processing genus: pinus 
Processing genus: picea 
Processing genus: quercus 
Processing genus: fagus 
Processing genus: abies 
Processing genus: betula 
Processing genus: castanea 
Processing genus: fraxinus 
Processing genus: alnus 
Processing genus: larix 
Processing genus: carpinus 
Processing genus: acer 
Processing genus: prunus 
Processing genus: tilia 
Processing genus: juniperus 
Processing genus: ulmus 
Processing genus: ostrya 
Processing genus: arbutus

# 合并所有结果为一个大的data.frame
all_species_isp <- do.call(rbind, all_results[!sapply(all_results, is.null)])
rownames(all_species_isp) <- NULL

# 查看结果
head(all_species_isp)

        species      mean       sd       n continent genus
1 pinusBanksian 0.0014600 0.002573    1134    europe pinus
2   pinusBrutia 1.3090000 0.990100     216    europe pinus
3 pinusCanarien 0.2325000 0.419100     702    europe pinus
4   pinusCembra 1.8880000 1.846000   14562    europe pinus
5 pinusContorta 0.0007107 0.001878   34290    europe pinus
6 pinusHalepens 0.2533000 0.323000 1392534    europe pinus

# 如果你想要以genus为单位查看结果，可以这样：
for(genus in genera_list) {
  cat("\n========", toupper(genus), "========\n")
  
  # Europe结果
  eu_data <- all_species_isp[all_species_isp$genus == genus & 
                              all_species_isp$continent == "europe", ]
  if(nrow(eu_data) > 0) {
    cat("\nEurope:\n")
    print(eu_data[, c("species", "mean", "sd", "n")])
  }
  
  # North America结果
  na_data <- all_species_isp[all_species_isp$genus == genus & 
                              all_species_isp$continent == "northAmerica", ]
  if(nrow(na_data) > 0) {
    cat("\nNorth America:\n")
    print(na_data[, c("species", "mean", "sd", "n")])
  }
}


======== PINUS ========

Europe:
         species      mean       sd       n
1  pinusBanksian 0.0014600 0.002573    1134
2    pinusBrutia 1.3090000 0.990100     216
3  pinusCanarien 0.2325000 0.419100     702
4    pinusCembra 1.8880000 1.846000   14562
5  pinusContorta 0.0007107 0.001878   34290
6  pinusHalepens 0.2533000 0.323000 1392534
7      pinusMugo 0.0048760 0.009245    2232
8     pinusNigra 0.1453000 0.164800 1491966
9  pinusPinaster 0.1082000 0.125400 2429226
10    pinusPinea 0.1944000 0.243600  446076
11 pinusPonderos 0.0096620 0.017790      72
12  pinusRadiata 0.0388600 0.037540  354636
13   pinusRigida 0.0103000 0.011800     198
14  pinusStrobus 0.0132600 0.022490   14274
15 pinusSylvestr 0.0074010 0.011550 8629128
16    pinusTaeda 0.0040080 0.006248     324
17 pinusUncinata 0.0377400 0.033720  282366
18     pinusUNKN 0.0469200 0.058360   44226

North America:
         species      mean        sd       n
19 pinusAlbicaul 0.0310800 0.0992900  235386
20 pinusAristata 0.0155600 0.0189200   17190
21 pinusArizonic 0.0425400 0.0431500     324
22 pinusAttenuat 0.0165100 0.0230200   11358
23 pinusBalfouri 0.0351800 0.0359800    6624
24 pinusBanksian 0.0067780 0.0078630 1795176
25 pinusCanarien 0.0123900 0.0280000     216
26 pinusCembroid 0.2320000 0.3161000   10170
27   pinusClausa 0.9519000 0.8907000  102438
28 pinusContorta 0.0047950 0.0193000 4910310
29 pinusCoulteri 0.1241000 0.1983000    2052
30 pinusDensiflo 0.0004504 0.0007962      18
31 pinusDiscolor 0.0810900 0.1064000   15300
32 pinusEchinata 0.0154800 0.0335100  720666
33   pinusEdulis 0.3437000 1.0350000 1071036
34 pinusElliotti 0.0184100 0.0413100 1479492
35 pinusEngelman 0.0556200 0.0451800     522
36 pinusFlexilis 0.0826300 0.3296000  133452
37   pinusGlabra 0.0630200 0.0808800   21204
38 pinusHalepens 0.0212300 0.0217000     432
39 pinusJeffreyi 0.0447700 0.1678000  126972
40 pinusLamberti 0.2882000 1.9370000   97092
41 pinusLeiophyl 0.0090610 0.0120700    2286
42 pinusLongaeva 0.0256900 0.0244500    5274
43 pinusMonophyl 0.5141000 1.0880000  449226
44 pinusMonticol 0.0102100 0.0137700   87138
45 pinusMuricata 0.0119700 0.0136500    2232
46    pinusNigra 0.0163600 0.0187800    5508
47 pinusPalustri 0.0945800 0.1854000  381564
48 pinusPonderos 0.0562500 0.1526000 2522898
49  pinusPungens 0.9850000 0.9185000   20610
50  pinusRadiata 0.2424000 0.1571000     990
51   pinusRemota 0.0775700 0.1138000    2484
52 pinusResinosa 0.0229300 0.0318100  900450
53   pinusRigida 0.0546600 0.0492000  191358
54 pinusSabinian 0.7366000 1.9500000   17532
55 pinusSerotina 0.4726000 0.4707000   84564
56 pinusStrobifo 0.1394000 0.1739000   20016
57  pinusStrobus 0.0007767 0.0019090 1049382
58 pinusSylvestr 0.0056690 0.0073930   88794
59    pinusTaeda 0.0016920 0.0034410 9346320
60 pinusThunberg 0.0198800 0.0153200     234
61 pinusTorreyan 0.1646000 0.3669000     108
62     pinusUNKN 0.0335300 0.0284700    6318
63 pinusVirginia 0.1024000 0.1395000  588168
64 pinusWashoens 0.0602900 0.0925300     702

======== PICEA ========

Europe:
         species     mean      sd       n
65    piceaAbies 0.009991 0.02560 5248800
66  piceaOmorika 0.050030 0.09856     720
67  piceaPungens 0.017890 0.03730     504
68 piceaSitchens 0.009411 0.01907   51894
69     piceaUNKN 0.013440 0.03289    4680

North America:
         species     mean       sd       n
70    piceaAbies 0.054020 0.077370   58302
71 piceaBreweria 0.053250 0.064650     702
72 piceaEngelman 0.117600 0.198500 1124568
73   piceaGlauca 0.001533 0.007358 2968488
74  piceaMariana 0.027370 0.075460 3939210
75  piceaPungens 0.112500 0.123200   42894
76   piceaRubens 0.004426 0.043770  589140
77 piceaSitchens 0.032510 0.095390  316692
78     piceaUNKN 0.065160 0.089560  143658

======== QUERCUS ========

Europe:
           species   mean      sd       n
79 quercusCanarien 3.0200  3.2560   18792
80   quercusCerris 2.4280  5.1070  260928
81 quercusCoccifer 1.0250  0.4979     198
82  quercusFaginea 1.0310  1.6340  235764
83 quercusFrainett 5.9160 14.9100   20412
84     quercusIlex 2.2750  3.9570 1315854
85 quercusIthabure 0.3750  0.1217      36
86 quercusPalustri 0.4794  0.3964      72
87  quercusPetraea 1.3060  2.0250 1180008
88 quercusPubescen 1.5730  5.0570  640872
89 quercusPyrenaic 0.6349  0.8588  497394
90    quercusRobur 0.6481  0.8935 1182384
91    quercusRubra 0.4323  0.7583   70452
92    quercusSuber 2.4750  2.8480  424638
93  quercusTrojana 5.1420  8.8910    3420
94     quercusUNKN 0.2773  0.4122  338148

North America:
            species     mean       sd       n
95  quercusAcutissi  0.12820  0.19850     288
96  quercusAgrifoli 12.16000 13.95000   53550
97      quercusAlba  2.36100  5.18300 1795518
98  quercusArizonic  0.15130  0.25100  156960
99   quercusBicolor  1.31300  1.46900   28908
100 quercusBuckleyi  0.50950  0.74540   17838
101 quercusChrysole 15.77000 61.91000  242028
102  quercusCinerea  0.10050  0.15190      36
103 quercusCoccinea  0.39180  0.77070  426348
104 quercusDouglasi  3.13100  7.90300   78498
105 quercusEllipsoi  0.20760  0.31790  149634
106   quercusEmoryi  0.03841  0.05844   62262
107 quercusEngelman  0.69230  0.90020     504
108  quercusFalcata  0.13870  0.41450  367218
109 quercusFusiform  0.68170  0.80970    1152
110 quercusGambelii  1.04600  3.31600  314514
111 quercusGarryana  5.14500 18.80000   99720
112 quercusGeminata  0.74540  1.67000    1116
113 quercusGracilif  0.47630  0.63440     342
114 quercusGravesii  0.57110  0.64800     288
115   quercusGrisea  0.49990  0.78010   29538
116 quercusHavardii  1.31800  0.92650      90
117 quercusHemispha  0.53100  0.92510    1332
118 quercusHypoleuc  0.42470  0.77540   25740
119 quercusIlicifol  0.03513  0.06779     612
120 quercusImbricar 11.96000 21.81000   43164
121   quercusIncana  0.44170  0.62360    5976
122 quercusKelloggi  0.58450  1.21500  148320
123   quercusLaceyi  0.37540  0.57980    5490
124   quercusLaevis  0.66200  1.17900   37674
125 quercusLaurifol  0.45160  0.75760  267570
126   quercusLobata  2.02500  6.45000    7056
127   quercusLyrata  2.80500  8.44300   47988
128 quercusMacrocar  0.58040  0.82830  242892
129 quercusMargaret  0.39260  0.88970   12672
130 quercusMariland  2.47600  4.44800  131058
131 quercusMichauxi  1.02700  2.62600   35136
132   quercusMinima  0.10180  0.20610   12762
133  quercusMontana  0.80980  2.10900  926316
134 quercusMuehlenb  1.08700  1.79300  104022
135    quercusNigra  1.40800  2.96100  747468
136 quercusOblongif  0.73220  0.87960    5994
137 quercusOglethor  0.13380  0.24440     720
138   quercusPagoda  0.73790  1.87000  101286
139 quercusPalustri  0.59540  0.75000   44910
140  quercusPhellos  0.13950  0.44570  150642
141 quercusPolymorp  0.47950  0.51050     198
142    quercusRubra  1.58100  3.29000 1191816
143   quercusRugosa  0.72920  1.17400    8424
144 quercusShumardi  2.39400 13.45000   37440
145  quercusSimilis  0.31500  0.45870     576
146  quercusSinuata  0.41580  0.64840    2484
147 quercusStellata  1.73300  3.57500  822816
148   quercusTexana  0.75450  1.12600   29448
149     quercusUNKN  0.39290  0.61820    1134
150 quercusVelutina  0.41410  0.84210  799470
151 quercusVirginia  0.14560  0.24050  267606
152 quercusWislizen  5.11700 14.08000   54126

======== FAGUS ========

Europe:
          species    mean   sd       n
153 fagusSylvatic 0.04363 0.11 3485520

North America:
          species   mean     sd      n
154 fagusGrandifo 0.1010 0.2635 999198
155 fagusSylvatic 0.1069 0.2357    180

======== ABIES ========

Europe:
          species    mean      sd      n
156     abiesAlba 0.02483 0.05976 945054
157  abiesBorisii 0.00000 0.00000   2718
158 abiesCephalon 0.23570 0.31640   6444
159  abiesGrandis 0.02777 0.02670  17226
160 abiesNordmann 0.37070 0.34440    792
161  abiesPinsapo 0.07382 0.07938   7506
162  abiesProcera 0.04316 0.02761     72
163     abiesUNKN 0.03394 0.04016   7668

North America:
          species    mean      sd       n
164 abiesAmabilis 0.02554 0.17730  521118
165 abiesBalsamea 0.01830 0.03015 3814128
166 abiesConcolor 0.01108 0.02268  865422
167  abiesFraseri 0.01424 0.02281    5328
168  abiesGrandis 0.01097 0.02017  655344
169 abiesLasiocar 0.08808 0.13590 1449360
170  abiesLowiana 0.05318 0.06235    4374
171 abiesMagnific 0.02638 0.04832  155916
172  abiesProcera 0.03261 0.05248   76752
173 abiesShastens 0.04554 0.05923   74160
174     abiesUNKN 0.05914 0.05597   11268

======== BETULA ========

Europe:
           species    mean      sd      n
175  betulaObscura 0.10990 0.04730     18
176  betulaPendula 0.03009 0.03180 719694
177 betulaPubescen 0.02002 0.02416 126810
178     betulaUNKN 0.04162 0.05752 325872

North America:
           species     mean       sd       n
179     betulaAlba 0.002213 0.004830    1026
180 betulaAlleghan 0.049000 0.134400  789318
181 betulaCordifol 0.025980 0.031960      90
182    betulaLenta 0.025080 0.057660  381474
183 betulaNeoalask 0.004571 0.006599    5832
184    betulaNigra 0.131200 0.213800   69426
185 betulaOccident 0.004092 0.006812    1476
186 betulaPapyrife 0.008643 0.012220 1916856
187  betulaPendula 0.006737 0.012470     198
188 betulaPopulifo 0.000751 0.002868   40464
189     betulaUNKN 0.021140 0.037020    5652

======== CASTANEA ========

Europe:
           species  mean    sd      n
190 castaneaSativa 8.945 5.212 771390

North America:
             species   mean      sd    n
191  castaneaDentata  27.51  98.560 1404
192 castaneaMollisim   0.00   0.000   18
193 castaneaMollissi 110.50 108.800  198
194   castaneaPumila  12.41   8.849  198

======== FRAXINUS ========

Europe:
             species     mean      sd      n
195 fraxinusAmerican 0.007665 0.01574    270
196 fraxinusAngustif 0.195700 0.41110  28998
197 fraxinusExcelsio 0.335100 0.39500 470520
198    fraxinusOrnus 0.159600 0.25480  37476
199     fraxinusUNKN 0.013890 0.02171    162

North America:
             species    mean      sd      n
200 fraxinusAmerican 0.02427 0.06217 740232
201 fraxinusBerlandi 0.39470 0.49890    450
202 fraxinusCarolini 0.28260 0.58600  10152
203 fraxinusExcelsio 0.05046 0.12610    126
204 fraxinusLatifoli 0.09724 0.13950  10926
205    fraxinusNigra 0.51420 1.24500 544932
206 fraxinusPennsylv 0.14620 0.19520 737622
207 fraxinusProfunda 0.91240 1.18700  11610
208 fraxinusQuadrang 0.16900 0.22660   9234
209 fraxinusTexensis 0.33260 0.61120   3834
210     fraxinusUNKN 0.23560 0.48020   4338
211 fraxinusVelutina 0.24510 0.27510   1278

======== ALNUS ========

Europe:
          species     mean       sd      n
212  alnusCordata 0.147400 0.090060  12546
213 alnusGlutinos 0.006122 0.002953 505818
214   alnusIncana 0.011570 0.012830  55602
215     alnusUNKN 0.475100 0.374500   7758
216  alnusViridis 0.779900 0.502200     72

North America:
          species     mean        sd      n
217 alnusGlutinos 0.001179 0.0005337   1368
218 alnusOblongif 0.019870 0.0076840    288
219 alnusRhombifo 0.006103 0.0024880   7794
220    alnusRubra 0.004466 0.0019290 352404
221     alnusUNKN 0.206600 0.0725700     18

======== LARIX ========

Europe:
          species   mean     sd      n
222  larixDecidua 0.9265 1.1320 359082
223 larixKaempfer 0.5057 0.4163  72648
224     larixUNKN 1.0660 0.8238  24732

North America:
          species   mean      sd      n
225  larixDecidua 0.7080 0.08633     36
226 larixLaricina 0.1121 0.21890 427968
227  larixLyallii 6.3730 6.56700   8892
228 larixOccident 0.3658 0.79870 389376
229     larixUNKN 1.8380 2.00800  18450

======== CARPINUS ========

Europe:
             species   mean     sd      n
230  carpinusBetulus 0.3042 0.2811 668736
231 carpinusOriental 1.4340 0.4942   2664

North America:
             species   mean      sd      n
232  carpinusBetulus 0.2678 0.09517    144
233 carpinusCarolini 1.1020 0.77430 145260

======== ACER ========

Europe:
         species   mean      sd      n
234 acerCampestr 0.3993 0.36590 114318
235  acerLobelii 1.1920 1.65900   1404
236 acerMonspess 0.1470 0.22680  17118
237  acerNegundo 4.4210 4.94700   2340
238   acerOpalus 0.2499 0.39660  20178
239 acerPlatanoi 0.8989 0.89870  36414
240 acerPseudopl 0.4145 0.37100 273690
241 acerTataricu 0.1690 0.09794     54
242     acerUNKN 0.4146 0.45050  11466

North America:
         species     mean       sd       n
243 acerBarbatum 0.108100 0.127200   34128
244 acerCircinat 0.003205 0.007572     954
245 acerFloridan 0.561400 0.349400     342
246  acerGinnala 0.465200 0.407900    1242
247  acerGlabrum 0.297000 0.376400   14256
248 acerGrandide 0.662100 0.600100   29826
249  acerGriseum 0.247700 0.272500      90
250 acerLeucoder 0.377600 0.345700     270
251 acerMacrophy 0.682900 2.065000  153630
252  acerNegundo 3.470000 3.957000  308052
253   acerNigrum 0.141100 0.151400    8748
254 acerPalmatum 0.423200 0.395700    1332
255 acerPensylva 0.026350 0.011660   58086
256 acerPlatanoi 0.467500 0.655500   16146
257 acerPseudopl 0.020070 0.106000     450
258   acerRubrum 0.553900 0.571100 4999896
259 acerSacchari 0.723000 1.395000  204516
260 acerSaccharu 0.249200 0.205700 2883636
261 acerSpicatum 0.043200 0.039310    1188
262     acerUNKN 0.127300 0.113500     306
263  acerXfreema 0.000000 0.000000    1404

======== PRUNUS ========

Europe:
           species   mean     sd     n
264 prunusAmygdalu 0.8336 0.6500    36
265    prunusAvium 0.2787 0.5066 67896
266 prunusCerasife 1.0490 0.7633    54
267  prunusCerasus 0.2624 0.3734    90
268 prunusDomestic 1.2040 1.4820  1890
269 prunusInsititi 0.5533 0.5787   162
270 prunusLusitani 0.8774 0.2298    18
271  prunusMahaleb 0.1514 0.1947  1044
272    prunusPadus 0.2677 0.2980 71010
273 prunusSerotina 0.1069 0.1465 13608
274  prunusSpinosa 0.6915 0.6221   756
275     prunusUNKN 0.1356 0.1491  3978

North America:
           species     mean       sd       n
276 prunusAmerican  3.94100  3.99300    2592
277 prunusArmeniac  1.11700  0.70680      90
278    prunusAvium  0.76910  0.68640    9108
279 prunusCarolini  5.10300  2.97600     324
280 prunusCerasife  0.27110  0.40750     288
281  prunusCerasus  0.90700  0.72250     234
282 prunusDomestic  0.23610  0.32330      90
283 prunusEmargina  0.44590  0.58310   13374
284 prunusLaurocer  1.41700  1.50200     432
285    prunusNigra  0.27760  0.34570     108
286 prunusPensylva  0.41680  0.31020   86940
287  prunusPersica 17.27000 15.77000     180
288 prunusSalicina  0.48760  0.38650      18
289 prunusSerotina  0.06001  0.09305 1041012
290 prunusSerrulat  0.09484  0.12760     540
291 prunusUmbellat  1.90600  1.20700      18
292     prunusUNKN  0.48780  0.42800    4482
293 prunusVirginia  1.07100  2.06000    4824
294  prunusXyedoen  0.00000  0.00000     216

======== TILIA ========

Europe:
          species   mean     sd     n
295  tiliaCordata 0.3345 0.3057 71532
296 tiliaPlatyphy 0.1140 0.1941 30132
297 tiliaTomentos 0.6108 0.4902  1692
298     tiliaUNKN 0.1834 0.2530 33750
299 tiliaVulgaris 0.1616 0.2145   234

North America:
          species    mean      sd      n
300 tiliaAmerican 0.03955 0.06602 542484
301  tiliaCordata 0.01129 0.01862   1908
302 tiliaTomentos 0.03310 0.03970     72
303     tiliaUNKN 0.05299 0.06612    126

======== JUNIPERUS ========

Europe:
             species    mean     sd      n
304 juniperuCommunis  1.0520 0.8673   2826
305 juniperuOxycedru  2.9300 3.2680  18162
306 juniperuPhoenice 14.4900 6.4810   2970
307 juniperuThurifer  0.4476 0.4499 132066
308     juniperuUNKN  0.4657 0.2044     18

North America:
             species     mean       sd       n
309    juniperuAshei 0.033960 0.049870  405306
310 juniperuCaliforn 0.064500 0.125800   13356
311 juniperuChinensi 0.001531 0.001572     198
312 juniperuCoahuile 0.048970 0.075760   43578
313 juniperuDeppeana 0.011220 0.018400  205416
314 juniperuFlaccida 0.117100 0.035500      18
315 juniperuMonosper 0.394000 0.443700  563094
316 juniperuOccident 0.013520 0.020320  206604
317 juniperuOsteospe 0.033850 0.069210 1145682
318 juniperuPinchoti 0.029910 0.037090  108594
319 juniperuScopulor 0.061380 0.048700  292752
320     juniperuUNKN 0.068410 0.122800    1800
321 juniperuVirginia 0.042350 0.045260  790272

======== ULMUS ========

Europe:
          species    mean      sd     n
322 ulmusCanescen 0.37980 0.21940    90
323 ulmusCarpinif 0.02184 0.04042    90
324   ulmusGlabra 0.13010 0.23880  5652
325   ulmusLaevis 0.10600 0.14220   810
326    ulmusMinor 0.07611 0.14840 26370
327   ulmusPumila 0.17880 0.03654    54
328     ulmusUNKN 0.07522 0.13730 17766

North America:
          species     mean       sd      n
329    ulmusAlata 0.003175 0.008052 308988
330 ulmusAmerican 0.009848 0.021280 780390
331 ulmusCrassifo 0.017050 0.023260  80118
332 ulmusParvifol 0.019980 0.016150    756
333  ulmusProcera 0.021620 0.026380     54
334   ulmusPumila 0.019900 0.024580  25830
335    ulmusRubra 0.046550 0.065070 214056
336 ulmusSerotina 0.004726 0.006197    468
337 ulmusThomasii 0.163000 0.230000   2646
338     ulmusUNKN 0.009922 0.016360   2862

======== OSTRYA ========

Europe:
           species   mean    sd      n
339 ostryaCarpinif 0.4102 0.179 124560

North America:
           species   mean     sd      n
340 ostryaVirginia 0.1697 0.1686 230742

======== ARBUTUS ========

Europe:
         species  mean   sd     n
341 arbutusUnedo 2.462 2.36 28278

North America:
            species    mean      sd      n
342 arbutusArizonic 18.0700 13.3700    378
343 arbutusMenziesi  2.6410  5.6290 154134
344    arbutusUnedo  0.4635  0.2615     18
345 arbutusXalapens  6.0820  5.4040    126

# 也可以创建一个宽格式的表格，方便比较
library(tidyr)
library(dplyr)

# 创建宽格式表格（可选）
wide_format <- all_species_isp %>%
  pivot_wider(
    id_cols = c(genus, species),
    names_from = continent,
    values_from = c(mean, sd, n),
    names_sep = "_"
  ) %>%
  arrange(genus, species)

print(wide_format)

# A tibble: 295 x 8
   genus species       mean_europe mean_northAmerica sd_europe sd_northAmerica
   <chr> <chr>               <dbl>             <dbl>     <dbl>           <dbl>
 1 abies abiesAlba          0.0248           NA         0.0598         NA     
 2 abies abiesAmabilis     NA                 0.0255   NA               0.177 
 3 abies abiesBalsamea     NA                 0.0183   NA               0.0302
 4 abies abiesBorisii       0                NA         0              NA     
 5 abies abiesCephalon      0.236            NA         0.316          NA     
 6 abies abiesConcolor     NA                 0.0111   NA               0.0227
 7 abies abiesFraseri      NA                 0.0142   NA               0.0228
 8 abies abiesGrandis       0.0278            0.0110    0.0267          0.0202
 9 abies abiesLasiocar     NA                 0.0881   NA               0.136 
10 abies abiesLowiana      NA                 0.0532   NA               0.0624
# i 285 more rows
# i 2 more variables: n_europe <dbl>, n_northAmerica <dbl>

write.csv(all_species_isp, "C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/species_isp_results.csv", row.names = FALSE)
write.csv(wide_format, "C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/species_isp_wide_format.csv", row.names = FALSE)

results_isp_df <- read.csv("C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_isp_df.csv")

results_isp_df <- results_isp_df %>%
  mutate(
    diff = eu_value - na_value,
    ratio = eu_value / na_value,
    sig_diff = abs(diff) > (eu_sd + na_sd)
  )

df_filtered <- results_isp_df %>%
  filter(!genus %in% c("quercus", "castanea"))

ggplot(df_filtered, aes(x = na_value, y = eu_value, color = diff)) +
  # 背景网格和参考线
  geom_abline(slope = 1, intercept = 0, 
              linetype = "dashed", 
              color = "gray30",
              linewidth = 0.5) +
  
  # 重要：先绘制error bars
  geom_errorbar(aes(
    ymin = eu_value - eu_sd,
    ymax = eu_value + eu_sd
  ),
  width = 0.05,
  linewidth = 0.3,
  alpha = 0.8,  # 提高不透明度
  color = "gray40") + # 统一颜色
  
  geom_errorbarh(aes(
    xmin = na_value - na_sd,
    xmax = na_value + na_sd
  ),
  height = 0.05,
  linewidth = 0.3,
  alpha = 0.8,  # 提高不透明度
  color = "gray40") + 
  
  geom_point(size = 3) +
  
  geom_text_repel(
    aes(label = genus,
        fontface = ifelse(sig_diff, "bold", "plain")),
    size = 3.5,
    box.padding = 0.5,
    min.segment.length = 0,
    max.overlaps = Inf,
    segment.color = "gray50",
    segment.alpha = 0.6
  ) +
  
  scale_color_gradient2(
    low = "#2c7fb8",    
    mid = "#2b2b2b",    
    high = "#7fbc41",   
    midpoint = 0,
    limits = c(-2, 2),
    name = "EU - NA difference"
  ) +
  
  scale_x_log10(limits = c(0.005, 20),
                breaks = c(0.01, 0.1, 1, 10)) +
  scale_y_log10(limits = c(0.005, 20),
                breaks = c(0.01, 0.1, 1, 10)) +
  
  annotation_logticks(sides = "bl", 
                     size = 0.3,
                     short = unit(0.07, "cm"),
                     mid = unit(0.14, "cm"),
                     long = unit(0.21, "cm")) +
  
  coord_fixed(ratio = 1) +
  
  theme_bw(base_size = 12) +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    axis.text = element_text(color = "black"),
    legend.position = "right",
    legend.key.height = unit(1.5, "cm"),
    plot.title = element_text(size = 13, face = "bold"),
    plot.subtitle = element_text(size = 11, color = "gray30")
  ) +
  
  labs(
    x = "North American ISP (log scale)",
    y = "European ISP (log scale)",
    title = "Comparison of ISP Values Between NA and EU",
    subtitle = "Point size indicates measurement certainty; bold labels show significant regional differences"
  )

setwd("R:/clark/clark.unix/makeMast/maps") 
genus <- 'quercus'
tdata <- getFittedIndividual( 'europe', genus = genus )
ISP   <- tdata$fecGmMu/(tdata$diam/2)^2/pi      # weight by plot area
i1    <- ISP*tdata$wtPerHa
i2    <- ISP^2*tdata$wtPerHa
MU    <- tapply( i1, tdata$species, sum, na.rm = T )/
         tapply( tdata$wtPerHa, tdata$species, sum, na.rm = T )
SD    <- sqrt( tapply( i2, tdata$species, sum, na.rm = T )/
               tapply( tdata$wtPerHa, tdata$species, sum, na.rm = T ) - MU^2 )
n     <- table( tdata$species )[names(MU)]
etable <- cbind( signif(MU, 4), signif(SD, 4 ), n )

tdata <- getFittedIndividual( 'northAmerica', genus = genus )
ISP   <- tdata$fecGmMu/(tdata$diam/2)^2/pi      # weight by plot area
i1    <- ISP*tdata$wtPerHa
i2    <- ISP^2*tdata$wtPerHa
MU    <- tapply( i1, tdata$species, sum, na.rm = T )/
         tapply( tdata$wtPerHa, tdata$species, sum, na.rm = T )
SD    <- sqrt( tapply( i2, tdata$species, sum, na.rm = T )/
               tapply( tdata$wtPerHa, tdata$species, sum, na.rm = T ) - MU^2 )
n  <- table( tdata$species )[ names(MU) ]
ntable <- cbind( signif(MU, 4), signif(SD, 4 ), n )

results_isp_df <- results_isp_df %>%
    filter(!genus %in% c("castanea"))

# 计算最小值并确定shift值
min_val <- min(c(log(results_isp_df$eu_value), log(results_isp_df$na_value)))
shift <- ceiling(abs(min_val)) + 1  # 动态计算需要加多少

results_isp_log_df <- results_isp_df %>%
    mutate(
        log_eu_value = log(eu_value) + shift,
        log_na_value = log(na_value) + shift,
        
        cv_eu = eu_sd / eu_value,
        cv_na = na_sd / na_value,
        
        # No capping - use actual CV
        log_eu_lower = log(pmax(eu_value - eu_sd, 0.001)) + shift,
        log_eu_upper = log(eu_value + eu_sd) + shift,
        log_na_lower = log(pmax(na_value - na_sd, 0.001)) + shift,
        log_na_upper = log(na_value + na_sd) + shift,
        
        diff = log_eu_value - log_na_value,
        eu_higher = log_eu_value > log_na_value
    )

na_order <- results_isp_log_df %>%
    arrange(desc(log_na_value)) %>%
    pull(genus)

df_diff <- results_isp_log_df %>%
    mutate(
        genus = factor(genus, levels = na_order),
        x_pos = as.numeric(factor(genus, levels = na_order)),
        y_start = pmin(log_eu_value, log_na_value),
        y_end = pmax(log_eu_value, log_na_value),
        diff_color = case_when(
            eu_higher ~ "Europe Higher",
            TRUE ~ "North America Higher"
        )
    )

df_eu <- results_isp_log_df %>%
    transmute(
        genus = factor(genus, levels = na_order),
        region = "Europe",
        value = log_eu_value,
        ymin = log_eu_lower,
        ymax = log_eu_upper
    )

df_na <- results_isp_log_df %>%
    transmute(
        genus = factor(genus, levels = na_order),
        region = "North America",
        value = log_na_value,
        ymin = log_na_lower,
        ymax = log_na_upper
    )

df_long <- bind_rows(df_eu, df_na)

p <- ggplot() +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray60", linewidth = 0.5) +
    
    geom_bar(data = df_long, 
             aes(x = genus, y = value, fill = region),
             stat = "identity", 
             position = position_dodge(width = 0.8),
             width = 0.7,
             alpha = 0.7) +
    
    geom_segment(data = df_diff,
                 aes(x = x_pos - 0.2, 
                     xend = x_pos + 0.2,
                     y = y_start, 
                     yend = y_start,
                     color = diff_color),
                 linewidth = 1.5,
                 alpha = 0.8) +
    
    geom_segment(data = df_diff,
                 aes(x = x_pos - 0.2, 
                     xend = x_pos + 0.2,
                     y = y_end, 
                     yend = y_end,
                     color = diff_color),
                 linewidth = 1.5,
                 alpha = 0.8) +
    
    # Vertical connection lines WITHOUT arrows
    geom_segment(data = df_diff,
                 aes(x = x_pos, 
                     xend = x_pos,
                     y = y_start, 
                     yend = y_end,
                     color = diff_color),
                 linewidth = 2,
                 alpha = 0.9) +
    
    geom_errorbar(data = df_long,
                  aes(x = genus, ymin = ymin, ymax = ymax, group = region),
                  position = position_dodge(width = 0.8),
                  width = 0.25,
                  linewidth = 0.4,
                  color = "gray30",
                  alpha = 0.7) +
    
    scale_fill_manual(
        values = c("Europe" = "#4E79A7", "North America" = "#E15759"),
        name = "Region"
    ) +
    
    scale_color_manual(
        values = c("Europe Higher" = "#2ca02c", "North America Higher" = "#ff7f0e"),
        name = "Difference"
    ) +
    
    theme_minimal(base_size = 14) +
    theme(
        panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank(),
        axis.text.x = element_text(angle = 55, hjust = 1, size = 16, face = "bold"),
        legend.position = "right",
        plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
        plot.subtitle = element_text(size = 10, hjust = 0.5, face = "italic"),
        plot.margin = margin(20, 20, 20, 20)
    ) +
    
    labs(
        x = "Genus",
        y = "ISP (log)",
    )

print(p)

setwd("R:/clark/clark.unix/makeMast/maps")

invPath      <- '../inventories'
calculateGenusStats <- function(tdata) {
  if(nrow(tdata) == 0) {
    return(list(mean = NA, sd = NA, n = 0))
  }
  
  # Calculate ISP
  ISP <- tdata$fecGmMu/(tdata$diam/2)^2/pi
  
  # Weight by plot area
  i1 <- ISP * tdata$wtPerHa
  i2 <- ISP^2 * tdata$wtPerHa
  
  # Calculate weighted mean
  MU <- tapply(i1, tdata$species, sum, na.rm = TRUE) / 
        tapply(tdata$wtPerHa, tdata$species, sum, na.rm = TRUE)
  
  # Calculate weighted SD
  SD <- sqrt(
    tapply(i2, tdata$species, sum, na.rm = TRUE) / 
    tapply(tdata$wtPerHa, tdata$species, sum, na.rm = TRUE) - MU^2
  )
  
  n <- table(tdata$species)[names(MU)]
  
  # Calculate genus-level statistics
  weighted.mean <- sum(MU * n, na.rm = TRUE) / sum(n, na.rm = TRUE)
  weighted.var <- sum(n * (SD^2 + (MU - weighted.mean)^2), na.rm = TRUE) / 
                 sum(n, na.rm = TRUE)
  weighted.sd <- sqrt(weighted.var)
  
  return(list(
    mean = weighted.mean,
    sd = weighted.sd,
    n = sum(n, na.rm = TRUE)
  ))
}

analyzeAllGenera <- function(genera_list) {
  results <- data.frame(
    Genus = character(),
    Region = character(),
    WeightedMean = numeric(),
    WeightedSD = numeric(),
    SampleSize = numeric(),
    stringsAsFactors = FALSE
  )
  
  # Process each genus
  for(genus in genera_list) {
    tryCatch({
      # Get data for Europe
      europe_data <- getFittedIndividual('europe', genus = genus)
      euro_stats <- calculateGenusStats(europe_data)
      
      # Get data for North America
      na_data <- getFittedIndividual('northAmerica', genus = genus)
      na_stats <- calculateGenusStats(na_data)
      
      # Calculate combined statistics
      combined_data <- rbind(europe_data, na_data)
      combined_stats <- calculateGenusStats(combined_data)
      
      # Add to results
      new_rows <- data.frame(
        Genus = rep(genus, 3),
        Region = c("Europe", "North America", "Combined"),
        WeightedMean = c(euro_stats$mean, na_stats$mean, combined_stats$mean),
        WeightedSD = c(euro_stats$sd, na_stats$sd, combined_stats$sd),
        SampleSize = c(euro_stats$n, na_stats$n, combined_stats$n),
        stringsAsFactors = FALSE
      )
      
      results <- rbind(results, new_rows)
      
      cat(sprintf("Processed %s\n", genus))
      
    }, error = function(e) {
      cat(sprintf("Error processing %s: %s\n", genus, e$message))
    })
  }
  
  # Round numeric columns for better display
  results$WeightedMean <- round(results$WeightedMean, 6)
  results$WeightedSD <- round(results$WeightedSD, 6)
  
  return(results)
}

# List of genera to analyze
genera_list <- c(
  "Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula", "Castanea",
  "Fraxinus", "Alnus", "Larix", "Carpinus", "Acer",
  "Prunus", "Tilia", "Juniperus", "Ulmus", "Ostrya", "Arbutus"
)

# Run the analysis
results_table <- analyzeAllGenera(genera_list)

Processed Pinus
Processed Picea
Processed Quercus
Processed Fagus
Processed Abies
Processed Betula
Processed Castanea
Processed Fraxinus
Processed Alnus
Processed Larix
Processed Carpinus
Processed Acer
Processed Prunus
Processed Tilia
Processed Juniperus
Processed Ulmus
Processed Ostrya
Processed Arbutus

# Print summary table with formatting
print_table <- function(results) {
  # Calculate max width for each column
  genus_width <- max(nchar("Genus"), max(nchar(as.character(results$Genus))))
  region_width <- max(nchar("Region"), max(nchar(as.character(results$Region))))
  mean_width <- max(nchar("WeightedMean"), nchar(format(results$WeightedMean, scientific = FALSE)))
  sd_width <- max(nchar("WeightedSD"), nchar(format(results$WeightedSD, scientific = FALSE)))
  n_width <- max(nchar("SampleSize"), nchar(format(results$SampleSize, scientific = FALSE)))
  
  # Print header
  cat(sprintf("\n%-*s | %-*s | %-*s | %-*s | %-*s\n",
              genus_width, "Genus",
              region_width, "Region",
              mean_width, "WeightedMean",
              sd_width, "WeightedSD",
              n_width, "SampleSize"))
  
  # Print separator
  cat(paste(rep("-", genus_width + region_width + mean_width + sd_width + n_width + 12), collapse = ""))
  cat("\n")
  
  # Print rows
  for(i in 1:nrow(results)) {
    cat(sprintf("%-*s | %-*s | %*.6f | %*.6f | %*d\n",
                genus_width, results$Genus[i],
                region_width, results$Region[i],
                mean_width, results$WeightedMean[i],
                sd_width, results$WeightedSD[i],
                n_width, results$SampleSize[i]))
  }
}

# Print formatted table
print_table(results_table)


Genus     | Region        | WeightedMean | WeightedSD | SampleSize
------------------------------------------------------------------
Pinus     | Europe        |     0.068552 |   0.172694 |   15138162
Pinus     | North America |     0.044963 |   0.319151 |   26531586
Pinus     | Combined      |     0.053543 |   0.275344 |   41669748
Picea     | Europe        |     0.009994 |   0.025583 |    5306598
Picea     | North America |     0.029931 |   0.096219 |    9183654
Picea     | Combined      |     0.022271 |   0.078175 |   14490252
Quercus   | Europe        |     1.433349 |   3.188612 |    6189372
Quercus   | North America |     1.741784 |  10.780373 |   10148688
Quercus   | Combined      |     1.613999 |   8.716642 |   16338060
Fagus     | Europe        |     0.043629 |   0.110019 |    3485520
Fagus     | North America |     0.101029 |   0.263519 |     999378
Fagus     | Combined      |     0.056418 |   0.159532 |    4484898
Abies     | Europe        |     0.026906 |   0.068132 |     987480
Abies     | North America |     0.031248 |   0.084268 |    7633170
Abies     | Combined      |     0.030812 |   0.082578 |    8620650
Betula    | Europe        |     0.032208 |   0.040585 |    1172394
Betula    | North America |     0.023073 |   0.080394 |    3211812
Betula    | Combined      |     0.025506 |   0.072045 |    4384206
Castanea  | Europe        |     8.945159 |   5.212435 |     771390
Castanea  | North America |    34.628286 |  97.628511 |       1818
Castanea  | Combined      |     9.005546 |   7.145863 |     773208
Fraxinus  | Europe        |     0.315077 |   0.391305 |     537426
Fraxinus  | North America |     0.204790 |   0.688452 |    2074734
Fraxinus  | Combined      |     0.227487 |   0.640270 |    2612160
Alnus     | Europe        |     0.016040 |   0.073771 |     581796
Alnus     | North America |     0.004511 |   0.002527 |     361872
Alnus     | Combined      |     0.011628 |   0.058259 |     943668
Larix     | Europe        |     0.867041 |   1.048048 |     456462
Larix     | North America |     0.332672 |   1.148497 |     844722
Larix     | Combined      |     0.523183 |   1.151350 |    1301184
Carpinus  | Europe        |     0.308661 |   0.291090 |     671400
Carpinus  | North America |     1.101020 |   0.774391 |     145404
Carpinus  | Combined      |     0.449718 |   0.517956 |     816804
Acer      | Europe        |     0.453194 |   0.644193 |     476982
Acer      | North America |     0.556240 |   1.101246 |    8718498
Acer      | Combined      |     0.551622 |   1.085231 |    9195480
Prunus    | Europe        |     0.268458 |   0.439508 |     160542
Prunus    | North America |     0.116021 |   0.471729 |    1164870
Prunus    | Combined      |     0.133765 |   0.468179 |    1325412
Tilia     | Europe        |     0.252089 |   0.293577 |     137340
Tilia     | North America |     0.039454 |   0.065930 |     544590
Tilia     | Combined      |     0.082442 |   0.167483 |     681930
Juniperus | Europe        |     1.014823 |   2.526711 |     156042
Juniperus | North America |     0.089308 |   0.219724 |    3776670
Juniperus | Combined      |     0.126029 |   0.576469 |    3932712
Ulmus     | Europe        |     0.082827 |   0.158883 |      50832
Ulmus     | North America |     0.014820 |   0.035805 |    1416168
Ulmus     | Combined      |     0.017253 |   0.047882 |    1467000
Ostrya    | Europe        |     0.410181 |   0.178992 |     124560
Ostrya    | North America |     0.169710 |   0.168628 |     230742
Ostrya    | Combined      |     0.254013 |   0.207035 |     355302
Arbutus   | Europe        |     2.462000 |   2.359686 |      28278
Arbutus   | North America |     2.681491 |   5.711852 |     154656
Arbutus   | Combined      |     2.647649 |   5.333768 |     182934

write.csv("results_table,C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_table.csv")

"","x"
"1","results_table,C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_table.csv"

CONSOLIDATE = F, CONSOLIDATE= T.

results_table <- results_table %>%
  filter(!Genus %in% c("Castanea","Quercus"))
# Plot mean ISP by genus and region
ggplot(results_table[results_table$Region != "Combined",], 
       aes(x = Genus, y = WeightedMean, fill = Region)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Mean ISP by Genus and Region",
       y = "Weighted Mean ISP",
       x = "Genus")

library(ggplot2)
library(dplyr)

plot_data <- results_table %>%
  filter(Region != "Combined") %>%
  tidyr::pivot_wider(
    id_cols = Genus,
    names_from = Region,
    values_from = c(WeightedMean, WeightedSD, SampleSize)
  ) %>%
  rename(
    eu_value = WeightedMean_Europe,
    na_value = `WeightedMean_North America`,
    eu_sd = WeightedSD_Europe,
    na_sd = `WeightedSD_North America`
  )

# Calculate EU-NA difference for color coding
plot_data$diff <- plot_data$eu_value - plot_data$na_value

# Create the visualization
ggplot(plot_data, aes(x = na_value, y = eu_value)) +
  # Reference line
  geom_abline(linetype = "dashed", color = "gray70") +
  
  # Error bars with controlled transparency
  geom_errorbarh(aes(xmin = na_value - na_sd, 
                     xmax = na_value + na_sd,
                     height = 0.05),
                 alpha = 0.3) +
  geom_errorbar(aes(ymin = eu_value - eu_sd, 
                    ymax = eu_value + eu_sd,
                    width = 0.05),
                alpha = 0.3) +
  
  # Points and labels
  geom_point(aes(color = diff), size = 3) +
  geom_text(aes(label = tolower(Genus)), 
            hjust = -0.3, 
            vjust = 0.5, 
            size = 4) +
  
  # Consistent log scales (0.01 to 10)
  scale_x_log10(
    breaks = 10^seq(-2, 1, by = 1),
    labels = c("0.01", "0.10", "1.00", "10.00"),
    limits = c(0.01, 10),
    name = "North American ISP (log scale)"
  ) +
  scale_y_log10(
    breaks = 10^seq(-2, 1, by = 1),
    labels = c("0.01", "0.10", "1.00", "10.00"),
    limits = c(0.01, 10),
    name = "European ISP (log scale)"
  ) +
  
  # Color gradient for regional differences
  scale_color_gradient2(
    name = "EU - NA difference",
    low = "darkblue",
    mid = "gray60",
    high = "darkred",
    midpoint = 1,
    limits = c(-3, 3)
  ) +
  
  # Fixed ratio and theme
  coord_fixed(ratio = 1) +
  theme_minimal() +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    legend.position = "right",
    plot.title = element_text(hjust = 0.2),
    plot.subtitle = element_text(hjust = 0.5, size = 9),
    aspect.ratio = 1
  ) +
  labs(
    title = "Comparison of ISP Values Between NA and EU",
    subtitle = "ISP:Individual standardized productivity(gm/cm2 ba)"
  )

trait_combined <- mu_processed %>%
  mutate(
    region = case_when(
      region == "WEA" ~ "EU",
      region %in% c("ENA", "WNA") ~ "NA",
      TRUE ~ "OTHER"
    )
  ) %>%
  filter(region %in% c("EU", "NA"))

library(tidyverse)
library(ggplot2)
library(ggrepel)
library(gridExtra)
library(grid)  
library(corrplot)

process_sensitivity_data <- function(mu_data, se_data) {
  processed_data <- mu_processed %>%
    mutate(
      region = case_when(
        region == "WEA" ~ "EU",
        region %in% c("ENA", "WNA") ~ "NA",
        TRUE ~ "OTHER"
      )
    ) %>%
    filter(
      region %in% c("EU", "NA"),
      !is.na(bfec.shade),
      !is.na(bfec.tempSite),
      !is.na(bfec.defSite)
    )
  
  filter_outliers <- function(x, k = 1.5) {
    qnt <- quantile(x, probs = c(0.25, 0.75), na.rm = TRUE)
    H <- k * IQR(x, na.rm = TRUE)
    x[x < (qnt[1] - H) | x > (qnt[2] + H)] <- NA
    return(x)
  }
  
  processed_data <- processed_data %>%
    group_by(region) %>%
    mutate(
      bfec.shade = filter_outliers(bfec.shade),
      bfec.tempSite = filter_outliers(bfec.tempSite),
      bfec.defSite = filter_outliers(bfec.defSite)
    ) %>%
    ungroup() %>%
    filter(
      !is.na(bfec.shade),
      !is.na(bfec.tempSite),
      !is.na(bfec.defSite)
    )
  
  return(processed_data)
}

# 计算相关性矩阵
calculate_correlations <- function(data, region_val) {
  region_data <- data %>%
    filter(region == region_val) %>%
    select(bfec.shade, bfec.tempSite, bfec.defSite)
  
  cor_matrix <- cor(region_data, use = "complete.obs")
  return(cor_matrix)
}

create_scatter_matrix <- function(data) {
  theme_scatter <- theme_bw() +
    theme(
      aspect.ratio = 1,
      panel.grid.minor = element_blank(),
      axis.title = element_text(size = 12),
      axis.text = element_text(size = 10),
      legend.position = "right"
    )
  
  # Shade vs Temperature
  p1 <- ggplot(data, aes(x = bfec.shade, y = bfec.tempSite, color = region)) +
    geom_point(alpha = 0.7, size = 2) +
    geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
    scale_color_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
    labs(
      x = "Shade sensitivity",
      y = "Temperature sensitivity"
    ) +
    # 动态设置轴范围
    scale_x_continuous(
      limits = c(min(data$bfec.shade) * 1.1, max(data$bfec.shade) * 1.1)
    ) +
    scale_y_continuous(
      limits = c(min(data$bfec.tempSite) * 1.1, max(data$bfec.tempSite) * 1.1)
    ) +
    theme_scatter
  
  # Shade vs Deficit
  p2 <- ggplot(data, aes(x = bfec.shade, y = bfec.defSite, color = region)) +
    geom_point(alpha = 0.7, size = 2) +
    geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
    scale_color_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
    labs(
      x = "Shade sensitivity",
      y = "Deficit sensitivity"
    ) +
    # 动态设置轴范围
    scale_x_continuous(
      limits = c(min(data$bfec.shade) * 1.1, max(data$bfec.shade) * 1.1)
    ) +
    scale_y_continuous(
      limits = c(min(data$bfec.defSite) * 1.1, max(data$bfec.defSite) * 1.1)
    ) +
    theme_scatter
  
  # Temperature vs Deficit
  p3 <- ggplot(data, aes(x = bfec.tempSite, y = bfec.defSite, color = region)) +
    geom_point(alpha = 0.7, size = 2) +
    geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
    scale_color_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
    labs(
      x = "Temperature sensitivity",
      y = "Deficit sensitivity"
    ) +
    # 动态设置轴范围
    scale_x_continuous(
      limits = c(min(data$bfec.tempSite) * 1.1, max(data$bfec.tempSite) * 1.1)
    ) +
    scale_y_continuous(
      limits = c(min(data$bfec.defSite) * 1.1, max(data$bfec.defSite) * 1.1)
    ) +
    theme_scatter
  
  return(list(p1 = p1, p2 = p2, p3 = p3))
}

# 执行分析
main_analysis <- function(mu_data, se_data) {
  # 数据处理
  processed_data <- process_sensitivity_data(mu_data, se_data)
  
  # 计算区域相关性
  eu_cor <- calculate_correlations(processed_data, "EU")
  na_cor <- calculate_correlations(processed_data, "NA")
  
  # 创建可视化
  plots <- create_scatter_matrix(processed_data)
  
  # 输出结果
  cat("European correlations:\n")
  print(eu_cor)
  cat("\nNorth American correlations:\n")
  print(na_cor)
  
  # 合并图表 - 使用更简单的标题方式
  combined_plot <- grid.arrange(
    plots$p1, plots$p2, plots$p3,
    ncol = 2,
    widths = c(1, 1),
    heights = c(1, 1),
    top = "Sensitivity Relationships by Region"  # 简化标题方式
  )
  
  # 创建相关性热图
  eu_cor_plot <- corrplot(eu_cor, 
                        method = "color",
                        type = "upper",
                        addCoef.col = "black",
                        tl.col = "black",
                        tl.srt = 45,
                        diag = FALSE,
                        title = "EU Correlations",
                        mar = c(0,0,1,0))
  
  na_cor_plot <- corrplot(na_cor, 
                        method = "color",
                        type = "upper",
                        addCoef.col = "black",
                        tl.col = "black",
                        tl.srt = 45,
                        diag = FALSE,
                        title = "NA Correlations",
                        mar = c(0,0,1,0))
  
  # 通过直方图判断数据分布是否正常
  hist_plots <- list(
    shade_hist = ggplot(processed_data, aes(x = bfec.shade, fill = region)) +
      geom_histogram(alpha = 0.7, bins = 15, position = "dodge") +
      scale_fill_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
      labs(title = "Shade Sensitivity Distribution") +
      theme_bw(),
    
    temp_hist = ggplot(processed_data, aes(x = bfec.tempSite, fill = region)) +
      geom_histogram(alpha = 0.7, bins = 15, position = "dodge") +
      scale_fill_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
      labs(title = "Temperature Sensitivity Distribution") +
      theme_bw(),
    
    fec_hist = ggplot(processed_data, aes(x = bfec.defSite, fill = region)) +
      geom_histogram(alpha = 0.7, bins = 15, position = "dodge") +
      scale_fill_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
      labs(title = "Deficit Sensitivity Distribution") +
      theme_bw()
  )
  
  # 返回处理后的数据和结果
  return(list(
    data = processed_data,
    eu_correlations = eu_cor,
    na_correlations = na_cor,
    plots = plots,
    combined_plot = combined_plot,
    hist_plots = hist_plots
  ))
}

calculate_summary_stats <- function(data) {
  summary_stats <- data %>%
    group_by(region) %>%
    summarise(
      across(
        c(bfec.shade, bfec.tempSite, bfec.defSite),
        list(
          mean = ~mean(., na.rm = TRUE),
          median = ~median(., na.rm = TRUE),
          sd = ~sd(., na.rm = TRUE),
          min = ~min(., na.rm = TRUE),
          max = ~max(., na.rm = TRUE),
          q25 = ~quantile(., 0.25, na.rm = TRUE),
          q75 = ~quantile(., 0.75, na.rm = TRUE),
          n = ~sum(!is.na(.))
        )
      )
    )
  
  return(summary_stats)
}

results <- main_analysis(mu_processed, se)

European correlations:
              bfec.shade bfec.tempSite bfec.defSite
bfec.shade     1.0000000   -0.15206061  -0.23288967
bfec.tempSite -0.1520606    1.00000000  -0.09430478
bfec.defSite  -0.2328897   -0.09430478   1.00000000

North American correlations:
               bfec.shade bfec.tempSite bfec.defSite
bfec.shade     1.00000000   -0.06958247   0.11054098
bfec.tempSite -0.06958247    1.00000000   0.06712926
bfec.defSite   0.11054098    0.06712926   1.00000000

write.csv(mu_processed,"C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_data.csv")

summary_stats <- calculate_summary_stats(results$data)

print(summary_stats)

# A tibble: 2 x 25
  region bfec.shade_mean bfec.shade_median bfec.shade_sd bfec.shade_min
  <chr>            <dbl>             <dbl>         <dbl>          <dbl>
1 EU              -0.751            -0.657         0.519          -1.96
2 NA              -0.568            -0.333         0.655          -2.49
# i 20 more variables: bfec.shade_max <dbl>, bfec.shade_q25 <dbl>,
#   bfec.shade_q75 <dbl>, bfec.shade_n <int>, bfec.tempSite_mean <dbl>,
#   bfec.tempSite_median <dbl>, bfec.tempSite_sd <dbl>,
#   bfec.tempSite_min <dbl>, bfec.tempSite_max <dbl>, bfec.tempSite_q25 <dbl>,
#   bfec.tempSite_q75 <dbl>, bfec.tempSite_n <int>, bfec.defSite_mean <dbl>,
#   bfec.defSite_median <dbl>, bfec.defSite_sd <dbl>, bfec.defSite_min <dbl>,
#   bfec.defSite_max <dbl>, bfec.defSite_q25 <dbl>, bfec.defSite_q75 <dbl>, ...

eu_data <- results$data %>% filter(region == "EU")
na_data <- results$data %>% filter(region == "NA")

cor.test(eu_data$bfec.shade, eu_data$bfec.tempSite)


    Pearson's product-moment correlation

data:  eu_data$bfec.shade and eu_data$bfec.tempSite
t = -1.1615, df = 57, p-value = 0.2503
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.3928455  0.1082364
sample estimates:
       cor 
-0.1520606

cat("\n")

cor.test(eu_data$bfec.shade, eu_data$bfec.defSite)


    Pearson's product-moment correlation

data:  eu_data$bfec.shade and eu_data$bfec.defSite
t = -1.808, df = 57, p-value = 0.07588
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.46145148  0.02466351
sample estimates:
       cor 
-0.2328897

cat("\n")

cor.test(eu_data$bfec.tempSite, eu_data$bfec.defSite)


    Pearson's product-moment correlation

data:  eu_data$bfec.tempSite and eu_data$bfec.defSite
t = -0.71517, df = 57, p-value = 0.4774
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.3421248  0.1657811
sample estimates:
        cor 
-0.09430478

cor.test(na_data$bfec.shade, na_data$bfec.tempSite)


    Pearson's product-moment correlation

data:  na_data$bfec.shade and na_data$bfec.tempSite
t = -0.68697, df = 97, p-value = 0.4937
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.2633764  0.1296097
sample estimates:
        cor 
-0.06958247

cat("\n")

cor.test(na_data$bfec.shade, na_data$bfec.defSite)


    Pearson's product-moment correlation

data:  na_data$bfec.shade and na_data$bfec.defSite
t = 1.0954, df = 97, p-value = 0.276
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.08880884  0.30137615
sample estimates:
     cor 
0.110541

cat("\n")

cor.test(na_data$bfec.tempSite, na_data$bfec.defSite)


    Pearson's product-moment correlation

data:  na_data$bfec.tempSite and na_data$bfec.defSite
t = 0.66264, df = 97, p-value = 0.5091
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.1320323  0.2610812
sample estimates:
       cor 
0.06712926

significant_genera <- c("Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula", "Castanea",
  "Fraxinus", "Alnus", "Larix", "Carpinus", "Acer",
  "Prunus", "Tilia", "Juniperus", "Ulmus", "Ostrya", "Arbutus")

genera_plots <- lapply(significant_genera, function(g) {
  genus_data <- mu_processed %>% filter(genus == g)
  

  p <- ggplot(genus_data, aes(x = bfec.shade, y = bfec.tempSite, color = region)) +
    geom_point(size = 3) +
    geom_smooth(method = "lm", se = TRUE) +
    scale_color_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
    labs(
      title = paste("Shade-Temperature Relationship:", g),
      x = "Shade sensitivity",
      y = "Temperature sensitivity"
    ) +
    theme_bw()
  
  return(p)
})

# 移除NULL值
genera_plots <- genera_plots[!sapply(genera_plots, is.null)]

# 显示属级别图表
do.call(grid.arrange, c(genera_plots, ncol =4))

# 2. 区域间差异的统计检验
# 使用Fisher's Z变换来检验两个相关系数是否有显著差异
fisher_z_test <- function(r1, r2, n1, n2) {
  z1 <- 0.5 * log((1 + r1) / (1 - r1))
  z2 <- 0.5 * log((1 + r2) / (1 - r2))
  se_diff <- sqrt(1/(n1-3) + 1/(n2-3))
  z <- (z1 - z2) / se_diff
  p <- 2 * pnorm(-abs(z))
  return(list(z = z, p = p))
}

# 计算三个相关系数的区域差异
shade_temp_test <- fisher_z_test(
  r1 = -0.17167917, r2 = -0.003961553,
  n1 = nrow(eu_data), n2 = nrow(na_data)
)

shade_fec_test <- fisher_z_test(
  r1 = -0.19157076, r2 = 0.05524851,
  n1 = nrow(eu_data), n2 = nrow(na_data)
)

temp_fec_test <- fisher_z_test(
  r1 = -0.08251622, r2 = 0.09090448,
  n1 = nrow(eu_data), n2 = nrow(na_data)
)

# 打印结果
cat("Statistical tests for regional differences in correlations:\n")

Statistical tests for regional differences in correlations:

cat("Shade-Temperature: z =", shade_temp_test$z, ", p =", shade_temp_test$p, "\n")

Shade-Temperature: z = -1.007651 , p = 0.3136219

cat("Shade-Deficit: z =", shade_fec_test$z, ", p =", shade_fec_test$p, "\n")

Shade-Deficit: z = -1.482455 , p = 0.1382193

mu_processed$genus <- sapply(strsplit(mu_processed$taxon, " "), `[`, 1)

genus_analysis <- function(data) {
  genus_correlations <- data %>%
    group_by(genus, region) %>%
    summarize(
      shade_temp_cor = cor(bfec.shade, bfec.tempSite, use = "complete.obs"),
      shade_fec_cor = cor(bfec.shade, bfec.defSite, use = "complete.obs"),
      n = n(),
      .groups = "drop"
    ) %>%
    filter(!is.na(shade_temp_cor), !is.na(shade_fec_cor))
  
  return(genus_correlations)
}

genus_cors <- genus_analysis(mu_processed)
print(genus_cors, n=33)

# A tibble: 35 x 5
   genus     region shade_temp_cor shade_fec_cor     n
   <chr>     <chr>           <dbl>         <dbl> <int>
 1 Abies     WEA            0.352        -0.594      5
 2 Abies     WNA            0.239        -0.609      6
 3 Acer      ENA           -0.352        -0.941      5
 4 Acer      WEA            0.338         0.120      5
 5 Acer      WNA           -1             1          2
 6 Aesculus  WEA           -1            -1          2
 7 Betula    ENA           -1.00          1.00       3
 8 Carya     ENA            0.0710       -0.254      4
 9 Celtis    ENA            1             1          2
10 Cornus    ENA            0.772         0.286      3
11 Corylus   WEA            1            -1          2
12 Fraxinus  ENA            0.0953       -0.982      3
13 Fraxinus  WEA            0.864        -0.958      3
14 Ilex      ENA           -0.429         0.574      6
15 Juglans   ENA           -1            -1          2
16 Juniperus ENA            1             1          2
17 Juniperus WEA           -1             1          2
18 Juniperus WNA           -1.00         -0.983      3
19 Magnolia  ENA            0.187         0.407      6
20 Picea     ENA            0.346        -0.189      3
21 Picea     WEA            0.0896        0.703      3
22 Picea     WNA            0.990         0.685      3
23 Pinus     ENA           -0.135         0.0232    12
24 Pinus     WEA            0.329         0.801      8
25 Pinus     WNA           -0.179        -0.114     12
26 Pistacia  WEA            1            -1          2
27 Prunus    ENA            0.716         0.678      4
28 Prunus    WEA            0.510         0.600      3
29 Quercus   ENA            0.285         0.371     22
30 Quercus   WEA           -0.934        -0.920     16
31 Quercus   WNA            0.363        -0.0758     6
32 Tilia     WEA            1             1          2
33 Tsuga     WNA            1             1          2
# i 2 more rows

特征与气候变量的系统发育敏感性分析

library(RANN)
library(ape)
library(phytools)
library(plotrix)
library(stringr)
library(castor)
library(corrplot)
library(dplyr)

setwd("R:/clark/clark.unix/phylogram")
paramPath   <- "../makeMast/fitLogs/output2/east_europe_west/"

source('../makeMast/predictionMastFunctions.R')
source('../makeMast/RFunctions/mastifFunctions.R')
source('../makeMast/moreFunctionsOld.R')
source('phyloFunctions.R')

vars <- getFittedVariables(continent = 'northAmerica', 
                          ppath = '../makeMast/fitLogs/', dirs = 'output2')
print(vars)

 [1] "aspect1.defSite.slope" "aspect1.slope"         "aspect2.defSite.slope"
 [4] "aspect2.slope"         "defAnom"               "defSite"              
 [7] "diam"                  "I.diam.2."             "I.tempSite.2."        
[10] "intercept"             "isolate"               "lfi10SqrtAnom"        
[13] "lfi10SqrtSite"         "lfi30SqrtAnom"         "lfi30SqrtSite"        
[16] "lfi50SqrtAnom"         "lfi50SqrtSite"         "ph"                   
[19] "shade"                 "slope"                 "tempSite"             
[22] "tpi"

traitFile <- "plantTraits.csv"
path2Traits <- "C:/Users/mh471/Downloads/radiation/"

trait_mapping <- list(
  original = c("gmPerSeed", "dispersal", "fruit", "leaves", "pollination", "sex", "thorns","gmPerCm", "leafN", "leafNP", "leafP",  
                      "maxHt", "maxDbh", "SLA", "woodSG"),
  photosynthesis_long = c("Leaf.nitrogen..N..content.per.leaf.area..g.m.2.",
                          "Photosynthetic.carboxylation.capacity..Vcmax..Vmax..per.leaf.area.at.25.C..Âµmol.mâ..Â².sâ..Â¹._x",
                          "Jmax.per.leaf.area.at.ambient.or.leaf.temperature..Âµmol.mâ..Â².sâ..Â¹._x",
                          "Leaf.photosynthesis.at.saturating.light.and.saturating.CO2.per.leaf.area..Amax...Âµmol.mâ..Â².sâ..Â¹._x"
                          ),
  photosynthesis_short = c("N_area", "Vcmax", "Jmax", "Amax")
)

# Combine all trait names
traitNames <- c(trait_mapping$original, trait_mapping$photosynthesis_long)

# Process and rename function
process_and_rename <- function(trait_table, long_names, short_names) {
  for(i in seq_along(long_names)) {
    if(long_names[i] %in% names(trait_table)) {
      names(trait_table)[names(trait_table) == long_names[i]] <- short_names[i]
    }
  }
  return(trait_table)
}

# Species level
tmp <- phyloTraits(tree = trAll, species = spp, traitNames = traitNames,
                   path2Traits = path2Traits, traitFile = traitFile)
traitTab <- process_and_rename(tmp$traitTab, 
                               trait_mapping$photosynthesis_long, 
                               trait_mapping$photosynthesis_short)
tree <- fixPhyloNames(tmp$tree)

# Add continent classification
library(dplyr)
traitTab <- traitTab %>%
  mutate(continent = case_when(
    region %in% c("WNA", "ENA") ~ "NA",
    region %in% c("WEA") ~ "EU",
    region == "AFR" ~ "AFR",
    region == "UNKN" ~ "UNKNOWN",
    grepl("^west", tolower(region)) | grepl("^east", tolower(region)) ~ "NA",
    grepl("^europe", tolower(region)) ~ "EU",
    TRUE ~ "OTHER"
  ))
# After getting species-level traitTab with continent classification
# Aggregate to genus level manually
traitGen <- traitTab %>%
  mutate(genus = columnSplit(taxon, ' ')[,1]) %>%  # Extract genus from species names
  group_by(genus, region, continent) %>%
  summarise(
    # For numeric traits, take the mean
    across(where(is.numeric), ~ mean(.x, na.rm = TRUE)),
    # For character traits, take the first non-NA value
    across(where(is.character), ~ first(na.omit(.x))),
    .groups = 'drop'
  ) %>%
  # Remove duplicate genus-region combinations if they exist
  distinct(genus, region, .keep_all = TRUE)

# If you need a phylogenetic tree at genus level
treeGen <- trAll
genus_tips <- columnSplit(treeGen$tip.label, ' ')[,1]
# Keep one species per genus
tips_to_keep <- !duplicated(genus_tips)
tips_to_drop <- which(!tips_to_keep)
if(length(tips_to_drop) > 0) {
  treeGen <- drop.tip(treeGen, tips_to_drop)
}
# Rename tips to genus names
treeGen$tip.label <- columnSplit(treeGen$tip.label, ' ')[,1]

setwd("R:/clark/clark.unix/phylogram")
paramPath <- "../makeMast/fitLogs/output2/east_europe_west/"

source('../makeMast/predictionMastFunctions.R')
source('../makeMast/RFunctions/mastifFunctions.R')
source('../makeMast/moreFunctionsOld.R')
source('phyloFunctions.R')

# Get fitted variables
vars <- getFittedVariables(continent = 'northAmerica', 
                          ppath = '../makeMast/fitLogs/', dirs = 'output2')
print(vars)

 [1] "aspect1.defSite.slope" "aspect1.slope"         "aspect2.defSite.slope"
 [4] "aspect2.slope"         "defAnom"               "defSite"              
 [7] "diam"                  "I.diam.2."             "I.tempSite.2."        
[10] "intercept"             "isolate"               "lfi10SqrtAnom"        
[13] "lfi10SqrtSite"         "lfi30SqrtAnom"         "lfi30SqrtSite"        
[16] "lfi50SqrtAnom"         "lfi50SqrtSite"         "ph"                   
[19] "shade"                 "slope"                 "tempSite"             
[22] "tpi"

tmp <- phyloSensitivity(tree = trAll, 
                       species = tree$tip.label, 
                       betaNames = c("tempSite", "shade", "defSite"),
                       paramPath = paramPath,
                       path2Traits = "C:/Users/mh471/Downloads/radiation/")  # Add this parameter

parEst   <- tmp$parEst
traitSig <- tmp$parSig
tree     <- tmp$tree

# Calculate phylogenetic distance matrix
pdist <- cophenetic(tree)

sens_data <- mu_processed %>%
  select(region, species, bfec.shade, bfec.tempSite, bfec.defSite) %>%
  distinct()

# 设置行名为物种名，便于后续匹配
rownames(sens_data) <- sens_data$species

# 计算敏感性欧氏距离矩阵
sens_dist <- as.matrix(dist(sens_data[, c("bfec.shade", "bfec.tempSite", "bfec.defSite")]))

# 创建物种名称映射函数
convert_species_name <- function(sens_name) {
  # 提取属名和种加词
  genus_match <- regexpr("^[a-z]+", sens_name)
  genus <- substr(sens_name, 1, attr(genus_match, "match.length"))
  species <- substr(sens_name, attr(genus_match, "match.length") + 1, nchar(sens_name))
  
  # 将属名首字母大写
  capitalized_genus <- paste0(toupper(substr(genus, 1, 1)), substr(genus, 2, nchar(genus)))
  
  # 将种加词小写
  lowercase_species <- tolower(species)
  
  # 组合格式为 "Genus species"
  return(paste(capitalized_genus, lowercase_species))
}

# 将sens_dist的行名转换为pdist格式
converted_names <- sapply(rownames(sens_dist), convert_species_name)

# 创建映射表
name_mapping <- data.frame(
  sens_name = rownames(sens_dist),
  pdist_name = converted_names,
  stringsAsFactors = FALSE
)

# 使用映射表查找匹配的物种
common_spp <- intersect(rownames(pdist), name_mapping$pdist_name)

# 创建sens_dist的子集，使用pdist格式的物种名
sens_dist_matched <- sens_dist
rownames(sens_dist_matched) <- converted_names
colnames(sens_dist_matched) <- converted_names

# 获取共同物种的子集
pdist_sub <- pdist[common_spp, common_spp]
sens_dist_sub <- sens_dist_matched[common_spp, common_spp]

# 检查结果维度
dim(pdist_sub)

[1] 101 101

dim(sens_dist_sub)

[1] 101 101

genus_info <- mu_processed %>%
  select(species, genus) %>%
  distinct()
rownames(genus_info) <- genus_info$species

genus_info$standard_name <- sapply(genus_info$species,convert_species_name)

# 获取属列表和具有足够物种数的属
genera <- unique(genus_info$genus[genus_info$standard_name %in% common_spp])
genera_counts <- table(genus_info$genus[genus_info$standard_name %in% common_spp])
genera_with_enough <- names(genera_counts)[genera_counts > 1]

# 可视化按属分组的比较
par(mfrow = c(3, 3), mar = c(2, 2, 2, 1), omi = c(1, 1, 0.5, 0.3))
for(g in genera_with_enough) {
  # 筛选属内物种
  spp_in_genus <- genus_info$standard_name[genus_info$genus == g & genus_info$standard_name %in% common_spp]
  
  # 提取对应的距离子矩阵
  pdist_g <- pdist_sub[spp_in_genus, spp_in_genus]
  sens_dist_g <- sens_dist_sub[spp_in_genus, spp_in_genus]
  
  # 设置对角线为NA
  diag(pdist_g) <- diag(sens_dist_g) <- NA
  
  # 创建散点图
  plot(jitter(pdist_g), sens_dist_g, 
       xlab = "", ylab = "", main = g,
       cex = 0.7, pch = 16, col = rgb(0, 0, 1, 0.4))
  
  # 计算相关性
  dist_pair <- data.frame(
    phylo = pdist_g[lower.tri(pdist_g)],
    sens = sens_dist_g[lower.tri(sens_dist_g)]
  )
  
  # 如果有足够的数据点进行相关性检验
  if(nrow(dist_pair) > 1) {
    corr_test <- cor.test(dist_pair$phylo, dist_pair$sens)
    r_val <- round(corr_test$estimate, 2)
    p_val <- corr_test$p.value
    
    # 添加相关系数和显著性标记
    label <- paste0(g, ", r=", r_val)
    if(p_val < 0.05) label <- paste0(label, "*")
    
    legend("topleft", legend = label, bty = "n", cex = 0.9)
  }
}

# 整体相关性分析
par(mfrow = c(1, 1), mar = c(4, 4, 2, 2))

# 提取下三角矩阵的值
pdist_vals <- pdist_sub[lower.tri(pdist_sub)]
sens_dist_vals <- sens_dist_sub[lower.tri(sens_dist_sub)]
# 计算整体相关性
overall_corr <- cor.test(pdist_vals, sens_dist_vals)
# 绘制整体散点图
plot(jitter(pdist_vals), sens_dist_vals,
     xlab = "Phylogenetic Distance", ylab = "Sensitivity Distance",
     main = paste0("All Species (r=", round(overall_corr$estimate, 2), 
                   ", p=", format(overall_corr$p.value, scientific = TRUE, digits = 3), ")"),
     cex = 0.5, pch = 16, col = rgb(0, 0, 1, 0.2))
abline(lm(sens_dist_vals ~ pdist_vals), col = "red", lty = 2)

# 额外：热图可视化比较
par(mfrow = c(1, 2), mar = c(1, 1, 2, 1))
# 系统发育距离热图
image(pdist_sub, main = "Phylogenetic Distance", axes = FALSE, col = rev(hcl.colors(100, "Blues")))

# 敏感性距离热图
image(sens_dist_sub, main = "Sensitivity Distance", axes = FALSE, col = rev(hcl.colors(100, "Blues")))

# Mantel测试评估两个距离矩阵的相关性
mantel_result <- mantel.test(pdist_sub, sens_dist_sub, nperm = 999)
cat("\nMantel test result:\n")


Mantel test result:

print(mantel_result)

$z.stat
[1] 3744098

$p
[1] 0.717

$alternative
[1] "two.sided"

# 检查实际有什么物种
eu_species <- sens_data$species[sens_data$region == "WEA"]  # Western Europe & Africa
na_species <- sens_data$species[sens_data$region %in% c("ENA", "WNA")]  # North America

print(head(eu_species))

[1] "abiesAlba"     NA              "abiesCephalon" "abiesCilicica"
[5] "abiesNordmann" "abiesPinsapo"

print(head(na_species))

[1] "abiesAmabilis" "abiesConcolor" "abiesFraseri"  "abiesGrandis" 
[5] "abiesLasiocar" "abiesMagnific"

# 转换物种名
eu_species_std <- sapply(eu_species, convert_species_name)
na_species_std <- sapply(na_species, convert_species_name)

# 移除 NA 值和转换失败的
eu_species_std <- eu_species_std[eu_species_std != "NANA NA" & !is.na(eu_species_std)]
na_species_std <- na_species_std[na_species_std != "NANA NA" & !is.na(na_species_std)]

print(eu_species_std)

          abiesAlba       abiesCephalon       abiesCilicica       abiesNordmann 
       "Abies alba"    "Abies cephalon"    "Abies cilicica"    "Abies nordmann" 
       abiesPinsapo        acerCampestr        acerMonspess          acerOpalus 
    "Abies pinsapo"     "Acer campestr"     "Acer monspess"       "Acer opalus" 
       acerPlatanoi        acerPseudopl    aesculusHippocas    aesculusTurbinat 
    "Acer platanoi"     "Acer pseudopl" "Aesculus hippocas" "Aesculus turbinat" 
      alnusGlutinos       betulaPendula     carpinusBetulus      castaneaSativa 
   "Alnus glutinos"    "Betula pendula"  "Carpinus betulus"   "Castanea sativa" 
       cedrusLibani      celtisAustrali      cercisSiliquas      cornusSanguine 
    "Cedrus libani"   "Celtis australi"   "Cercis siliquas"   "Cornus sanguine" 
    corylusAvellana      corylusColurna    crataeguMonogyna       diospyroLotus 
 "Corylus avellana"   "Corylus colurna" "Crataegu monogyna"    "Diospyro lotus" 
      fagusSylvatic         ficusCarica       frangulaAlnus    fraxinusAngustif 
   "Fagus sylvatic"      "Ficus carica"    "Frangula alnus" "Fraxinus angustif" 
   fraxinusExcelsio       fraxinusOrnus        ilexAquifoli        juglansRegia 
"Fraxinus excelsio"    "Fraxinus ornus"     "Ilex aquifoli"     "Juglans regia" 
   juniperuOxycedru    juniperuPhoenice        larixDecidua        oleaEuropaea 
"Juniperu oxycedru" "Juniperu phoenice"     "Larix decidua"     "Olea europaea" 
         piceaAbies        piceaOmorika       piceaOriental         pinusBrutia 
      "Picea abies"     "Picea omorika"    "Picea oriental"      "Pinus brutia" 
        pinusCembra       pinusHalepens          pinusNigra       pinusPinaster 
     "Pinus cembra"    "Pinus halepens"       "Pinus nigra"    "Pinus pinaster" 
         pinusPinea       pinusSylvestr       pinusUncinata    pistaciaLentiscu 
      "Pinus pinea"    "Pinus sylvestr"    "Pinus uncinata" "Pistacia lentiscu" 
   pistaciaTerebint    platanusOriental         prunusAvium      prunusCerasife 
"Pistacia terebint" "Platanus oriental"      "Prunus avium"   "Prunus cerasife" 
      prunusSpinosa     quercusCanarien     quercusCastanei       quercusCerris 
   "Prunus spinosa"  "Quercus canarien"  "Quercus castanei"    "Quercus cerris" 
    quercusCoccifer      quercusFaginea     quercusFrainett         quercusIlex 
 "Quercus coccifer"   "Quercus faginea"  "Quercus frainett"      "Quercus ilex" 
    quercusInfector     quercusIthabure     quercusMacranth      quercusPetraea 
 "Quercus infector"  "Quercus ithabure"  "Quercus macranth"   "Quercus petraea" 
    quercusPubescen        quercusRobur     quercusRotundif        quercusSuber 
 "Quercus pubescen"     "Quercus robur"  "Quercus rotundif"     "Quercus suber" 
     quercusTrojana      sorbusAucupari        taxusBaccata        tiliaCordata 
  "Quercus trojana"   "Sorbus aucupari"     "Taxus baccata"     "Tilia cordata" 
      tiliaPlatyphy         ulmusGlabra 
   "Tilia platyphy"      "Ulmus glabra"

print(na_species_std)

      abiesAmabilis       abiesConcolor        abiesFraseri        abiesGrandis 
   "Abies amabilis"    "Abies concolor"     "Abies fraseri"     "Abies grandis" 
      abiesLasiocar       abiesMagnific        abiesProcera        acerCircinat 
   "Abies lasiocar"    "Abies magnific"     "Abies procera"     "Acer circinat" 
       acerMacrophy         acerNegundo        acerPensylva          acerRubrum 
    "Acer macrophy"      "Acer negundo"     "Acer pensylva"       "Acer rubrum" 
       acerSaccharu        acerSpicatum      aesculusGlabra         alnusIncana 
    "Acer saccharu"     "Acer spicatum"   "Aesculus glabra"      "Alnus incana" 
         alnusRubra     amelanchArborea      betulaAlleghan         betulaLenta 
      "Alnus rubra"  "Amelanch arborea"   "Betula alleghan"      "Betula lenta" 
     betulaPapyrife    calocedrDecurren    carpinusCarolini           caryaAlba 
  "Betula papyrife" "Calocedr decurren" "Carpinus carolini"        "Carya alba" 
      caryaCordifor       caryaIllinoin          caryaOvata     castaneaDentata 
   "Carya cordifor"    "Carya illinoin"       "Carya ovata"  "Castanea dentata" 
     castaneaPumila      celtisLaevigat      celtisOccident      cercisCanadens 
  "Castanea pumila"   "Celtis laevigat"   "Celtis occident"   "Cercis canadens" 
     cornusAlternif      cornusDrummond       cornusFlorida      cornusNuttalli 
  "Cornus alternif"   "Cornus drummond"    "Cornus florida"   "Cornus nuttalli" 
     corylusCornuta    diospyroVirginia       fagusGrandifo    frangulaCarolini 
  "Corylus cornuta" "Diospyro virginia"    "Fagus grandifo" "Frangula carolini" 
   fraxinusAmerican       fraxinusNigra    fraxinusPennsylv         ilexCassine 
"Fraxinus american"    "Fraxinus nigra" "Fraxinus pennsylv"      "Ilex cassine" 
        ilexDecidua         ilexMontana           ilexOpaca        ilexVerticil 
     "Ilex decidua"      "Ilex montana"        "Ilex opaca"     "Ilex verticil" 
       ilexVomitori      juglansCinerea        juglansNigra    juniperuCommunis 
    "Ilex vomitori"   "Juglans cinerea"     "Juglans nigra" "Juniperu communis" 
   juniperuMonosper    juniperuOsteospe    juniperuScopulor    juniperuVirginia 
"Juniperu monosper" "Juniperu osteospe" "Juniperu scopulor" "Juniperu virginia" 
      larixLaricina    liriodenTulipife    magnoliaAcuminat     magnoliaFraseri 
   "Larix laricina" "Lirioden tulipife" "Magnolia acuminat"  "Magnolia fraseri" 
   magnoliaGrandifl    magnoliaMacrophy    magnoliaTripetal    magnoliaVirginia 
"Magnolia grandifl" "Magnolia macrophy" "Magnolia tripetal" "Magnolia virginia" 
      piceaEngelman         piceaGlauca        piceaMariana        piceaPungens 
   "Picea engelman"      "Picea glauca"     "Picea mariana"     "Picea pungens" 
        piceaRubens       piceaSitchens       pinusAlbicaul       pinusBanksian 
     "Picea rubens"    "Picea sitchens"    "Pinus albicaul"    "Pinus banksian" 
        pinusClausa       pinusContorta       pinusCoulteri       pinusEchinata 
     "Pinus clausa"    "Pinus contorta"    "Pinus coulteri"    "Pinus echinata" 
        pinusEdulis       pinusElliotti       pinusFlexilis       pinusJeffreyi 
     "Pinus edulis"    "Pinus elliotti"    "Pinus flexilis"    "Pinus jeffreyi" 
      pinusLamberti       pinusMonophyl       pinusMonticol       pinusPalustri 
   "Pinus lamberti"    "Pinus monophyl"    "Pinus monticol"    "Pinus palustri" 
      pinusPonderos        pinusPungens        pinusRadiata       pinusResinosa 
   "Pinus ponderos"     "Pinus pungens"     "Pinus radiata"    "Pinus resinosa" 
        pinusRigida       pinusSabinian       pinusSerotina        pinusStrobus 
     "Pinus rigida"    "Pinus sabinian"    "Pinus serotina"     "Pinus strobus" 
         pinusTaeda       pinusVirginia    platanusOccident      prunusAmerican 
      "Pinus taeda"    "Pinus virginia" "Platanus occident"   "Prunus american" 
     prunusPensylva      prunusSerotina      prunusVirginia    pseudotsMenziesi 
  "Prunus pensylva"   "Prunus serotina"   "Prunus virginia" "Pseudots menziesi" 
    quercusAgrifoli         quercusAlba      quercusBicolor     quercusChrysole 
 "Quercus agrifoli"      "Quercus alba"   "Quercus bicolor"  "Quercus chrysole" 
     quercusFalcata     quercusGarryana     quercusGeminata     quercusHemispha 
  "Quercus falcata"  "Quercus garryana"  "Quercus geminata"  "Quercus hemispha" 
    quercusImbricar       quercusIncana     quercusKelloggi       quercusLaevis 
 "Quercus imbricar"    "Quercus incana"  "Quercus kelloggi"    "Quercus laevis" 
    quercusLaurifol       quercusLobata       quercusLyrata     quercusMacrocar 
 "Quercus laurifol"    "Quercus lobata"    "Quercus lyrata"  "Quercus macrocar" 
    quercusMargaret     quercusMichauxi      quercusMontana     quercusMyrtifol 
 "Quercus margaret"  "Quercus michauxi"   "Quercus montana"  "Quercus myrtifol" 
       quercusNigra     quercusPalustri        quercusRubra     quercusShumardi 
    "Quercus nigra"  "Quercus palustri"     "Quercus rubra"  "Quercus shumardi" 
    quercusStellata     quercusVelutina     quercusVirginia     quercusWislizen 
 "Quercus stellata"  "Quercus velutina"  "Quercus virginia"  "Quercus wislizen" 
    robiniaPseudoac       taxusBrevifol       thujaOccident        thujaPlicata 
 "Robinia pseudoac"    "Taxus brevifol"    "Thuja occident"     "Thuja plicata" 
      tiliaAmerican       tsugaCanadens       tsugaHeteroph       tsugaMertensi 
   "Tilia american"    "Tsuga canadens"    "Tsuga heteroph"    "Tsuga mertensi" 
         ulmusAlata       ulmusAmerican          ulmusRubra    viburnumDentatum 
      "Ulmus alata"    "Ulmus american"       "Ulmus rubra" "Viburnum dentatum" 
   viburnumPrunifol    viburnumRufidulu 
"Viburnum prunifol" "Viburnum rufidulu"

# 计算区域内和区域间的比较
# 1. 欧洲物种之间的比较
eu_vs_eu <- expand.grid(sp1 = eu_species_std, sp2 = eu_species_std)
eu_vs_eu <- eu_vs_eu[eu_vs_eu$sp1 != eu_vs_eu$sp2, ]

# 2. 北美物种之间的比较
na_vs_na <- expand.grid(sp1 = na_species_std, sp2 = na_species_std)
na_vs_na <- na_vs_na[na_vs_na$sp1 != na_vs_na$sp2, ]

# 3. 欧洲与北美物种之间的比较(跨大陆)
cross_continent <- expand.grid(sp1 = eu_species_std, sp2 = na_species_std)

# 为每组比较计算系统发育距离和敏感性距离
calc_distances <- function(species_pairs, region_type) {
  result <- data.frame()
  
  for(i in 1:nrow(species_pairs)) {
    sp1 <- as.character(species_pairs$sp1[i])
    sp2 <- as.character(species_pairs$sp2[i])
    
    # 检查物种是否在系统发育树和敏感性数据中
    if(sp1 %in% rownames(pdist) && sp2 %in% rownames(pdist) && 
       sp1 %in% rownames(sens_dist_sub) && sp2 %in% rownames(sens_dist_sub)) {
      
      # 获取系统发育距离
      phylo_d <- pdist[sp1, sp2]
      
      # 获取敏感性距离
      sens_d <- sens_dist_sub[sp1, sp2]
      
  
      result <- rbind(result, data.frame(
        sp1 = sp1,
        sp2 = sp2,
        phylo_dist = phylo_d,
        sens_dist = sens_d,
        comparison_type = region_type
      ))
    }
  }
  
  return(result)
}

# 计算各组比较的距离
eu_eu_dist <- calc_distances(eu_vs_eu, "EU-EU")
na_na_dist <- calc_distances(na_vs_na, "NA-NA")
cross_dist <- calc_distances(cross_continent, "EU-NA")

# 合并所有比较结果
all_comparisons <- rbind(eu_eu_dist, na_na_dist, cross_dist)

# 可视化比较
par(mfrow = c(1, 1), mar = c(5, 5, 4, 2))
plot(all_comparisons$phylo_dist, all_comparisons$sens_dist,
     col = ifelse(all_comparisons$comparison_type == "EU-EU", "blue",
           ifelse(all_comparisons$comparison_type == "NA-NA", "red", "purple")),
     pch = ifelse(all_comparisons$comparison_type == "EU-EU", 16,
           ifelse(all_comparisons$comparison_type == "NA-NA", 17, 18)),
     xlab = "Phylogenetic Distance", ylab = "Sensitivity Distance",
     main = "Phylogenetic vs. Sensitivity Distance by Region")

# 添加图例
legend("topleft", legend = c("EU-EU", "NA-NA", "EU-NA"),
       col = c("blue", "red", "purple"), pch = c(16, 17, 18), bty = "n")

# 为每种比较类型添加趋势线
eu_eu_lm <- lm(sens_dist ~ phylo_dist, data = eu_eu_dist)
na_na_lm <- lm(sens_dist ~ phylo_dist, data = na_na_dist)
cross_lm <- lm(sens_dist ~ phylo_dist, data = cross_dist)

abline(eu_eu_lm, col = "blue", lty = 2)
abline(na_na_lm, col = "red", lty = 2)
abline(cross_lm, col = "purple", lty = 2)

interaction_model <- lm(sens_dist ~ phylo_dist * comparison_type, data = all_comparisons)
anova(interaction_model)

Analysis of Variance Table

Response: sens_dist
                             Df Sum Sq Mean Sq F value    Pr(>F)    
phylo_dist                    1     44  44.345  6.5166   0.01071 *  
comparison_type               2    548 273.829 40.2399 < 2.2e-16 ***
phylo_dist:comparison_type    2    137  68.339 10.0426 4.409e-05 ***
Residuals                  7590  51649   6.805                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#在相同系统发育距离下，跨大陆物种对的敏感性差异是否大于同大陆物种对

all_comparisons$cross_continent <- ifelse(all_comparisons$comparison_type == "EU-NA", 
                                         "Cross-Continental", "Within-Continental")

# ANCOVA模型
ancova_model <- lm(sens_dist ~ phylo_dist + cross_continent + phylo_dist:cross_continent, 
                  data = all_comparisons)

# 查看结果
summary(ancova_model)


Call:
lm(formula = sens_dist ~ phylo_dist + cross_continent + phylo_dist:cross_continent, 
    data = all_comparisons)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.9926 -1.1946 -0.7159  0.2795 18.9815 

Coefficients:
                                               Estimate Std. Error t value
(Intercept)                                   2.0295042  0.1106103  18.348
phylo_dist                                   -0.0005287  0.0002115  -2.500
cross_continentWithin-Continental            -0.3443766  0.1327062  -2.595
phylo_dist:cross_continentWithin-Continental  0.0003379  0.0002532   1.334
                                             Pr(>|t|)    
(Intercept)                                   < 2e-16 ***
phylo_dist                                    0.01245 *  
cross_continentWithin-Continental             0.00948 ** 
phylo_dist:cross_continentWithin-Continental  0.18209    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 2.624 on 7592 degrees of freedom
Multiple R-squared:  0.002191,  Adjusted R-squared:  0.001797 
F-statistic: 5.557 on 3 and 7592 DF,  p-value: 0.0008327

# 对比平均值（估计边际均值）
library(emmeans)
emmeans(ancova_model, ~ cross_continent)

 cross_continent    emmean     SE   df lower.CL upper.CL
 Cross-Continental    1.79 0.0547 7592     1.68     1.89
 Within-Continental   1.60 0.0361 7592     1.53     1.67

Confidence level used: 0.95

# 在特定系统发育距离值处比较（例如在系统发育距离=0.1处）
emtrends(ancova_model, ~ cross_continent, var = "phylo_dist")

 cross_continent    phylo_dist.trend       SE   df  lower.CL  upper.CL
 Cross-Continental         -0.000529 0.000211 7592 -0.000943 -1.14e-04
 Within-Continental        -0.000191 0.000139 7592 -0.000464  8.22e-05

Confidence level used: 0.95

# 可视化不同组别在固定系统发育距离下的敏感性差异
small_phylo <- 0.1
pred_data <- data.frame(
  phylo_dist = small_phylo,
  cross_continent = c("Cross-Continental", "Within-Continental")
)
predict(ancova_model, newdata = pred_data, interval = "confidence")

       fit      lwr      upr
1 2.029451 1.812661 2.246242
2 1.685109 1.541399 1.828819

library(lme4)

# 假设物种嵌套在属内
# 首先，提取物种的属信息
all_comparisons$genus1 <- gsub("([A-Z][a-z]+).*", "\\1", all_comparisons$sp1)
all_comparisons$genus2 <- gsub("([A-Z][a-z]+).*", "\\1", all_comparisons$sp2)

# 创建一个物种对的唯一标识符
all_comparisons$pair_id <- paste(all_comparisons$sp1, all_comparisons$sp2, sep="_")

# 混合效应模型，将属作为随机效应
mixed_model <- lmer(sens_dist ~ phylo_dist * cross_continent + (1|genus1) + (1|genus2), 
                   data = all_comparisons)

summary(mixed_model)

Linear mixed model fit by REML ['lmerMod']
Formula: sens_dist ~ phylo_dist * cross_continent + (1 | genus1) + (1 |  
    genus2)
   Data: all_comparisons

REML criterion at convergence: 35892.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.1690 -0.4242 -0.1792  0.1165  6.9986 

Random effects:
 Groups   Name        Variance Std.Dev.
 genus1   (Intercept) 0.3192   0.5650  
 genus2   (Intercept) 0.1868   0.4322  
 Residual             6.4796   2.5455  
Number of obs: 7596, groups:  genus1, 28; genus2, 28

Fixed effects:
                                               Estimate Std. Error t value
(Intercept)                                   1.6881821  0.1819155   9.280
phylo_dist                                   -0.0003271  0.0002176  -1.503
cross_continentWithin-Continental            -0.3789481  0.1317157  -2.877
phylo_dist:cross_continentWithin-Continental  0.0004124  0.0002472   1.668

Correlation of Fixed Effects:
            (Intr) phyl_d cr_W-C
phylo_dist  -0.539              
crss_cntW-C -0.511  0.680       
phyl_d:_W-C  0.435 -0.796 -0.858

# 在固定系统发育距离下比较
library(emmeans)
emtrends(mixed_model, ~ cross_continent, var = "phylo_dist")

 cross_continent    phylo_dist.trend       SE  df asymp.LCL asymp.UCL
 Cross-Continental         -3.27e-04 0.000218 Inf -0.000754  9.95e-05
 Within-Continental         8.54e-05 0.000151 Inf -0.000211  3.82e-04

Degrees-of-freedom method: asymptotic 
Confidence level used: 0.95

# 筛选系统发育距离小于阈值的数据子集
# 检查系统发育距离的分布
summary(all_comparisons$phylo_dist)

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
  0.1147 235.0695 475.1492 457.2973 704.4694 704.4694

hist(all_comparisons$phylo_dist, main="系统发育距离分布", xlab="系统发育距离")

# 查看最小的几个系统发育距离值
head(sort(all_comparisons$phylo_dist))

[1] 0.114742 0.114742 0.117820 0.117820 0.118578 0.118578

# 尝试适应数据的阈值，例如使用分布的第一个四分位数
threshold <- quantile(all_comparisons$phylo_dist, 0.25)
print(paste("新阈值:", threshold))

[1] "<U+65B0><U+9608><U+503C>: 235.069478"

small_phylo_subset <- all_comparisons[all_comparisons$phylo_dist < threshold, ]
# 检查筛选后的数据集大小
nrow(small_phylo_subset)

[1] 1897

# 检查跨大陆变量的水平数量
table(small_phylo_subset$cross_continent)


 Cross-Continental Within-Continental 
               619               1278

# 选择系统发育距离最小的前25%样本
percentile_threshold <- 0.25
n_samples <- nrow(all_comparisons)
n_select <- floor(n_samples * percentile_threshold)

ordered_samples <- all_comparisons[order(all_comparisons$phylo_dist), ]
small_phylo_subset <- ordered_samples[1:n_select, ]

# 验证数据集
nrow(small_phylo_subset)

[1] 1899

table(small_phylo_subset$cross_continent)


 Cross-Continental Within-Continental 
               619               1280

setwd("R:/clark/clark.unix/phylogram")
paramPath   <- "../makeMast/fitLogs/output2/east_europe_west/"
path2Traits <- "../traitsByGroup/"
traitFile   <- ("C:/Users/mh471/Downloads/radiation/plant_traits_final_clean.csv")
results <-read.csv("C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_data.csv")
source('../makeMast/predictionMastFunctions.R')
source('../makeMast/RFunctions/mastifFunctions.R')
source('../makeMast/moreFunctionsOld.R')
source('phyloFunctions.R')


vars <- getFittedVariables(continent = 'northAmerica', 
                          ppath = '../makeMast/fitLogs/', dirs = 'output2')
print(vars)

 [1] "aspect1.defSite.slope" "aspect1.slope"         "aspect2.defSite.slope"
 [4] "aspect2.slope"         "defAnom"               "defSite"              
 [7] "diam"                  "I.diam.2."             "I.tempSite.2."        
[10] "intercept"             "isolate"               "lfi10SqrtAnom"        
[13] "lfi10SqrtSite"         "lfi30SqrtAnom"         "lfi30SqrtSite"        
[16] "lfi50SqrtAnom"         "lfi50SqrtSite"         "ph"                   
[19] "shade"                 "slope"                 "tempSite"             
[22] "tpi"

tmp <- phyloSensitivity(tree = trAll, species = tree$tip.label, 
                       betaNames = c("tempSite", "shade", "defSite"),
                       paramPath = paramPath, path2Traits = path2Traits)

parEst   <- tmp$parEst
traitSig <- tmp$parSig
tree     <- tmp$tree


pdist <- cophenetic(tree)

sens_data <- results %>%
  select(species, bfec.shade, bfec.tempSite, bfec.defSite) %>%
  distinct()

rownames(sens_data) <- sens_data$species

sens_dist <- as.matrix(dist(sens_data))

spp <- getFittedSpecies(dirs = 'output2/east_europe_west',
                       keep = c('east', 'west', 'europe'))

trait_tmp <- phyloTraits(tree = trAll, species = spp, 
                        traitNames = c("gmPerCm", "leafN", "leafNP", "leafP", "gmPerSeed", 
                                      "maxHt", "pollination", "maxDbh", "SLA", "woodSG"))
traitTab <- trait_tmp$traitTab
trRsp <- trait_tmp$tree

x <- read.csv("R:/clark/clark.unix/phylogram/speciesByResponse.csv", row.names = 1)
rownames(x)[rownames(x) == 'caryaOvalis-g'] <- 'caryaOvalis'

convert_species_name <- function(sens_name) {
  genus_match <- regexpr("^[a-z]+", sens_name)
  genus <- substr(sens_name, 1, attr(genus_match, "match.length"))
  species <- substr(sens_name, attr(genus_match, "match.length") + 1, nchar(sens_name))
  
  capitalized_genus <- paste0(toupper(substr(genus, 1, 1)), substr(genus, 2, nchar(genus)))
  
  lowercase_species <- tolower(species)
  
  return(paste(capitalized_genus, lowercase_species))
}

sens_species <- data.frame(
  code8 = rownames(sens_dist),
  bfec.shade = as.numeric(as.character(sens_data$bfec.shade)),
  bfec.tempSite = as.numeric(as.character(sens_data$bfec.tempSite)),
  bfec.defSite = as.numeric(as.character(sens_data$bfec.defSite)),
  stringsAsFactors = FALSE
)

if("code8" %in% colnames(traitTab)) {
  combined_data <- merge(sens_species, traitTab, by = "code8", all = FALSE)
  
  combined_data$standard_name <- sapply(combined_data$code8, convert_species_name)

  combined_data$in_response <- combined_data$code8 %in% rownames(x)
  
  for(i in 1:nrow(combined_data)) {
    sp <- combined_data$code8[i]
    if(sp %in% rownames(x)) {
      anom_cols <- grep('Anom', colnames(x), value = TRUE)
      for(col in anom_cols) {
        combined_data[i, col] <- x[sp, col]
      }
    }
  }
} else {
  print("Warning: No code8 column found in traitTab. Using alternative matching method.")
  
  if(all(c("genus", "specEpith") %in% colnames(traitTab))) {
    traitTab$standard_name <- paste(traitTab$genus, traitTab$specEpith)
    
    sens_species$standard_name <- sapply(sens_species$code8, convert_species_name)
    
    combined_data <- merge(sens_species, traitTab, by = "standard_name", all = FALSE)
  } else {
    stop("Cannot find appropriate columns to match species between datasets.")
  }
}

abline_safe <- function(model_or_formula = NULL, data = NULL, col = "red", lty = 2, a = NULL, b = NULL, h = NULL, v = NULL) {
  if(!is.null(h)) {
    if(is.finite(h)) {
      abline(h = h, col = col, lty = lty)
      return(TRUE)
    } else {
      warning("无法绘制水平线：h不是有限值")
      return(FALSE)
    }
  }
  
  if(!is.null(v)) {
    if(is.finite(v)) {
      abline(v = v, col = col, lty = lty)
      return(TRUE)
    } else {
      warning("无法绘制垂直线：v不是有限值")
      return(FALSE)
    }
  }
  
  # 检查是否直接提供了a和b
  if(!is.null(a) && !is.null(b)) {
    if(is.finite(a) && is.finite(b)) {
      abline(a = a, b = b, col = col, lty = lty)
      return(TRUE)
    } else {
      warning("无法绘制回归线：a或b不是有限值")
      return(FALSE)
    }
  }
  
  # 处理模型或公式
  if(!is.null(model_or_formula)) {
    # 检查是否为公式（不使用is.formula函数）
    if(inherits(model_or_formula, "formula")) {
      # 如果输入的是公式，先拟合模型
      model <- try(lm(model_or_formula, data = data), silent = TRUE)
      if(inherits(model, "try-error")) {
        warning("拟合模型失败")
        return(FALSE)
      }
    } else {
      # 否则假设输入已经是一个模型
      model <- model_or_formula
    }
    
    # 提取系数
    coefs <- try(coef(model), silent = TRUE)
    if(inherits(coefs, "try-error") || length(coefs) < 2) {
      warning("无法提取模型系数")
      return(FALSE)
    }
    
    # 检查系数是否有限
    if(all(is.finite(coefs))) {
      abline(a = coefs[1], b = coefs[2], col = col, lty = lty)
      return(TRUE)
    } else {
      warning("无法绘制回归线：系数不是有限值")
      return(FALSE)
    }
  }
  
  warning("未提供有效参数")
  return(FALSE)
}
# 创建物种名称匹配函数，用于匹配树名
match_tree_species <- function(species_name, tree_tips) {
  # 检查是否完全匹配
  if(species_name %in% tree_tips) {
    return(species_name)
  }
  
  # 获取种名的前几个字符（属名部分应该是完整的）
  parts <- strsplit(species_name, " ")[[1]]
  if(length(parts) < 2) return(NA)
  
  genus <- parts[1]
  species_partial <- parts[2]
  
  # 查找同属的物种
  genus_matches <- grep(paste0("^", genus, " "), tree_tips, value = TRUE)
  
  if(length(genus_matches) == 0) return(NA)
  
  # 在同属物种中查找以species_partial开头的物种
  species_matches <- grep(paste0("^", genus, " ", species_partial), tree_tips, value = TRUE)
  
  if(length(species_matches) == 1) {
    return(species_matches)
  } else if(length(species_matches) > 1) {
    # 如果有多个匹配，返回第一个（可能需要更复杂的逻辑来选择最佳匹配）
    return(species_matches[1])
  } else {
    # 没有匹配，返回NA
    return(NA)
  }
}

# 为combined_data中的每个物种找到树中对应的物种
species_mapping <- sapply(combined_data$standard_name, function(sp) {
  match_tree_species(sp, tree$tip.label)
})

matched_count <- sum(!is.na(species_mapping))
cat("Total species:", length(species_mapping), "\n")

Total species: 250

cat("Successfully matched:", matched_count, "\n")

Successfully matched: 203

cat("Failed to match:", length(species_mapping) - matched_count, "\n")

Failed to match: 47

matched_data <- combined_data[!is.na(species_mapping),]
matched_data$tree_name <- species_mapping[!is.na(species_mapping)]

if("pollination" %in% colnames(matched_data)) {
  par(mar = c(10, 4, 2, 1))
  # 将特征转换为因子
  matched_data$pollination <- as.factor(matched_data$pollination)
  
  # 绘制箱线图
  boxplot(bfec.shade ~ pollination, data = matched_data, 
          main = "Shade Sensitivity by Pollination Type",
          xlab = "", ylab = "Shade Sensitivity", 
          las = 2)  # 垂直轴标签
  title(xlab = "Pollination", line = 8)  # 增加底部空间
}

if("genus" %in% colnames(matched_data)) {
  genus_counts <- table(matched_data$genus)
  genus_with_enough <- names(genus_counts)[genus_counts >= 1]
  
  genus_summary <- do.call(rbind, lapply(genus_with_enough, function(fam) {
    fam_data <- matched_data[matched_data$genus == fam, ]
    data.frame(
      genus = fam,
      mean_shade = mean(fam_data$bfec.shade, na.rm = TRUE),
      sd_shade = sd(fam_data$bfec.shade, na.rm = TRUE),
      count = nrow(fam_data)
    )
  }))
  
  par(mfrow = c(1, 1), mar = c(10, 4, 2, 2))
  genus_summary_filtered <- genus_summary[genus_summary$count >= 1, ]
  genus_summary_filtered <- genus_summary_filtered[order(genus_summary_filtered$mean_shade), ]
  
  barplot(genus_summary_filtered$mean_shade, 
          names.arg = genus_summary_filtered$genus, 
          las = 2,  # 垂直显示科名
          col = rainbow(nrow(genus_summary_filtered)),
          ylab = "Mean Shade Sensitivity",
          main = "Mean Shade Sensitivity by genus")
  abline_safe(h = 0, lty = 2)  # 添加参考线
}

[1] TRUE

# 查看traitGen中所有可用的性状
cat("=== All traits in traitGen ===\n")

=== All traits in traitGen ===

print(names(traitGen))

 [1] "genus"       "region"      "continent"   "gmPerSeed"   "gmPerCm"    
 [6] "leafN"       "leafNP"      "leafP"       "maxHt"       "maxDbh"     
[11] "SLA"         "woodSG"      "N_area"      "Vcmax"       "Jmax"       
[16] "Amax"        "code8"       "specEpith"   "subSpec"     "class"      
[21] "family"      "section"     "dispersal"   "fruit"       "leaves"     
[26] "pollination" "sex"         "thorns"      "taxon"

# 创建完整的matched_data，包含所有性状
create_complete_matched_data <- function() {
  # 使用 mu_processed 作为基础
  base_data <- mu_processed
  
  # 获取所有数值型性状列（排除标识符和分类变量）
  exclude_cols <- c("genus", "region", "continent", "code8", "specEpith", 
                    "subSpec", "class", "family", "section", "dispersal", 
                    "fruit", "leaves", "pollination", "sex", "thorns", "taxon")
  
  trait_cols <- setdiff(names(traitGen), exclude_cols)
  
  cat("Traits to merge:", paste(trait_cols, collapse = ", "), "\n\n")
  
  # 准备traitGen数据（每个属只保留一行，或按region分组）
  # 如果想要区域特异性数据，可以保留region
  # 这里我们先按genus汇总（取平均值）
  trait_summary <- traitGen %>%
    group_by(genus) %>%
    summarise(across(all_of(trait_cols), ~ mean(.x, na.rm = TRUE)),
              .groups = 'drop')
  
  cat("Merging", nrow(base_data), "sensitivity records with", 
      nrow(trait_summary), "genus trait records\n")
  
  # 合并数据
  matched_data <- merge(base_data, trait_summary, 
                       by = "genus", 
                       all.x = TRUE,
                       suffixes = c("", ".trait"))  # 避免列名冲突
  
  cat("Result:", nrow(matched_data), "records\n")
  cat("Columns added:", paste(setdiff(names(matched_data), names(base_data)), collapse = ", "), "\n")
  
  return(matched_data)
}

# 执行合并
matched_data <- create_complete_matched_data()

Traits to merge: gmPerSeed, gmPerCm, leafN, leafNP, leafP, maxHt, maxDbh, SLA, woodSG, N_area, Vcmax, Jmax, Amax 

Merging 284 sensitivity records with 45 genus trait records
Result: 284 records
Columns added: gmPerSeed.trait, gmPerCm, leafN, leafNP, leafP, maxHt, maxDbh, SLA, woodSG, N_area, Vcmax, Jmax, Amax

# 验证所有性状
all_traits <- c("gmPerSeed", "gmPerCm", "leafN", "leafNP", "leafP", 
                "maxHt", "maxDbh", "SLA", "woodSG",
                "N_area", "Vcmax", "Jmax", "Amax", "SLA_val")

cat("\n=== Data availability for all traits ===\n")


=== Data availability for all traits ===

for(trait in all_traits) {
  if(trait %in% names(matched_data)) {
    non_na <- sum(!is.na(matched_data[[trait]]))
    pct <- round(100 * non_na / nrow(matched_data), 1)
    cat(sprintf("%-15s: %4d non-NA values (%5.1f%%)\n", trait, non_na, pct))
  } else {
    cat(sprintf("%-15s: NOT FOUND\n", trait))
  }
}

gmPerSeed      :  217 non-NA values ( 76.4%)
gmPerCm        :  209 non-NA values ( 73.6%)
leafN          :  218 non-NA values ( 76.8%)
leafNP         :  218 non-NA values ( 76.8%)
leafP          :  218 non-NA values ( 76.8%)
maxHt          :  218 non-NA values ( 76.8%)
maxDbh         :  211 non-NA values ( 74.3%)
SLA            :  211 non-NA values ( 74.3%)
woodSG         :  211 non-NA values ( 74.3%)
N_area         :  218 non-NA values ( 76.8%)
Vcmax          :  209 non-NA values ( 73.6%)
Jmax           :  211 non-NA values ( 74.3%)
Amax           :  182 non-NA values ( 64.1%)
SLA_val        : NOT FOUND

# 扩展性状列表
continuous_traits <- c("gmPerSeed", "gmPerCm", "leafN", "leafNP", "leafP", 
                      "maxHt", "maxDbh", "SLA", "woodSG",
                      "N_area", "Vcmax", "Jmax", "Amax", "SLA_val")

continuous_traits_present <- continuous_traits[continuous_traits %in% colnames(matched_data)]

cat("\n=== Running comprehensive correlation analysis ===\n")


=== Running comprehensive correlation analysis ===

cat("Traits to analyze:", length(continuous_traits_present), "\n")

Traits to analyze: 13

print(continuous_traits_present)

 [1] "gmPerSeed" "gmPerCm"   "leafN"     "leafNP"    "leafP"     "maxHt"    
 [7] "maxDbh"    "SLA"       "woodSG"    "N_area"    "Vcmax"     "Jmax"     
[13] "Amax"

if(length(continuous_traits_present) > 0) {
  # 初始化结果
  trait_correlations_full <- data.frame(
    trait = character(),
    sensitivity = character(),
    r = numeric(),
    p_value = numeric(),
    n = numeric(),
    stringsAsFactors = FALSE
  )
  
  # 设置图形（创建更大的网格）
  n_plots <- min(length(continuous_traits_present), 12)  # 限制显示前12个
  par(mfrow = c(4, 3), mar = c(4, 4, 3, 1))
  
  # 只分析与shade的关系（为了简化可视化）
  for(i in 1:min(12, length(continuous_traits_present))) {
    trait <- continuous_traits_present[i]
    
    # 获取有效数据
    valid_idx <- !is.na(matched_data[[trait]]) & !is.na(matched_data$bfec.shade)
    valid_data <- matched_data[valid_idx, ]
    
    if(nrow(valid_data) >= 3) {
      # 计算相关性
      corr <- cor.test(valid_data[[trait]], valid_data$bfec.shade, 
                      method = "pearson", use = "complete.obs")
      
      # 保存结果
      trait_correlations_full <- rbind(trait_correlations_full, data.frame(
        trait = trait,
        sensitivity = "shade",
        r = corr$estimate,
        p_value = corr$p.value,
        n = nrow(valid_data),
        stringsAsFactors = FALSE
      ))
      
      # 绘图
      plot(valid_data[[trait]], valid_data$bfec.shade,
           pch = 16, col = rgb(0, 0, 1, 0.5),
           xlab = trait, ylab = "Shade Sensitivity",
           main = paste(trait, "vs Shade"))
      
      # 添加回归线
      if(var(valid_data[[trait]], na.rm = TRUE) > 0) {
        fit <- lm(bfec.shade ~ valid_data[[trait]], data = valid_data)
        abline(fit, col = "red", lty = 2)
      }
      
      # 添加统计信息
      text_pos <- par("usr")
      text(text_pos[1] + (text_pos[2] - text_pos[1]) * 0.05,
           text_pos[4] - (text_pos[4] - text_pos[3]) * 0.05,
           paste("r =", round(corr$estimate, 3),
                 "\np =", round(corr$p.value, 3)),
           pos = 4, cex = 0.8)
    }
  }
  
  # 现在计算所有组合（用于表格）
  sensitivity_vars <- c("bfec.shade", "bfec.tempSite", "bfec.defSite")
  
  trait_correlations_complete <- data.frame()
  
  for(trait in continuous_traits_present) {
    for(sens_var in sensitivity_vars) {
      valid_idx <- !is.na(matched_data[[trait]]) & !is.na(matched_data[[sens_var]])
      valid_data <- matched_data[valid_idx, ]
      
      if(nrow(valid_data) >= 3) {
        corr <- cor.test(valid_data[[trait]], valid_data[[sens_var]], 
                        method = "pearson", use = "complete.obs")
        
        trait_correlations_complete <- rbind(trait_correlations_complete, data.frame(
          trait = trait,
          sensitivity = gsub("bfec\\.", "", sens_var),
          r = corr$estimate,
          p_value = corr$p.value,
          n = nrow(valid_data),
          stringsAsFactors = FALSE
        ))
      }
    }
  }
  
  # 显示完整结果
  cat("\n=== Complete Correlation Results ===\n")
  print(trait_correlations_complete %>% 
        arrange(p_value) %>%
        head(20))  # 显示前20个最显著的
  
  # 找出显著的相关性
  sig_corr <- trait_correlations_complete[trait_correlations_complete$p_value < 0.05, ]
  if(nrow(sig_corr) > 0) {
    cat("\n=== Significant Correlations (p < 0.05) ===\n")
    print(sig_corr[order(abs(sig_corr$r), decreasing = TRUE), ])
  }
}


=== Complete Correlation Results ===
          trait sensitivity           r    p_value   n
cor22       SLA    tempSite  0.13757726 0.04592790 211
cor28    N_area    tempSite -0.11742228 0.08367661 218
cor36      Amax       shade  0.12629175 0.08934955 182
cor4    gmPerCm    tempSite  0.11515241 0.09685763 209
cor7      leafN    tempSite  0.11148804 0.10063983 218
cor38      Amax     defSite  0.11175499 0.13310266 182
cor33      Jmax       shade  0.10145443 0.14190095 211
cor25    woodSG    tempSite  0.10140338 0.14210265 211
cor23       SLA     defSite -0.08863924 0.19968954 211
cor18    maxDbh       shade  0.07733477 0.26341543 211
cor32     Vcmax     defSite  0.07680028 0.26904559 209
cor35      Jmax     defSite  0.06857570 0.32150504 211
cor29    N_area     defSite  0.06384444 0.34814680 218
cor37      Amax    tempSite -0.06876440 0.35632875 182
cor15     maxHt       shade  0.05814649 0.39293325 218
cor2  gmPerSeed     defSite  0.05705464 0.40298920 217
cor5    gmPerCm     defSite  0.05751841 0.40810551 209
cor31     Vcmax    tempSite  0.05542889 0.42537360 209
cor13     leafP    tempSite  0.05421391 0.42577470 218
cor9     leafNP       shade  0.05321859 0.43433218 218

=== Significant Correlations (p < 0.05) ===
      trait sensitivity         r   p_value   n
cor22   SLA    tempSite 0.1375773 0.0459279 211

library(ggplot2)
library(reshape2)

if(exists("trait_correlations_complete") && nrow(trait_correlations_complete) > 0) {
  # 准备热图数据
  heatmap_data <- trait_correlations_complete %>%
    mutate(
      sig_level = case_when(
        p_value < 0.001 ~ "***",
        p_value < 0.01 ~ "**",
        p_value < 0.05 ~ "*",
        p_value < 0.1 ~ ".",
        TRUE ~ ""
      )
    )
  
  # 创建矩阵
  corr_matrix <- dcast(heatmap_data, trait ~ sensitivity, value.var = "r")
  rownames(corr_matrix) <- corr_matrix$trait
  corr_matrix <- corr_matrix[,-1]
  
  # 创建ggplot热图
  heatmap_long <- melt(as.matrix(corr_matrix))
  names(heatmap_long) <- c("Trait", "Sensitivity", "Correlation")
  
  # 添加显著性标记
  sig_matrix <- dcast(heatmap_data, trait ~ sensitivity, value.var = "sig_level")
  sig_long <- melt(as.matrix(sig_matrix[,-1]))
  heatmap_long$sig <- sig_long$value
  
  p_heat <- ggplot(heatmap_long, aes(x = Sensitivity, y = Trait, fill = Correlation)) +
    geom_tile(color = "white") +
    geom_text(aes(label = round(Correlation, 2)), size = 3) +
    geom_text(aes(label = sig), size = 4, vjust = -0.5) +
    scale_fill_gradient2(low = "blue", mid = "white", high = "red",
                        midpoint = 0, limits = c(-0.3, 0.3),
                        name = "Correlation\nCoefficient") +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.text.y = element_text(size = 9)) +
    labs(title = "Trait-Sensitivity Correlations",
         subtitle = "Significance: *** p<0.001, ** p<0.01, * p<0.05, . p<0.1",
         x = "Environmental Sensitivity", y = "Functional Trait")
  
  print(p_heat)
}

# 由于traitGen有区域信息，我们可以做更精细的分析
# 重新合并，保留区域信息
create_regional_matched_data <- function() {
  base_data <- mu_processed
  
  # 直接合并，保留region信息
  matched_regional <- merge(base_data, 
                           traitGen[, !names(traitGen) %in% c("code8", "specEpith", "subSpec", 
                                                               "class", "family", "section", 
                                                               "dispersal", "fruit", "taxon")],
                           by = c("genus", "region"),
                           all.x = TRUE)
  
  return(matched_regional)
}

matched_regional <- create_regional_matched_data()

# 检查区域分布
cat("\n=== Regional distribution ===\n")


=== Regional distribution ===

table(matched_regional$continent)


    AFR      EU      NA UNKNOWN 
      1      74     142       2

# 按大陆分析
for(cont in c("EU", "NA")) {
  cont_data <- matched_regional[matched_regional$continent == cont & !is.na(matched_regional$continent), ]
  
  if(nrow(cont_data) > 10) {
    cat("\n=== Analysis for", cont, "===\n")
    cat("Number of records:", nrow(cont_data), "\n")
    
    # 选几个关键性状分析
    key_traits <- c("leafN", "maxHt", "N_area", "Vcmax")
    key_traits <- key_traits[key_traits %in% names(cont_data)]
    
    for(trait in key_traits) {
      if(sum(!is.na(cont_data[[trait]])) >= 5) {
        corr <- cor.test(cont_data[[trait]], cont_data$bfec.shade, use = "complete.obs")
        cat(sprintf("  %s vs shade: r = %.3f, p = %.3f (n = %d)\n",
                   trait, corr$estimate, corr$p.value, sum(complete.cases(cont_data[[trait]], cont_data$bfec.shade))))
      }
    }
  }
}


=== Analysis for EU ===
Number of records: 74 
  leafN vs shade: r = -0.093, p = 0.432 (n = 74)
  maxHt vs shade: r = -0.004, p = 0.976 (n = 74)
  N_area vs shade: r = -0.061, p = 0.603 (n = 74)
  Vcmax vs shade: r = -0.148, p = 0.212 (n = 73)

=== Analysis for NA ===
Number of records: 142 
  leafN vs shade: r = -0.015, p = 0.863 (n = 142)
  maxHt vs shade: r = 0.063, p = 0.460 (n = 142)
  N_area vs shade: r = -0.052, p = 0.537 (n = 142)
  Vcmax vs shade: r = 0.051, p = 0.556 (n = 134)

# 首先添加tree_name列到matched_data
# 假设tree的tip.label格式是 "Genus species"
if(exists("tree")) {
  # 创建物种名匹配
  matched_data$species_std <- paste(toupper(substr(matched_data$genus, 1, 1)), 
                                    tolower(substr(matched_data$genus, 2, nchar(matched_data$genus))),
                                    tolower(matched_data$specEpith), sep = " ")
  
  # 检查哪些物种在树中
  matched_data$in_tree <- matched_data$species_std %in% tree$tip.label
  
  cat("\nSpecies in phylogenetic tree:", sum(matched_data$in_tree), "out of", nrow(matched_data), "\n")
  
  if(sum(matched_data$in_tree) > 0) {
    # 只使用在树中的物种
    tree_data <- matched_data[matched_data$in_tree, ]
    
    # 计算系统发育信号
    library(phytools)
    
    signal_results <- data.frame()
    
    # 分析每个性状
    for(trait in c(continuous_traits_present, "bfec.shade", "bfec.tempSite", "bfec.defSite")) {
      # 准备数据
      trait_values <- setNames(tree_data[[trait]], tree_data$species_std)
      trait_values <- trait_values[!is.na(trait_values)]
      
      if(length(trait_values) >= 10) {
        # 修剪树只包含有数据的物种
        tree_subset <- keep.tip(tree, names(trait_values))
        
        # 计算Pagel's lambda
        lambda_test <- phylosig(tree_subset, trait_values, method = "lambda", test = TRUE)
        
        signal_results <- rbind(signal_results, data.frame(
          trait = trait,
          lambda = lambda_test$lambda,
          p_value = lambda_test$P,
          n_species = length(trait_values)
        ))
        
        cat(trait, ": λ =", round(lambda_test$lambda, 3), 
            ", p =", round(lambda_test$P, 4), "\n")
      }
    }
    
    if(nrow(signal_results) > 0) {
      cat("\n=== Phylogenetic Signal Summary ===\n")
      print(signal_results[order(signal_results$lambda, decreasing = TRUE), ])
    }
  }
}


Species in phylogenetic tree: 0 out of 284

standardize_species_name <- function(species_code) {
  # 假设species格式是 "genusSpecies" (如 "abiesAlba")
  # 需要转换为 "Genus species" (如 "Abies alba")
  
  # 找到大写字母的位置（除了第一个）
  capital_pos <- gregexpr("[A-Z]", species_code)[[1]]
  
  if(length(capital_pos) > 0 && capital_pos[1] != -1) {
    # 提取genus（第一部分）
    genus <- substr(species_code, 1, capital_pos[1] - 1)
    # 如果genus是小写开头的
    if(capital_pos[1] == 1) {
      # species_code 已经是 "GenusSpecies" 格式
      # 找第二个大写字母
      if(length(capital_pos) > 1) {
        genus <- substr(species_code, 1, capital_pos[2] - 1)
        species <- substr(species_code, capital_pos[2], nchar(species_code))
      } else {
        # 没有第二个大写字母，尝试其他方法
        genus <- species_code
        species <- ""
      }
    } else {
      # species_code 是 "genusSpecies" 格式
      # 找到genus和species的分界
      for(i in 2:nchar(species_code)) {
        if(substr(species_code, i, i) == toupper(substr(species_code, i, i))) {
          genus <- substr(species_code, 1, i-1)
          species <- substr(species_code, i, nchar(species_code))
          break
        }
      }
    }
    
    # 格式化：Genus species
    genus_formatted <- paste0(toupper(substr(genus, 1, 1)), 
                              tolower(substr(genus, 2, nchar(genus))))
    species_formatted <- tolower(species)
    
    return(paste(genus_formatted, species_formatted))
  } else {
    # 无法解析，返回原值
    return(species_code)
  }
}

# 更简单的方法：直接使用genus和specEpith列
if("genus" %in% names(matched_data) && "specEpith" %in% names(matched_data)) {
  # 创建标准物种名
  matched_data$species_clean <- paste(
    matched_data$genus,
    matched_data$specEpith
  )
  
  # 检查格式
  cat("\n=== Created species names (first 10) ===\n")
  print(head(matched_data$species_clean, 10))
  
  # 另一种格式（首字母大写）
  matched_data$species_capitalized <- paste(
    paste0(toupper(substr(matched_data$genus, 1, 1)), 
           tolower(substr(matched_data$genus, 2, nchar(matched_data$genus)))),
    tolower(matched_data$specEpith)
  )
  
  cat("\n=== Capitalized species names (first 10) ===\n")
  print(head(matched_data$species_capitalized, 10))
}

# 或者直接从species列转换
if("species" %in% names(matched_data)) {
  matched_data$species_std <- sapply(matched_data$species, standardize_species_name)
  
  cat("\n=== Standardized from species column (first 10) ===\n")
  print(head(matched_data$species_std, 10))
}


=== Standardized from species column (first 10) ===
 [1] "Abies amabilis" "Abies alba"     "Abies magnific" "Abies grandis" 
 [5] "Abies procera"  "Abies cephalon" "Abies fraseri"  "Abies concolor"
 [9] "Abies lasiocar" "Abies pinsapo"

# 尝试不同的格式匹配
find_matches <- function(matched_data, tree) {
  results <- list()
  
  # 尝试不同的格式
  formats_to_try <- c()
  
  if("species_clean" %in% names(matched_data)) {
    formats_to_try <- c(formats_to_try, "species_clean")
  }
  if("species_capitalized" %in% names(matched_data)) {
    formats_to_try <- c(formats_to_try, "species_capitalized")
  }
  if("species_std" %in% names(matched_data)) {
    formats_to_try <- c(formats_to_try, "species_std")
  }
  if("species" %in% names(matched_data)) {
    formats_to_try <- c(formats_to_try, "species")
  }
  
  for(format_name in formats_to_try) {
    n_matches <- sum(matched_data[[format_name]] %in% tree$tip.label)
    cat(format_name, ":", n_matches, "matches\n")
    results[[format_name]] <- n_matches
  }
  
  # 找最佳格式
  best_format <- names(which.max(unlist(results)))
  cat("\nBest format:", best_format, "with", max(unlist(results)), "matches\n")
  
  return(best_format)
}

best_format <- find_matches(matched_data, tree)

species_std : 101 matches
species : 0 matches

Best format: species_std with 101 matches

# 使用最佳格式
if(!is.null(best_format) && best_format %in% names(matched_data)) {
  matched_data$tree_name <- matched_data[[best_format]]
  matched_data$in_tree <- matched_data$tree_name %in% tree$tip.label
  
  cat("\n=== Final match results ===\n")
  cat("Species in tree:", sum(matched_data$in_tree), "out of", nrow(matched_data), "\n")
  
  if(sum(matched_data$in_tree) > 0) {
    cat("\nMatched species examples:\n")
    print(head(matched_data$tree_name[matched_data$in_tree], 10))
  } else {
    cat("\nNo matches found. Checking for partial matches...\n")
    
    # 尝试部分匹配
    for(i in 1:min(10, nrow(matched_data))) {
      species_to_check <- matched_data$tree_name[i]
      similar_in_tree <- tree$tip.label[grep(matched_data$genus[i], tree$tip.label, ignore.case = TRUE)]
      if(length(similar_in_tree) > 0) {
        cat("\nFor", species_to_check, "found similar:", similar_in_tree[1], "\n")
      }
    }
  }
}


=== Final match results ===
Species in tree: 101 out of 284 

Matched species examples:
 [1] "Abies amabilis" "Abies alba"     "Abies grandis"  "Abies procera" 
 [5] "Abies fraseri"  "Abies concolor" "Abies pinsapo"  "Abies cilicica"
 [9] "Acer negundo"   "Acer opalus"

# 系统发育信号分析
library(phytools)
library(ape)

# 创建 in_tree 列
matched_data$in_tree <- matched_data$tree_name %in% tree$tip.label
cat("Species in tree:", sum(matched_data$in_tree), "out of", nrow(matched_data), "\n")

Species in tree: 101 out of 284

# 准备数据
tree_data <- matched_data[matched_data$in_tree, ]
cat("Using", nrow(tree_data), "species for phylogenetic analysis\n")

Using 101 species for phylogenetic analysis

# 初始化结果
signal_results <- data.frame()

# 分析性状
all_traits <- c("bfec.shade", "bfec.tempSite", "bfec.defSite")

for(trait in all_traits) {
  cat("\nAnalyzing", trait, "...\n")
  
  trait_values <- setNames(tree_data[[trait]], tree_data$tree_name)
  trait_values <- trait_values[!is.na(trait_values)]
  cat("  Non-NA values:", length(trait_values), "\n")
  
  if(length(trait_values) >= 10) {
    tree_subset <- keep.tip(tree, names(trait_values))
    cat("  Tree tips:", length(tree_subset$tip.label), "\n")
    
    tryCatch({
      lambda_test <- phylosig(tree_subset, trait_values, method = "lambda", test = TRUE)
      K_test <- phylosig(tree_subset, trait_values, method = "K", test = TRUE, nsim = 999)
      
      signal_results <- rbind(signal_results, data.frame(
        trait = trait,
        lambda = lambda_test$lambda,
        lambda_p = lambda_test$P,
        K = K_test$K,
        K_p = K_test$P,
        n_species = length(trait_values)
      ))
      
      cat("  Lambda =", round(lambda_test$lambda, 3), "p =", round(lambda_test$P, 4), "\n")
      
    }, error = function(e) {
      cat("  Error:", e$message, "\n")
    })
  } else {
    cat("  Skipped - insufficient data\n")
  }
}


Analyzing bfec.shade ...
  Non-NA values: 101 
  Tree tips: 101 
  Lambda = 0 p = 1 

Analyzing bfec.tempSite ...
  Non-NA values: 101 
  Tree tips: 101 
  Lambda = 0 p = 1 

Analyzing bfec.defSite ...
  Non-NA values: 101 
  Tree tips: 101 
  Lambda = 0 p = 1

# 显示结果
if(nrow(signal_results) > 0) {
  cat("\n=== Phylogenetic Signal Summary ===\n")
  print(signal_results)
}


=== Phylogenetic Signal Summary ===
          trait       lambda lambda_p            K       K_p n_species
1    bfec.shade 7.331374e-05        1 0.0017448399 0.9389389       101
2 bfec.tempSite 7.331374e-05        1 0.0007612654 0.9479479       101
3  bfec.defSite 7.331374e-05        1 0.0008714858 0.9389389       101

# PIC分析来控制系统发育相关性
perform_pic_analysis <- function(tree_data, tree) {
  pic_results <- data.frame()
  
  # 获取功能性状和敏感性性状
  func_traits <- c("gmPerCm","gmPerSeed", "leafN","leafNP","leafP","maxHt","maxDbh","SLA","woodSG")
  sens_traits <- c("bfec.shade", "bfec.tempSite", "bfec.defSite")
  
  for(func in func_traits) {
    for(sens in sens_traits) {
      # 准备数据 - 转换为数值
      func_vals <- as.numeric(tree_data[[func]])
      sens_vals <- as.numeric(tree_data[[sens]])
      
      names(func_vals) <- tree_data$tree_name
      names(sens_vals) <- tree_data$tree_name
      
      # 找共同的非NA物种
      common_species <- intersect(names(func_vals[!is.na(func_vals)]),
                                 names(sens_vals[!is.na(sens_vals)]))
      
      if(length(common_species) >= 10) {
        # 修剪树
        tree_pic <- keep.tip(tree, common_species)
        
        # 获取对应的值
        func_pic_data <- as.numeric(func_vals[common_species])
        sens_pic_data <- as.numeric(sens_vals[common_species])
        
        tryCatch({
          # 计算PIC
          func_pic <- pic(func_pic_data, tree_pic)
          sens_pic <- pic(sens_pic_data, tree_pic)
          
          # 相关性测试
          pic_cor <- cor.test(func_pic, sens_pic)
          
          # 原始相关性（不控制系统发育）
          raw_cor <- cor.test(func_pic_data, sens_pic_data)
          
          pic_results <- rbind(pic_results, data.frame(
            functional_trait = func,
            sensitivity = gsub("bfec\\.", "", sens),
            raw_r = raw_cor$estimate,
            raw_p = raw_cor$p.value,
            pic_r = pic_cor$estimate,
            pic_p = pic_cor$p.value,
            n_species = length(common_species),
            stringsAsFactors = FALSE
          ))
        }, error = function(e) {
          cat("  Error for", func, "vs", sens, ":", e$message, "\n")
        })
      }
    }
  }
  
  return(pic_results)
}

# 运行PIC分析
pic_results <- perform_pic_analysis(tree_data, tree)

if(nrow(pic_results) > 0) {
  cat("\n=== Phylogenetic Independent Contrasts Results ===\n")
  print(head(pic_results, 15))
  
  # 显示显著的PIC相关性
  sig_pic <- pic_results[pic_results$pic_p < 0.05, ]
  if(nrow(sig_pic) > 0) {
    cat("\nSignificant correlations (pic_p < 0.05):\n")
    print(sig_pic[order(abs(sig_pic$pic_r), decreasing = TRUE), ])
  } else {
    cat("\nNo significant PIC correlations\n")
  }
} else {
  cat("No PIC results generated\n")
}


=== Phylogenetic Independent Contrasts Results ===
      functional_trait sensitivity        raw_r      raw_p       pic_r
cor            gmPerCm       shade  0.075922244 0.45983318 -0.22273847
cor1           gmPerCm    tempSite  0.172258930 0.09156544 -0.21137738
cor2           gmPerCm     defSite  0.113743919 0.26728791 -0.01168649
cor3         gmPerSeed       shade -0.018746293 0.85313311 -0.04810092
cor4         gmPerSeed    tempSite  0.053709908 0.59560607 -0.04752820
cor5         gmPerSeed     defSite  0.090198265 0.37214457  0.01666861
cor6             leafN       shade  0.027374422 0.78688851 -0.30938233
cor7             leafN    tempSite  0.019175087 0.84981265 -0.27646576
cor8             leafN     defSite  0.035140648 0.72851945 -0.02416435
cor9            leafNP       shade  0.001199049 0.99055347 -0.30482695
cor10           leafNP    tempSite  0.127948126 0.20457794 -0.27499238
cor11           leafNP     defSite  0.055458029 0.58367139 -0.03096513
cor12            leafP       shade  0.061840526 0.54105672 -0.31073530
cor13            leafP    tempSite -0.008608089 0.93226123 -0.27775120
cor14            leafP     defSite  0.018675821 0.85367907 -0.02750221
            pic_p n_species
cor   0.029163074        97
cor1  0.038700401        97
cor2  0.910024478        97
cor3  0.636360427       100
cor4  0.640386957       100
cor5  0.869923558       100
cor6  0.001832936       100
cor7  0.005604384       100
cor8  0.812336138       100
cor9  0.002156179       100
cor10 0.005874391       100
cor11 0.760930423       100
cor12 0.001745757       100
cor13 0.005377882       100
cor14 0.786991537       100

Significant correlations (pic_p < 0.05):
      functional_trait sensitivity        raw_r      raw_p      pic_r
cor12            leafP       shade  0.061840526 0.54105672 -0.3107353
cor6             leafN       shade  0.027374422 0.78688851 -0.3093823
cor9            leafNP       shade  0.001199049 0.99055347 -0.3048269
cor15            maxHt       shade -0.103222247 0.30679140  0.3030420
cor21              SLA       shade  0.009830698 0.92386346 -0.2993291
cor13            leafP    tempSite -0.008608089 0.93226123 -0.2777512
cor7             leafN    tempSite  0.019175087 0.84981265 -0.2764658
cor16            maxHt    tempSite -0.043163030 0.66981461  0.2751147
cor10           leafNP    tempSite  0.127948126 0.20457794 -0.2749924
cor22              SLA    tempSite  0.029624993 0.77330607 -0.2704302
cor18           maxDbh       shade -0.058818838 0.56305592  0.2614391
cor24           woodSG       shade  0.070732950 0.49115634 -0.2516071
cor25           woodSG    tempSite  0.185850578 0.06836390 -0.2313911
cor19           maxDbh    tempSite  0.024688017 0.80834634  0.2300730
cor            gmPerCm       shade  0.075922244 0.45983318 -0.2227385
cor1           gmPerCm    tempSite  0.172258930 0.09156544 -0.2113774
            pic_p n_species
cor12 0.001745757       100
cor6  0.001832936       100
cor9  0.002156179       100
cor15 0.002296271       100
cor21 0.003050132        97
cor13 0.005377882       100
cor7  0.005604384       100
cor16 0.005851535       100
cor10 0.005874391       100
cor22 0.007703377        97
cor18 0.009314234        99
cor24 0.013403566        97
cor25 0.023308494        97
cor19 0.022664201        99
cor   0.029163074        97
cor1  0.038700401        97

# 由于traitGen有区域信息，我们可以做更精细的分析
# 重新合并，保留区域信息
create_regional_matched_data <- function() {
  base_data <- mu_processed
  
  # 直接合并，保留region信息
  matched_regional <- merge(base_data, 
                           traitGen[, !names(traitGen) %in% c("code8", "specEpith", "subSpec", 
                                                               "class", "family", "section", 
                                                               "dispersal", "fruit", "taxon")],
                           by = c("genus", "region"),
                           all.x = TRUE)
  
  return(matched_regional)
}

matched_regional <- create_regional_matched_data()

# 检查区域分布
cat("\n=== Regional distribution ===\n")


=== Regional distribution ===

table(matched_regional$continent)


    AFR      EU      NA UNKNOWN 
      1      74     142       2

# 按大陆分析
for(cont in c("EU", "NA")) {
  cont_data <- matched_regional[matched_regional$continent == cont & !is.na(matched_regional$continent), ]
  
  if(nrow(cont_data) > 10) {
    cat("\n=== Analysis for", cont, "===\n")
    cat("Number of records:", nrow(cont_data), "\n")
    
    # 选几个关键性状分析
    key_traits <- c("leafN", "maxHt", "N_area", "Vcmax")
    key_traits <- key_traits[key_traits %in% names(cont_data)]
    
    for(trait in key_traits) {
      if(sum(!is.na(cont_data[[trait]])) >= 5) {
        corr <- cor.test(cont_data[[trait]], cont_data$bfec.shade, use = "complete.obs")
        cat(sprintf("  %s vs shade: r = %.3f, p = %.3f (n = %d)\n",
                   trait, corr$estimate, corr$p.value, sum(complete.cases(cont_data[[trait]], cont_data$bfec.shade))))
      }
    }
  }
}


=== Analysis for EU ===
Number of records: 74 
  leafN vs shade: r = -0.093, p = 0.432 (n = 74)
  maxHt vs shade: r = -0.004, p = 0.976 (n = 74)
  N_area vs shade: r = -0.061, p = 0.603 (n = 74)
  Vcmax vs shade: r = -0.148, p = 0.212 (n = 73)

=== Analysis for NA ===
Number of records: 142 
  leafN vs shade: r = -0.015, p = 0.863 (n = 142)
  maxHt vs shade: r = 0.063, p = 0.460 (n = 142)
  N_area vs shade: r = -0.052, p = 0.537 (n = 142)
  Vcmax vs shade: r = 0.051, p = 0.556 (n = 134)

# 分析PIC效应
analyze_pic_effects <- function(pic_results) {
  cat("\n=== KEY FINDINGS FROM PIC ANALYSIS ===\n\n")
  
  # 1. 系统发育掩盖的关系（PIC后相关性增强）
  masked_relationships <- pic_results %>%
    filter(abs(pic_r) > abs(raw_r) + 0.1) %>%
    arrange(desc(abs(pic_r)))
  
  if(nrow(masked_relationships) > 0) {
    cat("1. Relationships MASKED by phylogeny (stronger after PIC):\n")
    print(masked_relationships[, c("functional_trait", "sensitivity", "raw_r", "pic_r")])
    cat("\n")
  }
  
  # 2. 系统发育驱动的关系（PIC后相关性减弱）
  phylo_driven <- pic_results %>%
    filter(abs(raw_r) > abs(pic_r) + 0.1) %>%
    arrange(desc(abs(raw_r)))
  
  if(nrow(phylo_driven) > 0) {
    cat("2. Relationships DRIVEN by phylogeny (weaker after PIC):\n")
    print(phylo_driven[, c("functional_trait", "sensitivity", "raw_r", "pic_r")])
    cat("\n")
  }
  
  # 3. 方向改变的关系
  sign_change <- pic_results %>%
    filter(sign(raw_r) != sign(pic_r) & abs(pic_r) > 0.02)
  
  if(nrow(sign_change) > 0) {
    cat("3. Relationships that CHANGED DIRECTION after PIC:\n")
    print(sign_change[, c("functional_trait", "sensitivity", "raw_r", "pic_r")])
    cat("\n")
  }
}

analyze_pic_effects(pic_results)


=== KEY FINDINGS FROM PIC ANALYSIS ===

1. Relationships MASKED by phylogeny (stronger after PIC):
      functional_trait sensitivity        raw_r      pic_r
cor12            leafP       shade  0.061840526 -0.3107353
cor6             leafN       shade  0.027374422 -0.3093823
cor9            leafNP       shade  0.001199049 -0.3048269
cor15            maxHt       shade -0.103222247  0.3030420
cor21              SLA       shade  0.009830698 -0.2993291
cor13            leafP    tempSite -0.008608089 -0.2777512
cor7             leafN    tempSite  0.019175087 -0.2764658
cor16            maxHt    tempSite -0.043163030  0.2751147
cor10           leafNP    tempSite  0.127948126 -0.2749924
cor22              SLA    tempSite  0.029624993 -0.2704302
cor18           maxDbh       shade -0.058818838  0.2614391
cor24           woodSG       shade  0.070732950 -0.2516071
cor19           maxDbh    tempSite  0.024688017  0.2300730
cor            gmPerCm       shade  0.075922244 -0.2227385

2. Relationships DRIVEN by phylogeny (weaker after PIC):
      functional_trait sensitivity     raw_r       pic_r
cor26           woodSG     defSite 0.1198987 -0.01626167
cor2           gmPerCm     defSite 0.1137439 -0.01168649

3. Relationships that CHANGED DIRECTION after PIC:
      functional_trait sensitivity        raw_r       pic_r
cor            gmPerCm       shade  0.075922244 -0.22273847
cor1           gmPerCm    tempSite  0.172258930 -0.21137738
cor4         gmPerSeed    tempSite  0.053709908 -0.04752820
cor6             leafN       shade  0.027374422 -0.30938233
cor7             leafN    tempSite  0.019175087 -0.27646576
cor8             leafN     defSite  0.035140648 -0.02416435
cor9            leafNP       shade  0.001199049 -0.30482695
cor10           leafNP    tempSite  0.127948126 -0.27499238
cor11           leafNP     defSite  0.055458029 -0.03096513
cor12            leafP       shade  0.061840526 -0.31073530
cor14            leafP     defSite  0.018675821 -0.02750221
cor15            maxHt       shade -0.103222247  0.30304200
cor16            maxHt    tempSite -0.043163030  0.27511474
cor18           maxDbh       shade -0.058818838  0.26143911
cor21              SLA       shade  0.009830698 -0.29932907
cor22              SLA    tempSite  0.029624993 -0.27043021
cor24           woodSG       shade  0.070732950 -0.25160709
cor25           woodSG    tempSite  0.185850578 -0.23139111

# 确保pic_results有所需的列
if(!"delta_r" %in% names(pic_results)) {
  pic_results$delta_r <- pic_results$raw_r - pic_results$pic_r
}

if(!"phylo_effect" %in% names(pic_results)) {
  pic_results$phylo_effect <- ifelse(abs(pic_results$delta_r) > 0.1, "Strong",
                                     ifelse(abs(pic_results$delta_r) > 0.05, "Moderate", "Weak"))
}

# 检查数据结构
cat("=== PIC Results Structure ===\n")

=== PIC Results Structure ===

str(pic_results)

'data.frame':   27 obs. of  9 variables:
 $ functional_trait: chr  "gmPerCm" "gmPerCm" "gmPerCm" "gmPerSeed" ...
 $ sensitivity     : chr  "shade" "tempSite" "defSite" "shade" ...
 $ raw_r           : num  0.0759 0.1723 0.1137 -0.0187 0.0537 ...
 $ raw_p           : num  0.4598 0.0916 0.2673 0.8531 0.5956 ...
 $ pic_r           : num  -0.2227 -0.2114 -0.0117 -0.0481 -0.0475 ...
 $ pic_p           : num  0.0292 0.0387 0.91 0.6364 0.6404 ...
 $ n_species       : int  97 97 97 100 100 100 100 100 100 100 ...
 $ delta_r         : num  0.2987 0.3836 0.1254 0.0294 0.1012 ...
 $ phylo_effect    : chr  "Strong" "Strong" "Strong" "Weak" ...

cat("\nFirst few rows:\n")


First few rows:

print(head(pic_results))

     functional_trait sensitivity       raw_r      raw_p       pic_r      pic_p
cor           gmPerCm       shade  0.07592224 0.45983318 -0.22273847 0.02916307
cor1          gmPerCm    tempSite  0.17225893 0.09156544 -0.21137738 0.03870040
cor2          gmPerCm     defSite  0.11374392 0.26728791 -0.01168649 0.91002448
cor3        gmPerSeed       shade -0.01874629 0.85313311 -0.04810092 0.63636043
cor4        gmPerSeed    tempSite  0.05370991 0.59560607 -0.04752820 0.64038696
cor5        gmPerSeed     defSite  0.09019827 0.37214457  0.01666861 0.86992356
     n_species    delta_r phylo_effect
cor         97 0.29866071       Strong
cor1        97 0.38363631       Strong
cor2        97 0.12543041       Strong
cor3       100 0.02935463         Weak
cor4       100 0.10123811       Strong
cor5       100 0.07352966     Moderate

library(ggplot2)
library(ggrepel)

# 图1: 原始 vs PIC 相关性散点图
p1 <- ggplot(pic_results, aes(x = raw_r, y = pic_r)) +
  geom_hline(yintercept = 0, linetype = "solid", alpha = 0.3) +
  geom_vline(xintercept = 0, linetype = "solid", alpha = 0.3) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "gray50") +
  geom_point(aes(color = sensitivity, size = abs(delta_r)), alpha = 0.7) +
  scale_color_manual(values = c("shade" = "#27AE60", 
                               "tempSite" = "#E67E22", 
                               "defSite" = "#8E44AD")) +
  scale_size_continuous(range = c(2, 6), name = "|Δr|") +
  labs(
    title = "Effect of Phylogenetic Correction on Trait-Sensitivity Correlations",
    subtitle = "Points below diagonal: phylogeny inflates correlation; Above: phylogeny masks correlation",
    x = "Raw Correlation (r)",
    y = "PIC Correlation (r)",
    color = "Environmental\nSensitivity"
  ) +
  theme_minimal() +
  coord_fixed()

# 只标记大效应的点
if(any(abs(pic_results$delta_r) > 0.15)) {
  p1 <- p1 + geom_text_repel(
    data = pic_results[abs(pic_results$delta_r) > 0.15, ],
    aes(label = paste(functional_trait, "-", sensitivity)),
    size = 3, max.overlaps = 15
  )
}

print(p1)

# 图2: Delta r 条形图（选择前20个最大效应）
top_effects <- pic_results %>%
  arrange(desc(abs(delta_r))) %>%
  head(20) %>%
  mutate(
    trait_sens = paste(functional_trait, sensitivity, sep = "-"),
    trait_sens = factor(trait_sens, levels = trait_sens[order(delta_r)])
  )

p2 <- ggplot(top_effects, aes(x = trait_sens, y = delta_r, fill = phylo_effect)) +
  geom_bar(stat = "identity") +
  geom_hline(yintercept = 0, linetype = "solid") +
  coord_flip() +
  scale_fill_manual(values = c("Strong" = "#E74C3C", 
                              "Moderate" = "#F39C12", 
                              "Weak" = "#95A5A6")) +
  labs(
    title = "Top 20 Phylogenetic Effects on Correlations",
    subtitle = "Negative: phylogeny inflates correlation; Positive: phylogeny masks correlation",
    x = "Trait-Sensitivity Pair",
    y = "Change in Correlation (raw - PIC)",
    fill = "Effect\nStrength"
  ) +
  theme_minimal()

print(p2)

# 分析PIC效应
analyze_pic_effects <- function(pic_results) {
  cat("\n=== KEY FINDINGS FROM PIC ANALYSIS ===\n\n")
  
  # 1. 系统发育掩盖的关系
  masked_relationships <- pic_results %>%
    filter(abs(pic_r) > abs(raw_r) + 0.1) %>%
    arrange(desc(abs(pic_r)))
  
  if(nrow(masked_relationships) > 0) {
    cat("1. Relationships MASKED by phylogeny (stronger after PIC):\n")
    print(masked_relationships[, c("functional_trait", "sensitivity", "raw_r", "pic_r", "delta_r")])
    cat("\n")
  }
  
  # 2. 系统发育驱动的关系
  phylo_driven <- pic_results %>%
    filter(abs(raw_r) > abs(pic_r) + 0.1) %>%
    arrange(desc(abs(delta_r)))
  
  if(nrow(phylo_driven) > 0) {
    cat("2. Relationships DRIVEN by phylogeny (weaker after PIC):\n")
    print(phylo_driven[, c("functional_trait", "sensitivity", "raw_r", "pic_r", "delta_r")])
    cat("\n")
  }
  
  # 3. 方向改变的关系
  sign_change <- pic_results %>%
    filter(sign(raw_r) != sign(pic_r) & abs(pic_r) > 0.02)
  
  if(nrow(sign_change) > 0) {
    cat("3. Relationships that CHANGED DIRECTION after PIC:\n")
    print(sign_change[, c("functional_trait", "sensitivity", "raw_r", "pic_r", "delta_r")])
    cat("\n")
  }
  
  # 4. 统计总结
  cat("4. SUMMARY STATISTICS:\n")
  cat("   Total comparisons:", nrow(pic_results), "\n")
  cat("   Strong phylogenetic effects (|Δr| > 0.1):", sum(abs(pic_results$delta_r) > 0.1), "\n")
  cat("   Direction changes:", sum(sign(pic_results$raw_r) != sign(pic_results$pic_r)), "\n")
  cat("   Mean absolute change:", round(mean(abs(pic_results$delta_r)), 3), "\n")
}

analyze_pic_effects(pic_results)


=== KEY FINDINGS FROM PIC ANALYSIS ===

1. Relationships MASKED by phylogeny (stronger after PIC):
      functional_trait sensitivity        raw_r      pic_r    delta_r
cor12            leafP       shade  0.061840526 -0.3107353  0.3725758
cor6             leafN       shade  0.027374422 -0.3093823  0.3367568
cor9            leafNP       shade  0.001199049 -0.3048269  0.3060260
cor15            maxHt       shade -0.103222247  0.3030420 -0.4062642
cor21              SLA       shade  0.009830698 -0.2993291  0.3091598
cor13            leafP    tempSite -0.008608089 -0.2777512  0.2691431
cor7             leafN    tempSite  0.019175087 -0.2764658  0.2956408
cor16            maxHt    tempSite -0.043163030  0.2751147 -0.3182778
cor10           leafNP    tempSite  0.127948126 -0.2749924  0.4029405
cor22              SLA    tempSite  0.029624993 -0.2704302  0.3000552
cor18           maxDbh       shade -0.058818838  0.2614391 -0.3202579
cor24           woodSG       shade  0.070732950 -0.2516071  0.3223400
cor19           maxDbh    tempSite  0.024688017  0.2300730 -0.2053849
cor            gmPerCm       shade  0.075922244 -0.2227385  0.2986607

2. Relationships DRIVEN by phylogeny (weaker after PIC):
      functional_trait sensitivity     raw_r       pic_r   delta_r
cor26           woodSG     defSite 0.1198987 -0.01626167 0.1361604
cor2           gmPerCm     defSite 0.1137439 -0.01168649 0.1254304

3. Relationships that CHANGED DIRECTION after PIC:
      functional_trait sensitivity        raw_r       pic_r     delta_r
cor            gmPerCm       shade  0.075922244 -0.22273847  0.29866071
cor1           gmPerCm    tempSite  0.172258930 -0.21137738  0.38363631
cor4         gmPerSeed    tempSite  0.053709908 -0.04752820  0.10123811
cor6             leafN       shade  0.027374422 -0.30938233  0.33675675
cor7             leafN    tempSite  0.019175087 -0.27646576  0.29564085
cor8             leafN     defSite  0.035140648 -0.02416435  0.05930500
cor9            leafNP       shade  0.001199049 -0.30482695  0.30602600
cor10           leafNP    tempSite  0.127948126 -0.27499238  0.40294051
cor11           leafNP     defSite  0.055458029 -0.03096513  0.08642316
cor12            leafP       shade  0.061840526 -0.31073530  0.37257582
cor14            leafP     defSite  0.018675821 -0.02750221  0.04617803
cor15            maxHt       shade -0.103222247  0.30304200 -0.40626424
cor16            maxHt    tempSite -0.043163030  0.27511474 -0.31827777
cor18           maxDbh       shade -0.058818838  0.26143911 -0.32025795
cor21              SLA       shade  0.009830698 -0.29932907  0.30915977
cor22              SLA    tempSite  0.029624993 -0.27043021  0.30005520
cor24           woodSG       shade  0.070732950 -0.25160709  0.32234004
cor25           woodSG    tempSite  0.185850578 -0.23139111  0.41724168

4. SUMMARY STATISTICS:
   Total comparisons: 27 
   Strong phylogenetic effects (|<U+0394>r| > 0.1): 19 
   Direction changes: 21 
   Mean absolute change: 0.224

# 创建一个综合的4面板图
library(gridExtra)

# Panel A: 散点图
panel_a <- p1 + theme(legend.position = "none") + 
  labs(title = "A. Raw vs PIC Correlations")

# Panel B: 最强效应的条形图
top_10 <- pic_results %>%
  arrange(desc(abs(delta_r))) %>%
  head(10) %>%
  mutate(trait_sens = paste(substr(functional_trait, 1, 8), 
                            substr(sensitivity, 1, 4), sep = "-"))

panel_b <- ggplot(top_10, aes(x = reorder(trait_sens, abs(delta_r)), y = delta_r)) +
  geom_bar(stat = "identity", fill = ifelse(top_10$delta_r > 0, "#3498DB", "#E74C3C")) +
  coord_flip() +
  theme_minimal() +
  labs(title = "B. Top 10 Phylogenetic Effects", x = "", y = "Δr")

# Panel C: 按敏感性分组的箱线图
panel_c <- ggplot(pic_results, aes(x = sensitivity, y = abs(delta_r), fill = sensitivity)) +
  geom_boxplot(alpha = 0.7) +
  geom_jitter(width = 0.2, alpha = 0.3) +
  scale_fill_manual(values = c("shade" = "#27AE60", 
                              "tempSite" = "#E67E22", 
                              "defSite" = "#8E44AD")) +
  theme_minimal() +
  theme(legend.position = "none") +
  labs(title = "C. Effect Size by Sensitivity", x = "", y = "|Δr|")

# Panel D: 显著性比较
sig_data <- pic_results %>%
  mutate(
    raw_sig = ifelse(raw_p < 0.05, "Significant", "Not Significant"),
    pic_sig = ifelse(pic_p < 0.05, "Significant", "Not Significant"),
    category = paste(raw_sig, "→", pic_sig)
  ) %>%
  count(category)

panel_d <- ggplot(sig_data, aes(x = category, y = n, fill = category)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none") +
  labs(title = "D. Significance Changes", x = "", y = "Count")

# 组合所有面板
combined_plot <- grid.arrange(panel_a, panel_b, panel_c, panel_d, 
                             ncol = 2, nrow = 2)

cat("\n=== FINAL SUMMARY ===\n\n")


=== FINAL SUMMARY ===

# 最重要的发现
important_findings <- pic_results %>%
  filter(abs(delta_r) > 0.15) %>%
  arrange(desc(abs(delta_r))) %>%
  head(5)

cat("TOP 5 PHYLOGENETIC EFFECTS:\n")

TOP 5 PHYLOGENETIC EFFECTS:

for(i in 1:nrow(important_findings)) {
  row <- important_findings[i,]
  cat(sprintf("%d. %s vs %s:\n", i, row$functional_trait, row$sensitivity))
  cat(sprintf("   Raw r = %.3f → PIC r = %.3f (Δ = %.3f)\n", 
              row$raw_r, row$pic_r, row$delta_r))
  cat(sprintf("   Interpretation: %s\n\n",
              ifelse(row$delta_r > 0, 
                     "Phylogeny masks true correlation",
                     "Phylogeny creates spurious correlation")))
}

1. woodSG vs tempSite:
   Raw r = 0.186 <U+2192> PIC r = -0.231 (<U+0394> = 0.417)
   Interpretation: Phylogeny masks true correlation

2. maxHt vs shade:
   Raw r = -0.103 <U+2192> PIC r = 0.303 (<U+0394> = -0.406)
   Interpretation: Phylogeny creates spurious correlation

3. leafNP vs tempSite:
   Raw r = 0.128 <U+2192> PIC r = -0.275 (<U+0394> = 0.403)
   Interpretation: Phylogeny masks true correlation

4. gmPerCm vs tempSite:
   Raw r = 0.172 <U+2192> PIC r = -0.211 (<U+0394> = 0.384)
   Interpretation: Phylogeny masks true correlation

5. leafP vs shade:
   Raw r = 0.062 <U+2192> PIC r = -0.311 (<U+0394> = 0.373)
   Interpretation: Phylogeny masks true correlation

# 整体模式
cat("OVERALL PATTERNS:\n")

OVERALL PATTERNS:

cat("- Mean absolute phylogenetic effect:", round(mean(abs(pic_results$delta_r)), 3), "\n")

- Mean absolute phylogenetic effect: 0.224

cat("- Proportion with strong effects (|Δr| > 0.1):", 
    round(mean(abs(pic_results$delta_r) > 0.1), 2), "\n")

- Proportion with strong effects (|<U+0394>r| > 0.1): 0.7

cat("- Proportion with direction change:", 
    round(mean(sign(pic_results$raw_r) != sign(pic_results$pic_r)), 2), "\n")

- Proportion with direction change: 0.78

# 检查tree对象是否存在
if(!exists("tree") || is.null(tree)) {
  cat("Error: Phylogenetic tree not found!\n")
  cat("Please load your tree first.\n")
  
  # 如果你的树在其他变量中（比如trAll）
  if(exists("trAll")) {
    tree <- trAll
    cat("Using trAll as phylogenetic tree\n")
  } else {
    stop("No phylogenetic tree found. Please load it first.")
  }
}

# 检查tree的类型
cat("Tree class:", class(tree), "\n")

Tree class: phylo

cat("Number of tips:", length(tree$tip.label), "\n")

Number of tips: 223

cat("First 5 species in tree:", head(tree$tip.label, 5), "\n")

First 5 species in tree: Cedrus brevifolia Cedrus libani Cedrus atlantica Tsuga mertensiana Tsuga heterophylla

# 检查matched_data
if(!exists("matched_data")) {
  stop("matched_data not found. Please run the data preparation steps first.")
}

cat("\nData dimensions:", dim(matched_data), "\n")


Data dimensions: 284 53

cat("Species with tree names:", sum(!is.na(matched_data$tree_name)), "\n")

Species with tree names: 284

# PGLS 分析：控制系统发育相关性
library(nlme)
library(ape)

# 准备数据
analysis_data <- matched_data %>%
  mutate(
    continent = case_when(
      region == "WEA" ~ "EU",
      region %in% c("ENA", "WNA") ~ "NA",
      TRUE ~ NA_character_
    )
  ) %>%
  filter(!is.na(continent)) %>%
  mutate(
    log_seed_mass = log(gmPerSeed + 0.001),
    shade_std = scale(bfec.shade)[,1]
  )

cat("Data prepared:\n")

Data prepared:

cat("  Total species:", nrow(analysis_data), "\n")

  Total species: 216

cat("  EU species:", sum(analysis_data$continent == "EU"), "\n")

  EU species: 74

cat("  NA species:", sum(analysis_data$continent == "NA"), "\n")

  NA species: 142

# 找共同物种
common_sp <- intersect(analysis_data$tree_name, tree$tip.label)
cat("\nSpecies for PGLS analysis:", length(common_sp), "\n")


Species for PGLS analysis: 100

# 筛选数据和树
data_sub <- analysis_data[analysis_data$tree_name %in% common_sp, ]
tree_sub <- keep.tip(tree, common_sp)
data_sub <- data_sub[match(tree_sub$tip.label, data_sub$tree_name), ]

# 创建系统发育相关结构
cor_brownian <- corBrownian(phy = tree_sub)

# Model 1: 无交互
model1 <- gls(
  log_seed_mass ~ shade_std + continent,
  data = data_sub,
  correlation = cor_brownian,
  method = "ML"
)

# Model 2: 有交互
model2 <- gls(
  log_seed_mass ~ shade_std * continent,
  data = data_sub,
  correlation = cor_brownian,
  method = "ML"
)

# 显示结果
cat("\n=== Model 1: Additive ===\n")


=== Model 1: Additive ===

print(summary(model1))

Generalized least squares fit by maximum likelihood
  Model: log_seed_mass ~ shade_std + continent 
  Data: data_sub 
       AIC      BIC    logLik
  487.4028 497.8235 -239.7014

Correlation Structure: corBrownian
 Formula: ~1 
 Parameter estimate(s):
numeric(0)

Coefficients:
                Value Std.Error    t-value p-value
(Intercept) -3.522907  5.447158 -0.6467423  0.5193
shade_std   -0.035933  0.084528 -0.4250954  0.6717
continentNA  0.202307  0.216001  0.9366026  0.3513

 Correlation: 
            (Intr) shd_st
shade_std    0.007       
continentNA -0.023 -0.441

Standardized residuals:
        Min          Q1         Med          Q3         Max 
-0.29173456 -0.06260276  0.07178814  0.34023621  0.56048821 

Residual standard error: 10.81914 
Degrees of freedom: 100 total; 97 residual

cat("\n=== Model 2: Interactive ===\n")


=== Model 2: Interactive ===

print(summary(model2))

Generalized least squares fit by maximum likelihood
  Model: log_seed_mass ~ shade_std * continent 
  Data: data_sub 
       AIC      BIC    logLik
  488.9864 502.0123 -239.4932

Correlation Structure: corBrownian
 Formula: ~1 
 Parameter estimate(s):
numeric(0)

Coefficients:
                          Value Std.Error    t-value p-value
(Intercept)           -3.462490  5.464901 -0.6335870  0.5279
shade_std              0.039119  0.145780  0.2683423  0.7890
continentNA            0.113781  0.257899  0.4411842  0.6601
shade_std:continentNA -0.116833  0.184602 -0.6328920  0.5283

 Correlation: 
                      (Intr) shd_st cntnNA
shade_std              0.018              
continentNA           -0.028 -0.657       
shade_std:continentNA -0.017 -0.813  0.542

Standardized residuals:
        Min          Q1         Med          Q3         Max 
-0.29559248 -0.06022049  0.07177805  0.33988159  0.56501593 

Residual standard error: 10.79664 
Degrees of freedom: 100 total; 96 residual

cat("\n=== Model Comparison ===\n")


=== Model Comparison ===

print(anova(model1, model2))

       Model df      AIC      BIC    logLik   Test   L.Ratio p-value
model1     1  4 487.4028 497.8235 -239.7014                         
model2     2  5 488.9864 502.0123 -239.4932 1 vs 2 0.4163739  0.5188

prepare_continental_data <- function(matched_data) {
  # 映射region到continent
  data_with_continent <- matched_data %>%
    mutate(
      continent = case_when(
        region == "WEA" ~ "EU",
        region %in% c("ENA", "WNA") ~ "NA",
        TRUE ~ NA_character_
      )
    ) %>%
    filter(
      !is.na(continent),
      !is.na(bfec.shade),
      !is.na(gmPerSeed),
      !is.na(tree_name)
    ) %>%
    mutate(
      log_seed_mass = log(gmPerSeed + 0.001),
      shade_std = scale(bfec.shade)[,1]
    )
  
  cat("Data prepared:\n")
  cat("  Total species:", nrow(data_with_continent), "\n")
  cat("  EU species:", sum(data_with_continent$continent == "EU"), "\n")
  cat("  NA species:", sum(data_with_continent$continent == "NA"), "\n")
  
  return(data_with_continent)
}

# 重新运行
analysis_data <- prepare_continental_data(matched_data)

Data prepared:
  Total species: 216 
  EU species: 74 
  NA species: 142

# 然后运行PGLS分析
library(nlme)
library(ape)

run_phylo_ancova <- function(data, tree) {
  # 找共同物种
  common_sp <- intersect(data$tree_name, tree$tip.label)
  cat("\nSpecies for PGLS analysis:", length(common_sp), "\n")
  
  # 筛选数据和树
  data_sub <- data[data$tree_name %in% common_sp, ]
  
  # 修剪树
  tree_sub <- keep.tip(tree, common_sp)
  
  # 按树的顺序排序数据
  data_sub <- data_sub[match(tree_sub$tip.label, data_sub$tree_name), ]
  
  # 创建系统发育相关结构
  cor_brownian <- corBrownian(phy = tree_sub)
  
  # 模型1：无交互作用
  cat("\n=== Model 1: Additive Effects ===\n")
  model1 <- gls(
    log_seed_mass ~ shade_std + continent,
    data = data_sub,
    correlation = cor_brownian,
    method = "ML"
  )
  print(summary(model1))
  
  # 模型2：有交互作用
  cat("\n=== Model 2: Interactive Effects ===\n")
  model2 <- gls(
    log_seed_mass ~ shade_std * continent,
    data = data_sub,
    correlation = cor_brownian,
    method = "ML"
  )
  print(summary(model2))
  
  # 模型比较
  cat("\n=== Model Comparison ===\n")
  print(anova(model1, model2))
  
  return(list(additive = model1, interactive = model2))
}

# 运行分析
pgls_results <- run_phylo_ancova(analysis_data, tree)


Species for PGLS analysis: 100 

=== Model 1: Additive Effects ===

Generalized least squares fit by maximum likelihood
  Model: log_seed_mass ~ shade_std + continent 
  Data: data_sub 
       AIC      BIC    logLik
  487.4028 497.8235 -239.7014

Correlation Structure: corBrownian
 Formula: ~1 
 Parameter estimate(s):
numeric(0)

Coefficients:
                Value Std.Error    t-value p-value
(Intercept) -3.522907  5.447158 -0.6467423  0.5193
shade_std   -0.035933  0.084528 -0.4250954  0.6717
continentNA  0.202307  0.216001  0.9366026  0.3513

 Correlation: 
            (Intr) shd_st
shade_std    0.007       
continentNA -0.023 -0.441

Standardized residuals:
        Min          Q1         Med          Q3         Max 
-0.29173456 -0.06260276  0.07178814  0.34023621  0.56048821 

Residual standard error: 10.81914 
Degrees of freedom: 100 total; 97 residual

=== Model 2: Interactive Effects ===

Generalized least squares fit by maximum likelihood
  Model: log_seed_mass ~ shade_std * continent 
  Data: data_sub 
       AIC      BIC    logLik
  488.9864 502.0123 -239.4932

Correlation Structure: corBrownian
 Formula: ~1 
 Parameter estimate(s):
numeric(0)

Coefficients:
                          Value Std.Error    t-value p-value
(Intercept)           -3.462490  5.464901 -0.6335870  0.5279
shade_std              0.039119  0.145780  0.2683423  0.7890
continentNA            0.113781  0.257899  0.4411842  0.6601
shade_std:continentNA -0.116833  0.184602 -0.6328920  0.5283

 Correlation: 
                      (Intr) shd_st cntnNA
shade_std              0.018              
continentNA           -0.028 -0.657       
shade_std:continentNA -0.017 -0.813  0.542

Standardized residuals:
        Min          Q1         Med          Q3         Max 
-0.29559248 -0.06022049  0.07177805  0.33988159  0.56501593 

Residual standard error: 10.79664 
Degrees of freedom: 100 total; 96 residual

=== Model Comparison ===
       Model df      AIC      BIC    logLik   Test   L.Ratio p-value
model1     1  4 487.4028 497.8235 -239.7014                         
model2     2  5 488.9864 502.0123 -239.4932 1 vs 2 0.4163739  0.5188

library(ggplot2)
library(gridExtra)

# 创建综合可视化
visualize_continental_effects <- function(data) {
  # 基础统计
  cat("\n=== Basic Statistics ===\n")
  stats <- data %>%
    group_by(continent) %>%
    summarise(
      n = n(),
      mean_shade = mean(bfec.shade),
      sd_shade = sd(bfec.shade),
      mean_seed = mean(log_seed_mass),
      sd_seed = sd(log_seed_mass),
      correlation = cor(bfec.shade, log_seed_mass)
    )
  print(stats)
  
  # 图1：散点图with回归线
  p1 <- ggplot(data, aes(x = bfec.shade, y = log_seed_mass, color = continent)) +
    geom_point(alpha = 0.6, size = 2.5) +
    geom_smooth(method = "lm", se = TRUE, size = 1.2) +
    scale_color_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C"),
                      labels = c("EU" = "Europe", "NA" = "North America")) +
    labs(
      title = "Shade Sensitivity vs Seed Mass by Continent",
      subtitle = paste("EU: n =", sum(data$continent == "EU"), 
                      "; NA: n =", sum(data$continent == "NA")),
      x = "Shade Sensitivity (βshade)",
      y = "Log(Seed Mass)",
      color = "Continent"
    ) +
    theme_minimal() +
    theme(legend.position = "top", text = element_text(size = 12))
  
  # 图2：密度图比较shade sensitivity
  p2 <- ggplot(data, aes(x = bfec.shade, fill = continent)) +
    geom_density(alpha = 0.5) +
    geom_rug(aes(color = continent), alpha = 0.3) +
    scale_fill_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C")) +
    scale_color_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C")) +
    labs(
      title = "Distribution of Shade Sensitivity",
      x = "Shade Sensitivity",
      y = "Density"
    ) +
    theme_minimal() +
    theme(legend.position = "top")
  
  # 图3：箱线图比较
  p3_data <- data %>%
    select(continent, bfec.shade, log_seed_mass) %>%
    pivot_longer(cols = c(bfec.shade, log_seed_mass), 
                 names_to = "variable", values_to = "value") %>%
    mutate(variable = recode(variable,
                             "bfec.shade" = "Shade Sensitivity",
                             "log_seed_mass" = "Log(Seed Mass)"))
  
  p3 <- ggplot(p3_data, aes(x = continent, y = value, fill = continent)) +
    geom_boxplot(alpha = 0.7) +
    geom_jitter(width = 0.1, alpha = 0.2) +
    facet_wrap(~ variable, scales = "free_y") +
    scale_fill_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C")) +
    labs(
      title = "Continental Comparison",
      x = "Continent",
      y = "Value"
    ) +
    theme_minimal() +
    theme(legend.position = "none")
  
  # 图4：斜率比较（从简单lm获取）
  eu_model <- lm(log_seed_mass ~ bfec.shade, data = filter(data, continent == "EU"))
  na_model <- lm(log_seed_mass ~ bfec.shade, data = filter(data, continent == "NA"))
  
  slope_data <- data.frame(
    Continent = c("Europe", "North America"),
    Slope = c(coef(eu_model)[2], coef(na_model)[2]),
    SE = c(summary(eu_model)$coefficients[2, 2], 
           summary(na_model)$coefficients[2, 2])
  )
  
  p4 <- ggplot(slope_data, aes(x = Continent, y = Slope, fill = Continent)) +
    geom_bar(stat = "identity", alpha = 0.7) +
    geom_errorbar(aes(ymin = Slope - SE, ymax = Slope + SE), width = 0.2) +
    scale_fill_manual(values = c("Europe" = "#3498DB", "North America" = "#E74C3C")) +
    geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) +
    labs(
      title = "Shade Effect on Seed Mass",
      subtitle = "(Simple linear regression)",
      y = "Regression Slope"
    ) +
    theme_minimal() +
    theme(legend.position = "none")
  
  # 组合图
  grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)
  
  return(list(p1 = p1, p2 = p2, p3 = p3, p4 = p4, stats = stats))
}

# 创建可视化
viz_results <- visualize_continental_effects(analysis_data)


=== Basic Statistics ===
# A tibble: 2 x 7
  continent     n mean_shade sd_shade mean_seed sd_seed correlation
  <chr>     <int>      <dbl>    <dbl>     <dbl>   <dbl>       <dbl>
1 EU           74     -0.960    0.829     -1.88    2.25      0.192 
2 NA          142     -0.967    2.11      -2.32    2.44     -0.0150

# 创建结果总结可视化
library(ggplot2)
library(gridExtra)

# 准备数据用于可视化
create_summary_plots <- function() {
  # 效应大小比较
  effect_data <- data.frame(
    Continent = c("Europe", "North America"),
    Effect = c(0.060, 0.118),
    SE = c(0.045, 0.045),  # 近似值
    Type = "Shade Effect on Seed Mass"
  )
  
  p1 <- ggplot(effect_data, aes(x = Continent, y = Effect, fill = Continent)) +
    geom_bar(stat = "identity", alpha = 0.8) +
    geom_errorbar(aes(ymin = Effect - SE, ymax = Effect + SE), width = 0.2) +
    scale_fill_manual(values = c("Europe" = "#3498DB", "North America" = "#E74C3C")) +
    labs(title = "A. Shade → Fecundity Effect",
         subtitle = "NA effect is 2× stronger (though interaction p=0.48)",
         y = "Regression Coefficient") +
    theme_minimal() +
    theme(legend.position = "none") +
    geom_text(aes(label = round(Effect, 3)), vjust = -0.5)
  
  # 进化速率比较
  rate_data <- data.frame(
    Continent = rep(c("Europe", "North America"), 2),
    Rate = c(0.5736, 0.1423, 0.1529, 0.1971),
    Trait = rep(c("Shade Sensitivity", "Seed Mass"), each = 2)
  )
  
  p2 <- ggplot(rate_data, aes(x = Continent, y = Rate, fill = Continent)) +
    geom_bar(stat = "identity", alpha = 0.8) +
    facet_wrap(~Trait, scales = "free_y") +
    scale_fill_manual(values = c("Europe" = "#3498DB", "North America" = "#E74C3C")) +
    labs(title = "B. Evolution Rates (σ²)",
         subtitle = "EU: fast shade evolution; NA: fast seed evolution",
         y = "Evolution Rate") +
    theme_minimal() +
    theme(legend.position = "none") +
    geom_text(aes(label = round(Rate, 3)), vjust = -0.5)
  
  # PIC相关性
  pic_data <- data.frame(
    Continent = c("Europe", "North America"),
    Correlation = c(0.210, 0.207),
    P_value = c(0.096, 0.030),
    Significant = c("Marginal", "Significant")
  )
  
  p3 <- ggplot(pic_data, aes(x = Continent, y = Correlation, fill = Significant)) +
    geom_bar(stat = "identity", alpha = 0.8) +
    scale_fill_manual(values = c("Marginal" = "#95A5A6", "Significant" = "#27AE60")) +
    labs(title = "C. Evolutionary Correlation (PIC)",
         subtitle = "Shade-Seed coevolution",
         y = "PIC Correlation") +
    theme_minimal() +
    geom_text(aes(label = paste("r =", round(Correlation, 3), "\np =", round(P_value, 3))), 
              vjust = 1.5)
  
  grid.arrange(p1, p2, p3, ncol = 3)
}

create_summary_plots()

matched_data_isp <- matched_data %>%
  mutate(genus_lower = tolower(genus)) %>%
  left_join(
    results_isp_df %>%
      rename(genus_lower = genus) %>%
      select(genus_lower, eu_value, eu_sd, na_value, na_sd),
    by = "genus_lower"
  ) %>%
  select(-genus_lower) %>%
  mutate(
    ISP_mean = case_when(
      region == "WEA" ~ eu_value,
      region %in% c("ENA", "WNA") ~ na_value,
      TRUE ~ NA_real_
    ),
    ISP_sd = case_when(
      region == "WEA" ~ eu_sd,
      region %in% c("ENA", "WNA") ~ na_sd,
      TRUE ~ NA_real_
    ),
    log_ISP = log(ISP_mean + 0.001)
  ) %>%
  select(-eu_value, -eu_sd, -na_value, -na_sd)

cat("\n=== ISP Integration Results ===\n")


=== ISP Integration Results ===

cat("Total species:", nrow(matched_data_isp), "\n")

Total species: 284

cat("Species with ISP data:", sum(!is.na(matched_data_isp$ISP_mean)), "\n")

Species with ISP data: 149

cat("  EU:", sum(matched_data_isp$region == "WEA" & !is.na(matched_data_isp$ISP_mean)), "\n")

  EU: 53

cat("  NA:", sum(matched_data_isp$region %in% c("ENA", "WNA") & !is.na(matched_data_isp$ISP_mean)), "\n")

  NA: 96

# 现在做系统发育信号分析
library(phytools)

data_with_isp <- matched_data_isp %>% 
  filter(!is.na(log_ISP)) %>%
  select(tree_name, log_ISP) %>%
  distinct()

isp_values <- setNames(data_with_isp$log_ISP, data_with_isp$tree_name)
common_sp <- intersect(names(isp_values), tree$tip.label)

cat("\nSpecies for phylogenetic analysis:", length(common_sp), "\n")


Species for phylogenetic analysis: 75

if(length(common_sp) >= 10) {
  tree_isp <- keep.tip(tree, common_sp)
  isp_final <- isp_values[common_sp]
  
  lambda_test <- phylosig(tree_isp, isp_final, method = "lambda", test = TRUE)
  K_test <- phylosig(tree_isp, isp_final, method = "K", test = TRUE, nsim = 999)
  
  cat("\n=== ISP Phylogenetic Signal ===\n")
  cat("Pagel's λ =", round(lambda_test$lambda, 3), "  P =", format.pval(lambda_test$P), "\n")
  cat("Blomberg's K =", round(K_test$K, 3), "  P =", format.pval(K_test$P), "\n")
} else {
  cat("Not enough species for analysis\n")
}


=== ISP Phylogenetic Signal ===
Pagel's <U+03BB> = 0.999   P = < 2.22e-16 
Blomberg's K = 0.69   P = 0.001001

library(geiger)
library(phytools)

compare_evolutionary_patterns <- function(data, tree) {
  cat("\n=== Evolutionary Pattern Analysis ===\n")
  
  # 添加 continent 列
  data <- data %>%
    mutate(
      continent = case_when(
        region == "WEA" ~ "EU",
        region %in% c("ENA", "WNA") ~ "NA",
        TRUE ~ NA_character_
      ),
      log_seed_mass = log(gmPerSeed + 0.001)
    ) %>%
    filter(!is.na(continent))
  
  results <- list()
  
  for(cont in c("EU", "NA")) {
    cat("\nAnalyzing", cont, "...\n")
    
    cont_data <- data %>% 
      filter(continent == cont) %>%
      filter(tree_name %in% tree$tip.label)
    
    cont_species <- cont_data$tree_name
    
    if(length(cont_species) > 10) {
      tree_cont <- keep.tip(tree, cont_species)
      
      shade_vals <- cont_data$bfec.shade
      names(shade_vals) <- cont_data$tree_name
      shade_vals <- shade_vals[tree_cont$tip.label]
      
      seed_vals <- cont_data$log_seed_mass
      names(seed_vals) <- cont_data$tree_name
      seed_vals <- seed_vals[tree_cont$tip.label]
      
      tryCatch({
        fit_shade <- fitContinuous(tree_cont, shade_vals, model = "BM")
        fit_seed <- fitContinuous(tree_cont, seed_vals, model = "BM")
        
        K_shade <- phylosig(tree_cont, shade_vals, method = "K", test = TRUE, nsim = 99)
        K_seed <- phylosig(tree_cont, seed_vals, method = "K", test = TRUE, nsim = 99)
        
        pic_shade <- pic(shade_vals, tree_cont)
        pic_seed <- pic(seed_vals, tree_cont)
        pic_cor <- cor.test(pic_shade, pic_seed)
        
        results[[cont]] <- list(
          n_species = length(cont_species),
          shade_rate = fit_shade$opt$sigsq,
          seed_rate = fit_seed$opt$sigsq,
          shade_K = K_shade$K,
          seed_K = K_seed$K,
          pic_correlation = pic_cor$estimate,
          pic_p_value = pic_cor$p.value
        )
        
        cat("  N species:", length(cont_species), "\n")
        cat("  Shade evolution rate:", round(fit_shade$opt$sigsq, 4), "\n")
        cat("  Seed mass evolution rate:", round(fit_seed$opt$sigsq, 4), "\n")
        cat("  Shade K:", round(K_shade$K, 3), "p =", round(K_shade$P, 3), "\n")
        cat("  Seed K:", round(K_seed$K, 3), "p =", round(K_seed$P, 3), "\n")
        cat("  PIC correlation:", round(pic_cor$estimate, 3), 
            "p =", round(pic_cor$p.value, 3), "\n")
        
      }, error = function(e) {
        cat("  Error:", e$message, "\n")
      })
    } else {
      cat("  Not enough species with tree data\n")
    }
  }
  
  # 比较结果
  if(length(results) == 2) {
    cat("\n=== Continental Comparison ===\n")
    cat("Shade evolution rate ratio (NA/EU):", 
        round(results[["NA"]]$shade_rate / results[["EU"]]$shade_rate, 2), "\n")
    cat("Seed evolution rate ratio (NA/EU):", 
        round(results[["NA"]]$seed_rate / results[["EU"]]$seed_rate, 2), "\n")
    cat("PIC correlation difference:", 
        round(results[["NA"]]$pic_correlation - results[["EU"]]$pic_correlation, 3), "\n")
  }
  
  return(results)
}

# 运行
evol_patterns <- compare_evolutionary_patterns(matched_data_isp, tree)


=== Evolutionary Pattern Analysis ===

Analyzing EU ...
  N species: 36 
  Shade evolution rate: 0.8525 
  Seed mass evolution rate: 0.1019 
  Shade K: 0.004 p = 0.96 
  Seed K: 0.244 p = 0.01 
  PIC correlation: -0.361 p = 0.033 

Analyzing NA ...
  N species: 64 
  Shade evolution rate: 1.427 
  Seed mass evolution rate: 0.4614 
  Shade K: 0.002 p = 0.727 
  Seed K: 0.08 p = 0.01 
  PIC correlation: -0.102 p = 0.425 

=== Continental Comparison ===
Shade evolution rate ratio (NA/EU): 1.67 
Seed evolution rate ratio (NA/EU): 4.53 
PIC correlation difference: 0.259

# 修复1：比较性状的系统发育信号
compare_all_traits_phylosignal <- function(data, tree) {
  library(phytools)
  
  cat("\n=== Comparative Phylogenetic Signal Analysis ===\n")
  
  # 只用可用的数值性状
  traits <- c("log_ISP", "bfec.shade", "bfec.tempSite", "bfec.defSite",
              "gmPerCm", "gmPerSeed", "leafN", "maxHt", "maxDbh", "SLA", "woodSG")
  
  results <- data.frame()
  
  for(trait in traits) {
    if(trait %in% names(data)) {
      trait_data <- data %>% 
        filter(!is.na(!!sym(trait))) %>%
        filter(is.finite(!!sym(trait)))  # 确保是有限数值
      
      if(nrow(trait_data) < 10) next
      
      trait_vals <- as.numeric(trait_data[[trait]])
      names(trait_vals) <- trait_data$tree_name
      
      common <- intersect(names(trait_vals), tree$tip.label)
      
      if(length(common) >= 10) {
        tree_temp <- keep.tip(tree, common)
        trait_final <- trait_vals[common]
        
        tryCatch({
          lambda_res <- phylosig(tree_temp, trait_final, method = "lambda", test = TRUE)
          K_res <- phylosig(tree_temp, trait_final, method = "K", test = TRUE, nsim = 99)
          
          results <- rbind(results, data.frame(
            Trait = trait,
            Category = case_when(
              trait %in% c("bfec.shade", "bfec.tempSite", "bfec.defSite") ~ "Sensitivity",
              trait == "log_ISP" ~ "Reproduction",
              TRUE ~ "Function"
            ),
            N = length(common),
            Lambda = round(lambda_res$lambda, 3),
            Lambda_P = lambda_res$P,
            K = round(K_res$K, 3),
            K_P = K_res$P
          ))
        }, error = function(e) {
          cat("  Error with", trait, ":", e$message, "\n")
        })
      }
    }
  }
  
  results <- results %>% arrange(desc(Lambda))
  print(results)
  return(results)
}

# 修复2：大洲分析 - 正确处理region格式
analyze_continental_isp_evolution <- function(data, tree) {
  library(geiger)
  library(phytools)
  
  results <- list()
  
  # 正确映射region
  region_map <- c("WEA" = "EU", "ENA" = "NA", "WNA" = "NA")
  data$continent <- region_map[data$region]
  
  for(cont in c("EU", "NA")) {
    cat("\n━━━", cont, "━━━\n")
    
    cont_data <- data %>% 
      filter(continent == cont, !is.na(log_ISP))
    
    if(nrow(cont_data) < 10) {
      cat("Not enough data\n")
      next
    }
    
    # 准备树和数据
    cont_species <- cont_data$tree_name
    tree_cont <- keep.tip(tree, intersect(tree$tip.label, cont_species))
    
    if(length(tree_cont$tip.label) < 10) next
    
    isp_vals <- as.numeric(cont_data$log_ISP)
    names(isp_vals) <- cont_data$tree_name
    isp_vals <- isp_vals[tree_cont$tip.label]
    
    # 系统发育信号
    lambda_cont <- phylosig(tree_cont, isp_vals, method = "lambda", test = TRUE)
    K_cont <- phylosig(tree_cont, isp_vals, method = "K", test = TRUE, nsim = 99)
    
    # 基础统计
    stats <- cont_data %>%
      summarise(
        mean_ISP = mean(exp(log_ISP), na.rm = TRUE),
        sd_ISP = sd(exp(log_ISP), na.rm = TRUE),
        cv_ISP = sd_ISP / mean_ISP
      )
    
    results[[cont]] <- list(
      n = length(isp_vals),
      stats = stats,
      lambda = lambda_cont$lambda,
      K = K_cont$K
    )
    
    cat("Sample size:", length(isp_vals), "species\n")
    cat("Mean ISP:", round(stats$mean_ISP, 3), "\n")
    cat("λ =", round(lambda_cont$lambda, 3), "  K =", round(K_cont$K, 3), "\n")
  }
  
  return(results)
}

# 运行
trait_comparison <- compare_all_traits_phylosignal(matched_data_isp, tree)


=== Comparative Phylogenetic Signal Analysis ===
           Trait     Category   N Lambda     Lambda_P     K        K_P
1        gmPerCm     Function  97  1.000 8.720774e-42 0.869 0.01010101
2          leafN     Function 100  1.000 3.499974e-61 1.257 0.01010101
3          maxHt     Function 100  1.000 1.304940e-56 1.102 0.01010101
4         maxDbh     Function  99  1.000 7.146564e-26 0.298 0.01010101
5            SLA     Function  97  1.000 4.168186e-56 1.064 0.01010101
6         woodSG     Function  97  1.000 7.499977e-32 0.440 0.01010101
7        log_ISP Reproduction  75  0.999 3.260669e-40 0.690 0.01010101
8      gmPerSeed     Function 100  0.924 9.006442e-12 0.016 0.11111111
9     bfec.shade  Sensitivity 101  0.000 1.000000e+00 0.002 0.93939394
10 bfec.tempSite  Sensitivity 101  0.000 1.000000e+00 0.001 0.97979798
11  bfec.defSite  Sensitivity 101  0.000 1.000000e+00 0.001 0.90909091

continental_results <- analyze_continental_isp_evolution(matched_data_isp, tree)


<U+2501><U+2501><U+2501> EU <U+2501><U+2501><U+2501>
Sample size: 27 species
Mean ISP: 0.554 
<U+03BB> = 1.001   K = 1.193 

<U+2501><U+2501><U+2501> NA <U+2501><U+2501><U+2501>
Sample size: 48 species
Mean ISP: 0.594 
<U+03BB> = 1   K = 1.925

# 修复版本 - ISP与生态策略
analyze_isp_ecological_strategies <- function(data, tree) {
  library(nlme)
  library(ggplot2)
  
  cat("\n=== ISP and Ecological Strategies ===\n")
  
  # 准备数据 - 创建缺失的列
  eco_data <- data %>%
    filter(!is.na(log_ISP), !is.na(bfec.shade), !is.na(gmPerSeed)) %>%
    mutate(
      log_seed_mass = log(gmPerSeed),
      continent = case_when(
        region == "WEA" ~ "EU",
        region %in% c("ENA", "WNA") ~ "NA",
        TRUE ~ NA_character_
      )
    ) %>%
    filter(!is.na(continent))
  
  cat("Species for analysis:", nrow(eco_data), "\n")
  cat("  EU:", sum(eco_data$continent == "EU"), "\n")
  cat("  NA:", sum(eco_data$continent == "NA"), "\n")
  
  # 基础相关性
  cat("\n1. Basic correlations:\n")
  cor_matrix <- eco_data %>%
    select(log_ISP, bfec.shade, log_seed_mass) %>%
    cor(use = "complete.obs")
  print(round(cor_matrix, 3))
  
  # 准备PGLS
  common_sp <- intersect(eco_data$tree_name, tree$tip.label)
  if(length(common_sp) < 10) {
    cat("Not enough species for PGLS\n")
    return(NULL)
  }
  
  eco_data <- eco_data[eco_data$tree_name %in% common_sp, ]
  tree_eco <- keep.tip(tree, common_sp)
  eco_data <- eco_data[match(tree_eco$tip.label, eco_data$tree_name), ]
  
  cor_brown <- corBrownian(phy = tree_eco)
  
  cat("\n2. Phylogenetically controlled relationships:\n")
  
  # ISP ~ Shade
  tryCatch({
    m1 <- gls(log_ISP ~ bfec.shade * continent, 
              data = eco_data, 
              correlation = cor_brown, 
              method = "ML")
    
    cat("\nISP ~ Shade × Continent:\n")
    print(summary(m1)$tTable)
  }, error = function(e) {
    cat("Error in model 1:", e$message, "\n")
  })
  
  # ISP ~ Seed mass (trade-off)
  tryCatch({
    m2 <- gls(log_ISP ~ log_seed_mass * continent,
              data = eco_data,
              correlation = cor_brown,
              method = "ML")
    
    cat("\nISP ~ Seed mass × Continent:\n")
    print(summary(m2)$tTable)
  }, error = function(e) {
    cat("Error in model 2:", e$message, "\n")
  })
  
  # 简单可视化
  cat("\n3. Visualization...\n")
  
  p1 <- ggplot(eco_data, aes(x = bfec.shade, y = exp(log_ISP), color = continent)) +
    geom_point(size = 3, alpha = 0.6) +
    geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
    scale_y_log10() +
    scale_color_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C")) +
    labs(title = "ISP vs Shade Sensitivity",
         x = "Shade Sensitivity", 
         y = "ISP (seeds/cm²)",
         color = "Continent") +
    theme_minimal()
  
  p2 <- ggplot(eco_data, aes(x = exp(log_seed_mass), y = exp(log_ISP), color = continent)) +
    geom_point(size = 3, alpha = 0.6) +
    geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
    scale_x_log10() + 
    scale_y_log10() +
    scale_color_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C")) +
    labs(title = "Seed Size-Number Trade-off",
         x = "Seed Mass (mg)", 
         y = "ISP (seeds/cm²)",
         color = "Continent") +
    theme_minimal()
  
  print(p1)
  print(p2)
  
  return(list(data = eco_data, n_species = nrow(eco_data)))
}

# 运行
ecological_results <- analyze_isp_ecological_strategies(matched_data_isp, tree)


=== ISP and Ecological Strategies ===
Species for analysis: 149 
  EU: 53 
  NA: 96 

1. Basic correlations:
              log_ISP bfec.shade log_seed_mass
log_ISP         1.000     -0.023         0.790
bfec.shade     -0.023      1.000        -0.004
log_seed_mass   0.790     -0.004         1.000

2. Phylogenetically controlled relationships:


ISP ~ Shade × Continent:
                             Value  Std.Error    t-value    p-value
(Intercept)            -2.43201940 1.18621953 -2.0502271 0.04403585
bfec.shade              0.01968203 0.01827018  1.0772761 0.28500363
continentNA             0.16738539 0.07538951  2.2202743 0.02959341
bfec.shade:continentNA -0.02046622 0.02192930 -0.9332821 0.35383740


ISP ~ Seed mass × Continent:
                                Value  Std.Error   t-value      p-value
(Intercept)               -2.45654292 1.08842993 -2.256960 0.0270892460
log_seed_mass             -0.07549011 0.03408093 -2.215025 0.0299679027
continentNA                0.15181887 0.04365804  3.477455 0.0008682577
log_seed_mass:continentNA  0.10771155 0.02953383  3.647057 0.0005022262

3. Visualization...

detailed_statistical_analysis <- function(matched_data_isp, tree) {
  library(nlme)
  
  cat("\n=== Detailed Statistical Models ===\n")
  
  # 准备数据
  model_data <- matched_data_isp %>%
    filter(!is.na(log_ISP), !is.na(gmPerSeed), !is.na(bfec.shade)) %>%
    mutate(
      log_seed = log(gmPerSeed + 0.001),
      continent = case_when(
        region == "WEA" ~ "EU",
        region %in% c("ENA", "WNA") ~ "NA",
        TRUE ~ NA_character_
      )
    ) %>%
    filter(!is.na(continent))
  
  cat("Total species:", nrow(model_data), "\n")
  
  # 匹配树
  common_sp <- intersect(model_data$tree_name, tree$tip.label)
  if(length(common_sp) < 10) {
    cat("Not enough species\n")
    return(NULL)
  }
  
  model_data <- model_data[model_data$tree_name %in% common_sp, ]
  tree_model <- keep.tip(tree, common_sp)
  model_data <- model_data[match(tree_model$tip.label, model_data$tree_name), ]
  
  cor_brown <- corBrownian(phy = tree_model)
  
  # 完整模型
  cat("\n1. FULL MODEL:\n")
  m_full <- gls(log_ISP ~ log_seed * bfec.shade * continent,
                data = model_data,
                correlation = cor_brown,
                method = "ML")
  
  print(summary(m_full)$tTable)
  
  # 分别为每个大洲建模
  cat("\n2. SEPARATE MODELS BY CONTINENT:\n")
  
  for(cont in c("EU", "NA")) {
    cat("\n---", cont, "---\n")
    
    cont_data <- model_data %>% filter(continent == cont)
    cont_tree_tips <- intersect(cont_data$tree_name, tree$tip.label)
    
    if(length(cont_tree_tips) > 10) {
      cont_tree <- keep.tip(tree, cont_tree_tips)
      cont_data <- cont_data[match(cont_tree$tip.label, cont_data$tree_name), ]
      
      cor_cont <- corBrownian(phy = cont_tree)
      
      m_cont <- gls(log_ISP ~ log_seed + bfec.shade,
                    data = cont_data,
                    correlation = cor_cont,
                    method = "ML")
      
      print(summary(m_cont)$tTable)
      cat("N species:", nrow(cont_data), "\n")
    } else {
      cat("Not enough species for", cont, "\n")
    }
  }
  
  return(m_full)
}

# 修复版本 2：属级别分析
analyze_genus_contributions <- function(matched_data_isp) {
  library(dplyr)
  
  cat("\n════════════════════════════════════════════════════════════\n")
  cat("         GENUS-LEVEL ANALYSIS OF ISP PATTERNS\n")
  cat("════════════════════════════════════════════════════════════\n")
  
  # 准备数据
  analysis_data <- matched_data_isp %>%
    filter(!is.na(ISP_mean), !is.na(gmPerSeed)) %>%
    mutate(
      continent = case_when(
        region == "WEA" ~ "EU",
        region %in% c("ENA", "WNA") ~ "NA",
        TRUE ~ NA_character_
      )
    ) %>%
    filter(!is.na(continent))
  
  # 计算每个属在每个大洲的相关性
  genus_correlations <- analysis_data %>%
    group_by(genus, continent) %>%
    summarise(
      n_species = n(),
      mean_ISP = mean(ISP_mean, na.rm = TRUE),
      mean_seed = mean(gmPerSeed, na.rm = TRUE),
      sd_ISP = sd(ISP_mean, na.rm = TRUE),
      sd_seed = sd(gmPerSeed, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    filter(n_species >= 2)  # 至少2个物种
  
  # 计算相关性 - 分开处理
  for(i in 1:nrow(genus_correlations)) {
    g <- genus_correlations$genus[i]
    c <- genus_correlations$continent[i]
    
    subset_data <- analysis_data %>%
      filter(genus == g, continent == c)
    
    if(nrow(subset_data) >= 3) {
      cor_result <- tryCatch(
        cor(log(subset_data$ISP_mean + 0.001), 
            log(subset_data$gmPerSeed + 0.001),
            use = "complete.obs"),
        error = function(e) NA
      )
      genus_correlations$cor_ISP_seed[i] <- cor_result
    }
  }
  
  cat("\n=== Genus-level patterns ===\n")
  print(genus_correlations %>% arrange(continent, desc(n_species)))
  
  # 比较同一属在两个大洲的表现
  cat("\n=== SAME GENUS, DIFFERENT CONTINENTS ===\n")
  
  shared_genera <- genus_correlations %>%
    group_by(genus) %>%
    mutate(n_continents = n()) %>%
    filter(n_continents == 2) %>%
    select(genus, continent, n_species, cor_ISP_seed) %>%
    arrange(genus, continent)
  
  if(nrow(shared_genera) > 0) {
    # 手动转置
    shared_wide <- data.frame()
    for(g in unique(shared_genera$genus)) {
      eu_row <- shared_genera %>% filter(genus == g, continent == "EU")
      na_row <- shared_genera %>% filter(genus == g, continent == "NA")
      
      if(nrow(eu_row) > 0 & nrow(na_row) > 0) {
        shared_wide <- rbind(shared_wide, data.frame(
          genus = g,
          EU_n = eu_row$n_species[1],
          EU_cor = eu_row$cor_ISP_seed[1],
          NA_n = na_row$n_species[1],
          NA_cor = na_row$cor_ISP_seed[1],
          cor_diff = na_row$cor_ISP_seed[1] - eu_row$cor_ISP_seed[1]
        ))
      }
    }
    
    shared_wide <- shared_wide %>% arrange(desc(abs(cor_diff)))
    print(shared_wide)
  }
  
  return(list(
    genus_cors = genus_correlations,
    shared = shared_wide
  ))
}

# 运行
detailed_models <- detailed_statistical_analysis(matched_data_isp, tree)


=== Detailed Statistical Models ===
Total species: 149 

1. FULL MODEL:

                                      Value  Std.Error    t-value     p-value
(Intercept)                     -2.47771914 1.11232834 -2.2275070 0.029276483
log_seed                        -0.12249815 0.05525430 -2.2169887 0.030021475
bfec.shade                       0.01889367 0.03335126  0.5665055 0.572943080
continentNA                      0.12891334 0.07561499  1.7048649 0.092852791
log_seed:bfec.shade             -0.02227968 0.02877754 -0.7742036 0.441533879
log_seed:continentNA             0.15201816 0.05191035  2.9284750 0.004650767
bfec.shade:continentNA          -0.02096473 0.03373565 -0.6214415 0.536417912
log_seed:bfec.shade:continentNA  0.02452682 0.02777902  0.8829258 0.380433401

2. SEPARATE MODELS BY CONTINENT:

--- EU ---

                  Value  Std.Error   t-value    p-value
(Intercept) -1.64322173 0.94897654 -1.731573 0.09618657
log_seed     0.18571798 0.06827636  2.720092 0.01194289
bfec.shade   0.01910266 0.01789261  1.067629 0.29631003
N species: 27 

--- NA ---

                   Value   Std.Error    t-value     p-value
(Intercept) -2.501526859 0.922422325 -2.7119106 0.009440419
log_seed     0.013370726 0.017341668  0.7710173 0.444726328
bfec.shade   0.001107467 0.009603453  0.1153197 0.908704792
N species: 48

genus_results <- analyze_genus_contributions(matched_data_isp)


------------------------------------------------------------
         GENUS-LEVEL ANALYSIS OF ISP PATTERNS
------------------------------------------------------------


=== Genus-level patterns ===
# A tibble: 20 x 8
   genus     continent n_species mean_ISP mean_seed sd_ISP  sd_seed cor_ISP_seed
   <chr>     <chr>         <int>    <dbl>     <dbl>  <dbl>    <dbl>        <dbl>
 1 Quercus   EU               16  1.30     2.16          0 0.805              NA
 2 Pinus     EU                8  0.0443   0.138         0 0.220              NA
 3 Abies     EU                5  0.0264   0.0718        0 0.0259             NA
 4 Acer      EU                5  0.448    0.0889        0 0.0544             NA
 5 Fraxinus  EU                3  0.326    0.0608        0 0.0294             NA
 6 Picea     EU                3  0.0100   0.00543       0 0.00177            NA
 7 Prunus    EU                3  0.266    0.284         0 0.176              NA
 8 Juniperus EU                2  1.06     0.166         0 0.0325             NA
 9 Tilia     EU                2  0.225    0.0427        0 0.00750            NA
10 Quercus   NA               28  1.72     2.82          0 1.67               NA
11 Pinus     NA               24  0.0451   0.106         0 0.133              NA
12 Abies     NA                7  0.0302   0.0313        0 0.0203             NA
13 Acer      NA                7  0.555    0.0646        0 0.0406             NA
14 Picea     NA                6  0.0304   0.00253       0 0.00111            NA
15 Juniperus NA                5  0.0901   0.0325        0 0.0388             NA
16 Prunus    NA                4  0.136    0.230         0 0.327              NA
17 Betula    NA                3  0.0224   0.000713      0 0.000338           NA
18 Fraxinus  NA                3  0.210    0.0444        0 0.0122             NA
19 Ulmus     NA                3  0.0146   0.0069        0 0.00365            NA
20 Alnus     NA                2  0.00450  0.000791      0 0.000384           NA

=== SAME GENUS, DIFFERENT CONTINENTS ===
      genus EU_n EU_cor NA_n NA_cor cor_diff
1     Abies    5     NA    7     NA       NA
2      Acer    5     NA    7     NA       NA
3  Fraxinus    3     NA    3     NA       NA
4 Juniperus    2     NA    5     NA       NA
5     Picea    3     NA    6     NA       NA
6     Pinus    8     NA   24     NA       NA
7    Prunus    3     NA    4     NA       NA
8   Quercus   16     NA   28     NA       NA