knitr::opts_knit$set(root.dir = getwd())Phylogenies
library(RColorBrewer)
library(dplyr)
library(ggplot2)
library(RANN)
library(ape)
library(stringr)
library(castor)
library(corrplot)
library(ggplot2)
library(officer)
library(rvg)setwd("R:/clark/clark.unix/makeMast/maps")
library(RANN)
path <-"/Volumes/research/clark/clark.unix/"
dataPlotPath <- '../'
sumPlotPath <- '../inventories/continentPred/annual/'
traitPath <- '../../'
invPath <- '../inventories/'
path2Traits <- paste(traitPath, 'traitsByGroup/', sep='')
source( '../Rfunctions/mastifFunctions.r' )
source( 'R:/clark/clark.unix/makeMast/moreFunctionsOld.r' )
Rcpp::sourceCpp( '../RcppFunctions/cppFns.cpp' )source( 'R:/clark/clark.unix/makeMast/predictionMastFunctions.R' )setwd("R:/clark/clark.unix/phylogram")
source('phyloFunctions.R')setwd("R:/clark/clark.unix/phylogram")
# Load phylogenetic tree
trAll <- read.tree("Vascular_Plants_rooted.dated.tre")
trAll <- fixPhyloNames(trAll)
# Get species and traits
spp <- getFittedSpecies(
dirs = 'output2/east_europe_west',
keep = c('east', 'west', 'europe')
)
# Get trait data
tmp <- phyloTraits(
tree = trAll,
species = spp,
traitNames = c("gmPerSeed", "dispersal", "fruit",
"pollination","gmPerCm", "leafN", "leafNP", "leafP",
"maxHt", "maxDbh", "SLA", "woodSG")
)
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
specCode fixed in function nameFix:
[1] "caryaGlabra" "caryaOvalis-g"
traitTab <- tmp$traitTab
trq <- tmp$tree
tree <- tmp$treelibrary( RANN )
library( ape )
library( phytools )
library( plotrix )
library( stringr )
library( castor )
library( corrplot )
setwd("R:/clark/clark.unix/phylogram")
paramPath <- "../makeMast/fitLogs/output2/east_europe_west/"
path2Traits <- "../traitsByGroup/"
traitFile <- paste( path2Traits, 'plantTraits.csv', sep = '' )
source('../makeMast/predictionMastFunctions.r')
source('../makeMast/RFunctions/mastifFunctions.r')
source('../makeMast/moreFunctionsOld.r')
source('phyloFunctions.R')
vars <- getFittedVariables( continent = 'northAmerica',
ppath = '../makeMast/fitLogs/', dirs = 'output2')
print( vars ) [1] "aspect1.defSite.slope" "aspect1.slope" "aspect2.defSite.slope"
[4] "aspect2.slope" "defAnom" "defSite"
[7] "diam" "I.diam.2." "I.tempSite.2."
[10] "intercept" "isolate" "lfi10SqrtAnom"
[13] "lfi10SqrtSite" "lfi30SqrtAnom" "lfi30SqrtSite"
[16] "lfi50SqrtAnom" "lfi50SqrtSite" "ph"
[19] "shade" "slope" "tempSite"
[22] "tpi"
tmp <- phyloSensitivity( tree, species = tree$tip.label,
betaNames = c( "ph", "tpi", "slope", "shade", "defAnom" ),
paramPath = paramPath, path2Traits = path2Traits )
parEst <- tmp$parEst
traitSig <- tmp$parSig
tree <- tmp$tree
include <- c( 'west', 'east', 'europe' )
dfColumn <- 'region'
#trAng <- read.tree("Angiosperms_100trees.tre")
trAll <- read.tree("Vascular_Plants_rooted.dated.tre") # all vascular
trAll <- fixPhyloNames( trAll )
#trq <- fixPhyloNames( trAng )
tmp <- phyloTraits( trq, species = spp[ grep( 'Quercus', spp ) ],
traitNames = c( "gmPerSeed", "dispersal", "fruit", "leaves",
"pollination", "sex", "thorns" ) )
traitTab <- tmp$traitTab
trq <- tmp$tree
phyloSubset( tree = trq, df = traitTab, dfColumn = 'genus',
include = 'Quercus', colRamp = regionCols[1:3],
colColumn = 'region', edge.color = 'region',
taxon = 'taxon', cex = .6, legendCorner = 'topleft' )
axisPhylo( side = 1 )
mtext( 'Mya', side = 1, line = 2.5 )x <- read.csv( 'speciesByResponse.csv', row.names = 1 ) # file created in regionAnalysis.Rmd
rownames(x)[ rownames(x) == 'caryaOvalis-g' ] <- 'caryaOvalis'
spp <- code2species( rownames(x) )
tmp <- phyloTraits( tree = trAll, species = spp,
traitNames = c( "gmPerSeed", "dispersal", "fruit", "leaves",
"pollination", "sex", "thorns" ) )
traitTab <- tmp$traitTab
trRsp <- tmp$tree
species <- trRsp$tip.label
traitTab <- cbind( traitTab, x[ rownames(traitTab), ] )
rownames( traitTab ) <- species
# here is the phylogenetic distance matrix
pdist <- cophenetic( trRsp )
# here is the trait distance matrix (columns with 'Anom')
dcols <- colnames(traitTab)[ grep('Anom', colnames( traitTab )) ] # response to anomalies
tdist <- as.matrix( dist( traitTab[, dcols ] ) ) # trait distance
rownames( tdist ) <- colnames( tdist ) <- paste( traitTab$genus, traitTab$specEpith )
wp <- which( rownames( pdist ) %in% rownames( tdist ) )
pdist <- pdist[ wp, wp ]
wp <- which( rownames( tdist ) %in% rownames( pdist ) )
tdist <- tdist[ wp, wp ]
pdist <- pdist[ rownames(tdist), colnames( tdist ) ]
traitTab <- traitTab[ wp, ]
# here is a comparison of distances by family
diag( pdist ) <- diag( tdist ) <- NA
ftab <- table( traitTab$genus )
genus <- names( ftab )[ ftab > 3 ]
par( mfrow = c(3,4), mar = c(1, .5, .8, .5 ), omi = c(1,1,.3,.3), bty = 'n' )
for( k in 1:length( genus ) ){
wk <- which( traitTab$genus == genus[k] )
plot( jitter(pdist[wk,wk]), tdist[wk,wk], xlab = '', ylab = '', cex = .6, pch = 16,
xaxt = 'n', yaxt = 'n', col = .getColor( 'darkblue', .4 ) )
axis( 1, labels = F )
axis( 2, labels = F )
mk <- cbind( pdist[wk,wk][lower.tri( pdist[wk,wk] ) ],
tdist[wk,wk][lower.tri( tdist[wk,wk] ) ] )
mk <- Hmisc::rcorr( mk )
lab <- paste( genus[k], round( mk$r[2], 2 ), sep = ', ' )
if( mk$P[2] < .05 )lab <- paste( lab, '*', sep = '' )
.plotLabel( lab, 'topleft', cex = 1.2, above = T )
}mtext( 'Phylogenetic distance', 1, outer = T, line = 2 )
mtext( 'Anomaly response distance', 2, outer = T, line = 2 )
library(phylosignal)
library(phylobase)
vars <- c("shade")
tnames <- c( "gmPerSeed",
"ph","shade","capsule","fleshy","nut", "pod","samara","syncarpium",
"anemochory","hydrochory","zoochory", "animal","wind", "broad_evergreen",
"needle_deciduous","needle_evergreen","scalelike_evergreen",
"monoecious","non.monoecious" )
tdata <- as.matrix( traitTab[ , tnames ] )
pt <- phylo4d(trRsp, tdata)
# color code tips by family
fams <- sort( unique( traitTab$genus ) )
famCols <- colorRampPalette( colContrast )( length(fams) )
names( famCols ) <- fams
tip.col <- famCols[ traitTab$genus ]
names( tip.col ) <- trRsp$tip.label
# 使用 asp 参数强制1:1宽高比
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1, 1), asp = 1)
# 然后绘制
barplot.phylo4d(pt, tree.type = "fan",
tip.cex = 0.6,
trait = vars,
tree.ladderize = TRUE,
tip.col = tip.col,
bar.col = tip.col,
bar.lwd = 4,
trait.bg.col = c( rep( '#c7eae5', 3), '#dfc27d' ),
show.trait = FALSE,
add = TRUE) # 如果支持add参数
par(pty = "s") # 's' 表示 square(正方形)
par(mar = c(1, 1, 1, 1)) # 四边留一点边距
par(oma = c(0, 0, 0, 0))barplot.phylo4d(pt, tree.type = "fan",
tip.cex = 0.6,
trait = vars,
tree.ladderize = TRUE,
tip.col = tip.col,
bar.col = tip.col,
bar.lwd = 4,center = FALSE, scale = FALSE,
trait.bg.col = c( rep( '#c7eae5', 3), '#dfc27d' ),
show.trait = FALSE)
psig <- phyloSignal(p4d = pt, method = "all")
pstats <- signif( psig$stat, 3 )
pvalue <- signif( psig$pvalue, 3 )
# 为 vars 里的每个trait提取值
trait_of_interest <- vars[1] # 你的 vars = c("shade")
k_val <- psig$stat[trait_of_interest, "K"]
k_p <- psig$pvalue[trait_of_interest, "K"]
lambda_val <- psig$stat[trait_of_interest, "Lambda"]
lambda_p <- psig$pvalue[trait_of_interest, "Lambda"]
# 然后你的 mtext 代码就能用了
mtext(side = 1, line = 2,
text = paste("Phylogenetic Signal: K =", round(k_val, 3),
ifelse(k_p < 0.05, "(significant)", "(ns)"),
" | λ =", round(lambda_val, 3),
ifelse(lambda_p < 0.05, "(significant)", "(ns)")),
cex = 1, font = 2)pdist <- cophenetic( tree ) setwd("R:/clark/clark.unix/phylogram")
mu <- read.csv('C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/parEst.csv')
se <- read.csv('C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/parSe.csv')
trAll <- read.tree("Vascular_Plants_rooted.dated.tre")
paramPath <- "../makeMast/fitLogs/output2/east_europe_west/"
#path2Traits <- "../traitsByGroup/"
#traitFile <- paste( path2Traits, 'plantTraits.csv', sep = '' )
path2Traits= "C:/Users/mh471/Downloads/radiation/"
traitFile <- read.csv("C:/Users/mh471/Downloads/radiation/plant_traits_transformed.csv")
source('../makeMast/predictionMastFunctions.R')
source('../makeMast/RFunctions/mastifFunctions.R')
source('../makeMast/moreFunctionsOld.R')
source('phyloFunctions.R')library(tidyverse)
library(ggplot2)
library(ggrepel)
library(gridExtra)
traitFile <- read.csv("C:/Users/mh471/Downloads/radiation/plant_traits_transformed.csv")
mu_processed <- mu
if(!"genus" %in% names(mu_processed)) mu_processed$genus <- NA
if(!"region" %in% names(mu_processed)) mu_processed$region <- NA
for(i in 1:nrow(mu_processed)) {
species_code <- mu_processed$species[i]
match_idx <- which(traitFile$code8 == species_code)
if(length(match_idx) > 0) {
idx <- match_idx[1]
mu_processed$N_area[i] <- traitFile$`Leaf.nitrogen..N..content.per.leaf.area..g.m.2.`[idx]
mu_processed$Vcmax[i] <- traitFile$`Photosynthetic.carboxylation.capacity..Vcmax..Vmax..per.leaf.area.at.25.C..µmol.mâ..².sâ..¹._x`[idx]
mu_processed$Jmax[i] <- traitFile$`Jmax.per.leaf.area.at.ambient.or.leaf.temperature..µmol.mâ..².sâ..¹._x`[idx]
mu_processed$Amax[i] <- traitFile$`Leaf.photosynthesis.at.saturating.light.and.saturating.CO2.per.leaf.area..Amax...µmol.mâ..².sâ..¹._x`[idx]
mu_processed$SLA_val[i] <- traitFile$SLA[idx]
mu_processed$genus[i] <- traitFile$genus[idx]
mu_processed$region[i] <- traitFile$region[idx]
}
}
# Fix region classification
mu_processed <- mu_processed %>%
mutate(continent = case_when(
grepl("^west", region) | grepl("^east", region) ~ "NA",
grepl("^europe", region) ~ "EU",
TRUE ~ "OTHER"
))
se_processed <- se %>%
left_join(mu_processed, by = c("species" = "species"))
# Generic function to process any trait
process_genus_trait_continental <- function(mu_data, se_data, genus, trait_name) {
subset_data <- mu_data[mu_data$genus == genus, ]
if(nrow(subset_data) == 0) return(NULL)
# Get trait data for both regions
eu_data <- subset_data[subset_data$continent == "EU" & !is.na(subset_data[[trait_name]]), ]
na_data <- subset_data[subset_data$continent == "NA" & !is.na(subset_data[[trait_name]]), ]
if(nrow(eu_data) == 0 || nrow(na_data) == 0) return(NULL)
result <- data.frame(
genus = genus,
na_value = mean(na_data[[trait_name]], na.rm = TRUE),
eu_value = mean(eu_data[[trait_name]], na.rm = TRUE),
na_error = sd(na_data[[trait_name]], na.rm = TRUE) / sqrt(nrow(na_data)),
eu_error = sd(eu_data[[trait_name]], na.rm = TRUE) / sqrt(nrow(eu_data)),
n_eu = nrow(eu_data),
n_na = nrow(na_data)
)
# Handle NA errors
result$na_error[is.na(result$na_error)] <- 0.05 * abs(result$na_value[is.na(result$na_error)])
result$eu_error[is.na(result$eu_error)] <- 0.05 * abs(result$eu_value[is.na(result$eu_error)])
return(result)
}
# Create plot function
create_continental_plot <- function(results_df, trait_name, unit) {
if(is.null(results_df) || nrow(results_df) == 0) return(NULL)
results_df <- results_df %>%
mutate(diff = eu_value - na_value)
# Auto-scale axes
x_range <- range(results_df$na_value, na.rm = TRUE)
y_range <- range(results_df$eu_value, na.rm = TRUE)
plot_range <- range(c(x_range, y_range))
plot_range <- plot_range + c(-0.1, 0.1) * diff(plot_range)
plot <- ggplot(results_df, aes(x = na_value, y = eu_value, color = diff)) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
width = diff(plot_range) * 0.01, linewidth = 0.5) +
geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
height = diff(plot_range) * 0.01, linewidth = 0.5) +
geom_point(size = 4) +
geom_text_repel(aes(label = genus),
size = 3.5,
show.legend = FALSE,
box.padding = 0.3,
point.padding = 0.2,
min.segment.length = 0,
segment.color = "grey50",
segment.size = 0.5,
max.overlaps = 30) +
scale_color_gradient2(
low = "blue4",
mid = "grey50",
high = "darkred",
midpoint = 0,
name = "EU - NA"
) +
scale_x_continuous(limits = plot_range) +
scale_y_continuous(limits = plot_range) +
coord_fixed(ratio = 1) +
theme_minimal() +
theme(
aspect.ratio = 1,
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "grey90"),
axis.title = element_text(size = 11),
axis.text = element_text(size = 9),
legend.position = "right",
plot.title = element_text(size = 12, face = "bold", hjust = 0.5),
plot.margin = margin(10, 10, 10, 10)
) +
labs(
x = paste0("North America, ", trait_name, " (", unit, ")"),
y = paste0("Europe, ", trait_name, " (", unit, ")"),
title = paste0(trait_name, " comparison"))
return(plot)
}
# Process only the traits that have data
traits_to_analyze <- list(
N_area = list(name = "Leaf N", unit = "g/m²"),
Vcmax = list(name = "Vcmax", unit = "μmol/m²/s"),
Jmax = list(name = "Jmax", unit = "μmol/m²/s")
)
all_plots <- list()
for(trait in names(traits_to_analyze)) {
print(paste("\n\nAnalyzing", trait, ":"))
# Check data availability - fixed to handle missing columns
continental_data <- mu_processed %>%
filter(!is.na(.[[trait]]), !is.na(genus), continent %in% c("EU", "NA")) %>%
group_by(genus, continent) %>%
summarise(n = n(), mean_val = mean(.[[trait]], na.rm = TRUE), .groups = 'drop') %>%
pivot_wider(names_from = continent, values_from = c(n, mean_val), values_fill = list(n = 0, mean_val = NA))
# Check which columns exist and filter appropriately
if("n_EU" %in% names(continental_data) && "n_NA" %in% names(continental_data)) {
both_continents <- continental_data %>%
filter(n_EU > 0 & n_NA > 0)
print(paste("Genera with", trait, "data from both continents:"))
print(both_continents)
if(nrow(both_continents) > 0) {
genera_with_both <- unique(both_continents$genus)
results <- lapply(genera_with_both, function(g)
process_genus_trait_continental(mu_processed, se_processed, g, trait))
results_df <- do.call(rbind, results[!sapply(results, is.null)])
if(!is.null(results_df) && nrow(results_df) > 0) {
plot <- create_continental_plot(results_df,
traits_to_analyze[[trait]]$name,
traits_to_analyze[[trait]]$unit)
all_plots[[trait]] <- plot
print(plot)
# Print statistics
print(paste("\nResults for", trait, ":"))
print(results_df)
# Paired t-test if enough data
if(nrow(results_df) >= 3) {
t_test <- t.test(results_df$eu_value, results_df$na_value, paired = TRUE)
print(paste("Paired t-test: p =", round(t_test$p.value, 4)))
}
}
}
} else {
print(paste("No genera have", trait, "data from both continents"))
}
}[1] "\n\nAnalyzing N_area :"
[1] "Genera with N_area data from both continents:"
# A tibble: 12 x 5
genus n_EU n_NA mean_val_EU mean_val_NA
<chr> <int> <int> <dbl> <dbl>
1 Abies 1 1 2.19 2.19
2 Acer 4 2 2.19 2.19
3 Alnus 1 1 2.19 2.19
4 Betula 1 2 2.19 2.19
5 Fraxinus 3 2 2.19 2.19
6 Juglans 1 1 2.19 2.19
7 Larix 1 1 2.19 2.19
8 Picea 1 3 2.19 2.19
9 Pinus 5 12 2.19 2.19
10 Prunus 1 1 2.19 2.19
11 Quercus 8 7 2.19 2.19
12 Tilia 2 1 2.19 2.19
[1] "\nResults for N_area :"
genus na_value eu_value na_error eu_error n_eu n_na
1 Abies 2.6208651 4.002096 0.13104326 0.20010479 1 1
2 Acer 0.6938030 1.597970 0.23212296 0.35972783 4 2
3 Alnus 1.7964072 2.424583 0.08982036 0.12122916 1 1
4 Betula 2.1179585 0.760942 0.38204150 0.03804710 1 2
5 Fraxinus 1.4417187 2.655459 0.13722868 0.33921588 3 2
6 Juglans 0.9704918 1.501604 0.04852459 0.07508018 1 1
7 Larix 1.7160000 2.459099 0.08580000 0.12295496 1 1
8 Picea 3.0375359 6.225000 0.51332510 0.31125000 1 3
9 Pinus 3.1629821 2.732445 0.44521937 0.45392105 5 12
10 Prunus 1.0742000 2.108458 0.05371000 0.10542289 1 1
11 Quercus 1.9682340 2.688094 0.13562578 0.17006554 8 7
12 Tilia 1.6483516 1.351567 0.08241758 0.45485319 2 1
[1] "Paired t-test: p = 0.0553"
[1] "\n\nAnalyzing Vcmax :"
[1] "Genera with Vcmax data from both continents:"
# A tibble: 7 x 5
genus n_EU n_NA mean_val_EU mean_val_NA
<chr> <int> <int> <dbl> <dbl>
1 Acer 2 1 45.8 45.8
2 Betula 1 1 45.8 45.8
3 Fraxinus 1 2 45.8 45.8
4 Juglans 1 1 45.8 45.8
5 Picea 1 2 45.8 45.8
6 Pinus 2 5 45.8 45.8
7 Quercus 3 3 45.8 45.8
[1] "\nResults for Vcmax :"
genus na_value eu_value na_error eu_error n_eu n_na
1 Acer 9.595942 19.57134 0.4797971 0.6566609 2 1
2 Betula 100.555556 1.00000 5.0277778 0.0500000 1 1
3 Fraxinus 19.111661 21.99600 1.5271897 1.0998000 1 2
4 Juglans 14.763158 26.85714 0.7381579 1.3428571 1 1
5 Picea 29.077500 77.79541 0.7775000 3.8897705 1 2
6 Pinus 65.273786 40.60216 17.3780500 5.8921600 2 5
7 Quercus 58.948687 60.04116 23.1511123 8.7614009 3 3
[1] "Paired t-test: p = 0.6997"
[1] "\n\nAnalyzing Jmax :"
[1] "Genera with Jmax data from both continents:"
# A tibble: 7 x 5
genus n_EU n_NA mean_val_EU mean_val_NA
<chr> <int> <int> <dbl> <dbl>
1 Acer 1 1 108. 108.
2 Betula 1 1 108. 108.
3 Fraxinus 1 1 108. 108.
4 Juglans 1 1 108. 108.
5 Picea 1 3 108. 108.
6 Pinus 4 5 108. 108.
7 Quercus 3 2 108. 108.
[1] "\nResults for Jmax :"
genus na_value eu_value na_error eu_error n_eu n_na
1 Acer 102.00000 45.1800 5.100000 2.259000 1 1
2 Betula 165.00000 93.3043 8.250000 4.665215 1 1
3 Fraxinus 211.00000 31.6000 10.550000 1.580000 1 1
4 Juglans 102.64481 46.9154 5.132241 2.345770 1 1
5 Picea 93.67134 130.6008 13.591975 6.530040 1 3
6 Pinus 102.76130 102.0039 15.304755 61.176629 4 5
7 Quercus 113.66825 136.8199 12.431749 23.416720 3 2
[1] "Paired t-test: p = 0.1682"
# Create combined plot
if(length(all_plots) >= 2) {
print("\n\nCreating combined plot...")
# Arrange plots in a grid
if(length(all_plots) == 2) {
combined <- grid.arrange(grobs = all_plots, ncol = 2)
} else if(length(all_plots) == 3) {
combined <- grid.arrange(grobs = all_plots, ncol = 2, nrow = 2)
}
}[1] "\n\nCreating combined plot..."
# Final summary
print("\n\nFINAL SUMMARY:")[1] "\n\nFINAL SUMMARY:"
print(paste("Successfully created", length(all_plots), "comparison plots"))[1] "Successfully created 3 comparison plots"
for(trait in names(all_plots)) {
print(paste("-", traits_to_analyze[[trait]]$name))
}[1] "- Leaf N"
[1] "- Vcmax"
[1] "- Jmax"
phyloTraits <- function( tree, species = tree$tip.label, fill = 'genus',
genera = NULL,
traitNames = c( "gmPerSeed", "dispersal", "fruit" ),
path2Traits = "C:/Users/mh471/Downloads/radiation/",
traitFile = "plant_traits_transformed.csv",
taxonGroup = "plants" ){ # Added taxonGroup parameter with default
require( stringr )
require( castor )
tree$tip.label <- .replaceString( tree$tip.label , '_', ' ' )
if( !is.null( species ) )species <- .replaceString( species , '_', ' ' )
if( is.null( genera ) )GENERA <- F
genSpec <- columnSplit( species, ' ' )
specNames <- paste( tolower( substr( genSpec[,1], 1, 8 ) ),
upperFirstLetter( substr( genSpec[,2], 1, 8 ) ), sep = '' )
FAMILY <- T
if( fill == 'genus' )FAMILY <- F
tnew <- numeric(0)
# Fixed: changed tnames to traitNames
for( k in 1:length( traitNames )){
tk <- getTraits( specNames = specNames, genera = genera, traitName = traitNames[k], FAMILY = FAMILY,
path2Traits = path2Traits, traitFile = traitFile )
tnew <- data.frame( cbind(tnew, tk) )
}
names( tnew ) <- traitNames
tnew$code8 <- specNames
traitTab <- tnew
# rm( tnew )
if( is.null( genera ) ){
species <- species[ specNames %in% rownames( traitTab ) ]
specNames <- specNames[ specNames %in% rownames( traitTab ) ]
GENERA <- F
# genSpec <- columnSplit( species, ' ' )
# specNames <- paste( tolower( substr( genSpec[,1], 1, 8 ) ),
# upperFirstLetter( substr( genSpec[,2], 1, 8 ) ), sep = '' )
species <- .replaceString( species, '_', ' ' )
tree$tip.label <- .replaceString( tree$tip.label , '_', ' ' )
ws <- which( species %in% tree$tip.label )
# species <- species[ species %in% tree$tip.label ]
trSpec <- tree$tip.label
wf <- which( trSpec %in% species )
wn <- which( !trSpec %in% species )
tree <- drop.tip(tree, tip = tree$tip.label[wn] )
species <- tree$tip.label
gg <- columnSplit( species, ' ' )
specNames <- species2code( gg[,1], gg[,2] )
traitTab <- traitTab[ specNames, ]
}else{
GENERA <- T
specNames <- NULL
ts <- tree$tip.label
tgen <- columnSplit( ts, ' ' )[,1]
ww <- which( tgen %in% genera )
wd <- which( duplicated( tgen ) | !tgen %in% genera )
tree <- drop.tip( tree, wd )
genera <- columnSplit( tree$tip.label, ' ' )[,1]
tree$tip.label <- genera
tnew <- numeric(0)
# Fixed: changed tnames to traitNames
for( k in 1:length( traitNames )){
tk <- getTraits( specNames = NULL, genera = genera, traitName = traitNames[k], FAMILY = FAMILY,
path2Traits = path2Traits, traitFile = traitFile )
tnew <- data.frame( cbind(tnew, tk) )
}
names( tnew ) <- traitNames
traitTab <- tnew
}
regName <- 'region'
if( taxonGroup == 'mammals' ){
regName <- 'biogeographical_realm'
rownames(traitTab) <- traitTab$scientificName
}
# get first region
if( !GENERA ){
region <- getTraits( specNames = traitTab$code8, traitName = regName,
path2Traits = path2Traits, traitFile = traitFile )
# rownames(traitTab) <- paste( traitTab$genus, traitTab$specEpith )
}else{
region <- getTraits( genera = traitTab$genus, traitName = regName,
path2Traits = path2Traits, traitFile = traitFile )
if( taxonGroup == 'mammals' )region <- .replaceString( region, ', ', '_' )
rownames(traitTab) <- traitTab$genus
}
if( taxonGroup == 'plants'){
nd <- stringi::stri_count(region, fixed = '_' )
for( k in 1:5){
wk <- which(nd == k)
if( length(wk) == 0 )next
region[ wk ] <- columnSplit( region[ wk ] )[,1]
}
region[ is.na(region) ] <- 'UNKN'
region[ region == 'asia' ] <- 'EEA'
region[ region == 'europe' ] <- 'WEA'
region[ region == 'africa' ] <- 'AFR'
region[ region == 'east' ] <- 'ENA'
region[ region == 'west' ] <- 'WNA'
}
if( taxonGroup == 'mammals' ){
gg <- grep( 'Afro', region )
region[ gg ] <- 'AFR'
gg <- grep( 'Austral', region )
region[ gg ] <- 'OCEAN'
gg <- grep( 'Oceanian', region )
region[ gg ] <- 'OCEAN'
gg <- grep( 'Nearctic', region )
region[ gg ] <- 'NA'
gg <- grep( 'Neotropical', region )
region[ gg ] <- 'SA'
gg <- grep( 'Indomalayan', region )
region[ gg ] <- 'ASIA'
gg <- grep( 'Palearctic', region )
region[ gg ] <- 'ASIA'
}
traitTab$region <- region
traitTab$taxon <- tree$tip.label
list( tree = tree, traitTab = traitTab )
}original_names <- names(traitFile)
# 清理编码问题(那些 Â 之类的字符)
clean_names <- original_names
clean_names <- gsub("Â", "", clean_names) # 移除 Â
clean_names <- gsub("µ", "u", clean_names) # µ 改为 u
clean_names <- gsub("â..", "", clean_names) # 移除其他编码问题
clean_names <- gsub("\\.{2,}", ".", clean_names) # 多个点改为单个点
clean_names <- gsub("\\.$", "", clean_names) # 移除末尾的点
clean_names <- gsub("^\\.", "", clean_names) # 移除开头的点
# 对于那些特别长的列名,可以选择性地简化
# 但保留 gmPerSeed, dispersal, fruit 这类简短的原始名称不变
for(i in 1:length(clean_names)) {
# 只处理特别长的列名(比如超过50个字符的)
if(nchar(clean_names[i]) > 50) {
# 简化光合作用相关的列名
clean_names[i] <- gsub("per.leaf.area.at.ambient.or.leaf.temperature", "", clean_names[i])
clean_names[i] <- gsub("per.leaf.dry.mass..specific.leaf.area..SLA.or.1.LMA...undefined.if.petiole.is.in..or.excluded", "", clean_names[i])
clean_names[i] <- gsub("measured.by.light.absoption..e.g..SPAD.chlorophyll.meter", "SPAD", clean_names[i])
clean_names[i] <- gsub("at.saturating.light.and.ambient.CO2.per.leaf.area", "", clean_names[i])
clean_names[i] <- gsub("at.saturating.light.and.saturating.CO2.per.leaf.area", "", clean_names[i])
clean_names[i] <- gsub("per.area.at.saturating.light.at.PAR.1200.micromol.m.2.s.1", "PAR1200", clean_names[i])
clean_names[i] <- gsub("per.area.in.shade.at.PAR.50.micromol.m.2.s.1", "PAR50", clean_names[i])
clean_names[i] <- gsub("..umol.m..2.s..1.", "", clean_names[i])
clean_names[i] <- gsub("photosynthesis.transpiration", "", clean_names[i])
clean_names[i] <- gsub("..dimensionless.", "", clean_names[i])
clean_names[i] <- gsub("..micromol.millimol.", "", clean_names[i])
}
}
# 再次清理多余的点
clean_names <- gsub("\\.{2,}", ".", clean_names)
clean_names <- gsub("\\.$", "", clean_names)
# 应用新的列名
names(traitFile) <- clean_names
# 查看部分改变的列名对比
changes <- data.frame(
old = original_names[c(14, 22, 25, 51:55, 91:95)],
new = clean_names[c(14, 22, 25, 51:55, 91:95)]
)
print(changes) old
1 gmPerSeed
2 dispersal
3 fruit
4 Jmax.per.leaf.area.at.ambient.or.leaf.temperature..µmol.mâ..².sâ..¹._x
5 Leaf.area.per.leaf.dry.mass..specific.leaf.area..SLA.or.1.LMA...undefined.if.petiole.is.in..or.excluded..mm2.mg.1.
6 Leaf.carbon..C..content.per.leaf.dry.mass..mg.g.
7 Leaf.carbon.isotope.signature..delta.13C...per.mill.
8 Leaf.carbon.nitrogen..C.N..ratio..g.g.1._x
9 Jmax.per.leaf.area.at.ambient.or.leaf.temperature..µmol.mâ..².sâ..¹._y
10 Leaf.carbon.content.per.dry.mass..mg.g.
11 Leaf.carbon.nitrogen..C.N..ratio..g.g.1._y
12 Leaf.chlorophyll.a.b.content.per.area..g.m.2._y
13 Leaf.chlorophyll.per.area..measured.by.light.absoption..e.g..SPAD.chlorophyll.meter...SPAD.Chlorophyll.Index._y
new
1 gmPerSeed
2 dispersal
3 fruit
4 Jmax.umol.m².s¹._x
5 Leaf.area.per.leaf.dry.mass.specific.leaf.area.SLA.or.1.LMA.undefined.if.petiole.is.in.or.excluded.mm2.mg.1
6 Leaf.carbon.C.content.per.leaf.dry.mass.mg.g
7 Leaf.carbon.isotope.signature.delta.13C.per.mill
8 Leaf.carbon.nitrogen.C.N.ratio.g.g.1._x
9 Jmax.umol.m².s¹._y
10 Leaf.carbon.content.per.dry.mass.mg.g
11 Leaf.carbon.nitrogen.C.N.ratio.g.g.1._y
12 Leaf.chlorophyll.a.b.content.per.area.g.m.2._y
13 Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter.SPAD.Chlorophyll.Index._y
# 如果你想查看哪些列有 _x 和 _y 版本(可能是重复数据)
x_cols <- grep("_x$", names(traitFile), value = TRUE)
y_cols <- grep("_y$", names(traitFile), value = TRUE)
if(length(x_cols) > 0) {
cat("\n带 _x 后缀的列(可能来自第一个数据源):\n")
print(x_cols[1:min(5, length(x_cols))]) # 显示前5个
}
<U+5E26> _x <U+540E><U+7F00><U+7684><U+5217>(<U+53EF><U+80FD><U+6765><U+81EA><U+7B2C><U+4E00><U+4E2A><U+6570><U+636E><U+6E90>):
[1] "Jmax.umol.m².s¹._x"
[2] "Leaf.carbon.nitrogen.C.N.ratio.g.g.1._x"
[3] "Leaf.chlorophyll.a.b.content.per.area.g.m.2._x"
[4] "Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter.SPAD.Chlorophyll.Index._x"
[5] "Leaf.dry.matter.content.per.leaf.water.saturated.mass.LDMC.g.g._x"
if(length(y_cols) > 0) {
cat("\n带 _y 后缀的列(可能来自第二个数据源):\n")
print(y_cols[1:min(5, length(y_cols))]) # 显示前5个
}
<U+5E26> _y <U+540E><U+7F00><U+7684><U+5217>(<U+53EF><U+80FD><U+6765><U+81EA><U+7B2C><U+4E8C><U+4E2A><U+6570><U+636E><U+6E90>):
[1] "Jmax.umol.m².s¹._y"
[2] "Leaf.carbon.nitrogen.C.N.ratio.g.g.1._y"
[3] "Leaf.chlorophyll.a.b.content.per.area.g.m.2._y"
[4] "Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter.SPAD.Chlorophyll.Index._y"
[5] "Leaf.dry.matter.content.per.leaf.water.saturated.mass.LDMC.g.g._y"
# 查看最终的列名
colnames(traitFile) [1] "SpeciesName"
[2] "AccSpeciesID"
[3] "AccSpeciesName"
[4] "TraitID"
[5] "DataID"
[6] "species_clean"
[7] "code8"
[8] "genus"
[9] "specEpith"
[10] "subSpec"
[11] "class"
[12] "family"
[13] "section"
[14] "gmPerSeed"
[15] "gmPerSeed_Sd"
[16] "seedCoatRatio"
[17] "seedCoatRatio_Sd"
[18] "seedMoistureFraction"
[19] "seedMoistureFraction_Sd"
[20] "kCalPerGm"
[21] "degDaySeed"
[22] "dispersal"
[23] "drought"
[24] "flood"
[25] "fruit"
[26] "gmPerCm"
[27] "leafN"
[28] "leafNP"
[29] "leafP"
[30] "leaves"
[31] "maxDbh"
[32] "maxHt"
[33] "pollination"
[34] "region"
[35] "seedsPerFruit"
[36] "seedsPerFruit_Sd"
[37] "sex"
[38] "shade"
[39] "SLA"
[40] "woodSG"
[41] "xylem"
[42] "years2fruitMature"
[43] "gmPerCone"
[44] "gmPerCone_Sd"
[45] "coneRatio"
[46] "thorns"
[47] "spinoseLeaf"
[48] "pubescence"
[49] "bfec.shade.x"
[50] "bfec.shade.y"
[51] "Jmax.umol.m².s¹._x"
[52] "Leaf.area.per.leaf.dry.mass.specific.leaf.area.SLA.or.1.LMA.undefined.if.petiole.is.in.or.excluded.mm2.mg.1"
[53] "Leaf.carbon.C.content.per.leaf.dry.mass.mg.g"
[54] "Leaf.carbon.isotope.signature.delta.13C.per.mill"
[55] "Leaf.carbon.nitrogen.C.N.ratio.g.g.1._x"
[56] "Leaf.chlorophyll.a.b.content.per.area.g.m.2._x"
[57] "Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter.SPAD.Chlorophyll.Index._x"
[58] "Leaf.dry.matter.content.per.leaf.water.saturated.mass.LDMC.g.g._x"
[59] "Leaf.lamina.dry.matter.content.LDMC.petiole.excluded.g.g._x"
[60] "Leaf.nitrogen.N.content.per.leaf.area.g.m.2"
[61] "Leaf.nitrogen.N.content.per.leaf.dry.mass.mg.g"
[62] "Leaf.nitrogen.N.isotope.signature.delta.15N.per.mill"
[63] "Leaf.nitrogen.phosphorus.N.P.ratio.g.g._x"
[64] "Leaf.phosphorus.P.content.per.leaf.area.g.m.2"
[65] "Leaf.photosynthesis.Asat.umol.m².s¹._x"
[66] "Leaf.photosynthesis.Amax.umol.m².s¹._x"
[67] "Leaf.photosynthesis.PAR1200.Asat.umol.m².s¹._x"
[68] "Leaf.photosynthesis.PAR50.umol.m².s¹._x"
[69] "Leaf.thickness.mm._x"
[70] "Leaf.water.content.per.leaf.water.saturated.mass.LWC.1.LDMC.g.g._x"
[71] "Net.photosynthesis.at.field.capacity.umol.m².s¹._x"
[72] "Net.photosynthetic.rate.at.100.PPFD.umol.m².s¹._x"
[73] "Net.photosynthetic.rate.at.20.PPFD.umol.m².s¹._x"
[74] "Net.photosynthetic.rate.at.200.PPFD.umol.m².s¹._x"
[75] "Net.photosynthetic.rate.at.300.PPFD.umol.m².s¹._x"
[76] "Net.photosynthetic.rate.at.50.PPFD.umol.m².s¹._x"
[77] "Net.photosynthetic.rate.at.500.PPFD.umol.m².s¹._x"
[78] "Net.photosynthetic.rate.at.800.PPFD.umol.m².s¹._x"
[79] "Photosynthesis.per.leaf.area.at.leaf.temperature.Aarea.umol.m².s¹._x"
[80] "Photosynthesis.rate.per.leaf.dry.mass.micro.mol.g.1.s.1"
[81] "Photosynthesis.rate.per.leaf.transpiration.photosynthetic.water.use.effinciency.WUE.micromol.millimol"
[82] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.25.C.umol.m².s¹._x"
[83] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.leaf.temperature.umol.m².s¹._x"
[84] "Photosynthetic.electron.transport.capacity.per.leaf.area.at.25C.Jmax25area.umol.m².s¹._x"
[85] "Plant.height.m"
[86] "Plant.height.relative.growth.rate.cm.cm.1.d.1"
[87] "Plant.relative.growth.rate.g.g.1.d.1"
[88] "Species.environmental.indicator.value.according.to.Ellenberg.pH.dimensionless"
[89] "Ellenberg.indicator.value.pH.reaction.dimensionless"
[90] "Instantaneous.water.use.effinciency.WUE.micromol.millimol"
[91] "Jmax.umol.m².s¹._y"
[92] "Leaf.carbon.content.per.dry.mass.mg.g"
[93] "Leaf.carbon.nitrogen.C.N.ratio.g.g.1._y"
[94] "Leaf.chlorophyll.a.b.content.per.area.g.m.2._y"
[95] "Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter.SPAD.Chlorophyll.Index._y"
[96] "Leaf.delta.13C.Leaf.carbon.isotope.ratio.per.mill"
[97] "Leaf.delta.15nitrogen.per.mill"
[98] "Leaf.dry.matter.content.per.leaf.water.saturated.mass.LDMC.g.g._y"
[99] "Leaf.lamina.dry.matter.content.LDMC.petiole.excluded.g.g._y"
[100] "Leaf.nitrogen.content.per.area.Narea.g.m.2"
[101] "Leaf.nitrogen.content.per.dry.mass.Nmass.mg.g"
[102] "Leaf.nitrogen.phosphorus.N.P.ratio.g.g._y"
[103] "Leaf.phosphorus.content.per.area.Parea.g.m.2"
[104] "Leaf.photosynthesis.Asat.umol.m².s¹._y"
[105] "Leaf.photosynthesis.Amax.umol.m².s¹._y"
[106] "Leaf.photosynthesis.PAR1200.Asat.umol.m².s¹._y"
[107] "Leaf.photosynthesis.PAR50.umol.m².s¹._y"
[108] "Leaf.thickness.mm._y"
[109] "Leaf.water.content.per.leaf.water.saturated.mass.LWC.1.LDMC.g.g._y"
[110] "Net.photosynthesis.at.field.capacity.umol.m².s¹._y"
[111] "Net.photosynthetic.rate.at.100.PPFD.umol.m².s¹._y"
[112] "Net.photosynthetic.rate.at.20.PPFD.umol.m².s¹._y"
[113] "Net.photosynthetic.rate.at.200.PPFD.umol.m².s¹._y"
[114] "Net.photosynthetic.rate.at.300.PPFD.umol.m².s¹._y"
[115] "Net.photosynthetic.rate.at.50.PPFD.umol.m².s¹._y"
[116] "Net.photosynthetic.rate.at.500.PPFD.umol.m².s¹._y"
[117] "Net.photosynthetic.rate.at.800.PPFD.umol.m².s¹._y"
[118] "Photosynthesis.per.leaf.area.at.leaf.temperature.Aarea.umol.m².s¹._y"
[119] "Photosynthesis.per.leaf.dry.mass.Amass.micro.mol.g.1.s.1"
[120] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.25.C.umol.m².s¹._y"
[121] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.leaf.temperature.umol.m².s¹._y"
[122] "Photosynthetic.electron.transport.capacity.per.leaf.area.at.25C.Jmax25area.umol.m².s¹._y"
[123] "Plant.height.at.age.of.20.years.m"
[124] "Plant.height.relative.growth.rate.RGR_height.cm.cm.1.d.1"
[125] "Plant.relative.growth.rate.RGR.g.g.1.d.1"
[126] "SLA.undefined.if.petiole.in.or.excluded.mm2.mg.1"
[127] "Ellenberg.indicator.value.pH.reaction._std"
[128] "Instantaneous.water.use.effinciency.WUE._std"
[129] "Jmax._std"
[130] "Leaf.carbon.content.per.dry.mass_std"
[131] "Leaf.carbon.nitrogen.C.N.ratio_std"
[132] "Leaf.chlorophyll.a.b.content.per.area_std"
[133] "Leaf.chlorophyll.per.area.measured.by.light.absoption.e.g.SPAD.chlorophyll.meter._std"
[134] "Leaf.delta.13C.Leaf.carbon.isotope.ratio._std"
[135] "Leaf.delta.15nitrogen_std"
[136] "Leaf.dry.matter.content.per.leaf.water.saturated.mass.LDMC._std"
[137] "Leaf.lamina.dry.matter.content.LDMC.petiole.excluded._std"
[138] "Leaf.nitrogen.content.per.area.Narea._std"
[139] "Leaf.nitrogen.content.per.dry.mass.Nmass._std"
[140] "Leaf.nitrogen.phosphorus.N.P.ratio_std"
[141] "Leaf.phosphorus.content.per.area.Parea._std"
[142] "Leaf.photosynthesis.Asat._std"
[143] "Leaf.photosynthesis.Amax._std"
[144] "Leaf.photosynthesis.PAR1200.Asat._std"
[145] "Leaf.photosynthesis.PAR50_std"
[146] "Leaf.thickness_std"
[147] "Leaf.water.content.per.leaf.water.saturated.mass.LWC.1.LDMC._std"
[148] "Net.photosynthetic.rate.at.100.PPFD_std"
[149] "Net.photosynthetic.rate.at.20.PPFD_std"
[150] "Net.photosynthetic.rate.at.200.PPFD_std"
[151] "Net.photosynthetic.rate.at.300.PPFD_std"
[152] "Net.photosynthetic.rate.at.50.PPFD_std"
[153] "Net.photosynthetic.rate.at.500.PPFD_std"
[154] "Net.photosynthetic.rate.at.800.PPFD_std"
[155] "Photosynthesis.per.leaf.area.at.leaf.temperature.Aarea._std"
[156] "Photosynthesis.per.leaf.dry.mass.Amass._std"
[157] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.25.C_std"
[158] "Photosynthetic.carboxylation.capacity.Vcmax.Vmax.per.leaf.area.at.leaf.temperature_std"
[159] "Photosynthetic.electron.transport.capacity.per.leaf.area.at.25C.Jmax25area._std"
[160] "Plant.height.relative.growth.rate.RGR_height._std"
[161] "Plant.relative.growth.rate.RGR._std"
[162] "SLA.undefined.if.petiole.in.or.excluded_std"
trAll <- fixPhyloNames( trAll )
tmp <- phyloTraits(tree = trAll, species = spp)
traitTab <- tmp$traitTab
mu_processed <- mu %>%
left_join(traitTab, by = c("species" = "code8"))
se_processed<-se%>%left_join(mu_processed, by = c("species" = "species"))write.csv(se_processed,"C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/mu_trait_0801.csv")process_genus_data <- function(mu_data, se_data, genus, min_se = 1e-6) {
subset_data <- mu_data[mu_data$genus == genus, ]
if(nrow(subset_data) == 0) return(NULL)
is_europe <- subset_data$region == "WEA"
is_na <- subset_data$region %in% c("ENA", "WNA")
species_data <- data.frame(
species = subset_data$species,
mu_shade = subset_data$bfec.shade,
se_shade = se_data$bfec.shade[match(subset_data$species, se_data$species)],
region = ifelse(is_europe, "EU",
ifelse(is_na, "NA", "OTHER"))
)
# Remove NA values, filter for EU and NA regions, and apply SE threshold
species_data <- species_data %>%
filter(!is.na(mu_shade),
!is.na(se_shade),
region %in% c("EU", "NA"),
se_shade >= min_se) # Filter out too small SE
if(nrow(species_data) == 0) return(NULL)
# Calculate weights
species_data$weight <- 1 / (species_data$se_shade^2)
regional_stats <- species_data %>%
group_by(region) %>%
summarise(
mean_shade = sum(mu_shade * weight) / sum(weight),
se_shade = sqrt(abs(sum(mu_shade^2 * weight) / sum(weight) - mean_shade^2)),
n = n(),
.groups = 'drop'
)
if(nrow(regional_stats) < 2) return(NULL)
eu_species <- sort(unique(subset_data$species[is_europe]))
na_species <- sort(unique(subset_data$species[is_na]))
n_eu <- length(eu_species)
n_na <- length(na_species)
result <- data.frame(
genus = genus,
na_value = regional_stats$mean_shade[regional_stats$region == "NA"],
eu_value = regional_stats$mean_shade[regional_stats$region == "EU"],
na_error = regional_stats$se_shade[regional_stats$region == "NA"],
eu_error = regional_stats$se_shade[regional_stats$region == "EU"],
n_eu = n_eu,
n_na = n_na
)
return(result)
}
genera_list <- c("Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula", "Castanea",
"Fraxinus", "Alnus", "Larix", "Carpinus", "Acer", "Prunus", "Tilia",
"Juniperus", "Ulmus", "Ostrya", "Arbutus")
results <- lapply(genera_list, function(g) process_genus_data(mu_processed, se, g, min_se=1e-6))
results_df <- do.call(rbind, results[!sapply(results, is.null)])
mu_processed$genus <- sapply(strsplit(mu_processed$taxon, " "), `[`, 1)
se$genus <- mu_processed$genus[match(se$species, mu_processed$species)]
results <- lapply(genera_list, function(g) process_genus_data(mu_processed, se, g, min_se=1e-6))
results_df <- do.call(rbind, results[!sapply(results, is.null)])
genera_list <- c(
"Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula", "Castanea",
"Fraxinus", "Alnus", "Larix", "Carpinus", "Acer",
"Prunus", "Tilia", "Juniperus", "Ulmus", "Ostrya", "Arbutus"
)results <- lapply(genera_list, function(g) process_genus_data(mu_processed, se, g, min_se=1e-6))
results_df <- do.call(rbind, results[!sapply(results, is.null)])results_df <- results_df %>%
mutate(diff = eu_value - na_value)
plot <- ggplot(results_df, aes(x = na_value, y = eu_value, color = diff)) +
# 1:1 虚线参考线
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
width = 0.02) +
geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
height = 0.02) +
geom_point(size = 3) +
geom_text_repel(aes(label = genus),
size = 4,
show.legend = FALSE,
box.padding = 0.3,
point.padding = 0.3,
min.segment.length = 0,
segment.color="grey80") +
scale_color_gradient2(
low = "blue4",
mid = "grey70",
high = "red4",
midpoint = 0
) +
scale_x_continuous(
limits = c(-3, 0.5),
breaks = seq(-3, 0.5, 0.5),
expand = expansion(mult = 0.05)
) +
scale_y_continuous(
limits = c(-3, 0.5),
breaks = seq(-3, 0.5, 0.5),
expand = expansion(mult = 0.05)
) +
# 1:1 纵横比
coord_fixed(ratio = 1) +
theme_bw() +
theme(
aspect.ratio = 1,
panel.grid.minor = element_blank(),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.position="right"
) +
labs(
x = "North America, shade sensitivity",
y = "Europe, shade sensitivity",
title = "Shade sensitivity comparison of genus")
plotresults_df <- results_df %>%
mutate(diff = eu_value - na_value)
plot <- ggplot(results_df, aes(x = na_value, y = eu_value, color = diff)) +
# 1:1 reference line
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
# Error bars
geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
width = 0.02, linewidth = 0.5) +
geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
height = 0.02, linewidth = 0.5) +
# Points with only color aesthetic (no fill)
geom_point(size = 4) +
# Improved text labels
geom_text_repel(aes(label = genus),
size = 4.5,
fontface = "bold",
show.legend = FALSE,
box.padding = 0.5,
point.padding = 0.3,
min.segment.length = 0,
segment.color = "grey50",
segment.size = 0.5,
max.overlaps = 30) +
# Deeper color gradient (only one scale)
scale_color_gradient2(
low = "blue4",
mid = "grey50",
high = "darkred",
midpoint = 0,
name = "EU - NA difference"
) +
scale_x_continuous(
limits = c(-3, 0.5),
breaks = seq(-3, 0.5, 0.5),
expand = expansion(mult = 0.05)
) +
scale_y_continuous(
limits = c(-3, 0.5),
breaks = seq(-3, 0.5, 0.5),
expand = expansion(mult = 0.05)
) +
# 1:1 aspect ratio
coord_fixed(ratio = 1) +
# Cleaner theme with no border
theme_minimal() +
theme(
aspect.ratio = 1,
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "grey90"),
axis.title = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
plot.margin = margin(20, 20, 20, 20)
) +
labs(
x = "North America, shade sensitivity",
y = "Europe, shade sensitivity",
title = "Shade sensitivity comparison by genus")
plotdiet diversity
mu_processed <- mu %>%
left_join(traitTab, by = c("species" = "code8"))
fruit_df <- mu_processed %>%
select(species, region, fruit, bfec.shade) %>%
left_join(
se %>% select(species, bfec.shade),
by = "species",
suffix = c(".mu", ".se")
) %>%
mutate(region = case_when(
region == "WEA" ~ "EU", # West Europe
region %in% c("ENA", "WNA") ~ "NA", # East/West North America
TRUE ~ "OTHER"
)) %>%
filter(region %in% c("EU", "NA")) %>%
filter(!fruit %in% c("achenosum"))%>%
filter(
!is.na(bfec.shade.mu),
!is.na(bfec.shade.se),
bfec.shade.se >= 1e-6
) %>%
mutate(weight = 1 / (bfec.shade.se^2))
# 对 (fruit, region) 两列进行分组汇总
fruit_results_df <- fruit_df %>%
group_by(fruit, region) %>%
summarise(
mean_shade = sum(bfec.shade.mu * weight) / sum(weight),
se_shade = sqrt(abs(
sum((bfec.shade.mu^2) * weight) / sum(weight) - mean_shade^2
)),
n = n(),
.groups = "drop"
) %>%
pivot_wider(
names_from = region,
values_from = c(mean_shade, se_shade, n)
) %>%
rename(
eu_value = mean_shade_EU,
na_value = mean_shade_NA,
eu_error = se_shade_EU,
na_error = se_shade_NA,
n_eu = n_EU,
n_na = n_NA
) %>%
filter(!is.na(eu_value), !is.na(na_value)) %>%
mutate(diff = eu_value - na_value)
fruit_results_df# A tibble: 10 x 8
fruit eu_value na_value eu_error na_error n_eu n_na diff
<chr> <dbl> <dbl> <dbl> <dbl> <int> <int> <dbl>
1 "" -1.87 -0.256 0.470 0.324 2 8 -1.61
2 "berry" -0.630 -0.225 0.00482 0.0920 3 3 -0.405
3 "berry-cone" -1.37 -0.314 0.270 0.180 2 4 -1.05
4 "capsule" -0.00409 -2.73 0.00698 0 2 1 2.73
5 "drupe" -0.454 -0.113 0.369 0.0807 8 10 -0.341
6 "nut" -0.182 -0.0160 0.657 0.0816 23 42 -0.166
7 "nutlet" -2.59 -2.49 0.714 0 2 1 -0.0991
8 "pod" -0.731 -0.0182 0 0.0192 1 2 -0.713
9 "pome" -1.52 -0.431 0.137 0 2 1 -1.09
10 "samara" -0.0132 -0.0000393 0.0882 0.00598 28 56 -0.0132
plot_fruit <- ggplot(fruit_results_df, aes(x = na_value, y = eu_value, color = diff)) +
# 1:1 虚线参考线
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
width = 0.02) +
geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
height = 0.02) +
geom_point(size = 3) +
geom_text_repel(aes(label = fruit),
size = 4,
show.legend = FALSE,
box.padding = 0.3,
point.padding = 0.3,
min.segment.length = 0,
segment.color = "grey80") +
scale_color_gradient2(
low = "blue4",
mid = "grey70",
high = "red4",
midpoint = 0
) +
scale_x_continuous(
limits = c(-3, 0.5),
breaks = seq(-3, 0.5, 0.5),
expand = expansion(mult = 0.05)
) +
scale_y_continuous(
limits = c(-3, 0.5),
breaks = seq(-3, 0.5, 0.5),
expand = expansion(mult = 0.05)
) +
coord_fixed(ratio = 1) +
theme_bw() +
theme(
aspect.ratio = 1,
panel.grid.minor = element_blank(),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.position = "right"
) +
labs(
x = "North America",
y = "Europe",
title = "Shade sensitivity comparison of diet diverisity"
)
plot_fruitresults_df <- results_df %>%
mutate(diff = eu_value - na_value,
comparison = ifelse(eu_value < na_value, "Europe < North America", "Europe > North America"),
label_text = paste0(genus, "\n(n=", n_eu + n_na, ")"))
plot <- ggplot(results_df, aes(x = na_value, y = eu_value, color = comparison)) +
# 添加背景色分区 - 使用多边形
annotate("polygon",
x = c(0.3, -3.2, -3.2, 0.3),
y = c(0.3, -3.2, 0.3, 0.3),
fill = "#E8F4F8", alpha = 0.5) + # 上方区域(Europe > North America)
annotate("polygon",
x = c(0.3, -3.2, -3.2, 0.3),
y = c(0.3, -3.2, -3.2, -3.2),
fill = "#F0F8E8", alpha = 0.5) + # 下方区域(Europe < North America)
# 1:1 虚线参考线
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30", size = 0.8) +
geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
width = 0.02,
size = 0.7,
alpha = 0.6) +
geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
height = 0.02,
size = 0.7,
alpha = 0.6) +
# 数据点 - 根据分组着色
geom_point(size = 4,
alpha = 0.9) +
# 标签 - 放大字体,允许伸出边界
geom_text_repel(aes(label = label_text ), # 如果要用species,改为label = species
size = 6, # 增大字体
show.legend = FALSE,
box.padding = 0.3,
point.padding = 0.3,
min.segment.length = 0.05,
segment.color = "grey60",
segment.size = 0.4,
color = "grey30",
fontface = "italic", # 斜体(物种名常用)
max.overlaps = Inf, # 允许所有标签显示
force = 3,
force_pull = 0.8,
max.time = 3,
max.iter = 20000,
direction = "both",
xlim = c(-Inf, Inf), # 允许标签超出x轴范围
ylim = c(-Inf, Inf), # 允许标签超出y轴范围
seed = 42) +
# 手动设置颜色
scale_color_manual(values = c("Europe < North America" = "#2E7D32", # 深绿色
"Europe > North America" = "#1565C0"), # 深蓝色
name = "Comparison") +
# 反转X轴:只显示首尾刻度
scale_x_reverse(
limits = c(0.3, -3.2),
breaks = c(0, -3), # 只显示首尾
labels = c("0", "-3"),
expand = expansion(mult = 0.02)
) +
# 反转Y轴:只显示首尾刻度
scale_y_reverse(
limits = c(0.3, -3.2),
breaks = c(0, -3), # 只显示首尾
labels = c("0", "-3"),
expand = expansion(mult = 0.02)
) +
# 1:1 纵横比
coord_fixed(ratio = 1, clip = "off") + # clip = "off" 允许标签超出图表区域
theme_classic() +
theme(
aspect.ratio = 1,
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_rect(fill = "white", color = NA),
axis.title = element_text(size = 16, face = "bold"), # 把 13 改为 16 或更大
axis.text = element_text(size = 16),
axis.line = element_line(size = 0.8),
plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
plot.margin = margin(20, 30, 20, 20),
legend.position = "none"
)+
labs(
x = "North America",
y = "Europe")
plotplot_fruit <- ggplot(fruit_results_df, aes(x = na_value, y = eu_value, color = diff)) +
# 1:1 reference line
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
# Error bars with thicker lines
geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
width = 0.02, linewidth = 0.5) +
geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
height = 0.02, linewidth = 0.5) +
# Larger points with better visibility
geom_point(size = 4) +
# Improved text labels
geom_text_repel(aes(label = fruit),
size = 4.5,
fontface = "bold",
show.legend = FALSE,
box.padding = 0.5,
point.padding = 0.3,
min.segment.length = 0,
segment.color = "grey50",
segment.size = 0.5,
max.overlaps = 30) +
# Deeper color gradient
scale_color_gradient2(
low = "blue4",
mid = "grey50",
high = "darkred",
midpoint = 0,
name = "EU - NA difference"
) +
scale_x_continuous(
limits = c(-3, 0.5),
breaks = seq(-3, 0.5, 0.5),
expand = expansion(mult = 0.05)
) +
scale_y_continuous(
limits = c(-3, 0.5),
breaks = seq(-3, 0.5, 0.5),
expand = expansion(mult = 0.05)
) +
# 1:1 aspect ratio
coord_fixed(ratio = 1) +
# Cleaner theme with no border
theme_minimal() +
theme(
aspect.ratio = 1,
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "grey90"),
axis.title = element_text(size = 14, face = "bold"),
axis.text = element_text(size = 12),
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
plot.margin = margin(20, 20, 20, 20)
) +
labs(
x = "North America, shade sensitivity",
y = "Europe, shade sensitivity",
title = "Shade sensitivity comparison by fruit type")
plot_fruitdispersal
mu_processed <- mu %>%
left_join(traitTab, by = c("species" = "code8"))
dispersal_df <- mu_processed %>%
select(species, region, dispersal, bfec.shade) %>%
left_join(
se %>% select(species, bfec.shade),
by = "species",
suffix = c(".mu", ".se") # 区分合并后重名列
) %>%
# region 转化为 EU/NA, 其他的就标记为 OTHER
mutate(region = case_when(
region == "WEA" ~ "EU", # West Europe
region %in% c("ENA", "WNA") ~ "NA", # East/West North America
TRUE ~ "OTHER"
)) %>%
# 只保留 EU 和 NA
filter(region %in% c("EU", "NA")) %>%
filter(!dispersal %in% c("hydrochory"))%>%
# 去除 NA 并做最小 SE 的过滤
filter(
!is.na(bfec.shade.mu),
!is.na(bfec.shade.se),
bfec.shade.se >= 1e-6
) %>%
# 计算权重 (1/SE^2)
mutate(weight = 1 / (bfec.shade.se^2))
# 按 (dispersal, region) 两列分组,求加权平均值和标准误
dispersal_results_df <- dispersal_df %>%
group_by(dispersal, region) %>%
summarise(
mean_shade = sum(bfec.shade.mu * weight) / sum(weight),
se_shade = sqrt(abs(
sum((bfec.shade.mu^2) * weight) / sum(weight) - mean_shade^2
)),
n = n(),
.groups = "drop"
) %>%
# 宽表展开,以便得到 EU 和 NA 两列
pivot_wider(
names_from = region,
values_from = c(mean_shade, se_shade, n)
) %>%
# 重命名,方便可视化
rename(
eu_value = mean_shade_EU,
na_value = mean_shade_NA,
eu_error = se_shade_EU,
na_error = se_shade_NA,
n_eu = n_EU,
n_na = n_NA
) %>%
# 只保留同时有 EU 与 NA 信息的 dispersal
filter(!is.na(eu_value), !is.na(na_value)) %>%
# (可选)计算差值
mutate(diff = eu_value - na_value)
# 查看结果
dispersal_results_df# A tibble: 4 x 8
dispersal eu_value na_value eu_error na_error n_eu n_na diff
<chr> <dbl> <dbl> <dbl> <dbl> <int> <int> <dbl>
1 "" -0.00486 -0.00635 0.0299 0.316 10 11 0.00149
2 "anemochory" -0.0126 -0.0000375 0.0873 0.00624 24 47 -0.0126
3 "barochory" -0.598 -0.295 0.307 0.258 11 4 -0.303
4 "zoochory" -0.207 -0.00296 0.674 0.0292 28 71 -0.204
library(ggrepel)
plot_dispersal <- ggplot(dispersal_results_df, aes(x = na_value, y = eu_value, color = diff)) +
# 1:1 虚线参考线
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "gray30") +
geom_errorbar(aes(ymin = eu_value - eu_error, ymax = eu_value + eu_error),
width = 0.02) +
geom_errorbarh(aes(xmin = na_value - na_error, xmax = na_value + na_error),
height = 0.02) +
geom_point(size = 3) +
# 用 dispersal 作为文本标签
geom_text_repel(aes(label = dispersal),
size = 4,
show.legend = FALSE,
box.padding = 0.3,
point.padding = 0.3,
min.segment.length = 0,
segment.color = "grey80") +
scale_color_gradient2(
low = "blue4",
mid = "grey70",
high = "red4",
midpoint = 0
) +
# 这里演示与原先类似的坐标范围,你可酌情调整
scale_x_continuous(
limits = c(-3, 0.5),
breaks = seq(-3, 0.5, 0.5),
expand = expansion(mult = 0.05)
) +
scale_y_continuous(
limits = c(-3, 0.5),
breaks = seq(-3, 0.5, 0.5),
expand = expansion(mult = 0.05)
) +
# 1:1
coord_fixed(ratio = 1) +
theme_bw() +
theme(
aspect.ratio = 1,
panel.grid.minor = element_blank(),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.position = "right"
) +
labs(
x = "North America",
y = "Europe",
title = "Shade sensitivity comparison of dispersal type"
)
plot_dispersalsetwd("R:/clark/clark.unix/makeMast/maps")
get_genus_isp <- function(continent = c("europe", "northAmerica"),
genus) {
# continent: "europe" or "northAmerica"
# genus: character, e.g. "Abies"
tdata <- getFittedIndividual(continent, genus = genus)
if(nrow(tdata) == 0) {
return( list(mean = NA, sd = NA) )
}
# 2) 计算 ISP
ISP <- tdata$fecGmMu / ( (tdata$diam / 2)^2 * pi )
# 3) 加权累加
i1 <- ISP * tdata$wtPerHa
i2 <- (ISP^2) * tdata$wtPerHa
# 4) 加权平均
sum_wt <- sum(tdata$wtPerHa, na.rm = TRUE)
if(sum_wt == 0) {
# 避免除以 0
return( list(mean = NA, sd = NA) )
}
MU <- sum(i1, na.rm = TRUE) / sum_wt
SD <- sqrt( (sum(i2, na.rm = TRUE) / sum_wt) - MU^2 )
# 以列表形式返回
return( list(mean = MU, sd = SD) )
}setwd("R:/clark/clark.unix/makeMast/maps")
invPath <- '../inventories'
genera_list <- c(
"Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula","Castanea",
"Fraxinus", "Alnus", "Larix", "Carpinus", "Acer",
"Prunus", "Tilia", "Juniperus", "Ulmus", "Ostrya", "Arbutus"
)
genera_list <- tolower(genera_list)
genera_list [1] "pinus" "picea" "quercus" "fagus" "abies" "betula"
[7] "castanea" "fraxinus" "alnus" "larix" "carpinus" "acer"
[13] "prunus" "tilia" "juniperus" "ulmus" "ostrya" "arbutus"
# 遍历每个 genus,分别取 "europe", "northAmerica" 的 ISP
results_isp <- lapply(genera_list, function(g) {
eu_res <- get_genus_isp("europe", g)
na_res <- get_genus_isp("northAmerica", g)
data.frame(
genus = g,
eu_value = eu_res$mean,
eu_sd = eu_res$sd,
na_value = na_res$mean,
na_sd = na_res$sd
)
})
# 把列表组合成 data.frame
results_isp_df <- do.call(rbind, results_isp)
results_isp_df genus eu_value eu_sd na_value na_sd
1 pinus 0.04425046 0.13947398 0.045085542 0.311828218
2 picea 0.01000835 0.02563520 0.030352770 0.094867611
3 quercus 1.30462745 3.02823910 1.723802997 10.762136778
4 fagus 0.04362936 0.11001884 0.101029352 0.263519792
5 abies 0.02639642 0.06519988 0.030244262 0.079348122
6 betula 0.02992234 0.03382724 0.022416140 0.079033977
7 castanea 8.94515864 5.21243526 34.683751169 97.696610288
8 fraxinus 0.32642941 0.39375472 0.209692801 0.698789285
9 alnus 0.01800777 0.08891566 0.004502417 0.002379182
10 larix 0.83748591 1.02968040 0.339876710 1.167653518
11 carpinus 0.30568051 0.28451363 1.101144459 0.774381414
12 acer 0.44759677 0.59375091 0.555037358 1.103731557
13 prunus 0.26633540 0.42573410 0.135853083 0.598744166
14 tilia 0.22495711 0.27789523 0.039440871 0.065918767
15 juniperus 1.06111389 2.48942540 0.090142002 0.220846257
16 ulmus 0.07950197 0.15224935 0.014576553 0.034995716
17 ostrya 0.41018058 0.17899184 0.169710244 0.168627560
18 arbutus 2.46200024 2.35968589 2.681926391 5.712747273
results_isp_df <- results_isp_df %>%
mutate(
diff = eu_value - na_value,
comparison = ifelse(eu_value < na_value, "Europe < North America", "Europe > North America")
)
df_filtered <- results_isp_df %>%
filter(!genus %in% c("quercus", "castanea"))
plot_isp <- ggplot(df_filtered, aes(x = na_value, y = eu_value, color = comparison)) +
# 1:1 参考线
geom_abline(slope = 1, intercept = 0,
linetype = "dashed",
color = "gray30",
linewidth = 0.8) +
# Error bars - 根据分组着色
geom_errorbar(aes(ymin = eu_value - eu_sd, ymax = eu_value + eu_sd),
width = 0.05,
linewidth = 0.7,
alpha = 0.6) +
geom_errorbarh(aes(xmin = na_value - na_sd, xmax = na_value + na_sd),
height = 0.05,
linewidth = 0.7,
alpha = 0.6) +
# 数据点
geom_point(size = 3.5, alpha = 0.8) +
# 标签
geom_text_repel(
aes(label = genus),
size = 3.5,
show.legend = FALSE,
box.padding = 0.5,
point.padding = 0.4,
min.segment.length = 0.1,
segment.color = "grey70",
segment.size = 0.4,
color = "black",
max.overlaps = 20,
force = 2,
seed = 42
) +
# 手动设置颜色 - 与第一个图一致
scale_color_manual(values = c("Europe < North America" = "darkgreen",
"Europe > North America" = "darkblue"),
name = "Comparison") +
scale_x_log10(limits = c(0.005, 20),
breaks = c(10^-2, 10^-1, 10^0, 10^1)) +
scale_y_log10(limits = c(0.005, 20),
breaks = c(10^-2, 10^-1, 10^0, 10^1)) +
annotation_logticks(sides = "bl",
size = 0.3,
short = unit(0.07, "cm"),
mid = unit(0.14, "cm"),
long = unit(0.21, "cm")) +
coord_fixed(ratio = 1) +
theme_classic() +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
plot.margin = margin(10, 15, 10, 10),
legend.position = "bottom",
legend.title = element_text(size = 11, face = "bold"),
legend.text = element_text(size = 10)
) +
labs(
x = "North America fecundity(g seed/cm² basal area)",
y = "Europe fecundity(g seed/cm² basal area)",
title = "ISP comparison of genus"
)
plot_ispwrite.csv(results_isp_df,"C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_isp_df.csv")setwd("R:/clark/clark.unix/makeMast/maps")
# 定义函数来计算单个genus在单个continent的species-level ISP
get_species_isp <- function(continent, genus) {
tdata <- getFittedIndividual(continent, genus = genus)
# 如果没有数据,返回空
if(nrow(tdata) == 0) {
return(NULL)
}
# 计算ISP
ISP <- tdata$fecGmMu / ((tdata$diam/2)^2 * pi)
i1 <- ISP * tdata$wtPerHa
i2 <- ISP^2 * tdata$wtPerHa
# 按species分组计算
MU <- tapply(i1, tdata$species, sum, na.rm = T) /
tapply(tdata$wtPerHa, tdata$species, sum, na.rm = T)
SD <- sqrt(tapply(i2, tdata$species, sum, na.rm = T) /
tapply(tdata$wtPerHa, tdata$species, sum, na.rm = T) - MU^2)
n <- table(tdata$species)[names(MU)]
# 组合结果
result <- data.frame(
species = names(MU),
mean = signif(MU, 4),
sd = signif(SD, 4),
n = as.numeric(n),
row.names = NULL
)
return(result)
}
# genus列表
genera_list <- tolower(c(
"Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula", "Castanea",
"Fraxinus", "Alnus", "Larix", "Carpinus", "Acer",
"Prunus", "Tilia", "Juniperus", "Ulmus", "Ostrya", "Arbutus"
))
# 创建一个列表来存储所有结果
all_results <- list()
# 遍历每个genus
for(genus in genera_list) {
cat("Processing genus:", genus, "\n")
# Europe数据
eu_result <- get_species_isp("europe", genus)
if(!is.null(eu_result)) {
eu_result$continent <- "europe"
eu_result$genus <- genus
}
# North America数据
na_result <- get_species_isp("northAmerica", genus)
if(!is.null(na_result)) {
na_result$continent <- "northAmerica"
na_result$genus <- genus
}
# 存储结果
all_results[[paste0(genus, "_europe")]] <- eu_result
all_results[[paste0(genus, "_northAmerica")]] <- na_result
}Processing genus: pinus
Processing genus: picea
Processing genus: quercus
Processing genus: fagus
Processing genus: abies
Processing genus: betula
Processing genus: castanea
Processing genus: fraxinus
Processing genus: alnus
Processing genus: larix
Processing genus: carpinus
Processing genus: acer
Processing genus: prunus
Processing genus: tilia
Processing genus: juniperus
Processing genus: ulmus
Processing genus: ostrya
Processing genus: arbutus
# 合并所有结果为一个大的data.frame
all_species_isp <- do.call(rbind, all_results[!sapply(all_results, is.null)])
rownames(all_species_isp) <- NULL
# 查看结果
head(all_species_isp) species mean sd n continent genus
1 pinusBanksian 0.0014600 0.002573 1134 europe pinus
2 pinusBrutia 1.3090000 0.990100 216 europe pinus
3 pinusCanarien 0.2325000 0.419100 702 europe pinus
4 pinusCembra 1.8880000 1.846000 14562 europe pinus
5 pinusContorta 0.0007107 0.001878 34290 europe pinus
6 pinusHalepens 0.2533000 0.323000 1392534 europe pinus
# 如果你想要以genus为单位查看结果,可以这样:
for(genus in genera_list) {
cat("\n========", toupper(genus), "========\n")
# Europe结果
eu_data <- all_species_isp[all_species_isp$genus == genus &
all_species_isp$continent == "europe", ]
if(nrow(eu_data) > 0) {
cat("\nEurope:\n")
print(eu_data[, c("species", "mean", "sd", "n")])
}
# North America结果
na_data <- all_species_isp[all_species_isp$genus == genus &
all_species_isp$continent == "northAmerica", ]
if(nrow(na_data) > 0) {
cat("\nNorth America:\n")
print(na_data[, c("species", "mean", "sd", "n")])
}
}
======== PINUS ========
Europe:
species mean sd n
1 pinusBanksian 0.0014600 0.002573 1134
2 pinusBrutia 1.3090000 0.990100 216
3 pinusCanarien 0.2325000 0.419100 702
4 pinusCembra 1.8880000 1.846000 14562
5 pinusContorta 0.0007107 0.001878 34290
6 pinusHalepens 0.2533000 0.323000 1392534
7 pinusMugo 0.0048760 0.009245 2232
8 pinusNigra 0.1453000 0.164800 1491966
9 pinusPinaster 0.1082000 0.125400 2429226
10 pinusPinea 0.1944000 0.243600 446076
11 pinusPonderos 0.0096620 0.017790 72
12 pinusRadiata 0.0388600 0.037540 354636
13 pinusRigida 0.0103000 0.011800 198
14 pinusStrobus 0.0132600 0.022490 14274
15 pinusSylvestr 0.0074010 0.011550 8629128
16 pinusTaeda 0.0040080 0.006248 324
17 pinusUncinata 0.0377400 0.033720 282366
18 pinusUNKN 0.0469200 0.058360 44226
North America:
species mean sd n
19 pinusAlbicaul 0.0310800 0.0992900 235386
20 pinusAristata 0.0155600 0.0189200 17190
21 pinusArizonic 0.0425400 0.0431500 324
22 pinusAttenuat 0.0165100 0.0230200 11358
23 pinusBalfouri 0.0351800 0.0359800 6624
24 pinusBanksian 0.0067780 0.0078630 1795176
25 pinusCanarien 0.0123900 0.0280000 216
26 pinusCembroid 0.2320000 0.3161000 10170
27 pinusClausa 0.9519000 0.8907000 102438
28 pinusContorta 0.0047950 0.0193000 4910310
29 pinusCoulteri 0.1241000 0.1983000 2052
30 pinusDensiflo 0.0004504 0.0007962 18
31 pinusDiscolor 0.0810900 0.1064000 15300
32 pinusEchinata 0.0154800 0.0335100 720666
33 pinusEdulis 0.3437000 1.0350000 1071036
34 pinusElliotti 0.0184100 0.0413100 1479492
35 pinusEngelman 0.0556200 0.0451800 522
36 pinusFlexilis 0.0826300 0.3296000 133452
37 pinusGlabra 0.0630200 0.0808800 21204
38 pinusHalepens 0.0212300 0.0217000 432
39 pinusJeffreyi 0.0447700 0.1678000 126972
40 pinusLamberti 0.2882000 1.9370000 97092
41 pinusLeiophyl 0.0090610 0.0120700 2286
42 pinusLongaeva 0.0256900 0.0244500 5274
43 pinusMonophyl 0.5141000 1.0880000 449226
44 pinusMonticol 0.0102100 0.0137700 87138
45 pinusMuricata 0.0119700 0.0136500 2232
46 pinusNigra 0.0163600 0.0187800 5508
47 pinusPalustri 0.0945800 0.1854000 381564
48 pinusPonderos 0.0562500 0.1526000 2522898
49 pinusPungens 0.9850000 0.9185000 20610
50 pinusRadiata 0.2424000 0.1571000 990
51 pinusRemota 0.0775700 0.1138000 2484
52 pinusResinosa 0.0229300 0.0318100 900450
53 pinusRigida 0.0546600 0.0492000 191358
54 pinusSabinian 0.7366000 1.9500000 17532
55 pinusSerotina 0.4726000 0.4707000 84564
56 pinusStrobifo 0.1394000 0.1739000 20016
57 pinusStrobus 0.0007767 0.0019090 1049382
58 pinusSylvestr 0.0056690 0.0073930 88794
59 pinusTaeda 0.0016920 0.0034410 9346320
60 pinusThunberg 0.0198800 0.0153200 234
61 pinusTorreyan 0.1646000 0.3669000 108
62 pinusUNKN 0.0335300 0.0284700 6318
63 pinusVirginia 0.1024000 0.1395000 588168
64 pinusWashoens 0.0602900 0.0925300 702
======== PICEA ========
Europe:
species mean sd n
65 piceaAbies 0.009991 0.02560 5248800
66 piceaOmorika 0.050030 0.09856 720
67 piceaPungens 0.017890 0.03730 504
68 piceaSitchens 0.009411 0.01907 51894
69 piceaUNKN 0.013440 0.03289 4680
North America:
species mean sd n
70 piceaAbies 0.054020 0.077370 58302
71 piceaBreweria 0.053250 0.064650 702
72 piceaEngelman 0.117600 0.198500 1124568
73 piceaGlauca 0.001533 0.007358 2968488
74 piceaMariana 0.027370 0.075460 3939210
75 piceaPungens 0.112500 0.123200 42894
76 piceaRubens 0.004426 0.043770 589140
77 piceaSitchens 0.032510 0.095390 316692
78 piceaUNKN 0.065160 0.089560 143658
======== QUERCUS ========
Europe:
species mean sd n
79 quercusCanarien 3.0200 3.2560 18792
80 quercusCerris 2.4280 5.1070 260928
81 quercusCoccifer 1.0250 0.4979 198
82 quercusFaginea 1.0310 1.6340 235764
83 quercusFrainett 5.9160 14.9100 20412
84 quercusIlex 2.2750 3.9570 1315854
85 quercusIthabure 0.3750 0.1217 36
86 quercusPalustri 0.4794 0.3964 72
87 quercusPetraea 1.3060 2.0250 1180008
88 quercusPubescen 1.5730 5.0570 640872
89 quercusPyrenaic 0.6349 0.8588 497394
90 quercusRobur 0.6481 0.8935 1182384
91 quercusRubra 0.4323 0.7583 70452
92 quercusSuber 2.4750 2.8480 424638
93 quercusTrojana 5.1420 8.8910 3420
94 quercusUNKN 0.2773 0.4122 338148
North America:
species mean sd n
95 quercusAcutissi 0.12820 0.19850 288
96 quercusAgrifoli 12.16000 13.95000 53550
97 quercusAlba 2.36100 5.18300 1795518
98 quercusArizonic 0.15130 0.25100 156960
99 quercusBicolor 1.31300 1.46900 28908
100 quercusBuckleyi 0.50950 0.74540 17838
101 quercusChrysole 15.77000 61.91000 242028
102 quercusCinerea 0.10050 0.15190 36
103 quercusCoccinea 0.39180 0.77070 426348
104 quercusDouglasi 3.13100 7.90300 78498
105 quercusEllipsoi 0.20760 0.31790 149634
106 quercusEmoryi 0.03841 0.05844 62262
107 quercusEngelman 0.69230 0.90020 504
108 quercusFalcata 0.13870 0.41450 367218
109 quercusFusiform 0.68170 0.80970 1152
110 quercusGambelii 1.04600 3.31600 314514
111 quercusGarryana 5.14500 18.80000 99720
112 quercusGeminata 0.74540 1.67000 1116
113 quercusGracilif 0.47630 0.63440 342
114 quercusGravesii 0.57110 0.64800 288
115 quercusGrisea 0.49990 0.78010 29538
116 quercusHavardii 1.31800 0.92650 90
117 quercusHemispha 0.53100 0.92510 1332
118 quercusHypoleuc 0.42470 0.77540 25740
119 quercusIlicifol 0.03513 0.06779 612
120 quercusImbricar 11.96000 21.81000 43164
121 quercusIncana 0.44170 0.62360 5976
122 quercusKelloggi 0.58450 1.21500 148320
123 quercusLaceyi 0.37540 0.57980 5490
124 quercusLaevis 0.66200 1.17900 37674
125 quercusLaurifol 0.45160 0.75760 267570
126 quercusLobata 2.02500 6.45000 7056
127 quercusLyrata 2.80500 8.44300 47988
128 quercusMacrocar 0.58040 0.82830 242892
129 quercusMargaret 0.39260 0.88970 12672
130 quercusMariland 2.47600 4.44800 131058
131 quercusMichauxi 1.02700 2.62600 35136
132 quercusMinima 0.10180 0.20610 12762
133 quercusMontana 0.80980 2.10900 926316
134 quercusMuehlenb 1.08700 1.79300 104022
135 quercusNigra 1.40800 2.96100 747468
136 quercusOblongif 0.73220 0.87960 5994
137 quercusOglethor 0.13380 0.24440 720
138 quercusPagoda 0.73790 1.87000 101286
139 quercusPalustri 0.59540 0.75000 44910
140 quercusPhellos 0.13950 0.44570 150642
141 quercusPolymorp 0.47950 0.51050 198
142 quercusRubra 1.58100 3.29000 1191816
143 quercusRugosa 0.72920 1.17400 8424
144 quercusShumardi 2.39400 13.45000 37440
145 quercusSimilis 0.31500 0.45870 576
146 quercusSinuata 0.41580 0.64840 2484
147 quercusStellata 1.73300 3.57500 822816
148 quercusTexana 0.75450 1.12600 29448
149 quercusUNKN 0.39290 0.61820 1134
150 quercusVelutina 0.41410 0.84210 799470
151 quercusVirginia 0.14560 0.24050 267606
152 quercusWislizen 5.11700 14.08000 54126
======== FAGUS ========
Europe:
species mean sd n
153 fagusSylvatic 0.04363 0.11 3485520
North America:
species mean sd n
154 fagusGrandifo 0.1010 0.2635 999198
155 fagusSylvatic 0.1069 0.2357 180
======== ABIES ========
Europe:
species mean sd n
156 abiesAlba 0.02483 0.05976 945054
157 abiesBorisii 0.00000 0.00000 2718
158 abiesCephalon 0.23570 0.31640 6444
159 abiesGrandis 0.02777 0.02670 17226
160 abiesNordmann 0.37070 0.34440 792
161 abiesPinsapo 0.07382 0.07938 7506
162 abiesProcera 0.04316 0.02761 72
163 abiesUNKN 0.03394 0.04016 7668
North America:
species mean sd n
164 abiesAmabilis 0.02554 0.17730 521118
165 abiesBalsamea 0.01830 0.03015 3814128
166 abiesConcolor 0.01108 0.02268 865422
167 abiesFraseri 0.01424 0.02281 5328
168 abiesGrandis 0.01097 0.02017 655344
169 abiesLasiocar 0.08808 0.13590 1449360
170 abiesLowiana 0.05318 0.06235 4374
171 abiesMagnific 0.02638 0.04832 155916
172 abiesProcera 0.03261 0.05248 76752
173 abiesShastens 0.04554 0.05923 74160
174 abiesUNKN 0.05914 0.05597 11268
======== BETULA ========
Europe:
species mean sd n
175 betulaObscura 0.10990 0.04730 18
176 betulaPendula 0.03009 0.03180 719694
177 betulaPubescen 0.02002 0.02416 126810
178 betulaUNKN 0.04162 0.05752 325872
North America:
species mean sd n
179 betulaAlba 0.002213 0.004830 1026
180 betulaAlleghan 0.049000 0.134400 789318
181 betulaCordifol 0.025980 0.031960 90
182 betulaLenta 0.025080 0.057660 381474
183 betulaNeoalask 0.004571 0.006599 5832
184 betulaNigra 0.131200 0.213800 69426
185 betulaOccident 0.004092 0.006812 1476
186 betulaPapyrife 0.008643 0.012220 1916856
187 betulaPendula 0.006737 0.012470 198
188 betulaPopulifo 0.000751 0.002868 40464
189 betulaUNKN 0.021140 0.037020 5652
======== CASTANEA ========
Europe:
species mean sd n
190 castaneaSativa 8.945 5.212 771390
North America:
species mean sd n
191 castaneaDentata 27.51 98.560 1404
192 castaneaMollisim 0.00 0.000 18
193 castaneaMollissi 110.50 108.800 198
194 castaneaPumila 12.41 8.849 198
======== FRAXINUS ========
Europe:
species mean sd n
195 fraxinusAmerican 0.007665 0.01574 270
196 fraxinusAngustif 0.195700 0.41110 28998
197 fraxinusExcelsio 0.335100 0.39500 470520
198 fraxinusOrnus 0.159600 0.25480 37476
199 fraxinusUNKN 0.013890 0.02171 162
North America:
species mean sd n
200 fraxinusAmerican 0.02427 0.06217 740232
201 fraxinusBerlandi 0.39470 0.49890 450
202 fraxinusCarolini 0.28260 0.58600 10152
203 fraxinusExcelsio 0.05046 0.12610 126
204 fraxinusLatifoli 0.09724 0.13950 10926
205 fraxinusNigra 0.51420 1.24500 544932
206 fraxinusPennsylv 0.14620 0.19520 737622
207 fraxinusProfunda 0.91240 1.18700 11610
208 fraxinusQuadrang 0.16900 0.22660 9234
209 fraxinusTexensis 0.33260 0.61120 3834
210 fraxinusUNKN 0.23560 0.48020 4338
211 fraxinusVelutina 0.24510 0.27510 1278
======== ALNUS ========
Europe:
species mean sd n
212 alnusCordata 0.147400 0.090060 12546
213 alnusGlutinos 0.006122 0.002953 505818
214 alnusIncana 0.011570 0.012830 55602
215 alnusUNKN 0.475100 0.374500 7758
216 alnusViridis 0.779900 0.502200 72
North America:
species mean sd n
217 alnusGlutinos 0.001179 0.0005337 1368
218 alnusOblongif 0.019870 0.0076840 288
219 alnusRhombifo 0.006103 0.0024880 7794
220 alnusRubra 0.004466 0.0019290 352404
221 alnusUNKN 0.206600 0.0725700 18
======== LARIX ========
Europe:
species mean sd n
222 larixDecidua 0.9265 1.1320 359082
223 larixKaempfer 0.5057 0.4163 72648
224 larixUNKN 1.0660 0.8238 24732
North America:
species mean sd n
225 larixDecidua 0.7080 0.08633 36
226 larixLaricina 0.1121 0.21890 427968
227 larixLyallii 6.3730 6.56700 8892
228 larixOccident 0.3658 0.79870 389376
229 larixUNKN 1.8380 2.00800 18450
======== CARPINUS ========
Europe:
species mean sd n
230 carpinusBetulus 0.3042 0.2811 668736
231 carpinusOriental 1.4340 0.4942 2664
North America:
species mean sd n
232 carpinusBetulus 0.2678 0.09517 144
233 carpinusCarolini 1.1020 0.77430 145260
======== ACER ========
Europe:
species mean sd n
234 acerCampestr 0.3993 0.36590 114318
235 acerLobelii 1.1920 1.65900 1404
236 acerMonspess 0.1470 0.22680 17118
237 acerNegundo 4.4210 4.94700 2340
238 acerOpalus 0.2499 0.39660 20178
239 acerPlatanoi 0.8989 0.89870 36414
240 acerPseudopl 0.4145 0.37100 273690
241 acerTataricu 0.1690 0.09794 54
242 acerUNKN 0.4146 0.45050 11466
North America:
species mean sd n
243 acerBarbatum 0.108100 0.127200 34128
244 acerCircinat 0.003205 0.007572 954
245 acerFloridan 0.561400 0.349400 342
246 acerGinnala 0.465200 0.407900 1242
247 acerGlabrum 0.297000 0.376400 14256
248 acerGrandide 0.662100 0.600100 29826
249 acerGriseum 0.247700 0.272500 90
250 acerLeucoder 0.377600 0.345700 270
251 acerMacrophy 0.682900 2.065000 153630
252 acerNegundo 3.470000 3.957000 308052
253 acerNigrum 0.141100 0.151400 8748
254 acerPalmatum 0.423200 0.395700 1332
255 acerPensylva 0.026350 0.011660 58086
256 acerPlatanoi 0.467500 0.655500 16146
257 acerPseudopl 0.020070 0.106000 450
258 acerRubrum 0.553900 0.571100 4999896
259 acerSacchari 0.723000 1.395000 204516
260 acerSaccharu 0.249200 0.205700 2883636
261 acerSpicatum 0.043200 0.039310 1188
262 acerUNKN 0.127300 0.113500 306
263 acerXfreema 0.000000 0.000000 1404
======== PRUNUS ========
Europe:
species mean sd n
264 prunusAmygdalu 0.8336 0.6500 36
265 prunusAvium 0.2787 0.5066 67896
266 prunusCerasife 1.0490 0.7633 54
267 prunusCerasus 0.2624 0.3734 90
268 prunusDomestic 1.2040 1.4820 1890
269 prunusInsititi 0.5533 0.5787 162
270 prunusLusitani 0.8774 0.2298 18
271 prunusMahaleb 0.1514 0.1947 1044
272 prunusPadus 0.2677 0.2980 71010
273 prunusSerotina 0.1069 0.1465 13608
274 prunusSpinosa 0.6915 0.6221 756
275 prunusUNKN 0.1356 0.1491 3978
North America:
species mean sd n
276 prunusAmerican 3.94100 3.99300 2592
277 prunusArmeniac 1.11700 0.70680 90
278 prunusAvium 0.76910 0.68640 9108
279 prunusCarolini 5.10300 2.97600 324
280 prunusCerasife 0.27110 0.40750 288
281 prunusCerasus 0.90700 0.72250 234
282 prunusDomestic 0.23610 0.32330 90
283 prunusEmargina 0.44590 0.58310 13374
284 prunusLaurocer 1.41700 1.50200 432
285 prunusNigra 0.27760 0.34570 108
286 prunusPensylva 0.41680 0.31020 86940
287 prunusPersica 17.27000 15.77000 180
288 prunusSalicina 0.48760 0.38650 18
289 prunusSerotina 0.06001 0.09305 1041012
290 prunusSerrulat 0.09484 0.12760 540
291 prunusUmbellat 1.90600 1.20700 18
292 prunusUNKN 0.48780 0.42800 4482
293 prunusVirginia 1.07100 2.06000 4824
294 prunusXyedoen 0.00000 0.00000 216
======== TILIA ========
Europe:
species mean sd n
295 tiliaCordata 0.3345 0.3057 71532
296 tiliaPlatyphy 0.1140 0.1941 30132
297 tiliaTomentos 0.6108 0.4902 1692
298 tiliaUNKN 0.1834 0.2530 33750
299 tiliaVulgaris 0.1616 0.2145 234
North America:
species mean sd n
300 tiliaAmerican 0.03955 0.06602 542484
301 tiliaCordata 0.01129 0.01862 1908
302 tiliaTomentos 0.03310 0.03970 72
303 tiliaUNKN 0.05299 0.06612 126
======== JUNIPERUS ========
Europe:
species mean sd n
304 juniperuCommunis 1.0520 0.8673 2826
305 juniperuOxycedru 2.9300 3.2680 18162
306 juniperuPhoenice 14.4900 6.4810 2970
307 juniperuThurifer 0.4476 0.4499 132066
308 juniperuUNKN 0.4657 0.2044 18
North America:
species mean sd n
309 juniperuAshei 0.033960 0.049870 405306
310 juniperuCaliforn 0.064500 0.125800 13356
311 juniperuChinensi 0.001531 0.001572 198
312 juniperuCoahuile 0.048970 0.075760 43578
313 juniperuDeppeana 0.011220 0.018400 205416
314 juniperuFlaccida 0.117100 0.035500 18
315 juniperuMonosper 0.394000 0.443700 563094
316 juniperuOccident 0.013520 0.020320 206604
317 juniperuOsteospe 0.033850 0.069210 1145682
318 juniperuPinchoti 0.029910 0.037090 108594
319 juniperuScopulor 0.061380 0.048700 292752
320 juniperuUNKN 0.068410 0.122800 1800
321 juniperuVirginia 0.042350 0.045260 790272
======== ULMUS ========
Europe:
species mean sd n
322 ulmusCanescen 0.37980 0.21940 90
323 ulmusCarpinif 0.02184 0.04042 90
324 ulmusGlabra 0.13010 0.23880 5652
325 ulmusLaevis 0.10600 0.14220 810
326 ulmusMinor 0.07611 0.14840 26370
327 ulmusPumila 0.17880 0.03654 54
328 ulmusUNKN 0.07522 0.13730 17766
North America:
species mean sd n
329 ulmusAlata 0.003175 0.008052 308988
330 ulmusAmerican 0.009848 0.021280 780390
331 ulmusCrassifo 0.017050 0.023260 80118
332 ulmusParvifol 0.019980 0.016150 756
333 ulmusProcera 0.021620 0.026380 54
334 ulmusPumila 0.019900 0.024580 25830
335 ulmusRubra 0.046550 0.065070 214056
336 ulmusSerotina 0.004726 0.006197 468
337 ulmusThomasii 0.163000 0.230000 2646
338 ulmusUNKN 0.009922 0.016360 2862
======== OSTRYA ========
Europe:
species mean sd n
339 ostryaCarpinif 0.4102 0.179 124560
North America:
species mean sd n
340 ostryaVirginia 0.1697 0.1686 230742
======== ARBUTUS ========
Europe:
species mean sd n
341 arbutusUnedo 2.462 2.36 28278
North America:
species mean sd n
342 arbutusArizonic 18.0700 13.3700 378
343 arbutusMenziesi 2.6410 5.6290 154134
344 arbutusUnedo 0.4635 0.2615 18
345 arbutusXalapens 6.0820 5.4040 126
# 也可以创建一个宽格式的表格,方便比较
library(tidyr)
library(dplyr)
# 创建宽格式表格(可选)
wide_format <- all_species_isp %>%
pivot_wider(
id_cols = c(genus, species),
names_from = continent,
values_from = c(mean, sd, n),
names_sep = "_"
) %>%
arrange(genus, species)
print(wide_format)# A tibble: 295 x 8
genus species mean_europe mean_northAmerica sd_europe sd_northAmerica
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 abies abiesAlba 0.0248 NA 0.0598 NA
2 abies abiesAmabilis NA 0.0255 NA 0.177
3 abies abiesBalsamea NA 0.0183 NA 0.0302
4 abies abiesBorisii 0 NA 0 NA
5 abies abiesCephalon 0.236 NA 0.316 NA
6 abies abiesConcolor NA 0.0111 NA 0.0227
7 abies abiesFraseri NA 0.0142 NA 0.0228
8 abies abiesGrandis 0.0278 0.0110 0.0267 0.0202
9 abies abiesLasiocar NA 0.0881 NA 0.136
10 abies abiesLowiana NA 0.0532 NA 0.0624
# i 285 more rows
# i 2 more variables: n_europe <dbl>, n_northAmerica <dbl>
write.csv(all_species_isp, "C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/species_isp_results.csv", row.names = FALSE)
write.csv(wide_format, "C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/species_isp_wide_format.csv", row.names = FALSE)results_isp_df <- read.csv("C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_isp_df.csv")results_isp_df <- results_isp_df %>%
mutate(
diff = eu_value - na_value,
ratio = eu_value / na_value,
sig_diff = abs(diff) > (eu_sd + na_sd)
)
df_filtered <- results_isp_df %>%
filter(!genus %in% c("quercus", "castanea"))
ggplot(df_filtered, aes(x = na_value, y = eu_value, color = diff)) +
# 背景网格和参考线
geom_abline(slope = 1, intercept = 0,
linetype = "dashed",
color = "gray30",
linewidth = 0.5) +
# 重要:先绘制error bars
geom_errorbar(aes(
ymin = eu_value - eu_sd,
ymax = eu_value + eu_sd
),
width = 0.05,
linewidth = 0.3,
alpha = 0.8, # 提高不透明度
color = "gray40") + # 统一颜色
geom_errorbarh(aes(
xmin = na_value - na_sd,
xmax = na_value + na_sd
),
height = 0.05,
linewidth = 0.3,
alpha = 0.8, # 提高不透明度
color = "gray40") +
geom_point(size = 3) +
geom_text_repel(
aes(label = genus,
fontface = ifelse(sig_diff, "bold", "plain")),
size = 3.5,
box.padding = 0.5,
min.segment.length = 0,
max.overlaps = Inf,
segment.color = "gray50",
segment.alpha = 0.6
) +
scale_color_gradient2(
low = "#2c7fb8",
mid = "#2b2b2b",
high = "#7fbc41",
midpoint = 0,
limits = c(-2, 2),
name = "EU - NA difference"
) +
scale_x_log10(limits = c(0.005, 20),
breaks = c(0.01, 0.1, 1, 10)) +
scale_y_log10(limits = c(0.005, 20),
breaks = c(0.01, 0.1, 1, 10)) +
annotation_logticks(sides = "bl",
size = 0.3,
short = unit(0.07, "cm"),
mid = unit(0.14, "cm"),
long = unit(0.21, "cm")) +
coord_fixed(ratio = 1) +
theme_bw(base_size = 12) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
axis.text = element_text(color = "black"),
legend.position = "right",
legend.key.height = unit(1.5, "cm"),
plot.title = element_text(size = 13, face = "bold"),
plot.subtitle = element_text(size = 11, color = "gray30")
) +
labs(
x = "North American ISP (log scale)",
y = "European ISP (log scale)",
title = "Comparison of ISP Values Between NA and EU",
subtitle = "Point size indicates measurement certainty; bold labels show significant regional differences"
)setwd("R:/clark/clark.unix/makeMast/maps")
genus <- 'quercus'
tdata <- getFittedIndividual( 'europe', genus = genus )
ISP <- tdata$fecGmMu/(tdata$diam/2)^2/pi # weight by plot area
i1 <- ISP*tdata$wtPerHa
i2 <- ISP^2*tdata$wtPerHa
MU <- tapply( i1, tdata$species, sum, na.rm = T )/
tapply( tdata$wtPerHa, tdata$species, sum, na.rm = T )
SD <- sqrt( tapply( i2, tdata$species, sum, na.rm = T )/
tapply( tdata$wtPerHa, tdata$species, sum, na.rm = T ) - MU^2 )
n <- table( tdata$species )[names(MU)]
etable <- cbind( signif(MU, 4), signif(SD, 4 ), n )
tdata <- getFittedIndividual( 'northAmerica', genus = genus )
ISP <- tdata$fecGmMu/(tdata$diam/2)^2/pi # weight by plot area
i1 <- ISP*tdata$wtPerHa
i2 <- ISP^2*tdata$wtPerHa
MU <- tapply( i1, tdata$species, sum, na.rm = T )/
tapply( tdata$wtPerHa, tdata$species, sum, na.rm = T )
SD <- sqrt( tapply( i2, tdata$species, sum, na.rm = T )/
tapply( tdata$wtPerHa, tdata$species, sum, na.rm = T ) - MU^2 )
n <- table( tdata$species )[ names(MU) ]
ntable <- cbind( signif(MU, 4), signif(SD, 4 ), n )results_isp_df <- results_isp_df %>%
filter(!genus %in% c("castanea"))
# 计算最小值并确定shift值
min_val <- min(c(log(results_isp_df$eu_value), log(results_isp_df$na_value)))
shift <- ceiling(abs(min_val)) + 1 # 动态计算需要加多少
results_isp_log_df <- results_isp_df %>%
mutate(
log_eu_value = log(eu_value) + shift,
log_na_value = log(na_value) + shift,
cv_eu = eu_sd / eu_value,
cv_na = na_sd / na_value,
# No capping - use actual CV
log_eu_lower = log(pmax(eu_value - eu_sd, 0.001)) + shift,
log_eu_upper = log(eu_value + eu_sd) + shift,
log_na_lower = log(pmax(na_value - na_sd, 0.001)) + shift,
log_na_upper = log(na_value + na_sd) + shift,
diff = log_eu_value - log_na_value,
eu_higher = log_eu_value > log_na_value
)
na_order <- results_isp_log_df %>%
arrange(desc(log_na_value)) %>%
pull(genus)
df_diff <- results_isp_log_df %>%
mutate(
genus = factor(genus, levels = na_order),
x_pos = as.numeric(factor(genus, levels = na_order)),
y_start = pmin(log_eu_value, log_na_value),
y_end = pmax(log_eu_value, log_na_value),
diff_color = case_when(
eu_higher ~ "Europe Higher",
TRUE ~ "North America Higher"
)
)
df_eu <- results_isp_log_df %>%
transmute(
genus = factor(genus, levels = na_order),
region = "Europe",
value = log_eu_value,
ymin = log_eu_lower,
ymax = log_eu_upper
)
df_na <- results_isp_log_df %>%
transmute(
genus = factor(genus, levels = na_order),
region = "North America",
value = log_na_value,
ymin = log_na_lower,
ymax = log_na_upper
)
df_long <- bind_rows(df_eu, df_na)
p <- ggplot() +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray60", linewidth = 0.5) +
geom_bar(data = df_long,
aes(x = genus, y = value, fill = region),
stat = "identity",
position = position_dodge(width = 0.8),
width = 0.7,
alpha = 0.7) +
geom_segment(data = df_diff,
aes(x = x_pos - 0.2,
xend = x_pos + 0.2,
y = y_start,
yend = y_start,
color = diff_color),
linewidth = 1.5,
alpha = 0.8) +
geom_segment(data = df_diff,
aes(x = x_pos - 0.2,
xend = x_pos + 0.2,
y = y_end,
yend = y_end,
color = diff_color),
linewidth = 1.5,
alpha = 0.8) +
# Vertical connection lines WITHOUT arrows
geom_segment(data = df_diff,
aes(x = x_pos,
xend = x_pos,
y = y_start,
yend = y_end,
color = diff_color),
linewidth = 2,
alpha = 0.9) +
geom_errorbar(data = df_long,
aes(x = genus, ymin = ymin, ymax = ymax, group = region),
position = position_dodge(width = 0.8),
width = 0.25,
linewidth = 0.4,
color = "gray30",
alpha = 0.7) +
scale_fill_manual(
values = c("Europe" = "#4E79A7", "North America" = "#E15759"),
name = "Region"
) +
scale_color_manual(
values = c("Europe Higher" = "#2ca02c", "North America Higher" = "#ff7f0e"),
name = "Difference"
) +
theme_minimal(base_size = 14) +
theme(
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
axis.text.x = element_text(angle = 55, hjust = 1, size = 16, face = "bold"),
legend.position = "right",
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5, face = "italic"),
plot.margin = margin(20, 20, 20, 20)
) +
labs(
x = "Genus",
y = "ISP (log)",
)
print(p)setwd("R:/clark/clark.unix/makeMast/maps")
invPath <- '../inventories'
calculateGenusStats <- function(tdata) {
if(nrow(tdata) == 0) {
return(list(mean = NA, sd = NA, n = 0))
}
# Calculate ISP
ISP <- tdata$fecGmMu/(tdata$diam/2)^2/pi
# Weight by plot area
i1 <- ISP * tdata$wtPerHa
i2 <- ISP^2 * tdata$wtPerHa
# Calculate weighted mean
MU <- tapply(i1, tdata$species, sum, na.rm = TRUE) /
tapply(tdata$wtPerHa, tdata$species, sum, na.rm = TRUE)
# Calculate weighted SD
SD <- sqrt(
tapply(i2, tdata$species, sum, na.rm = TRUE) /
tapply(tdata$wtPerHa, tdata$species, sum, na.rm = TRUE) - MU^2
)
n <- table(tdata$species)[names(MU)]
# Calculate genus-level statistics
weighted.mean <- sum(MU * n, na.rm = TRUE) / sum(n, na.rm = TRUE)
weighted.var <- sum(n * (SD^2 + (MU - weighted.mean)^2), na.rm = TRUE) /
sum(n, na.rm = TRUE)
weighted.sd <- sqrt(weighted.var)
return(list(
mean = weighted.mean,
sd = weighted.sd,
n = sum(n, na.rm = TRUE)
))
}
analyzeAllGenera <- function(genera_list) {
results <- data.frame(
Genus = character(),
Region = character(),
WeightedMean = numeric(),
WeightedSD = numeric(),
SampleSize = numeric(),
stringsAsFactors = FALSE
)
# Process each genus
for(genus in genera_list) {
tryCatch({
# Get data for Europe
europe_data <- getFittedIndividual('europe', genus = genus)
euro_stats <- calculateGenusStats(europe_data)
# Get data for North America
na_data <- getFittedIndividual('northAmerica', genus = genus)
na_stats <- calculateGenusStats(na_data)
# Calculate combined statistics
combined_data <- rbind(europe_data, na_data)
combined_stats <- calculateGenusStats(combined_data)
# Add to results
new_rows <- data.frame(
Genus = rep(genus, 3),
Region = c("Europe", "North America", "Combined"),
WeightedMean = c(euro_stats$mean, na_stats$mean, combined_stats$mean),
WeightedSD = c(euro_stats$sd, na_stats$sd, combined_stats$sd),
SampleSize = c(euro_stats$n, na_stats$n, combined_stats$n),
stringsAsFactors = FALSE
)
results <- rbind(results, new_rows)
cat(sprintf("Processed %s\n", genus))
}, error = function(e) {
cat(sprintf("Error processing %s: %s\n", genus, e$message))
})
}
# Round numeric columns for better display
results$WeightedMean <- round(results$WeightedMean, 6)
results$WeightedSD <- round(results$WeightedSD, 6)
return(results)
}
# List of genera to analyze
genera_list <- c(
"Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula", "Castanea",
"Fraxinus", "Alnus", "Larix", "Carpinus", "Acer",
"Prunus", "Tilia", "Juniperus", "Ulmus", "Ostrya", "Arbutus"
)
# Run the analysis
results_table <- analyzeAllGenera(genera_list)Processed Pinus
Processed Picea
Processed Quercus
Processed Fagus
Processed Abies
Processed Betula
Processed Castanea
Processed Fraxinus
Processed Alnus
Processed Larix
Processed Carpinus
Processed Acer
Processed Prunus
Processed Tilia
Processed Juniperus
Processed Ulmus
Processed Ostrya
Processed Arbutus
# Print summary table with formatting
print_table <- function(results) {
# Calculate max width for each column
genus_width <- max(nchar("Genus"), max(nchar(as.character(results$Genus))))
region_width <- max(nchar("Region"), max(nchar(as.character(results$Region))))
mean_width <- max(nchar("WeightedMean"), nchar(format(results$WeightedMean, scientific = FALSE)))
sd_width <- max(nchar("WeightedSD"), nchar(format(results$WeightedSD, scientific = FALSE)))
n_width <- max(nchar("SampleSize"), nchar(format(results$SampleSize, scientific = FALSE)))
# Print header
cat(sprintf("\n%-*s | %-*s | %-*s | %-*s | %-*s\n",
genus_width, "Genus",
region_width, "Region",
mean_width, "WeightedMean",
sd_width, "WeightedSD",
n_width, "SampleSize"))
# Print separator
cat(paste(rep("-", genus_width + region_width + mean_width + sd_width + n_width + 12), collapse = ""))
cat("\n")
# Print rows
for(i in 1:nrow(results)) {
cat(sprintf("%-*s | %-*s | %*.6f | %*.6f | %*d\n",
genus_width, results$Genus[i],
region_width, results$Region[i],
mean_width, results$WeightedMean[i],
sd_width, results$WeightedSD[i],
n_width, results$SampleSize[i]))
}
}
# Print formatted table
print_table(results_table)
Genus | Region | WeightedMean | WeightedSD | SampleSize
------------------------------------------------------------------
Pinus | Europe | 0.068552 | 0.172694 | 15138162
Pinus | North America | 0.044963 | 0.319151 | 26531586
Pinus | Combined | 0.053543 | 0.275344 | 41669748
Picea | Europe | 0.009994 | 0.025583 | 5306598
Picea | North America | 0.029931 | 0.096219 | 9183654
Picea | Combined | 0.022271 | 0.078175 | 14490252
Quercus | Europe | 1.433349 | 3.188612 | 6189372
Quercus | North America | 1.741784 | 10.780373 | 10148688
Quercus | Combined | 1.613999 | 8.716642 | 16338060
Fagus | Europe | 0.043629 | 0.110019 | 3485520
Fagus | North America | 0.101029 | 0.263519 | 999378
Fagus | Combined | 0.056418 | 0.159532 | 4484898
Abies | Europe | 0.026906 | 0.068132 | 987480
Abies | North America | 0.031248 | 0.084268 | 7633170
Abies | Combined | 0.030812 | 0.082578 | 8620650
Betula | Europe | 0.032208 | 0.040585 | 1172394
Betula | North America | 0.023073 | 0.080394 | 3211812
Betula | Combined | 0.025506 | 0.072045 | 4384206
Castanea | Europe | 8.945159 | 5.212435 | 771390
Castanea | North America | 34.628286 | 97.628511 | 1818
Castanea | Combined | 9.005546 | 7.145863 | 773208
Fraxinus | Europe | 0.315077 | 0.391305 | 537426
Fraxinus | North America | 0.204790 | 0.688452 | 2074734
Fraxinus | Combined | 0.227487 | 0.640270 | 2612160
Alnus | Europe | 0.016040 | 0.073771 | 581796
Alnus | North America | 0.004511 | 0.002527 | 361872
Alnus | Combined | 0.011628 | 0.058259 | 943668
Larix | Europe | 0.867041 | 1.048048 | 456462
Larix | North America | 0.332672 | 1.148497 | 844722
Larix | Combined | 0.523183 | 1.151350 | 1301184
Carpinus | Europe | 0.308661 | 0.291090 | 671400
Carpinus | North America | 1.101020 | 0.774391 | 145404
Carpinus | Combined | 0.449718 | 0.517956 | 816804
Acer | Europe | 0.453194 | 0.644193 | 476982
Acer | North America | 0.556240 | 1.101246 | 8718498
Acer | Combined | 0.551622 | 1.085231 | 9195480
Prunus | Europe | 0.268458 | 0.439508 | 160542
Prunus | North America | 0.116021 | 0.471729 | 1164870
Prunus | Combined | 0.133765 | 0.468179 | 1325412
Tilia | Europe | 0.252089 | 0.293577 | 137340
Tilia | North America | 0.039454 | 0.065930 | 544590
Tilia | Combined | 0.082442 | 0.167483 | 681930
Juniperus | Europe | 1.014823 | 2.526711 | 156042
Juniperus | North America | 0.089308 | 0.219724 | 3776670
Juniperus | Combined | 0.126029 | 0.576469 | 3932712
Ulmus | Europe | 0.082827 | 0.158883 | 50832
Ulmus | North America | 0.014820 | 0.035805 | 1416168
Ulmus | Combined | 0.017253 | 0.047882 | 1467000
Ostrya | Europe | 0.410181 | 0.178992 | 124560
Ostrya | North America | 0.169710 | 0.168628 | 230742
Ostrya | Combined | 0.254013 | 0.207035 | 355302
Arbutus | Europe | 2.462000 | 2.359686 | 28278
Arbutus | North America | 2.681491 | 5.711852 | 154656
Arbutus | Combined | 2.647649 | 5.333768 | 182934
write.csv("results_table,C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_table.csv")"","x"
"1","results_table,C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_table.csv"
CONSOLIDATE = F, CONSOLIDATE= T.
results_table <- results_table %>%
filter(!Genus %in% c("Castanea","Quercus"))
# Plot mean ISP by genus and region
ggplot(results_table[results_table$Region != "Combined",],
aes(x = Genus, y = WeightedMean, fill = Region)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Mean ISP by Genus and Region",
y = "Weighted Mean ISP",
x = "Genus")library(ggplot2)
library(dplyr)
plot_data <- results_table %>%
filter(Region != "Combined") %>%
tidyr::pivot_wider(
id_cols = Genus,
names_from = Region,
values_from = c(WeightedMean, WeightedSD, SampleSize)
) %>%
rename(
eu_value = WeightedMean_Europe,
na_value = `WeightedMean_North America`,
eu_sd = WeightedSD_Europe,
na_sd = `WeightedSD_North America`
)
# Calculate EU-NA difference for color coding
plot_data$diff <- plot_data$eu_value - plot_data$na_value
# Create the visualization
ggplot(plot_data, aes(x = na_value, y = eu_value)) +
# Reference line
geom_abline(linetype = "dashed", color = "gray70") +
# Error bars with controlled transparency
geom_errorbarh(aes(xmin = na_value - na_sd,
xmax = na_value + na_sd,
height = 0.05),
alpha = 0.3) +
geom_errorbar(aes(ymin = eu_value - eu_sd,
ymax = eu_value + eu_sd,
width = 0.05),
alpha = 0.3) +
# Points and labels
geom_point(aes(color = diff), size = 3) +
geom_text(aes(label = tolower(Genus)),
hjust = -0.3,
vjust = 0.5,
size = 4) +
# Consistent log scales (0.01 to 10)
scale_x_log10(
breaks = 10^seq(-2, 1, by = 1),
labels = c("0.01", "0.10", "1.00", "10.00"),
limits = c(0.01, 10),
name = "North American ISP (log scale)"
) +
scale_y_log10(
breaks = 10^seq(-2, 1, by = 1),
labels = c("0.01", "0.10", "1.00", "10.00"),
limits = c(0.01, 10),
name = "European ISP (log scale)"
) +
# Color gradient for regional differences
scale_color_gradient2(
name = "EU - NA difference",
low = "darkblue",
mid = "gray60",
high = "darkred",
midpoint = 1,
limits = c(-3, 3)
) +
# Fixed ratio and theme
coord_fixed(ratio = 1) +
theme_minimal() +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray90"),
legend.position = "right",
plot.title = element_text(hjust = 0.2),
plot.subtitle = element_text(hjust = 0.5, size = 9),
aspect.ratio = 1
) +
labs(
title = "Comparison of ISP Values Between NA and EU",
subtitle = "ISP:Individual standardized productivity(gm/cm2 ba)"
)trait_combined <- mu_processed %>%
mutate(
region = case_when(
region == "WEA" ~ "EU",
region %in% c("ENA", "WNA") ~ "NA",
TRUE ~ "OTHER"
)
) %>%
filter(region %in% c("EU", "NA"))library(tidyverse)
library(ggplot2)
library(ggrepel)
library(gridExtra)
library(grid)
library(corrplot)
process_sensitivity_data <- function(mu_data, se_data) {
processed_data <- mu_processed %>%
mutate(
region = case_when(
region == "WEA" ~ "EU",
region %in% c("ENA", "WNA") ~ "NA",
TRUE ~ "OTHER"
)
) %>%
filter(
region %in% c("EU", "NA"),
!is.na(bfec.shade),
!is.na(bfec.tempSite),
!is.na(bfec.defSite)
)
filter_outliers <- function(x, k = 1.5) {
qnt <- quantile(x, probs = c(0.25, 0.75), na.rm = TRUE)
H <- k * IQR(x, na.rm = TRUE)
x[x < (qnt[1] - H) | x > (qnt[2] + H)] <- NA
return(x)
}
processed_data <- processed_data %>%
group_by(region) %>%
mutate(
bfec.shade = filter_outliers(bfec.shade),
bfec.tempSite = filter_outliers(bfec.tempSite),
bfec.defSite = filter_outliers(bfec.defSite)
) %>%
ungroup() %>%
filter(
!is.na(bfec.shade),
!is.na(bfec.tempSite),
!is.na(bfec.defSite)
)
return(processed_data)
}
# 计算相关性矩阵
calculate_correlations <- function(data, region_val) {
region_data <- data %>%
filter(region == region_val) %>%
select(bfec.shade, bfec.tempSite, bfec.defSite)
cor_matrix <- cor(region_data, use = "complete.obs")
return(cor_matrix)
}
create_scatter_matrix <- function(data) {
theme_scatter <- theme_bw() +
theme(
aspect.ratio = 1,
panel.grid.minor = element_blank(),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
legend.position = "right"
)
# Shade vs Temperature
p1 <- ggplot(data, aes(x = bfec.shade, y = bfec.tempSite, color = region)) +
geom_point(alpha = 0.7, size = 2) +
geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
scale_color_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
labs(
x = "Shade sensitivity",
y = "Temperature sensitivity"
) +
# 动态设置轴范围
scale_x_continuous(
limits = c(min(data$bfec.shade) * 1.1, max(data$bfec.shade) * 1.1)
) +
scale_y_continuous(
limits = c(min(data$bfec.tempSite) * 1.1, max(data$bfec.tempSite) * 1.1)
) +
theme_scatter
# Shade vs Deficit
p2 <- ggplot(data, aes(x = bfec.shade, y = bfec.defSite, color = region)) +
geom_point(alpha = 0.7, size = 2) +
geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
scale_color_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
labs(
x = "Shade sensitivity",
y = "Deficit sensitivity"
) +
# 动态设置轴范围
scale_x_continuous(
limits = c(min(data$bfec.shade) * 1.1, max(data$bfec.shade) * 1.1)
) +
scale_y_continuous(
limits = c(min(data$bfec.defSite) * 1.1, max(data$bfec.defSite) * 1.1)
) +
theme_scatter
# Temperature vs Deficit
p3 <- ggplot(data, aes(x = bfec.tempSite, y = bfec.defSite, color = region)) +
geom_point(alpha = 0.7, size = 2) +
geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
scale_color_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
labs(
x = "Temperature sensitivity",
y = "Deficit sensitivity"
) +
# 动态设置轴范围
scale_x_continuous(
limits = c(min(data$bfec.tempSite) * 1.1, max(data$bfec.tempSite) * 1.1)
) +
scale_y_continuous(
limits = c(min(data$bfec.defSite) * 1.1, max(data$bfec.defSite) * 1.1)
) +
theme_scatter
return(list(p1 = p1, p2 = p2, p3 = p3))
}
# 执行分析
main_analysis <- function(mu_data, se_data) {
# 数据处理
processed_data <- process_sensitivity_data(mu_data, se_data)
# 计算区域相关性
eu_cor <- calculate_correlations(processed_data, "EU")
na_cor <- calculate_correlations(processed_data, "NA")
# 创建可视化
plots <- create_scatter_matrix(processed_data)
# 输出结果
cat("European correlations:\n")
print(eu_cor)
cat("\nNorth American correlations:\n")
print(na_cor)
# 合并图表 - 使用更简单的标题方式
combined_plot <- grid.arrange(
plots$p1, plots$p2, plots$p3,
ncol = 2,
widths = c(1, 1),
heights = c(1, 1),
top = "Sensitivity Relationships by Region" # 简化标题方式
)
# 创建相关性热图
eu_cor_plot <- corrplot(eu_cor,
method = "color",
type = "upper",
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
diag = FALSE,
title = "EU Correlations",
mar = c(0,0,1,0))
na_cor_plot <- corrplot(na_cor,
method = "color",
type = "upper",
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
diag = FALSE,
title = "NA Correlations",
mar = c(0,0,1,0))
# 通过直方图判断数据分布是否正常
hist_plots <- list(
shade_hist = ggplot(processed_data, aes(x = bfec.shade, fill = region)) +
geom_histogram(alpha = 0.7, bins = 15, position = "dodge") +
scale_fill_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
labs(title = "Shade Sensitivity Distribution") +
theme_bw(),
temp_hist = ggplot(processed_data, aes(x = bfec.tempSite, fill = region)) +
geom_histogram(alpha = 0.7, bins = 15, position = "dodge") +
scale_fill_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
labs(title = "Temperature Sensitivity Distribution") +
theme_bw(),
fec_hist = ggplot(processed_data, aes(x = bfec.defSite, fill = region)) +
geom_histogram(alpha = 0.7, bins = 15, position = "dodge") +
scale_fill_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
labs(title = "Deficit Sensitivity Distribution") +
theme_bw()
)
# 返回处理后的数据和结果
return(list(
data = processed_data,
eu_correlations = eu_cor,
na_correlations = na_cor,
plots = plots,
combined_plot = combined_plot,
hist_plots = hist_plots
))
}
calculate_summary_stats <- function(data) {
summary_stats <- data %>%
group_by(region) %>%
summarise(
across(
c(bfec.shade, bfec.tempSite, bfec.defSite),
list(
mean = ~mean(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE),
n = ~sum(!is.na(.))
)
)
)
return(summary_stats)
}
results <- main_analysis(mu_processed, se)European correlations:
bfec.shade bfec.tempSite bfec.defSite
bfec.shade 1.0000000 -0.15206061 -0.23288967
bfec.tempSite -0.1520606 1.00000000 -0.09430478
bfec.defSite -0.2328897 -0.09430478 1.00000000
North American correlations:
bfec.shade bfec.tempSite bfec.defSite
bfec.shade 1.00000000 -0.06958247 0.11054098
bfec.tempSite -0.06958247 1.00000000 0.06712926
bfec.defSite 0.11054098 0.06712926 1.00000000
write.csv(mu_processed,"C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_data.csv")summary_stats <- calculate_summary_stats(results$data)
print(summary_stats)# A tibble: 2 x 25
region bfec.shade_mean bfec.shade_median bfec.shade_sd bfec.shade_min
<chr> <dbl> <dbl> <dbl> <dbl>
1 EU -0.751 -0.657 0.519 -1.96
2 NA -0.568 -0.333 0.655 -2.49
# i 20 more variables: bfec.shade_max <dbl>, bfec.shade_q25 <dbl>,
# bfec.shade_q75 <dbl>, bfec.shade_n <int>, bfec.tempSite_mean <dbl>,
# bfec.tempSite_median <dbl>, bfec.tempSite_sd <dbl>,
# bfec.tempSite_min <dbl>, bfec.tempSite_max <dbl>, bfec.tempSite_q25 <dbl>,
# bfec.tempSite_q75 <dbl>, bfec.tempSite_n <int>, bfec.defSite_mean <dbl>,
# bfec.defSite_median <dbl>, bfec.defSite_sd <dbl>, bfec.defSite_min <dbl>,
# bfec.defSite_max <dbl>, bfec.defSite_q25 <dbl>, bfec.defSite_q75 <dbl>, ...
eu_data <- results$data %>% filter(region == "EU")
na_data <- results$data %>% filter(region == "NA")
cor.test(eu_data$bfec.shade, eu_data$bfec.tempSite)
Pearson's product-moment correlation
data: eu_data$bfec.shade and eu_data$bfec.tempSite
t = -1.1615, df = 57, p-value = 0.2503
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.3928455 0.1082364
sample estimates:
cor
-0.1520606
cat("\n")cor.test(eu_data$bfec.shade, eu_data$bfec.defSite)
Pearson's product-moment correlation
data: eu_data$bfec.shade and eu_data$bfec.defSite
t = -1.808, df = 57, p-value = 0.07588
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.46145148 0.02466351
sample estimates:
cor
-0.2328897
cat("\n")cor.test(eu_data$bfec.tempSite, eu_data$bfec.defSite)
Pearson's product-moment correlation
data: eu_data$bfec.tempSite and eu_data$bfec.defSite
t = -0.71517, df = 57, p-value = 0.4774
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.3421248 0.1657811
sample estimates:
cor
-0.09430478
cor.test(na_data$bfec.shade, na_data$bfec.tempSite)
Pearson's product-moment correlation
data: na_data$bfec.shade and na_data$bfec.tempSite
t = -0.68697, df = 97, p-value = 0.4937
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.2633764 0.1296097
sample estimates:
cor
-0.06958247
cat("\n")cor.test(na_data$bfec.shade, na_data$bfec.defSite)
Pearson's product-moment correlation
data: na_data$bfec.shade and na_data$bfec.defSite
t = 1.0954, df = 97, p-value = 0.276
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.08880884 0.30137615
sample estimates:
cor
0.110541
cat("\n")cor.test(na_data$bfec.tempSite, na_data$bfec.defSite)
Pearson's product-moment correlation
data: na_data$bfec.tempSite and na_data$bfec.defSite
t = 0.66264, df = 97, p-value = 0.5091
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.1320323 0.2610812
sample estimates:
cor
0.06712926
significant_genera <- c("Pinus", "Picea", "Quercus", "Fagus", "Abies", "Betula", "Castanea",
"Fraxinus", "Alnus", "Larix", "Carpinus", "Acer",
"Prunus", "Tilia", "Juniperus", "Ulmus", "Ostrya", "Arbutus")
genera_plots <- lapply(significant_genera, function(g) {
genus_data <- mu_processed %>% filter(genus == g)
p <- ggplot(genus_data, aes(x = bfec.shade, y = bfec.tempSite, color = region)) +
geom_point(size = 3) +
geom_smooth(method = "lm", se = TRUE) +
scale_color_manual(values = c("EU" = "blue3", "NA" = "darkred")) +
labs(
title = paste("Shade-Temperature Relationship:", g),
x = "Shade sensitivity",
y = "Temperature sensitivity"
) +
theme_bw()
return(p)
})
# 移除NULL值
genera_plots <- genera_plots[!sapply(genera_plots, is.null)]
# 显示属级别图表
do.call(grid.arrange, c(genera_plots, ncol =4))# 2. 区域间差异的统计检验
# 使用Fisher's Z变换来检验两个相关系数是否有显著差异
fisher_z_test <- function(r1, r2, n1, n2) {
z1 <- 0.5 * log((1 + r1) / (1 - r1))
z2 <- 0.5 * log((1 + r2) / (1 - r2))
se_diff <- sqrt(1/(n1-3) + 1/(n2-3))
z <- (z1 - z2) / se_diff
p <- 2 * pnorm(-abs(z))
return(list(z = z, p = p))
}
# 计算三个相关系数的区域差异
shade_temp_test <- fisher_z_test(
r1 = -0.17167917, r2 = -0.003961553,
n1 = nrow(eu_data), n2 = nrow(na_data)
)
shade_fec_test <- fisher_z_test(
r1 = -0.19157076, r2 = 0.05524851,
n1 = nrow(eu_data), n2 = nrow(na_data)
)
temp_fec_test <- fisher_z_test(
r1 = -0.08251622, r2 = 0.09090448,
n1 = nrow(eu_data), n2 = nrow(na_data)
)
# 打印结果
cat("Statistical tests for regional differences in correlations:\n")Statistical tests for regional differences in correlations:
cat("Shade-Temperature: z =", shade_temp_test$z, ", p =", shade_temp_test$p, "\n")Shade-Temperature: z = -1.007651 , p = 0.3136219
cat("Shade-Deficit: z =", shade_fec_test$z, ", p =", shade_fec_test$p, "\n")Shade-Deficit: z = -1.482455 , p = 0.1382193
mu_processed$genus <- sapply(strsplit(mu_processed$taxon, " "), `[`, 1)
genus_analysis <- function(data) {
genus_correlations <- data %>%
group_by(genus, region) %>%
summarize(
shade_temp_cor = cor(bfec.shade, bfec.tempSite, use = "complete.obs"),
shade_fec_cor = cor(bfec.shade, bfec.defSite, use = "complete.obs"),
n = n(),
.groups = "drop"
) %>%
filter(!is.na(shade_temp_cor), !is.na(shade_fec_cor))
return(genus_correlations)
}
genus_cors <- genus_analysis(mu_processed)
print(genus_cors, n=33)# A tibble: 35 x 5
genus region shade_temp_cor shade_fec_cor n
<chr> <chr> <dbl> <dbl> <int>
1 Abies WEA 0.352 -0.594 5
2 Abies WNA 0.239 -0.609 6
3 Acer ENA -0.352 -0.941 5
4 Acer WEA 0.338 0.120 5
5 Acer WNA -1 1 2
6 Aesculus WEA -1 -1 2
7 Betula ENA -1.00 1.00 3
8 Carya ENA 0.0710 -0.254 4
9 Celtis ENA 1 1 2
10 Cornus ENA 0.772 0.286 3
11 Corylus WEA 1 -1 2
12 Fraxinus ENA 0.0953 -0.982 3
13 Fraxinus WEA 0.864 -0.958 3
14 Ilex ENA -0.429 0.574 6
15 Juglans ENA -1 -1 2
16 Juniperus ENA 1 1 2
17 Juniperus WEA -1 1 2
18 Juniperus WNA -1.00 -0.983 3
19 Magnolia ENA 0.187 0.407 6
20 Picea ENA 0.346 -0.189 3
21 Picea WEA 0.0896 0.703 3
22 Picea WNA 0.990 0.685 3
23 Pinus ENA -0.135 0.0232 12
24 Pinus WEA 0.329 0.801 8
25 Pinus WNA -0.179 -0.114 12
26 Pistacia WEA 1 -1 2
27 Prunus ENA 0.716 0.678 4
28 Prunus WEA 0.510 0.600 3
29 Quercus ENA 0.285 0.371 22
30 Quercus WEA -0.934 -0.920 16
31 Quercus WNA 0.363 -0.0758 6
32 Tilia WEA 1 1 2
33 Tsuga WNA 1 1 2
# i 2 more rows
特征与气候变量的系统发育敏感性分析
library(RANN)
library(ape)
library(phytools)
library(plotrix)
library(stringr)
library(castor)
library(corrplot)
library(dplyr)
setwd("R:/clark/clark.unix/phylogram")
paramPath <- "../makeMast/fitLogs/output2/east_europe_west/"
source('../makeMast/predictionMastFunctions.R')
source('../makeMast/RFunctions/mastifFunctions.R')
source('../makeMast/moreFunctionsOld.R')
source('phyloFunctions.R')
vars <- getFittedVariables(continent = 'northAmerica',
ppath = '../makeMast/fitLogs/', dirs = 'output2')
print(vars) [1] "aspect1.defSite.slope" "aspect1.slope" "aspect2.defSite.slope"
[4] "aspect2.slope" "defAnom" "defSite"
[7] "diam" "I.diam.2." "I.tempSite.2."
[10] "intercept" "isolate" "lfi10SqrtAnom"
[13] "lfi10SqrtSite" "lfi30SqrtAnom" "lfi30SqrtSite"
[16] "lfi50SqrtAnom" "lfi50SqrtSite" "ph"
[19] "shade" "slope" "tempSite"
[22] "tpi"
traitFile <- "plantTraits.csv"
path2Traits <- "C:/Users/mh471/Downloads/radiation/"
trait_mapping <- list(
original = c("gmPerSeed", "dispersal", "fruit", "leaves", "pollination", "sex", "thorns","gmPerCm", "leafN", "leafNP", "leafP",
"maxHt", "maxDbh", "SLA", "woodSG"),
photosynthesis_long = c("Leaf.nitrogen..N..content.per.leaf.area..g.m.2.",
"Photosynthetic.carboxylation.capacity..Vcmax..Vmax..per.leaf.area.at.25.C..µmol.mâ..².sâ..¹._x",
"Jmax.per.leaf.area.at.ambient.or.leaf.temperature..µmol.mâ..².sâ..¹._x",
"Leaf.photosynthesis.at.saturating.light.and.saturating.CO2.per.leaf.area..Amax...µmol.mâ..².sâ..¹._x"
),
photosynthesis_short = c("N_area", "Vcmax", "Jmax", "Amax")
)
# Combine all trait names
traitNames <- c(trait_mapping$original, trait_mapping$photosynthesis_long)
# Process and rename function
process_and_rename <- function(trait_table, long_names, short_names) {
for(i in seq_along(long_names)) {
if(long_names[i] %in% names(trait_table)) {
names(trait_table)[names(trait_table) == long_names[i]] <- short_names[i]
}
}
return(trait_table)
}
# Species level
tmp <- phyloTraits(tree = trAll, species = spp, traitNames = traitNames,
path2Traits = path2Traits, traitFile = traitFile)
traitTab <- process_and_rename(tmp$traitTab,
trait_mapping$photosynthesis_long,
trait_mapping$photosynthesis_short)
tree <- fixPhyloNames(tmp$tree)
# Add continent classification
library(dplyr)
traitTab <- traitTab %>%
mutate(continent = case_when(
region %in% c("WNA", "ENA") ~ "NA",
region %in% c("WEA") ~ "EU",
region == "AFR" ~ "AFR",
region == "UNKN" ~ "UNKNOWN",
grepl("^west", tolower(region)) | grepl("^east", tolower(region)) ~ "NA",
grepl("^europe", tolower(region)) ~ "EU",
TRUE ~ "OTHER"
))
# After getting species-level traitTab with continent classification
# Aggregate to genus level manually
traitGen <- traitTab %>%
mutate(genus = columnSplit(taxon, ' ')[,1]) %>% # Extract genus from species names
group_by(genus, region, continent) %>%
summarise(
# For numeric traits, take the mean
across(where(is.numeric), ~ mean(.x, na.rm = TRUE)),
# For character traits, take the first non-NA value
across(where(is.character), ~ first(na.omit(.x))),
.groups = 'drop'
) %>%
# Remove duplicate genus-region combinations if they exist
distinct(genus, region, .keep_all = TRUE)
# If you need a phylogenetic tree at genus level
treeGen <- trAll
genus_tips <- columnSplit(treeGen$tip.label, ' ')[,1]
# Keep one species per genus
tips_to_keep <- !duplicated(genus_tips)
tips_to_drop <- which(!tips_to_keep)
if(length(tips_to_drop) > 0) {
treeGen <- drop.tip(treeGen, tips_to_drop)
}
# Rename tips to genus names
treeGen$tip.label <- columnSplit(treeGen$tip.label, ' ')[,1]setwd("R:/clark/clark.unix/phylogram")
paramPath <- "../makeMast/fitLogs/output2/east_europe_west/"
source('../makeMast/predictionMastFunctions.R')
source('../makeMast/RFunctions/mastifFunctions.R')
source('../makeMast/moreFunctionsOld.R')
source('phyloFunctions.R')
# Get fitted variables
vars <- getFittedVariables(continent = 'northAmerica',
ppath = '../makeMast/fitLogs/', dirs = 'output2')
print(vars) [1] "aspect1.defSite.slope" "aspect1.slope" "aspect2.defSite.slope"
[4] "aspect2.slope" "defAnom" "defSite"
[7] "diam" "I.diam.2." "I.tempSite.2."
[10] "intercept" "isolate" "lfi10SqrtAnom"
[13] "lfi10SqrtSite" "lfi30SqrtAnom" "lfi30SqrtSite"
[16] "lfi50SqrtAnom" "lfi50SqrtSite" "ph"
[19] "shade" "slope" "tempSite"
[22] "tpi"
tmp <- phyloSensitivity(tree = trAll,
species = tree$tip.label,
betaNames = c("tempSite", "shade", "defSite"),
paramPath = paramPath,
path2Traits = "C:/Users/mh471/Downloads/radiation/") # Add this parameter
parEst <- tmp$parEst
traitSig <- tmp$parSig
tree <- tmp$tree
# Calculate phylogenetic distance matrix
pdist <- cophenetic(tree)sens_data <- mu_processed %>%
select(region, species, bfec.shade, bfec.tempSite, bfec.defSite) %>%
distinct()
# 设置行名为物种名,便于后续匹配
rownames(sens_data) <- sens_data$species
# 计算敏感性欧氏距离矩阵
sens_dist <- as.matrix(dist(sens_data[, c("bfec.shade", "bfec.tempSite", "bfec.defSite")]))
# 创建物种名称映射函数
convert_species_name <- function(sens_name) {
# 提取属名和种加词
genus_match <- regexpr("^[a-z]+", sens_name)
genus <- substr(sens_name, 1, attr(genus_match, "match.length"))
species <- substr(sens_name, attr(genus_match, "match.length") + 1, nchar(sens_name))
# 将属名首字母大写
capitalized_genus <- paste0(toupper(substr(genus, 1, 1)), substr(genus, 2, nchar(genus)))
# 将种加词小写
lowercase_species <- tolower(species)
# 组合格式为 "Genus species"
return(paste(capitalized_genus, lowercase_species))
}
# 将sens_dist的行名转换为pdist格式
converted_names <- sapply(rownames(sens_dist), convert_species_name)
# 创建映射表
name_mapping <- data.frame(
sens_name = rownames(sens_dist),
pdist_name = converted_names,
stringsAsFactors = FALSE
)
# 使用映射表查找匹配的物种
common_spp <- intersect(rownames(pdist), name_mapping$pdist_name)
# 创建sens_dist的子集,使用pdist格式的物种名
sens_dist_matched <- sens_dist
rownames(sens_dist_matched) <- converted_names
colnames(sens_dist_matched) <- converted_names
# 获取共同物种的子集
pdist_sub <- pdist[common_spp, common_spp]
sens_dist_sub <- sens_dist_matched[common_spp, common_spp]
# 检查结果维度
dim(pdist_sub)[1] 101 101
dim(sens_dist_sub)[1] 101 101
genus_info <- mu_processed %>%
select(species, genus) %>%
distinct()
rownames(genus_info) <- genus_info$species
genus_info$standard_name <- sapply(genus_info$species,convert_species_name)
# 获取属列表和具有足够物种数的属
genera <- unique(genus_info$genus[genus_info$standard_name %in% common_spp])
genera_counts <- table(genus_info$genus[genus_info$standard_name %in% common_spp])
genera_with_enough <- names(genera_counts)[genera_counts > 1]
# 可视化按属分组的比较
par(mfrow = c(3, 3), mar = c(2, 2, 2, 1), omi = c(1, 1, 0.5, 0.3))
for(g in genera_with_enough) {
# 筛选属内物种
spp_in_genus <- genus_info$standard_name[genus_info$genus == g & genus_info$standard_name %in% common_spp]
# 提取对应的距离子矩阵
pdist_g <- pdist_sub[spp_in_genus, spp_in_genus]
sens_dist_g <- sens_dist_sub[spp_in_genus, spp_in_genus]
# 设置对角线为NA
diag(pdist_g) <- diag(sens_dist_g) <- NA
# 创建散点图
plot(jitter(pdist_g), sens_dist_g,
xlab = "", ylab = "", main = g,
cex = 0.7, pch = 16, col = rgb(0, 0, 1, 0.4))
# 计算相关性
dist_pair <- data.frame(
phylo = pdist_g[lower.tri(pdist_g)],
sens = sens_dist_g[lower.tri(sens_dist_g)]
)
# 如果有足够的数据点进行相关性检验
if(nrow(dist_pair) > 1) {
corr_test <- cor.test(dist_pair$phylo, dist_pair$sens)
r_val <- round(corr_test$estimate, 2)
p_val <- corr_test$p.value
# 添加相关系数和显著性标记
label <- paste0(g, ", r=", r_val)
if(p_val < 0.05) label <- paste0(label, "*")
legend("topleft", legend = label, bty = "n", cex = 0.9)
}
}# 整体相关性分析
par(mfrow = c(1, 1), mar = c(4, 4, 2, 2))# 提取下三角矩阵的值
pdist_vals <- pdist_sub[lower.tri(pdist_sub)]
sens_dist_vals <- sens_dist_sub[lower.tri(sens_dist_sub)]
# 计算整体相关性
overall_corr <- cor.test(pdist_vals, sens_dist_vals)
# 绘制整体散点图
plot(jitter(pdist_vals), sens_dist_vals,
xlab = "Phylogenetic Distance", ylab = "Sensitivity Distance",
main = paste0("All Species (r=", round(overall_corr$estimate, 2),
", p=", format(overall_corr$p.value, scientific = TRUE, digits = 3), ")"),
cex = 0.5, pch = 16, col = rgb(0, 0, 1, 0.2))
abline(lm(sens_dist_vals ~ pdist_vals), col = "red", lty = 2)# 额外:热图可视化比较
par(mfrow = c(1, 2), mar = c(1, 1, 2, 1))
# 系统发育距离热图
image(pdist_sub, main = "Phylogenetic Distance", axes = FALSE, col = rev(hcl.colors(100, "Blues")))
# 敏感性距离热图
image(sens_dist_sub, main = "Sensitivity Distance", axes = FALSE, col = rev(hcl.colors(100, "Blues")))# Mantel测试评估两个距离矩阵的相关性
mantel_result <- mantel.test(pdist_sub, sens_dist_sub, nperm = 999)
cat("\nMantel test result:\n")
Mantel test result:
print(mantel_result)$z.stat
[1] 3744098
$p
[1] 0.717
$alternative
[1] "two.sided"
# 检查实际有什么物种
eu_species <- sens_data$species[sens_data$region == "WEA"] # Western Europe & Africa
na_species <- sens_data$species[sens_data$region %in% c("ENA", "WNA")] # North America
print(head(eu_species))[1] "abiesAlba" NA "abiesCephalon" "abiesCilicica"
[5] "abiesNordmann" "abiesPinsapo"
print(head(na_species))[1] "abiesAmabilis" "abiesConcolor" "abiesFraseri" "abiesGrandis"
[5] "abiesLasiocar" "abiesMagnific"
# 转换物种名
eu_species_std <- sapply(eu_species, convert_species_name)
na_species_std <- sapply(na_species, convert_species_name)
# 移除 NA 值和转换失败的
eu_species_std <- eu_species_std[eu_species_std != "NANA NA" & !is.na(eu_species_std)]
na_species_std <- na_species_std[na_species_std != "NANA NA" & !is.na(na_species_std)]
print(eu_species_std) abiesAlba abiesCephalon abiesCilicica abiesNordmann
"Abies alba" "Abies cephalon" "Abies cilicica" "Abies nordmann"
abiesPinsapo acerCampestr acerMonspess acerOpalus
"Abies pinsapo" "Acer campestr" "Acer monspess" "Acer opalus"
acerPlatanoi acerPseudopl aesculusHippocas aesculusTurbinat
"Acer platanoi" "Acer pseudopl" "Aesculus hippocas" "Aesculus turbinat"
alnusGlutinos betulaPendula carpinusBetulus castaneaSativa
"Alnus glutinos" "Betula pendula" "Carpinus betulus" "Castanea sativa"
cedrusLibani celtisAustrali cercisSiliquas cornusSanguine
"Cedrus libani" "Celtis australi" "Cercis siliquas" "Cornus sanguine"
corylusAvellana corylusColurna crataeguMonogyna diospyroLotus
"Corylus avellana" "Corylus colurna" "Crataegu monogyna" "Diospyro lotus"
fagusSylvatic ficusCarica frangulaAlnus fraxinusAngustif
"Fagus sylvatic" "Ficus carica" "Frangula alnus" "Fraxinus angustif"
fraxinusExcelsio fraxinusOrnus ilexAquifoli juglansRegia
"Fraxinus excelsio" "Fraxinus ornus" "Ilex aquifoli" "Juglans regia"
juniperuOxycedru juniperuPhoenice larixDecidua oleaEuropaea
"Juniperu oxycedru" "Juniperu phoenice" "Larix decidua" "Olea europaea"
piceaAbies piceaOmorika piceaOriental pinusBrutia
"Picea abies" "Picea omorika" "Picea oriental" "Pinus brutia"
pinusCembra pinusHalepens pinusNigra pinusPinaster
"Pinus cembra" "Pinus halepens" "Pinus nigra" "Pinus pinaster"
pinusPinea pinusSylvestr pinusUncinata pistaciaLentiscu
"Pinus pinea" "Pinus sylvestr" "Pinus uncinata" "Pistacia lentiscu"
pistaciaTerebint platanusOriental prunusAvium prunusCerasife
"Pistacia terebint" "Platanus oriental" "Prunus avium" "Prunus cerasife"
prunusSpinosa quercusCanarien quercusCastanei quercusCerris
"Prunus spinosa" "Quercus canarien" "Quercus castanei" "Quercus cerris"
quercusCoccifer quercusFaginea quercusFrainett quercusIlex
"Quercus coccifer" "Quercus faginea" "Quercus frainett" "Quercus ilex"
quercusInfector quercusIthabure quercusMacranth quercusPetraea
"Quercus infector" "Quercus ithabure" "Quercus macranth" "Quercus petraea"
quercusPubescen quercusRobur quercusRotundif quercusSuber
"Quercus pubescen" "Quercus robur" "Quercus rotundif" "Quercus suber"
quercusTrojana sorbusAucupari taxusBaccata tiliaCordata
"Quercus trojana" "Sorbus aucupari" "Taxus baccata" "Tilia cordata"
tiliaPlatyphy ulmusGlabra
"Tilia platyphy" "Ulmus glabra"
print(na_species_std) abiesAmabilis abiesConcolor abiesFraseri abiesGrandis
"Abies amabilis" "Abies concolor" "Abies fraseri" "Abies grandis"
abiesLasiocar abiesMagnific abiesProcera acerCircinat
"Abies lasiocar" "Abies magnific" "Abies procera" "Acer circinat"
acerMacrophy acerNegundo acerPensylva acerRubrum
"Acer macrophy" "Acer negundo" "Acer pensylva" "Acer rubrum"
acerSaccharu acerSpicatum aesculusGlabra alnusIncana
"Acer saccharu" "Acer spicatum" "Aesculus glabra" "Alnus incana"
alnusRubra amelanchArborea betulaAlleghan betulaLenta
"Alnus rubra" "Amelanch arborea" "Betula alleghan" "Betula lenta"
betulaPapyrife calocedrDecurren carpinusCarolini caryaAlba
"Betula papyrife" "Calocedr decurren" "Carpinus carolini" "Carya alba"
caryaCordifor caryaIllinoin caryaOvata castaneaDentata
"Carya cordifor" "Carya illinoin" "Carya ovata" "Castanea dentata"
castaneaPumila celtisLaevigat celtisOccident cercisCanadens
"Castanea pumila" "Celtis laevigat" "Celtis occident" "Cercis canadens"
cornusAlternif cornusDrummond cornusFlorida cornusNuttalli
"Cornus alternif" "Cornus drummond" "Cornus florida" "Cornus nuttalli"
corylusCornuta diospyroVirginia fagusGrandifo frangulaCarolini
"Corylus cornuta" "Diospyro virginia" "Fagus grandifo" "Frangula carolini"
fraxinusAmerican fraxinusNigra fraxinusPennsylv ilexCassine
"Fraxinus american" "Fraxinus nigra" "Fraxinus pennsylv" "Ilex cassine"
ilexDecidua ilexMontana ilexOpaca ilexVerticil
"Ilex decidua" "Ilex montana" "Ilex opaca" "Ilex verticil"
ilexVomitori juglansCinerea juglansNigra juniperuCommunis
"Ilex vomitori" "Juglans cinerea" "Juglans nigra" "Juniperu communis"
juniperuMonosper juniperuOsteospe juniperuScopulor juniperuVirginia
"Juniperu monosper" "Juniperu osteospe" "Juniperu scopulor" "Juniperu virginia"
larixLaricina liriodenTulipife magnoliaAcuminat magnoliaFraseri
"Larix laricina" "Lirioden tulipife" "Magnolia acuminat" "Magnolia fraseri"
magnoliaGrandifl magnoliaMacrophy magnoliaTripetal magnoliaVirginia
"Magnolia grandifl" "Magnolia macrophy" "Magnolia tripetal" "Magnolia virginia"
piceaEngelman piceaGlauca piceaMariana piceaPungens
"Picea engelman" "Picea glauca" "Picea mariana" "Picea pungens"
piceaRubens piceaSitchens pinusAlbicaul pinusBanksian
"Picea rubens" "Picea sitchens" "Pinus albicaul" "Pinus banksian"
pinusClausa pinusContorta pinusCoulteri pinusEchinata
"Pinus clausa" "Pinus contorta" "Pinus coulteri" "Pinus echinata"
pinusEdulis pinusElliotti pinusFlexilis pinusJeffreyi
"Pinus edulis" "Pinus elliotti" "Pinus flexilis" "Pinus jeffreyi"
pinusLamberti pinusMonophyl pinusMonticol pinusPalustri
"Pinus lamberti" "Pinus monophyl" "Pinus monticol" "Pinus palustri"
pinusPonderos pinusPungens pinusRadiata pinusResinosa
"Pinus ponderos" "Pinus pungens" "Pinus radiata" "Pinus resinosa"
pinusRigida pinusSabinian pinusSerotina pinusStrobus
"Pinus rigida" "Pinus sabinian" "Pinus serotina" "Pinus strobus"
pinusTaeda pinusVirginia platanusOccident prunusAmerican
"Pinus taeda" "Pinus virginia" "Platanus occident" "Prunus american"
prunusPensylva prunusSerotina prunusVirginia pseudotsMenziesi
"Prunus pensylva" "Prunus serotina" "Prunus virginia" "Pseudots menziesi"
quercusAgrifoli quercusAlba quercusBicolor quercusChrysole
"Quercus agrifoli" "Quercus alba" "Quercus bicolor" "Quercus chrysole"
quercusFalcata quercusGarryana quercusGeminata quercusHemispha
"Quercus falcata" "Quercus garryana" "Quercus geminata" "Quercus hemispha"
quercusImbricar quercusIncana quercusKelloggi quercusLaevis
"Quercus imbricar" "Quercus incana" "Quercus kelloggi" "Quercus laevis"
quercusLaurifol quercusLobata quercusLyrata quercusMacrocar
"Quercus laurifol" "Quercus lobata" "Quercus lyrata" "Quercus macrocar"
quercusMargaret quercusMichauxi quercusMontana quercusMyrtifol
"Quercus margaret" "Quercus michauxi" "Quercus montana" "Quercus myrtifol"
quercusNigra quercusPalustri quercusRubra quercusShumardi
"Quercus nigra" "Quercus palustri" "Quercus rubra" "Quercus shumardi"
quercusStellata quercusVelutina quercusVirginia quercusWislizen
"Quercus stellata" "Quercus velutina" "Quercus virginia" "Quercus wislizen"
robiniaPseudoac taxusBrevifol thujaOccident thujaPlicata
"Robinia pseudoac" "Taxus brevifol" "Thuja occident" "Thuja plicata"
tiliaAmerican tsugaCanadens tsugaHeteroph tsugaMertensi
"Tilia american" "Tsuga canadens" "Tsuga heteroph" "Tsuga mertensi"
ulmusAlata ulmusAmerican ulmusRubra viburnumDentatum
"Ulmus alata" "Ulmus american" "Ulmus rubra" "Viburnum dentatum"
viburnumPrunifol viburnumRufidulu
"Viburnum prunifol" "Viburnum rufidulu"
# 计算区域内和区域间的比较
# 1. 欧洲物种之间的比较
eu_vs_eu <- expand.grid(sp1 = eu_species_std, sp2 = eu_species_std)
eu_vs_eu <- eu_vs_eu[eu_vs_eu$sp1 != eu_vs_eu$sp2, ]
# 2. 北美物种之间的比较
na_vs_na <- expand.grid(sp1 = na_species_std, sp2 = na_species_std)
na_vs_na <- na_vs_na[na_vs_na$sp1 != na_vs_na$sp2, ]
# 3. 欧洲与北美物种之间的比较(跨大陆)
cross_continent <- expand.grid(sp1 = eu_species_std, sp2 = na_species_std)
# 为每组比较计算系统发育距离和敏感性距离
calc_distances <- function(species_pairs, region_type) {
result <- data.frame()
for(i in 1:nrow(species_pairs)) {
sp1 <- as.character(species_pairs$sp1[i])
sp2 <- as.character(species_pairs$sp2[i])
# 检查物种是否在系统发育树和敏感性数据中
if(sp1 %in% rownames(pdist) && sp2 %in% rownames(pdist) &&
sp1 %in% rownames(sens_dist_sub) && sp2 %in% rownames(sens_dist_sub)) {
# 获取系统发育距离
phylo_d <- pdist[sp1, sp2]
# 获取敏感性距离
sens_d <- sens_dist_sub[sp1, sp2]
result <- rbind(result, data.frame(
sp1 = sp1,
sp2 = sp2,
phylo_dist = phylo_d,
sens_dist = sens_d,
comparison_type = region_type
))
}
}
return(result)
}
# 计算各组比较的距离
eu_eu_dist <- calc_distances(eu_vs_eu, "EU-EU")
na_na_dist <- calc_distances(na_vs_na, "NA-NA")
cross_dist <- calc_distances(cross_continent, "EU-NA")
# 合并所有比较结果
all_comparisons <- rbind(eu_eu_dist, na_na_dist, cross_dist)
# 可视化比较
par(mfrow = c(1, 1), mar = c(5, 5, 4, 2))
plot(all_comparisons$phylo_dist, all_comparisons$sens_dist,
col = ifelse(all_comparisons$comparison_type == "EU-EU", "blue",
ifelse(all_comparisons$comparison_type == "NA-NA", "red", "purple")),
pch = ifelse(all_comparisons$comparison_type == "EU-EU", 16,
ifelse(all_comparisons$comparison_type == "NA-NA", 17, 18)),
xlab = "Phylogenetic Distance", ylab = "Sensitivity Distance",
main = "Phylogenetic vs. Sensitivity Distance by Region")
# 添加图例
legend("topleft", legend = c("EU-EU", "NA-NA", "EU-NA"),
col = c("blue", "red", "purple"), pch = c(16, 17, 18), bty = "n")
# 为每种比较类型添加趋势线
eu_eu_lm <- lm(sens_dist ~ phylo_dist, data = eu_eu_dist)
na_na_lm <- lm(sens_dist ~ phylo_dist, data = na_na_dist)
cross_lm <- lm(sens_dist ~ phylo_dist, data = cross_dist)
abline(eu_eu_lm, col = "blue", lty = 2)
abline(na_na_lm, col = "red", lty = 2)
abline(cross_lm, col = "purple", lty = 2)interaction_model <- lm(sens_dist ~ phylo_dist * comparison_type, data = all_comparisons)
anova(interaction_model)Analysis of Variance Table
Response: sens_dist
Df Sum Sq Mean Sq F value Pr(>F)
phylo_dist 1 44 44.345 6.5166 0.01071 *
comparison_type 2 548 273.829 40.2399 < 2.2e-16 ***
phylo_dist:comparison_type 2 137 68.339 10.0426 4.409e-05 ***
Residuals 7590 51649 6.805
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#在相同系统发育距离下,跨大陆物种对的敏感性差异是否大于同大陆物种对
all_comparisons$cross_continent <- ifelse(all_comparisons$comparison_type == "EU-NA",
"Cross-Continental", "Within-Continental")
# ANCOVA模型
ancova_model <- lm(sens_dist ~ phylo_dist + cross_continent + phylo_dist:cross_continent,
data = all_comparisons)
# 查看结果
summary(ancova_model)
Call:
lm(formula = sens_dist ~ phylo_dist + cross_continent + phylo_dist:cross_continent,
data = all_comparisons)
Residuals:
Min 1Q Median 3Q Max
-1.9926 -1.1946 -0.7159 0.2795 18.9815
Coefficients:
Estimate Std. Error t value
(Intercept) 2.0295042 0.1106103 18.348
phylo_dist -0.0005287 0.0002115 -2.500
cross_continentWithin-Continental -0.3443766 0.1327062 -2.595
phylo_dist:cross_continentWithin-Continental 0.0003379 0.0002532 1.334
Pr(>|t|)
(Intercept) < 2e-16 ***
phylo_dist 0.01245 *
cross_continentWithin-Continental 0.00948 **
phylo_dist:cross_continentWithin-Continental 0.18209
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.624 on 7592 degrees of freedom
Multiple R-squared: 0.002191, Adjusted R-squared: 0.001797
F-statistic: 5.557 on 3 and 7592 DF, p-value: 0.0008327
# 对比平均值(估计边际均值)
library(emmeans)
emmeans(ancova_model, ~ cross_continent) cross_continent emmean SE df lower.CL upper.CL
Cross-Continental 1.79 0.0547 7592 1.68 1.89
Within-Continental 1.60 0.0361 7592 1.53 1.67
Confidence level used: 0.95
# 在特定系统发育距离值处比较(例如在系统发育距离=0.1处)
emtrends(ancova_model, ~ cross_continent, var = "phylo_dist") cross_continent phylo_dist.trend SE df lower.CL upper.CL
Cross-Continental -0.000529 0.000211 7592 -0.000943 -1.14e-04
Within-Continental -0.000191 0.000139 7592 -0.000464 8.22e-05
Confidence level used: 0.95
# 可视化不同组别在固定系统发育距离下的敏感性差异
small_phylo <- 0.1
pred_data <- data.frame(
phylo_dist = small_phylo,
cross_continent = c("Cross-Continental", "Within-Continental")
)
predict(ancova_model, newdata = pred_data, interval = "confidence") fit lwr upr
1 2.029451 1.812661 2.246242
2 1.685109 1.541399 1.828819
library(lme4)
# 假设物种嵌套在属内
# 首先,提取物种的属信息
all_comparisons$genus1 <- gsub("([A-Z][a-z]+).*", "\\1", all_comparisons$sp1)
all_comparisons$genus2 <- gsub("([A-Z][a-z]+).*", "\\1", all_comparisons$sp2)
# 创建一个物种对的唯一标识符
all_comparisons$pair_id <- paste(all_comparisons$sp1, all_comparisons$sp2, sep="_")
# 混合效应模型,将属作为随机效应
mixed_model <- lmer(sens_dist ~ phylo_dist * cross_continent + (1|genus1) + (1|genus2),
data = all_comparisons)
summary(mixed_model)Linear mixed model fit by REML ['lmerMod']
Formula: sens_dist ~ phylo_dist * cross_continent + (1 | genus1) + (1 |
genus2)
Data: all_comparisons
REML criterion at convergence: 35892.6
Scaled residuals:
Min 1Q Median 3Q Max
-1.1690 -0.4242 -0.1792 0.1165 6.9986
Random effects:
Groups Name Variance Std.Dev.
genus1 (Intercept) 0.3192 0.5650
genus2 (Intercept) 0.1868 0.4322
Residual 6.4796 2.5455
Number of obs: 7596, groups: genus1, 28; genus2, 28
Fixed effects:
Estimate Std. Error t value
(Intercept) 1.6881821 0.1819155 9.280
phylo_dist -0.0003271 0.0002176 -1.503
cross_continentWithin-Continental -0.3789481 0.1317157 -2.877
phylo_dist:cross_continentWithin-Continental 0.0004124 0.0002472 1.668
Correlation of Fixed Effects:
(Intr) phyl_d cr_W-C
phylo_dist -0.539
crss_cntW-C -0.511 0.680
phyl_d:_W-C 0.435 -0.796 -0.858
# 在固定系统发育距离下比较
library(emmeans)
emtrends(mixed_model, ~ cross_continent, var = "phylo_dist") cross_continent phylo_dist.trend SE df asymp.LCL asymp.UCL
Cross-Continental -3.27e-04 0.000218 Inf -0.000754 9.95e-05
Within-Continental 8.54e-05 0.000151 Inf -0.000211 3.82e-04
Degrees-of-freedom method: asymptotic
Confidence level used: 0.95
# 筛选系统发育距离小于阈值的数据子集
# 检查系统发育距离的分布
summary(all_comparisons$phylo_dist) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.1147 235.0695 475.1492 457.2973 704.4694 704.4694
hist(all_comparisons$phylo_dist, main="系统发育距离分布", xlab="系统发育距离")# 查看最小的几个系统发育距离值
head(sort(all_comparisons$phylo_dist))[1] 0.114742 0.114742 0.117820 0.117820 0.118578 0.118578
# 尝试适应数据的阈值,例如使用分布的第一个四分位数
threshold <- quantile(all_comparisons$phylo_dist, 0.25)
print(paste("新阈值:", threshold))[1] "<U+65B0><U+9608><U+503C>: 235.069478"
small_phylo_subset <- all_comparisons[all_comparisons$phylo_dist < threshold, ]
# 检查筛选后的数据集大小
nrow(small_phylo_subset)[1] 1897
# 检查跨大陆变量的水平数量
table(small_phylo_subset$cross_continent)
Cross-Continental Within-Continental
619 1278
# 选择系统发育距离最小的前25%样本
percentile_threshold <- 0.25
n_samples <- nrow(all_comparisons)
n_select <- floor(n_samples * percentile_threshold)
ordered_samples <- all_comparisons[order(all_comparisons$phylo_dist), ]
small_phylo_subset <- ordered_samples[1:n_select, ]
# 验证数据集
nrow(small_phylo_subset)[1] 1899
table(small_phylo_subset$cross_continent)
Cross-Continental Within-Continental
619 1280
setwd("R:/clark/clark.unix/phylogram")
paramPath <- "../makeMast/fitLogs/output2/east_europe_west/"
path2Traits <- "../traitsByGroup/"
traitFile <- ("C:/Users/mh471/Downloads/radiation/plant_traits_final_clean.csv")
results <-read.csv("C:/Users/mh471/Downloads/continent comparison/phylogram/east_europe_west/results_data.csv")
source('../makeMast/predictionMastFunctions.R')
source('../makeMast/RFunctions/mastifFunctions.R')
source('../makeMast/moreFunctionsOld.R')
source('phyloFunctions.R')
vars <- getFittedVariables(continent = 'northAmerica',
ppath = '../makeMast/fitLogs/', dirs = 'output2')
print(vars) [1] "aspect1.defSite.slope" "aspect1.slope" "aspect2.defSite.slope"
[4] "aspect2.slope" "defAnom" "defSite"
[7] "diam" "I.diam.2." "I.tempSite.2."
[10] "intercept" "isolate" "lfi10SqrtAnom"
[13] "lfi10SqrtSite" "lfi30SqrtAnom" "lfi30SqrtSite"
[16] "lfi50SqrtAnom" "lfi50SqrtSite" "ph"
[19] "shade" "slope" "tempSite"
[22] "tpi"
tmp <- phyloSensitivity(tree = trAll, species = tree$tip.label,
betaNames = c("tempSite", "shade", "defSite"),
paramPath = paramPath, path2Traits = path2Traits)
parEst <- tmp$parEst
traitSig <- tmp$parSig
tree <- tmp$tree
pdist <- cophenetic(tree)
sens_data <- results %>%
select(species, bfec.shade, bfec.tempSite, bfec.defSite) %>%
distinct()
rownames(sens_data) <- sens_data$species
sens_dist <- as.matrix(dist(sens_data))
spp <- getFittedSpecies(dirs = 'output2/east_europe_west',
keep = c('east', 'west', 'europe'))
trait_tmp <- phyloTraits(tree = trAll, species = spp,
traitNames = c("gmPerCm", "leafN", "leafNP", "leafP", "gmPerSeed",
"maxHt", "pollination", "maxDbh", "SLA", "woodSG"))
traitTab <- trait_tmp$traitTab
trRsp <- trait_tmp$treex <- read.csv("R:/clark/clark.unix/phylogram/speciesByResponse.csv", row.names = 1)
rownames(x)[rownames(x) == 'caryaOvalis-g'] <- 'caryaOvalis'
convert_species_name <- function(sens_name) {
genus_match <- regexpr("^[a-z]+", sens_name)
genus <- substr(sens_name, 1, attr(genus_match, "match.length"))
species <- substr(sens_name, attr(genus_match, "match.length") + 1, nchar(sens_name))
capitalized_genus <- paste0(toupper(substr(genus, 1, 1)), substr(genus, 2, nchar(genus)))
lowercase_species <- tolower(species)
return(paste(capitalized_genus, lowercase_species))
}sens_species <- data.frame(
code8 = rownames(sens_dist),
bfec.shade = as.numeric(as.character(sens_data$bfec.shade)),
bfec.tempSite = as.numeric(as.character(sens_data$bfec.tempSite)),
bfec.defSite = as.numeric(as.character(sens_data$bfec.defSite)),
stringsAsFactors = FALSE
)if("code8" %in% colnames(traitTab)) {
combined_data <- merge(sens_species, traitTab, by = "code8", all = FALSE)
combined_data$standard_name <- sapply(combined_data$code8, convert_species_name)
combined_data$in_response <- combined_data$code8 %in% rownames(x)
for(i in 1:nrow(combined_data)) {
sp <- combined_data$code8[i]
if(sp %in% rownames(x)) {
anom_cols <- grep('Anom', colnames(x), value = TRUE)
for(col in anom_cols) {
combined_data[i, col] <- x[sp, col]
}
}
}
} else {
print("Warning: No code8 column found in traitTab. Using alternative matching method.")
if(all(c("genus", "specEpith") %in% colnames(traitTab))) {
traitTab$standard_name <- paste(traitTab$genus, traitTab$specEpith)
sens_species$standard_name <- sapply(sens_species$code8, convert_species_name)
combined_data <- merge(sens_species, traitTab, by = "standard_name", all = FALSE)
} else {
stop("Cannot find appropriate columns to match species between datasets.")
}
}abline_safe <- function(model_or_formula = NULL, data = NULL, col = "red", lty = 2, a = NULL, b = NULL, h = NULL, v = NULL) {
if(!is.null(h)) {
if(is.finite(h)) {
abline(h = h, col = col, lty = lty)
return(TRUE)
} else {
warning("无法绘制水平线:h不是有限值")
return(FALSE)
}
}
if(!is.null(v)) {
if(is.finite(v)) {
abline(v = v, col = col, lty = lty)
return(TRUE)
} else {
warning("无法绘制垂直线:v不是有限值")
return(FALSE)
}
}
# 检查是否直接提供了a和b
if(!is.null(a) && !is.null(b)) {
if(is.finite(a) && is.finite(b)) {
abline(a = a, b = b, col = col, lty = lty)
return(TRUE)
} else {
warning("无法绘制回归线:a或b不是有限值")
return(FALSE)
}
}
# 处理模型或公式
if(!is.null(model_or_formula)) {
# 检查是否为公式(不使用is.formula函数)
if(inherits(model_or_formula, "formula")) {
# 如果输入的是公式,先拟合模型
model <- try(lm(model_or_formula, data = data), silent = TRUE)
if(inherits(model, "try-error")) {
warning("拟合模型失败")
return(FALSE)
}
} else {
# 否则假设输入已经是一个模型
model <- model_or_formula
}
# 提取系数
coefs <- try(coef(model), silent = TRUE)
if(inherits(coefs, "try-error") || length(coefs) < 2) {
warning("无法提取模型系数")
return(FALSE)
}
# 检查系数是否有限
if(all(is.finite(coefs))) {
abline(a = coefs[1], b = coefs[2], col = col, lty = lty)
return(TRUE)
} else {
warning("无法绘制回归线:系数不是有限值")
return(FALSE)
}
}
warning("未提供有效参数")
return(FALSE)
}
# 创建物种名称匹配函数,用于匹配树名
match_tree_species <- function(species_name, tree_tips) {
# 检查是否完全匹配
if(species_name %in% tree_tips) {
return(species_name)
}
# 获取种名的前几个字符(属名部分应该是完整的)
parts <- strsplit(species_name, " ")[[1]]
if(length(parts) < 2) return(NA)
genus <- parts[1]
species_partial <- parts[2]
# 查找同属的物种
genus_matches <- grep(paste0("^", genus, " "), tree_tips, value = TRUE)
if(length(genus_matches) == 0) return(NA)
# 在同属物种中查找以species_partial开头的物种
species_matches <- grep(paste0("^", genus, " ", species_partial), tree_tips, value = TRUE)
if(length(species_matches) == 1) {
return(species_matches)
} else if(length(species_matches) > 1) {
# 如果有多个匹配,返回第一个(可能需要更复杂的逻辑来选择最佳匹配)
return(species_matches[1])
} else {
# 没有匹配,返回NA
return(NA)
}
}
# 为combined_data中的每个物种找到树中对应的物种
species_mapping <- sapply(combined_data$standard_name, function(sp) {
match_tree_species(sp, tree$tip.label)
})
matched_count <- sum(!is.na(species_mapping))
cat("Total species:", length(species_mapping), "\n")Total species: 250
cat("Successfully matched:", matched_count, "\n")Successfully matched: 203
cat("Failed to match:", length(species_mapping) - matched_count, "\n")Failed to match: 47
matched_data <- combined_data[!is.na(species_mapping),]
matched_data$tree_name <- species_mapping[!is.na(species_mapping)]if("pollination" %in% colnames(matched_data)) {
par(mar = c(10, 4, 2, 1))
# 将特征转换为因子
matched_data$pollination <- as.factor(matched_data$pollination)
# 绘制箱线图
boxplot(bfec.shade ~ pollination, data = matched_data,
main = "Shade Sensitivity by Pollination Type",
xlab = "", ylab = "Shade Sensitivity",
las = 2) # 垂直轴标签
title(xlab = "Pollination", line = 8) # 增加底部空间
}if("genus" %in% colnames(matched_data)) {
genus_counts <- table(matched_data$genus)
genus_with_enough <- names(genus_counts)[genus_counts >= 1]
genus_summary <- do.call(rbind, lapply(genus_with_enough, function(fam) {
fam_data <- matched_data[matched_data$genus == fam, ]
data.frame(
genus = fam,
mean_shade = mean(fam_data$bfec.shade, na.rm = TRUE),
sd_shade = sd(fam_data$bfec.shade, na.rm = TRUE),
count = nrow(fam_data)
)
}))
par(mfrow = c(1, 1), mar = c(10, 4, 2, 2))
genus_summary_filtered <- genus_summary[genus_summary$count >= 1, ]
genus_summary_filtered <- genus_summary_filtered[order(genus_summary_filtered$mean_shade), ]
barplot(genus_summary_filtered$mean_shade,
names.arg = genus_summary_filtered$genus,
las = 2, # 垂直显示科名
col = rainbow(nrow(genus_summary_filtered)),
ylab = "Mean Shade Sensitivity",
main = "Mean Shade Sensitivity by genus")
abline_safe(h = 0, lty = 2) # 添加参考线
}[1] TRUE
# 查看traitGen中所有可用的性状
cat("=== All traits in traitGen ===\n")=== All traits in traitGen ===
print(names(traitGen)) [1] "genus" "region" "continent" "gmPerSeed" "gmPerCm"
[6] "leafN" "leafNP" "leafP" "maxHt" "maxDbh"
[11] "SLA" "woodSG" "N_area" "Vcmax" "Jmax"
[16] "Amax" "code8" "specEpith" "subSpec" "class"
[21] "family" "section" "dispersal" "fruit" "leaves"
[26] "pollination" "sex" "thorns" "taxon"
# 创建完整的matched_data,包含所有性状
create_complete_matched_data <- function() {
# 使用 mu_processed 作为基础
base_data <- mu_processed
# 获取所有数值型性状列(排除标识符和分类变量)
exclude_cols <- c("genus", "region", "continent", "code8", "specEpith",
"subSpec", "class", "family", "section", "dispersal",
"fruit", "leaves", "pollination", "sex", "thorns", "taxon")
trait_cols <- setdiff(names(traitGen), exclude_cols)
cat("Traits to merge:", paste(trait_cols, collapse = ", "), "\n\n")
# 准备traitGen数据(每个属只保留一行,或按region分组)
# 如果想要区域特异性数据,可以保留region
# 这里我们先按genus汇总(取平均值)
trait_summary <- traitGen %>%
group_by(genus) %>%
summarise(across(all_of(trait_cols), ~ mean(.x, na.rm = TRUE)),
.groups = 'drop')
cat("Merging", nrow(base_data), "sensitivity records with",
nrow(trait_summary), "genus trait records\n")
# 合并数据
matched_data <- merge(base_data, trait_summary,
by = "genus",
all.x = TRUE,
suffixes = c("", ".trait")) # 避免列名冲突
cat("Result:", nrow(matched_data), "records\n")
cat("Columns added:", paste(setdiff(names(matched_data), names(base_data)), collapse = ", "), "\n")
return(matched_data)
}
# 执行合并
matched_data <- create_complete_matched_data()Traits to merge: gmPerSeed, gmPerCm, leafN, leafNP, leafP, maxHt, maxDbh, SLA, woodSG, N_area, Vcmax, Jmax, Amax
Merging 284 sensitivity records with 45 genus trait records
Result: 284 records
Columns added: gmPerSeed.trait, gmPerCm, leafN, leafNP, leafP, maxHt, maxDbh, SLA, woodSG, N_area, Vcmax, Jmax, Amax
# 验证所有性状
all_traits <- c("gmPerSeed", "gmPerCm", "leafN", "leafNP", "leafP",
"maxHt", "maxDbh", "SLA", "woodSG",
"N_area", "Vcmax", "Jmax", "Amax", "SLA_val")
cat("\n=== Data availability for all traits ===\n")
=== Data availability for all traits ===
for(trait in all_traits) {
if(trait %in% names(matched_data)) {
non_na <- sum(!is.na(matched_data[[trait]]))
pct <- round(100 * non_na / nrow(matched_data), 1)
cat(sprintf("%-15s: %4d non-NA values (%5.1f%%)\n", trait, non_na, pct))
} else {
cat(sprintf("%-15s: NOT FOUND\n", trait))
}
}gmPerSeed : 217 non-NA values ( 76.4%)
gmPerCm : 209 non-NA values ( 73.6%)
leafN : 218 non-NA values ( 76.8%)
leafNP : 218 non-NA values ( 76.8%)
leafP : 218 non-NA values ( 76.8%)
maxHt : 218 non-NA values ( 76.8%)
maxDbh : 211 non-NA values ( 74.3%)
SLA : 211 non-NA values ( 74.3%)
woodSG : 211 non-NA values ( 74.3%)
N_area : 218 non-NA values ( 76.8%)
Vcmax : 209 non-NA values ( 73.6%)
Jmax : 211 non-NA values ( 74.3%)
Amax : 182 non-NA values ( 64.1%)
SLA_val : NOT FOUND
# 扩展性状列表
continuous_traits <- c("gmPerSeed", "gmPerCm", "leafN", "leafNP", "leafP",
"maxHt", "maxDbh", "SLA", "woodSG",
"N_area", "Vcmax", "Jmax", "Amax", "SLA_val")
continuous_traits_present <- continuous_traits[continuous_traits %in% colnames(matched_data)]
cat("\n=== Running comprehensive correlation analysis ===\n")
=== Running comprehensive correlation analysis ===
cat("Traits to analyze:", length(continuous_traits_present), "\n")Traits to analyze: 13
print(continuous_traits_present) [1] "gmPerSeed" "gmPerCm" "leafN" "leafNP" "leafP" "maxHt"
[7] "maxDbh" "SLA" "woodSG" "N_area" "Vcmax" "Jmax"
[13] "Amax"
if(length(continuous_traits_present) > 0) {
# 初始化结果
trait_correlations_full <- data.frame(
trait = character(),
sensitivity = character(),
r = numeric(),
p_value = numeric(),
n = numeric(),
stringsAsFactors = FALSE
)
# 设置图形(创建更大的网格)
n_plots <- min(length(continuous_traits_present), 12) # 限制显示前12个
par(mfrow = c(4, 3), mar = c(4, 4, 3, 1))
# 只分析与shade的关系(为了简化可视化)
for(i in 1:min(12, length(continuous_traits_present))) {
trait <- continuous_traits_present[i]
# 获取有效数据
valid_idx <- !is.na(matched_data[[trait]]) & !is.na(matched_data$bfec.shade)
valid_data <- matched_data[valid_idx, ]
if(nrow(valid_data) >= 3) {
# 计算相关性
corr <- cor.test(valid_data[[trait]], valid_data$bfec.shade,
method = "pearson", use = "complete.obs")
# 保存结果
trait_correlations_full <- rbind(trait_correlations_full, data.frame(
trait = trait,
sensitivity = "shade",
r = corr$estimate,
p_value = corr$p.value,
n = nrow(valid_data),
stringsAsFactors = FALSE
))
# 绘图
plot(valid_data[[trait]], valid_data$bfec.shade,
pch = 16, col = rgb(0, 0, 1, 0.5),
xlab = trait, ylab = "Shade Sensitivity",
main = paste(trait, "vs Shade"))
# 添加回归线
if(var(valid_data[[trait]], na.rm = TRUE) > 0) {
fit <- lm(bfec.shade ~ valid_data[[trait]], data = valid_data)
abline(fit, col = "red", lty = 2)
}
# 添加统计信息
text_pos <- par("usr")
text(text_pos[1] + (text_pos[2] - text_pos[1]) * 0.05,
text_pos[4] - (text_pos[4] - text_pos[3]) * 0.05,
paste("r =", round(corr$estimate, 3),
"\np =", round(corr$p.value, 3)),
pos = 4, cex = 0.8)
}
}
# 现在计算所有组合(用于表格)
sensitivity_vars <- c("bfec.shade", "bfec.tempSite", "bfec.defSite")
trait_correlations_complete <- data.frame()
for(trait in continuous_traits_present) {
for(sens_var in sensitivity_vars) {
valid_idx <- !is.na(matched_data[[trait]]) & !is.na(matched_data[[sens_var]])
valid_data <- matched_data[valid_idx, ]
if(nrow(valid_data) >= 3) {
corr <- cor.test(valid_data[[trait]], valid_data[[sens_var]],
method = "pearson", use = "complete.obs")
trait_correlations_complete <- rbind(trait_correlations_complete, data.frame(
trait = trait,
sensitivity = gsub("bfec\\.", "", sens_var),
r = corr$estimate,
p_value = corr$p.value,
n = nrow(valid_data),
stringsAsFactors = FALSE
))
}
}
}
# 显示完整结果
cat("\n=== Complete Correlation Results ===\n")
print(trait_correlations_complete %>%
arrange(p_value) %>%
head(20)) # 显示前20个最显著的
# 找出显著的相关性
sig_corr <- trait_correlations_complete[trait_correlations_complete$p_value < 0.05, ]
if(nrow(sig_corr) > 0) {
cat("\n=== Significant Correlations (p < 0.05) ===\n")
print(sig_corr[order(abs(sig_corr$r), decreasing = TRUE), ])
}
}
=== Complete Correlation Results ===
trait sensitivity r p_value n
cor22 SLA tempSite 0.13757726 0.04592790 211
cor28 N_area tempSite -0.11742228 0.08367661 218
cor36 Amax shade 0.12629175 0.08934955 182
cor4 gmPerCm tempSite 0.11515241 0.09685763 209
cor7 leafN tempSite 0.11148804 0.10063983 218
cor38 Amax defSite 0.11175499 0.13310266 182
cor33 Jmax shade 0.10145443 0.14190095 211
cor25 woodSG tempSite 0.10140338 0.14210265 211
cor23 SLA defSite -0.08863924 0.19968954 211
cor18 maxDbh shade 0.07733477 0.26341543 211
cor32 Vcmax defSite 0.07680028 0.26904559 209
cor35 Jmax defSite 0.06857570 0.32150504 211
cor29 N_area defSite 0.06384444 0.34814680 218
cor37 Amax tempSite -0.06876440 0.35632875 182
cor15 maxHt shade 0.05814649 0.39293325 218
cor2 gmPerSeed defSite 0.05705464 0.40298920 217
cor5 gmPerCm defSite 0.05751841 0.40810551 209
cor31 Vcmax tempSite 0.05542889 0.42537360 209
cor13 leafP tempSite 0.05421391 0.42577470 218
cor9 leafNP shade 0.05321859 0.43433218 218
=== Significant Correlations (p < 0.05) ===
trait sensitivity r p_value n
cor22 SLA tempSite 0.1375773 0.0459279 211
library(ggplot2)
library(reshape2)
if(exists("trait_correlations_complete") && nrow(trait_correlations_complete) > 0) {
# 准备热图数据
heatmap_data <- trait_correlations_complete %>%
mutate(
sig_level = case_when(
p_value < 0.001 ~ "***",
p_value < 0.01 ~ "**",
p_value < 0.05 ~ "*",
p_value < 0.1 ~ ".",
TRUE ~ ""
)
)
# 创建矩阵
corr_matrix <- dcast(heatmap_data, trait ~ sensitivity, value.var = "r")
rownames(corr_matrix) <- corr_matrix$trait
corr_matrix <- corr_matrix[,-1]
# 创建ggplot热图
heatmap_long <- melt(as.matrix(corr_matrix))
names(heatmap_long) <- c("Trait", "Sensitivity", "Correlation")
# 添加显著性标记
sig_matrix <- dcast(heatmap_data, trait ~ sensitivity, value.var = "sig_level")
sig_long <- melt(as.matrix(sig_matrix[,-1]))
heatmap_long$sig <- sig_long$value
p_heat <- ggplot(heatmap_long, aes(x = Sensitivity, y = Trait, fill = Correlation)) +
geom_tile(color = "white") +
geom_text(aes(label = round(Correlation, 2)), size = 3) +
geom_text(aes(label = sig), size = 4, vjust = -0.5) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red",
midpoint = 0, limits = c(-0.3, 0.3),
name = "Correlation\nCoefficient") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(size = 9)) +
labs(title = "Trait-Sensitivity Correlations",
subtitle = "Significance: *** p<0.001, ** p<0.01, * p<0.05, . p<0.1",
x = "Environmental Sensitivity", y = "Functional Trait")
print(p_heat)
}# 由于traitGen有区域信息,我们可以做更精细的分析
# 重新合并,保留区域信息
create_regional_matched_data <- function() {
base_data <- mu_processed
# 直接合并,保留region信息
matched_regional <- merge(base_data,
traitGen[, !names(traitGen) %in% c("code8", "specEpith", "subSpec",
"class", "family", "section",
"dispersal", "fruit", "taxon")],
by = c("genus", "region"),
all.x = TRUE)
return(matched_regional)
}
matched_regional <- create_regional_matched_data()
# 检查区域分布
cat("\n=== Regional distribution ===\n")
=== Regional distribution ===
table(matched_regional$continent)
AFR EU NA UNKNOWN
1 74 142 2
# 按大陆分析
for(cont in c("EU", "NA")) {
cont_data <- matched_regional[matched_regional$continent == cont & !is.na(matched_regional$continent), ]
if(nrow(cont_data) > 10) {
cat("\n=== Analysis for", cont, "===\n")
cat("Number of records:", nrow(cont_data), "\n")
# 选几个关键性状分析
key_traits <- c("leafN", "maxHt", "N_area", "Vcmax")
key_traits <- key_traits[key_traits %in% names(cont_data)]
for(trait in key_traits) {
if(sum(!is.na(cont_data[[trait]])) >= 5) {
corr <- cor.test(cont_data[[trait]], cont_data$bfec.shade, use = "complete.obs")
cat(sprintf(" %s vs shade: r = %.3f, p = %.3f (n = %d)\n",
trait, corr$estimate, corr$p.value, sum(complete.cases(cont_data[[trait]], cont_data$bfec.shade))))
}
}
}
}
=== Analysis for EU ===
Number of records: 74
leafN vs shade: r = -0.093, p = 0.432 (n = 74)
maxHt vs shade: r = -0.004, p = 0.976 (n = 74)
N_area vs shade: r = -0.061, p = 0.603 (n = 74)
Vcmax vs shade: r = -0.148, p = 0.212 (n = 73)
=== Analysis for NA ===
Number of records: 142
leafN vs shade: r = -0.015, p = 0.863 (n = 142)
maxHt vs shade: r = 0.063, p = 0.460 (n = 142)
N_area vs shade: r = -0.052, p = 0.537 (n = 142)
Vcmax vs shade: r = 0.051, p = 0.556 (n = 134)
# 首先添加tree_name列到matched_data
# 假设tree的tip.label格式是 "Genus species"
if(exists("tree")) {
# 创建物种名匹配
matched_data$species_std <- paste(toupper(substr(matched_data$genus, 1, 1)),
tolower(substr(matched_data$genus, 2, nchar(matched_data$genus))),
tolower(matched_data$specEpith), sep = " ")
# 检查哪些物种在树中
matched_data$in_tree <- matched_data$species_std %in% tree$tip.label
cat("\nSpecies in phylogenetic tree:", sum(matched_data$in_tree), "out of", nrow(matched_data), "\n")
if(sum(matched_data$in_tree) > 0) {
# 只使用在树中的物种
tree_data <- matched_data[matched_data$in_tree, ]
# 计算系统发育信号
library(phytools)
signal_results <- data.frame()
# 分析每个性状
for(trait in c(continuous_traits_present, "bfec.shade", "bfec.tempSite", "bfec.defSite")) {
# 准备数据
trait_values <- setNames(tree_data[[trait]], tree_data$species_std)
trait_values <- trait_values[!is.na(trait_values)]
if(length(trait_values) >= 10) {
# 修剪树只包含有数据的物种
tree_subset <- keep.tip(tree, names(trait_values))
# 计算Pagel's lambda
lambda_test <- phylosig(tree_subset, trait_values, method = "lambda", test = TRUE)
signal_results <- rbind(signal_results, data.frame(
trait = trait,
lambda = lambda_test$lambda,
p_value = lambda_test$P,
n_species = length(trait_values)
))
cat(trait, ": λ =", round(lambda_test$lambda, 3),
", p =", round(lambda_test$P, 4), "\n")
}
}
if(nrow(signal_results) > 0) {
cat("\n=== Phylogenetic Signal Summary ===\n")
print(signal_results[order(signal_results$lambda, decreasing = TRUE), ])
}
}
}
Species in phylogenetic tree: 0 out of 284
standardize_species_name <- function(species_code) {
# 假设species格式是 "genusSpecies" (如 "abiesAlba")
# 需要转换为 "Genus species" (如 "Abies alba")
# 找到大写字母的位置(除了第一个)
capital_pos <- gregexpr("[A-Z]", species_code)[[1]]
if(length(capital_pos) > 0 && capital_pos[1] != -1) {
# 提取genus(第一部分)
genus <- substr(species_code, 1, capital_pos[1] - 1)
# 如果genus是小写开头的
if(capital_pos[1] == 1) {
# species_code 已经是 "GenusSpecies" 格式
# 找第二个大写字母
if(length(capital_pos) > 1) {
genus <- substr(species_code, 1, capital_pos[2] - 1)
species <- substr(species_code, capital_pos[2], nchar(species_code))
} else {
# 没有第二个大写字母,尝试其他方法
genus <- species_code
species <- ""
}
} else {
# species_code 是 "genusSpecies" 格式
# 找到genus和species的分界
for(i in 2:nchar(species_code)) {
if(substr(species_code, i, i) == toupper(substr(species_code, i, i))) {
genus <- substr(species_code, 1, i-1)
species <- substr(species_code, i, nchar(species_code))
break
}
}
}
# 格式化:Genus species
genus_formatted <- paste0(toupper(substr(genus, 1, 1)),
tolower(substr(genus, 2, nchar(genus))))
species_formatted <- tolower(species)
return(paste(genus_formatted, species_formatted))
} else {
# 无法解析,返回原值
return(species_code)
}
}
# 更简单的方法:直接使用genus和specEpith列
if("genus" %in% names(matched_data) && "specEpith" %in% names(matched_data)) {
# 创建标准物种名
matched_data$species_clean <- paste(
matched_data$genus,
matched_data$specEpith
)
# 检查格式
cat("\n=== Created species names (first 10) ===\n")
print(head(matched_data$species_clean, 10))
# 另一种格式(首字母大写)
matched_data$species_capitalized <- paste(
paste0(toupper(substr(matched_data$genus, 1, 1)),
tolower(substr(matched_data$genus, 2, nchar(matched_data$genus)))),
tolower(matched_data$specEpith)
)
cat("\n=== Capitalized species names (first 10) ===\n")
print(head(matched_data$species_capitalized, 10))
}
# 或者直接从species列转换
if("species" %in% names(matched_data)) {
matched_data$species_std <- sapply(matched_data$species, standardize_species_name)
cat("\n=== Standardized from species column (first 10) ===\n")
print(head(matched_data$species_std, 10))
}
=== Standardized from species column (first 10) ===
[1] "Abies amabilis" "Abies alba" "Abies magnific" "Abies grandis"
[5] "Abies procera" "Abies cephalon" "Abies fraseri" "Abies concolor"
[9] "Abies lasiocar" "Abies pinsapo"
# 尝试不同的格式匹配
find_matches <- function(matched_data, tree) {
results <- list()
# 尝试不同的格式
formats_to_try <- c()
if("species_clean" %in% names(matched_data)) {
formats_to_try <- c(formats_to_try, "species_clean")
}
if("species_capitalized" %in% names(matched_data)) {
formats_to_try <- c(formats_to_try, "species_capitalized")
}
if("species_std" %in% names(matched_data)) {
formats_to_try <- c(formats_to_try, "species_std")
}
if("species" %in% names(matched_data)) {
formats_to_try <- c(formats_to_try, "species")
}
for(format_name in formats_to_try) {
n_matches <- sum(matched_data[[format_name]] %in% tree$tip.label)
cat(format_name, ":", n_matches, "matches\n")
results[[format_name]] <- n_matches
}
# 找最佳格式
best_format <- names(which.max(unlist(results)))
cat("\nBest format:", best_format, "with", max(unlist(results)), "matches\n")
return(best_format)
}
best_format <- find_matches(matched_data, tree)species_std : 101 matches
species : 0 matches
Best format: species_std with 101 matches
# 使用最佳格式
if(!is.null(best_format) && best_format %in% names(matched_data)) {
matched_data$tree_name <- matched_data[[best_format]]
matched_data$in_tree <- matched_data$tree_name %in% tree$tip.label
cat("\n=== Final match results ===\n")
cat("Species in tree:", sum(matched_data$in_tree), "out of", nrow(matched_data), "\n")
if(sum(matched_data$in_tree) > 0) {
cat("\nMatched species examples:\n")
print(head(matched_data$tree_name[matched_data$in_tree], 10))
} else {
cat("\nNo matches found. Checking for partial matches...\n")
# 尝试部分匹配
for(i in 1:min(10, nrow(matched_data))) {
species_to_check <- matched_data$tree_name[i]
similar_in_tree <- tree$tip.label[grep(matched_data$genus[i], tree$tip.label, ignore.case = TRUE)]
if(length(similar_in_tree) > 0) {
cat("\nFor", species_to_check, "found similar:", similar_in_tree[1], "\n")
}
}
}
}
=== Final match results ===
Species in tree: 101 out of 284
Matched species examples:
[1] "Abies amabilis" "Abies alba" "Abies grandis" "Abies procera"
[5] "Abies fraseri" "Abies concolor" "Abies pinsapo" "Abies cilicica"
[9] "Acer negundo" "Acer opalus"
# 系统发育信号分析
library(phytools)
library(ape)
# 创建 in_tree 列
matched_data$in_tree <- matched_data$tree_name %in% tree$tip.label
cat("Species in tree:", sum(matched_data$in_tree), "out of", nrow(matched_data), "\n")Species in tree: 101 out of 284
# 准备数据
tree_data <- matched_data[matched_data$in_tree, ]
cat("Using", nrow(tree_data), "species for phylogenetic analysis\n")Using 101 species for phylogenetic analysis
# 初始化结果
signal_results <- data.frame()
# 分析性状
all_traits <- c("bfec.shade", "bfec.tempSite", "bfec.defSite")
for(trait in all_traits) {
cat("\nAnalyzing", trait, "...\n")
trait_values <- setNames(tree_data[[trait]], tree_data$tree_name)
trait_values <- trait_values[!is.na(trait_values)]
cat(" Non-NA values:", length(trait_values), "\n")
if(length(trait_values) >= 10) {
tree_subset <- keep.tip(tree, names(trait_values))
cat(" Tree tips:", length(tree_subset$tip.label), "\n")
tryCatch({
lambda_test <- phylosig(tree_subset, trait_values, method = "lambda", test = TRUE)
K_test <- phylosig(tree_subset, trait_values, method = "K", test = TRUE, nsim = 999)
signal_results <- rbind(signal_results, data.frame(
trait = trait,
lambda = lambda_test$lambda,
lambda_p = lambda_test$P,
K = K_test$K,
K_p = K_test$P,
n_species = length(trait_values)
))
cat(" Lambda =", round(lambda_test$lambda, 3), "p =", round(lambda_test$P, 4), "\n")
}, error = function(e) {
cat(" Error:", e$message, "\n")
})
} else {
cat(" Skipped - insufficient data\n")
}
}
Analyzing bfec.shade ...
Non-NA values: 101
Tree tips: 101
Lambda = 0 p = 1
Analyzing bfec.tempSite ...
Non-NA values: 101
Tree tips: 101
Lambda = 0 p = 1
Analyzing bfec.defSite ...
Non-NA values: 101
Tree tips: 101
Lambda = 0 p = 1
# 显示结果
if(nrow(signal_results) > 0) {
cat("\n=== Phylogenetic Signal Summary ===\n")
print(signal_results)
}
=== Phylogenetic Signal Summary ===
trait lambda lambda_p K K_p n_species
1 bfec.shade 7.331374e-05 1 0.0017448399 0.9389389 101
2 bfec.tempSite 7.331374e-05 1 0.0007612654 0.9479479 101
3 bfec.defSite 7.331374e-05 1 0.0008714858 0.9389389 101
# PIC分析来控制系统发育相关性
perform_pic_analysis <- function(tree_data, tree) {
pic_results <- data.frame()
# 获取功能性状和敏感性性状
func_traits <- c("gmPerCm","gmPerSeed", "leafN","leafNP","leafP","maxHt","maxDbh","SLA","woodSG")
sens_traits <- c("bfec.shade", "bfec.tempSite", "bfec.defSite")
for(func in func_traits) {
for(sens in sens_traits) {
# 准备数据 - 转换为数值
func_vals <- as.numeric(tree_data[[func]])
sens_vals <- as.numeric(tree_data[[sens]])
names(func_vals) <- tree_data$tree_name
names(sens_vals) <- tree_data$tree_name
# 找共同的非NA物种
common_species <- intersect(names(func_vals[!is.na(func_vals)]),
names(sens_vals[!is.na(sens_vals)]))
if(length(common_species) >= 10) {
# 修剪树
tree_pic <- keep.tip(tree, common_species)
# 获取对应的值
func_pic_data <- as.numeric(func_vals[common_species])
sens_pic_data <- as.numeric(sens_vals[common_species])
tryCatch({
# 计算PIC
func_pic <- pic(func_pic_data, tree_pic)
sens_pic <- pic(sens_pic_data, tree_pic)
# 相关性测试
pic_cor <- cor.test(func_pic, sens_pic)
# 原始相关性(不控制系统发育)
raw_cor <- cor.test(func_pic_data, sens_pic_data)
pic_results <- rbind(pic_results, data.frame(
functional_trait = func,
sensitivity = gsub("bfec\\.", "", sens),
raw_r = raw_cor$estimate,
raw_p = raw_cor$p.value,
pic_r = pic_cor$estimate,
pic_p = pic_cor$p.value,
n_species = length(common_species),
stringsAsFactors = FALSE
))
}, error = function(e) {
cat(" Error for", func, "vs", sens, ":", e$message, "\n")
})
}
}
}
return(pic_results)
}
# 运行PIC分析
pic_results <- perform_pic_analysis(tree_data, tree)
if(nrow(pic_results) > 0) {
cat("\n=== Phylogenetic Independent Contrasts Results ===\n")
print(head(pic_results, 15))
# 显示显著的PIC相关性
sig_pic <- pic_results[pic_results$pic_p < 0.05, ]
if(nrow(sig_pic) > 0) {
cat("\nSignificant correlations (pic_p < 0.05):\n")
print(sig_pic[order(abs(sig_pic$pic_r), decreasing = TRUE), ])
} else {
cat("\nNo significant PIC correlations\n")
}
} else {
cat("No PIC results generated\n")
}
=== Phylogenetic Independent Contrasts Results ===
functional_trait sensitivity raw_r raw_p pic_r
cor gmPerCm shade 0.075922244 0.45983318 -0.22273847
cor1 gmPerCm tempSite 0.172258930 0.09156544 -0.21137738
cor2 gmPerCm defSite 0.113743919 0.26728791 -0.01168649
cor3 gmPerSeed shade -0.018746293 0.85313311 -0.04810092
cor4 gmPerSeed tempSite 0.053709908 0.59560607 -0.04752820
cor5 gmPerSeed defSite 0.090198265 0.37214457 0.01666861
cor6 leafN shade 0.027374422 0.78688851 -0.30938233
cor7 leafN tempSite 0.019175087 0.84981265 -0.27646576
cor8 leafN defSite 0.035140648 0.72851945 -0.02416435
cor9 leafNP shade 0.001199049 0.99055347 -0.30482695
cor10 leafNP tempSite 0.127948126 0.20457794 -0.27499238
cor11 leafNP defSite 0.055458029 0.58367139 -0.03096513
cor12 leafP shade 0.061840526 0.54105672 -0.31073530
cor13 leafP tempSite -0.008608089 0.93226123 -0.27775120
cor14 leafP defSite 0.018675821 0.85367907 -0.02750221
pic_p n_species
cor 0.029163074 97
cor1 0.038700401 97
cor2 0.910024478 97
cor3 0.636360427 100
cor4 0.640386957 100
cor5 0.869923558 100
cor6 0.001832936 100
cor7 0.005604384 100
cor8 0.812336138 100
cor9 0.002156179 100
cor10 0.005874391 100
cor11 0.760930423 100
cor12 0.001745757 100
cor13 0.005377882 100
cor14 0.786991537 100
Significant correlations (pic_p < 0.05):
functional_trait sensitivity raw_r raw_p pic_r
cor12 leafP shade 0.061840526 0.54105672 -0.3107353
cor6 leafN shade 0.027374422 0.78688851 -0.3093823
cor9 leafNP shade 0.001199049 0.99055347 -0.3048269
cor15 maxHt shade -0.103222247 0.30679140 0.3030420
cor21 SLA shade 0.009830698 0.92386346 -0.2993291
cor13 leafP tempSite -0.008608089 0.93226123 -0.2777512
cor7 leafN tempSite 0.019175087 0.84981265 -0.2764658
cor16 maxHt tempSite -0.043163030 0.66981461 0.2751147
cor10 leafNP tempSite 0.127948126 0.20457794 -0.2749924
cor22 SLA tempSite 0.029624993 0.77330607 -0.2704302
cor18 maxDbh shade -0.058818838 0.56305592 0.2614391
cor24 woodSG shade 0.070732950 0.49115634 -0.2516071
cor25 woodSG tempSite 0.185850578 0.06836390 -0.2313911
cor19 maxDbh tempSite 0.024688017 0.80834634 0.2300730
cor gmPerCm shade 0.075922244 0.45983318 -0.2227385
cor1 gmPerCm tempSite 0.172258930 0.09156544 -0.2113774
pic_p n_species
cor12 0.001745757 100
cor6 0.001832936 100
cor9 0.002156179 100
cor15 0.002296271 100
cor21 0.003050132 97
cor13 0.005377882 100
cor7 0.005604384 100
cor16 0.005851535 100
cor10 0.005874391 100
cor22 0.007703377 97
cor18 0.009314234 99
cor24 0.013403566 97
cor25 0.023308494 97
cor19 0.022664201 99
cor 0.029163074 97
cor1 0.038700401 97
# 由于traitGen有区域信息,我们可以做更精细的分析
# 重新合并,保留区域信息
create_regional_matched_data <- function() {
base_data <- mu_processed
# 直接合并,保留region信息
matched_regional <- merge(base_data,
traitGen[, !names(traitGen) %in% c("code8", "specEpith", "subSpec",
"class", "family", "section",
"dispersal", "fruit", "taxon")],
by = c("genus", "region"),
all.x = TRUE)
return(matched_regional)
}
matched_regional <- create_regional_matched_data()
# 检查区域分布
cat("\n=== Regional distribution ===\n")
=== Regional distribution ===
table(matched_regional$continent)
AFR EU NA UNKNOWN
1 74 142 2
# 按大陆分析
for(cont in c("EU", "NA")) {
cont_data <- matched_regional[matched_regional$continent == cont & !is.na(matched_regional$continent), ]
if(nrow(cont_data) > 10) {
cat("\n=== Analysis for", cont, "===\n")
cat("Number of records:", nrow(cont_data), "\n")
# 选几个关键性状分析
key_traits <- c("leafN", "maxHt", "N_area", "Vcmax")
key_traits <- key_traits[key_traits %in% names(cont_data)]
for(trait in key_traits) {
if(sum(!is.na(cont_data[[trait]])) >= 5) {
corr <- cor.test(cont_data[[trait]], cont_data$bfec.shade, use = "complete.obs")
cat(sprintf(" %s vs shade: r = %.3f, p = %.3f (n = %d)\n",
trait, corr$estimate, corr$p.value, sum(complete.cases(cont_data[[trait]], cont_data$bfec.shade))))
}
}
}
}
=== Analysis for EU ===
Number of records: 74
leafN vs shade: r = -0.093, p = 0.432 (n = 74)
maxHt vs shade: r = -0.004, p = 0.976 (n = 74)
N_area vs shade: r = -0.061, p = 0.603 (n = 74)
Vcmax vs shade: r = -0.148, p = 0.212 (n = 73)
=== Analysis for NA ===
Number of records: 142
leafN vs shade: r = -0.015, p = 0.863 (n = 142)
maxHt vs shade: r = 0.063, p = 0.460 (n = 142)
N_area vs shade: r = -0.052, p = 0.537 (n = 142)
Vcmax vs shade: r = 0.051, p = 0.556 (n = 134)
# 分析PIC效应
analyze_pic_effects <- function(pic_results) {
cat("\n=== KEY FINDINGS FROM PIC ANALYSIS ===\n\n")
# 1. 系统发育掩盖的关系(PIC后相关性增强)
masked_relationships <- pic_results %>%
filter(abs(pic_r) > abs(raw_r) + 0.1) %>%
arrange(desc(abs(pic_r)))
if(nrow(masked_relationships) > 0) {
cat("1. Relationships MASKED by phylogeny (stronger after PIC):\n")
print(masked_relationships[, c("functional_trait", "sensitivity", "raw_r", "pic_r")])
cat("\n")
}
# 2. 系统发育驱动的关系(PIC后相关性减弱)
phylo_driven <- pic_results %>%
filter(abs(raw_r) > abs(pic_r) + 0.1) %>%
arrange(desc(abs(raw_r)))
if(nrow(phylo_driven) > 0) {
cat("2. Relationships DRIVEN by phylogeny (weaker after PIC):\n")
print(phylo_driven[, c("functional_trait", "sensitivity", "raw_r", "pic_r")])
cat("\n")
}
# 3. 方向改变的关系
sign_change <- pic_results %>%
filter(sign(raw_r) != sign(pic_r) & abs(pic_r) > 0.02)
if(nrow(sign_change) > 0) {
cat("3. Relationships that CHANGED DIRECTION after PIC:\n")
print(sign_change[, c("functional_trait", "sensitivity", "raw_r", "pic_r")])
cat("\n")
}
}
analyze_pic_effects(pic_results)
=== KEY FINDINGS FROM PIC ANALYSIS ===
1. Relationships MASKED by phylogeny (stronger after PIC):
functional_trait sensitivity raw_r pic_r
cor12 leafP shade 0.061840526 -0.3107353
cor6 leafN shade 0.027374422 -0.3093823
cor9 leafNP shade 0.001199049 -0.3048269
cor15 maxHt shade -0.103222247 0.3030420
cor21 SLA shade 0.009830698 -0.2993291
cor13 leafP tempSite -0.008608089 -0.2777512
cor7 leafN tempSite 0.019175087 -0.2764658
cor16 maxHt tempSite -0.043163030 0.2751147
cor10 leafNP tempSite 0.127948126 -0.2749924
cor22 SLA tempSite 0.029624993 -0.2704302
cor18 maxDbh shade -0.058818838 0.2614391
cor24 woodSG shade 0.070732950 -0.2516071
cor19 maxDbh tempSite 0.024688017 0.2300730
cor gmPerCm shade 0.075922244 -0.2227385
2. Relationships DRIVEN by phylogeny (weaker after PIC):
functional_trait sensitivity raw_r pic_r
cor26 woodSG defSite 0.1198987 -0.01626167
cor2 gmPerCm defSite 0.1137439 -0.01168649
3. Relationships that CHANGED DIRECTION after PIC:
functional_trait sensitivity raw_r pic_r
cor gmPerCm shade 0.075922244 -0.22273847
cor1 gmPerCm tempSite 0.172258930 -0.21137738
cor4 gmPerSeed tempSite 0.053709908 -0.04752820
cor6 leafN shade 0.027374422 -0.30938233
cor7 leafN tempSite 0.019175087 -0.27646576
cor8 leafN defSite 0.035140648 -0.02416435
cor9 leafNP shade 0.001199049 -0.30482695
cor10 leafNP tempSite 0.127948126 -0.27499238
cor11 leafNP defSite 0.055458029 -0.03096513
cor12 leafP shade 0.061840526 -0.31073530
cor14 leafP defSite 0.018675821 -0.02750221
cor15 maxHt shade -0.103222247 0.30304200
cor16 maxHt tempSite -0.043163030 0.27511474
cor18 maxDbh shade -0.058818838 0.26143911
cor21 SLA shade 0.009830698 -0.29932907
cor22 SLA tempSite 0.029624993 -0.27043021
cor24 woodSG shade 0.070732950 -0.25160709
cor25 woodSG tempSite 0.185850578 -0.23139111
# 确保pic_results有所需的列
if(!"delta_r" %in% names(pic_results)) {
pic_results$delta_r <- pic_results$raw_r - pic_results$pic_r
}
if(!"phylo_effect" %in% names(pic_results)) {
pic_results$phylo_effect <- ifelse(abs(pic_results$delta_r) > 0.1, "Strong",
ifelse(abs(pic_results$delta_r) > 0.05, "Moderate", "Weak"))
}
# 检查数据结构
cat("=== PIC Results Structure ===\n")=== PIC Results Structure ===
str(pic_results)'data.frame': 27 obs. of 9 variables:
$ functional_trait: chr "gmPerCm" "gmPerCm" "gmPerCm" "gmPerSeed" ...
$ sensitivity : chr "shade" "tempSite" "defSite" "shade" ...
$ raw_r : num 0.0759 0.1723 0.1137 -0.0187 0.0537 ...
$ raw_p : num 0.4598 0.0916 0.2673 0.8531 0.5956 ...
$ pic_r : num -0.2227 -0.2114 -0.0117 -0.0481 -0.0475 ...
$ pic_p : num 0.0292 0.0387 0.91 0.6364 0.6404 ...
$ n_species : int 97 97 97 100 100 100 100 100 100 100 ...
$ delta_r : num 0.2987 0.3836 0.1254 0.0294 0.1012 ...
$ phylo_effect : chr "Strong" "Strong" "Strong" "Weak" ...
cat("\nFirst few rows:\n")
First few rows:
print(head(pic_results)) functional_trait sensitivity raw_r raw_p pic_r pic_p
cor gmPerCm shade 0.07592224 0.45983318 -0.22273847 0.02916307
cor1 gmPerCm tempSite 0.17225893 0.09156544 -0.21137738 0.03870040
cor2 gmPerCm defSite 0.11374392 0.26728791 -0.01168649 0.91002448
cor3 gmPerSeed shade -0.01874629 0.85313311 -0.04810092 0.63636043
cor4 gmPerSeed tempSite 0.05370991 0.59560607 -0.04752820 0.64038696
cor5 gmPerSeed defSite 0.09019827 0.37214457 0.01666861 0.86992356
n_species delta_r phylo_effect
cor 97 0.29866071 Strong
cor1 97 0.38363631 Strong
cor2 97 0.12543041 Strong
cor3 100 0.02935463 Weak
cor4 100 0.10123811 Strong
cor5 100 0.07352966 Moderate
library(ggplot2)
library(ggrepel)
# 图1: 原始 vs PIC 相关性散点图
p1 <- ggplot(pic_results, aes(x = raw_r, y = pic_r)) +
geom_hline(yintercept = 0, linetype = "solid", alpha = 0.3) +
geom_vline(xintercept = 0, linetype = "solid", alpha = 0.3) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "gray50") +
geom_point(aes(color = sensitivity, size = abs(delta_r)), alpha = 0.7) +
scale_color_manual(values = c("shade" = "#27AE60",
"tempSite" = "#E67E22",
"defSite" = "#8E44AD")) +
scale_size_continuous(range = c(2, 6), name = "|Δr|") +
labs(
title = "Effect of Phylogenetic Correction on Trait-Sensitivity Correlations",
subtitle = "Points below diagonal: phylogeny inflates correlation; Above: phylogeny masks correlation",
x = "Raw Correlation (r)",
y = "PIC Correlation (r)",
color = "Environmental\nSensitivity"
) +
theme_minimal() +
coord_fixed()
# 只标记大效应的点
if(any(abs(pic_results$delta_r) > 0.15)) {
p1 <- p1 + geom_text_repel(
data = pic_results[abs(pic_results$delta_r) > 0.15, ],
aes(label = paste(functional_trait, "-", sensitivity)),
size = 3, max.overlaps = 15
)
}
print(p1)# 图2: Delta r 条形图(选择前20个最大效应)
top_effects <- pic_results %>%
arrange(desc(abs(delta_r))) %>%
head(20) %>%
mutate(
trait_sens = paste(functional_trait, sensitivity, sep = "-"),
trait_sens = factor(trait_sens, levels = trait_sens[order(delta_r)])
)
p2 <- ggplot(top_effects, aes(x = trait_sens, y = delta_r, fill = phylo_effect)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = 0, linetype = "solid") +
coord_flip() +
scale_fill_manual(values = c("Strong" = "#E74C3C",
"Moderate" = "#F39C12",
"Weak" = "#95A5A6")) +
labs(
title = "Top 20 Phylogenetic Effects on Correlations",
subtitle = "Negative: phylogeny inflates correlation; Positive: phylogeny masks correlation",
x = "Trait-Sensitivity Pair",
y = "Change in Correlation (raw - PIC)",
fill = "Effect\nStrength"
) +
theme_minimal()
print(p2)# 分析PIC效应
analyze_pic_effects <- function(pic_results) {
cat("\n=== KEY FINDINGS FROM PIC ANALYSIS ===\n\n")
# 1. 系统发育掩盖的关系
masked_relationships <- pic_results %>%
filter(abs(pic_r) > abs(raw_r) + 0.1) %>%
arrange(desc(abs(pic_r)))
if(nrow(masked_relationships) > 0) {
cat("1. Relationships MASKED by phylogeny (stronger after PIC):\n")
print(masked_relationships[, c("functional_trait", "sensitivity", "raw_r", "pic_r", "delta_r")])
cat("\n")
}
# 2. 系统发育驱动的关系
phylo_driven <- pic_results %>%
filter(abs(raw_r) > abs(pic_r) + 0.1) %>%
arrange(desc(abs(delta_r)))
if(nrow(phylo_driven) > 0) {
cat("2. Relationships DRIVEN by phylogeny (weaker after PIC):\n")
print(phylo_driven[, c("functional_trait", "sensitivity", "raw_r", "pic_r", "delta_r")])
cat("\n")
}
# 3. 方向改变的关系
sign_change <- pic_results %>%
filter(sign(raw_r) != sign(pic_r) & abs(pic_r) > 0.02)
if(nrow(sign_change) > 0) {
cat("3. Relationships that CHANGED DIRECTION after PIC:\n")
print(sign_change[, c("functional_trait", "sensitivity", "raw_r", "pic_r", "delta_r")])
cat("\n")
}
# 4. 统计总结
cat("4. SUMMARY STATISTICS:\n")
cat(" Total comparisons:", nrow(pic_results), "\n")
cat(" Strong phylogenetic effects (|Δr| > 0.1):", sum(abs(pic_results$delta_r) > 0.1), "\n")
cat(" Direction changes:", sum(sign(pic_results$raw_r) != sign(pic_results$pic_r)), "\n")
cat(" Mean absolute change:", round(mean(abs(pic_results$delta_r)), 3), "\n")
}
analyze_pic_effects(pic_results)
=== KEY FINDINGS FROM PIC ANALYSIS ===
1. Relationships MASKED by phylogeny (stronger after PIC):
functional_trait sensitivity raw_r pic_r delta_r
cor12 leafP shade 0.061840526 -0.3107353 0.3725758
cor6 leafN shade 0.027374422 -0.3093823 0.3367568
cor9 leafNP shade 0.001199049 -0.3048269 0.3060260
cor15 maxHt shade -0.103222247 0.3030420 -0.4062642
cor21 SLA shade 0.009830698 -0.2993291 0.3091598
cor13 leafP tempSite -0.008608089 -0.2777512 0.2691431
cor7 leafN tempSite 0.019175087 -0.2764658 0.2956408
cor16 maxHt tempSite -0.043163030 0.2751147 -0.3182778
cor10 leafNP tempSite 0.127948126 -0.2749924 0.4029405
cor22 SLA tempSite 0.029624993 -0.2704302 0.3000552
cor18 maxDbh shade -0.058818838 0.2614391 -0.3202579
cor24 woodSG shade 0.070732950 -0.2516071 0.3223400
cor19 maxDbh tempSite 0.024688017 0.2300730 -0.2053849
cor gmPerCm shade 0.075922244 -0.2227385 0.2986607
2. Relationships DRIVEN by phylogeny (weaker after PIC):
functional_trait sensitivity raw_r pic_r delta_r
cor26 woodSG defSite 0.1198987 -0.01626167 0.1361604
cor2 gmPerCm defSite 0.1137439 -0.01168649 0.1254304
3. Relationships that CHANGED DIRECTION after PIC:
functional_trait sensitivity raw_r pic_r delta_r
cor gmPerCm shade 0.075922244 -0.22273847 0.29866071
cor1 gmPerCm tempSite 0.172258930 -0.21137738 0.38363631
cor4 gmPerSeed tempSite 0.053709908 -0.04752820 0.10123811
cor6 leafN shade 0.027374422 -0.30938233 0.33675675
cor7 leafN tempSite 0.019175087 -0.27646576 0.29564085
cor8 leafN defSite 0.035140648 -0.02416435 0.05930500
cor9 leafNP shade 0.001199049 -0.30482695 0.30602600
cor10 leafNP tempSite 0.127948126 -0.27499238 0.40294051
cor11 leafNP defSite 0.055458029 -0.03096513 0.08642316
cor12 leafP shade 0.061840526 -0.31073530 0.37257582
cor14 leafP defSite 0.018675821 -0.02750221 0.04617803
cor15 maxHt shade -0.103222247 0.30304200 -0.40626424
cor16 maxHt tempSite -0.043163030 0.27511474 -0.31827777
cor18 maxDbh shade -0.058818838 0.26143911 -0.32025795
cor21 SLA shade 0.009830698 -0.29932907 0.30915977
cor22 SLA tempSite 0.029624993 -0.27043021 0.30005520
cor24 woodSG shade 0.070732950 -0.25160709 0.32234004
cor25 woodSG tempSite 0.185850578 -0.23139111 0.41724168
4. SUMMARY STATISTICS:
Total comparisons: 27
Strong phylogenetic effects (|<U+0394>r| > 0.1): 19
Direction changes: 21
Mean absolute change: 0.224
# 创建一个综合的4面板图
library(gridExtra)
# Panel A: 散点图
panel_a <- p1 + theme(legend.position = "none") +
labs(title = "A. Raw vs PIC Correlations")
# Panel B: 最强效应的条形图
top_10 <- pic_results %>%
arrange(desc(abs(delta_r))) %>%
head(10) %>%
mutate(trait_sens = paste(substr(functional_trait, 1, 8),
substr(sensitivity, 1, 4), sep = "-"))
panel_b <- ggplot(top_10, aes(x = reorder(trait_sens, abs(delta_r)), y = delta_r)) +
geom_bar(stat = "identity", fill = ifelse(top_10$delta_r > 0, "#3498DB", "#E74C3C")) +
coord_flip() +
theme_minimal() +
labs(title = "B. Top 10 Phylogenetic Effects", x = "", y = "Δr")
# Panel C: 按敏感性分组的箱线图
panel_c <- ggplot(pic_results, aes(x = sensitivity, y = abs(delta_r), fill = sensitivity)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.3) +
scale_fill_manual(values = c("shade" = "#27AE60",
"tempSite" = "#E67E22",
"defSite" = "#8E44AD")) +
theme_minimal() +
theme(legend.position = "none") +
labs(title = "C. Effect Size by Sensitivity", x = "", y = "|Δr|")
# Panel D: 显著性比较
sig_data <- pic_results %>%
mutate(
raw_sig = ifelse(raw_p < 0.05, "Significant", "Not Significant"),
pic_sig = ifelse(pic_p < 0.05, "Significant", "Not Significant"),
category = paste(raw_sig, "→", pic_sig)
) %>%
count(category)
panel_d <- ggplot(sig_data, aes(x = category, y = n, fill = category)) +
geom_bar(stat = "identity") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none") +
labs(title = "D. Significance Changes", x = "", y = "Count")
# 组合所有面板
combined_plot <- grid.arrange(panel_a, panel_b, panel_c, panel_d,
ncol = 2, nrow = 2)cat("\n=== FINAL SUMMARY ===\n\n")
=== FINAL SUMMARY ===
# 最重要的发现
important_findings <- pic_results %>%
filter(abs(delta_r) > 0.15) %>%
arrange(desc(abs(delta_r))) %>%
head(5)
cat("TOP 5 PHYLOGENETIC EFFECTS:\n")TOP 5 PHYLOGENETIC EFFECTS:
for(i in 1:nrow(important_findings)) {
row <- important_findings[i,]
cat(sprintf("%d. %s vs %s:\n", i, row$functional_trait, row$sensitivity))
cat(sprintf(" Raw r = %.3f → PIC r = %.3f (Δ = %.3f)\n",
row$raw_r, row$pic_r, row$delta_r))
cat(sprintf(" Interpretation: %s\n\n",
ifelse(row$delta_r > 0,
"Phylogeny masks true correlation",
"Phylogeny creates spurious correlation")))
}1. woodSG vs tempSite:
Raw r = 0.186 <U+2192> PIC r = -0.231 (<U+0394> = 0.417)
Interpretation: Phylogeny masks true correlation
2. maxHt vs shade:
Raw r = -0.103 <U+2192> PIC r = 0.303 (<U+0394> = -0.406)
Interpretation: Phylogeny creates spurious correlation
3. leafNP vs tempSite:
Raw r = 0.128 <U+2192> PIC r = -0.275 (<U+0394> = 0.403)
Interpretation: Phylogeny masks true correlation
4. gmPerCm vs tempSite:
Raw r = 0.172 <U+2192> PIC r = -0.211 (<U+0394> = 0.384)
Interpretation: Phylogeny masks true correlation
5. leafP vs shade:
Raw r = 0.062 <U+2192> PIC r = -0.311 (<U+0394> = 0.373)
Interpretation: Phylogeny masks true correlation
# 整体模式
cat("OVERALL PATTERNS:\n")OVERALL PATTERNS:
cat("- Mean absolute phylogenetic effect:", round(mean(abs(pic_results$delta_r)), 3), "\n")- Mean absolute phylogenetic effect: 0.224
cat("- Proportion with strong effects (|Δr| > 0.1):",
round(mean(abs(pic_results$delta_r) > 0.1), 2), "\n")- Proportion with strong effects (|<U+0394>r| > 0.1): 0.7
cat("- Proportion with direction change:",
round(mean(sign(pic_results$raw_r) != sign(pic_results$pic_r)), 2), "\n")- Proportion with direction change: 0.78
# 检查tree对象是否存在
if(!exists("tree") || is.null(tree)) {
cat("Error: Phylogenetic tree not found!\n")
cat("Please load your tree first.\n")
# 如果你的树在其他变量中(比如trAll)
if(exists("trAll")) {
tree <- trAll
cat("Using trAll as phylogenetic tree\n")
} else {
stop("No phylogenetic tree found. Please load it first.")
}
}
# 检查tree的类型
cat("Tree class:", class(tree), "\n")Tree class: phylo
cat("Number of tips:", length(tree$tip.label), "\n")Number of tips: 223
cat("First 5 species in tree:", head(tree$tip.label, 5), "\n")First 5 species in tree: Cedrus brevifolia Cedrus libani Cedrus atlantica Tsuga mertensiana Tsuga heterophylla
# 检查matched_data
if(!exists("matched_data")) {
stop("matched_data not found. Please run the data preparation steps first.")
}
cat("\nData dimensions:", dim(matched_data), "\n")
Data dimensions: 284 53
cat("Species with tree names:", sum(!is.na(matched_data$tree_name)), "\n")Species with tree names: 284
# PGLS 分析:控制系统发育相关性
library(nlme)
library(ape)
# 准备数据
analysis_data <- matched_data %>%
mutate(
continent = case_when(
region == "WEA" ~ "EU",
region %in% c("ENA", "WNA") ~ "NA",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(continent)) %>%
mutate(
log_seed_mass = log(gmPerSeed + 0.001),
shade_std = scale(bfec.shade)[,1]
)
cat("Data prepared:\n")Data prepared:
cat(" Total species:", nrow(analysis_data), "\n") Total species: 216
cat(" EU species:", sum(analysis_data$continent == "EU"), "\n") EU species: 74
cat(" NA species:", sum(analysis_data$continent == "NA"), "\n") NA species: 142
# 找共同物种
common_sp <- intersect(analysis_data$tree_name, tree$tip.label)
cat("\nSpecies for PGLS analysis:", length(common_sp), "\n")
Species for PGLS analysis: 100
# 筛选数据和树
data_sub <- analysis_data[analysis_data$tree_name %in% common_sp, ]
tree_sub <- keep.tip(tree, common_sp)
data_sub <- data_sub[match(tree_sub$tip.label, data_sub$tree_name), ]
# 创建系统发育相关结构
cor_brownian <- corBrownian(phy = tree_sub)
# Model 1: 无交互
model1 <- gls(
log_seed_mass ~ shade_std + continent,
data = data_sub,
correlation = cor_brownian,
method = "ML"
)
# Model 2: 有交互
model2 <- gls(
log_seed_mass ~ shade_std * continent,
data = data_sub,
correlation = cor_brownian,
method = "ML"
)
# 显示结果
cat("\n=== Model 1: Additive ===\n")
=== Model 1: Additive ===
print(summary(model1))Generalized least squares fit by maximum likelihood
Model: log_seed_mass ~ shade_std + continent
Data: data_sub
AIC BIC logLik
487.4028 497.8235 -239.7014
Correlation Structure: corBrownian
Formula: ~1
Parameter estimate(s):
numeric(0)
Coefficients:
Value Std.Error t-value p-value
(Intercept) -3.522907 5.447158 -0.6467423 0.5193
shade_std -0.035933 0.084528 -0.4250954 0.6717
continentNA 0.202307 0.216001 0.9366026 0.3513
Correlation:
(Intr) shd_st
shade_std 0.007
continentNA -0.023 -0.441
Standardized residuals:
Min Q1 Med Q3 Max
-0.29173456 -0.06260276 0.07178814 0.34023621 0.56048821
Residual standard error: 10.81914
Degrees of freedom: 100 total; 97 residual
cat("\n=== Model 2: Interactive ===\n")
=== Model 2: Interactive ===
print(summary(model2))Generalized least squares fit by maximum likelihood
Model: log_seed_mass ~ shade_std * continent
Data: data_sub
AIC BIC logLik
488.9864 502.0123 -239.4932
Correlation Structure: corBrownian
Formula: ~1
Parameter estimate(s):
numeric(0)
Coefficients:
Value Std.Error t-value p-value
(Intercept) -3.462490 5.464901 -0.6335870 0.5279
shade_std 0.039119 0.145780 0.2683423 0.7890
continentNA 0.113781 0.257899 0.4411842 0.6601
shade_std:continentNA -0.116833 0.184602 -0.6328920 0.5283
Correlation:
(Intr) shd_st cntnNA
shade_std 0.018
continentNA -0.028 -0.657
shade_std:continentNA -0.017 -0.813 0.542
Standardized residuals:
Min Q1 Med Q3 Max
-0.29559248 -0.06022049 0.07177805 0.33988159 0.56501593
Residual standard error: 10.79664
Degrees of freedom: 100 total; 96 residual
cat("\n=== Model Comparison ===\n")
=== Model Comparison ===
print(anova(model1, model2)) Model df AIC BIC logLik Test L.Ratio p-value
model1 1 4 487.4028 497.8235 -239.7014
model2 2 5 488.9864 502.0123 -239.4932 1 vs 2 0.4163739 0.5188
prepare_continental_data <- function(matched_data) {
# 映射region到continent
data_with_continent <- matched_data %>%
mutate(
continent = case_when(
region == "WEA" ~ "EU",
region %in% c("ENA", "WNA") ~ "NA",
TRUE ~ NA_character_
)
) %>%
filter(
!is.na(continent),
!is.na(bfec.shade),
!is.na(gmPerSeed),
!is.na(tree_name)
) %>%
mutate(
log_seed_mass = log(gmPerSeed + 0.001),
shade_std = scale(bfec.shade)[,1]
)
cat("Data prepared:\n")
cat(" Total species:", nrow(data_with_continent), "\n")
cat(" EU species:", sum(data_with_continent$continent == "EU"), "\n")
cat(" NA species:", sum(data_with_continent$continent == "NA"), "\n")
return(data_with_continent)
}
# 重新运行
analysis_data <- prepare_continental_data(matched_data)Data prepared:
Total species: 216
EU species: 74
NA species: 142
# 然后运行PGLS分析
library(nlme)
library(ape)
run_phylo_ancova <- function(data, tree) {
# 找共同物种
common_sp <- intersect(data$tree_name, tree$tip.label)
cat("\nSpecies for PGLS analysis:", length(common_sp), "\n")
# 筛选数据和树
data_sub <- data[data$tree_name %in% common_sp, ]
# 修剪树
tree_sub <- keep.tip(tree, common_sp)
# 按树的顺序排序数据
data_sub <- data_sub[match(tree_sub$tip.label, data_sub$tree_name), ]
# 创建系统发育相关结构
cor_brownian <- corBrownian(phy = tree_sub)
# 模型1:无交互作用
cat("\n=== Model 1: Additive Effects ===\n")
model1 <- gls(
log_seed_mass ~ shade_std + continent,
data = data_sub,
correlation = cor_brownian,
method = "ML"
)
print(summary(model1))
# 模型2:有交互作用
cat("\n=== Model 2: Interactive Effects ===\n")
model2 <- gls(
log_seed_mass ~ shade_std * continent,
data = data_sub,
correlation = cor_brownian,
method = "ML"
)
print(summary(model2))
# 模型比较
cat("\n=== Model Comparison ===\n")
print(anova(model1, model2))
return(list(additive = model1, interactive = model2))
}
# 运行分析
pgls_results <- run_phylo_ancova(analysis_data, tree)
Species for PGLS analysis: 100
=== Model 1: Additive Effects ===
Generalized least squares fit by maximum likelihood
Model: log_seed_mass ~ shade_std + continent
Data: data_sub
AIC BIC logLik
487.4028 497.8235 -239.7014
Correlation Structure: corBrownian
Formula: ~1
Parameter estimate(s):
numeric(0)
Coefficients:
Value Std.Error t-value p-value
(Intercept) -3.522907 5.447158 -0.6467423 0.5193
shade_std -0.035933 0.084528 -0.4250954 0.6717
continentNA 0.202307 0.216001 0.9366026 0.3513
Correlation:
(Intr) shd_st
shade_std 0.007
continentNA -0.023 -0.441
Standardized residuals:
Min Q1 Med Q3 Max
-0.29173456 -0.06260276 0.07178814 0.34023621 0.56048821
Residual standard error: 10.81914
Degrees of freedom: 100 total; 97 residual
=== Model 2: Interactive Effects ===
Generalized least squares fit by maximum likelihood
Model: log_seed_mass ~ shade_std * continent
Data: data_sub
AIC BIC logLik
488.9864 502.0123 -239.4932
Correlation Structure: corBrownian
Formula: ~1
Parameter estimate(s):
numeric(0)
Coefficients:
Value Std.Error t-value p-value
(Intercept) -3.462490 5.464901 -0.6335870 0.5279
shade_std 0.039119 0.145780 0.2683423 0.7890
continentNA 0.113781 0.257899 0.4411842 0.6601
shade_std:continentNA -0.116833 0.184602 -0.6328920 0.5283
Correlation:
(Intr) shd_st cntnNA
shade_std 0.018
continentNA -0.028 -0.657
shade_std:continentNA -0.017 -0.813 0.542
Standardized residuals:
Min Q1 Med Q3 Max
-0.29559248 -0.06022049 0.07177805 0.33988159 0.56501593
Residual standard error: 10.79664
Degrees of freedom: 100 total; 96 residual
=== Model Comparison ===
Model df AIC BIC logLik Test L.Ratio p-value
model1 1 4 487.4028 497.8235 -239.7014
model2 2 5 488.9864 502.0123 -239.4932 1 vs 2 0.4163739 0.5188
library(ggplot2)
library(gridExtra)
# 创建综合可视化
visualize_continental_effects <- function(data) {
# 基础统计
cat("\n=== Basic Statistics ===\n")
stats <- data %>%
group_by(continent) %>%
summarise(
n = n(),
mean_shade = mean(bfec.shade),
sd_shade = sd(bfec.shade),
mean_seed = mean(log_seed_mass),
sd_seed = sd(log_seed_mass),
correlation = cor(bfec.shade, log_seed_mass)
)
print(stats)
# 图1:散点图with回归线
p1 <- ggplot(data, aes(x = bfec.shade, y = log_seed_mass, color = continent)) +
geom_point(alpha = 0.6, size = 2.5) +
geom_smooth(method = "lm", se = TRUE, size = 1.2) +
scale_color_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C"),
labels = c("EU" = "Europe", "NA" = "North America")) +
labs(
title = "Shade Sensitivity vs Seed Mass by Continent",
subtitle = paste("EU: n =", sum(data$continent == "EU"),
"; NA: n =", sum(data$continent == "NA")),
x = "Shade Sensitivity (βshade)",
y = "Log(Seed Mass)",
color = "Continent"
) +
theme_minimal() +
theme(legend.position = "top", text = element_text(size = 12))
# 图2:密度图比较shade sensitivity
p2 <- ggplot(data, aes(x = bfec.shade, fill = continent)) +
geom_density(alpha = 0.5) +
geom_rug(aes(color = continent), alpha = 0.3) +
scale_fill_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C")) +
scale_color_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C")) +
labs(
title = "Distribution of Shade Sensitivity",
x = "Shade Sensitivity",
y = "Density"
) +
theme_minimal() +
theme(legend.position = "top")
# 图3:箱线图比较
p3_data <- data %>%
select(continent, bfec.shade, log_seed_mass) %>%
pivot_longer(cols = c(bfec.shade, log_seed_mass),
names_to = "variable", values_to = "value") %>%
mutate(variable = recode(variable,
"bfec.shade" = "Shade Sensitivity",
"log_seed_mass" = "Log(Seed Mass)"))
p3 <- ggplot(p3_data, aes(x = continent, y = value, fill = continent)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.1, alpha = 0.2) +
facet_wrap(~ variable, scales = "free_y") +
scale_fill_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C")) +
labs(
title = "Continental Comparison",
x = "Continent",
y = "Value"
) +
theme_minimal() +
theme(legend.position = "none")
# 图4:斜率比较(从简单lm获取)
eu_model <- lm(log_seed_mass ~ bfec.shade, data = filter(data, continent == "EU"))
na_model <- lm(log_seed_mass ~ bfec.shade, data = filter(data, continent == "NA"))
slope_data <- data.frame(
Continent = c("Europe", "North America"),
Slope = c(coef(eu_model)[2], coef(na_model)[2]),
SE = c(summary(eu_model)$coefficients[2, 2],
summary(na_model)$coefficients[2, 2])
)
p4 <- ggplot(slope_data, aes(x = Continent, y = Slope, fill = Continent)) +
geom_bar(stat = "identity", alpha = 0.7) +
geom_errorbar(aes(ymin = Slope - SE, ymax = Slope + SE), width = 0.2) +
scale_fill_manual(values = c("Europe" = "#3498DB", "North America" = "#E74C3C")) +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) +
labs(
title = "Shade Effect on Seed Mass",
subtitle = "(Simple linear regression)",
y = "Regression Slope"
) +
theme_minimal() +
theme(legend.position = "none")
# 组合图
grid.arrange(p1, p2, p3, p4, ncol = 2, nrow = 2)
return(list(p1 = p1, p2 = p2, p3 = p3, p4 = p4, stats = stats))
}
# 创建可视化
viz_results <- visualize_continental_effects(analysis_data)
=== Basic Statistics ===
# A tibble: 2 x 7
continent n mean_shade sd_shade mean_seed sd_seed correlation
<chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 EU 74 -0.960 0.829 -1.88 2.25 0.192
2 NA 142 -0.967 2.11 -2.32 2.44 -0.0150
# 创建结果总结可视化
library(ggplot2)
library(gridExtra)
# 准备数据用于可视化
create_summary_plots <- function() {
# 效应大小比较
effect_data <- data.frame(
Continent = c("Europe", "North America"),
Effect = c(0.060, 0.118),
SE = c(0.045, 0.045), # 近似值
Type = "Shade Effect on Seed Mass"
)
p1 <- ggplot(effect_data, aes(x = Continent, y = Effect, fill = Continent)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_errorbar(aes(ymin = Effect - SE, ymax = Effect + SE), width = 0.2) +
scale_fill_manual(values = c("Europe" = "#3498DB", "North America" = "#E74C3C")) +
labs(title = "A. Shade → Fecundity Effect",
subtitle = "NA effect is 2× stronger (though interaction p=0.48)",
y = "Regression Coefficient") +
theme_minimal() +
theme(legend.position = "none") +
geom_text(aes(label = round(Effect, 3)), vjust = -0.5)
# 进化速率比较
rate_data <- data.frame(
Continent = rep(c("Europe", "North America"), 2),
Rate = c(0.5736, 0.1423, 0.1529, 0.1971),
Trait = rep(c("Shade Sensitivity", "Seed Mass"), each = 2)
)
p2 <- ggplot(rate_data, aes(x = Continent, y = Rate, fill = Continent)) +
geom_bar(stat = "identity", alpha = 0.8) +
facet_wrap(~Trait, scales = "free_y") +
scale_fill_manual(values = c("Europe" = "#3498DB", "North America" = "#E74C3C")) +
labs(title = "B. Evolution Rates (σ²)",
subtitle = "EU: fast shade evolution; NA: fast seed evolution",
y = "Evolution Rate") +
theme_minimal() +
theme(legend.position = "none") +
geom_text(aes(label = round(Rate, 3)), vjust = -0.5)
# PIC相关性
pic_data <- data.frame(
Continent = c("Europe", "North America"),
Correlation = c(0.210, 0.207),
P_value = c(0.096, 0.030),
Significant = c("Marginal", "Significant")
)
p3 <- ggplot(pic_data, aes(x = Continent, y = Correlation, fill = Significant)) +
geom_bar(stat = "identity", alpha = 0.8) +
scale_fill_manual(values = c("Marginal" = "#95A5A6", "Significant" = "#27AE60")) +
labs(title = "C. Evolutionary Correlation (PIC)",
subtitle = "Shade-Seed coevolution",
y = "PIC Correlation") +
theme_minimal() +
geom_text(aes(label = paste("r =", round(Correlation, 3), "\np =", round(P_value, 3))),
vjust = 1.5)
grid.arrange(p1, p2, p3, ncol = 3)
}
create_summary_plots()matched_data_isp <- matched_data %>%
mutate(genus_lower = tolower(genus)) %>%
left_join(
results_isp_df %>%
rename(genus_lower = genus) %>%
select(genus_lower, eu_value, eu_sd, na_value, na_sd),
by = "genus_lower"
) %>%
select(-genus_lower) %>%
mutate(
ISP_mean = case_when(
region == "WEA" ~ eu_value,
region %in% c("ENA", "WNA") ~ na_value,
TRUE ~ NA_real_
),
ISP_sd = case_when(
region == "WEA" ~ eu_sd,
region %in% c("ENA", "WNA") ~ na_sd,
TRUE ~ NA_real_
),
log_ISP = log(ISP_mean + 0.001)
) %>%
select(-eu_value, -eu_sd, -na_value, -na_sd)
cat("\n=== ISP Integration Results ===\n")
=== ISP Integration Results ===
cat("Total species:", nrow(matched_data_isp), "\n")Total species: 284
cat("Species with ISP data:", sum(!is.na(matched_data_isp$ISP_mean)), "\n")Species with ISP data: 149
cat(" EU:", sum(matched_data_isp$region == "WEA" & !is.na(matched_data_isp$ISP_mean)), "\n") EU: 53
cat(" NA:", sum(matched_data_isp$region %in% c("ENA", "WNA") & !is.na(matched_data_isp$ISP_mean)), "\n") NA: 96
# 现在做系统发育信号分析
library(phytools)
data_with_isp <- matched_data_isp %>%
filter(!is.na(log_ISP)) %>%
select(tree_name, log_ISP) %>%
distinct()
isp_values <- setNames(data_with_isp$log_ISP, data_with_isp$tree_name)
common_sp <- intersect(names(isp_values), tree$tip.label)
cat("\nSpecies for phylogenetic analysis:", length(common_sp), "\n")
Species for phylogenetic analysis: 75
if(length(common_sp) >= 10) {
tree_isp <- keep.tip(tree, common_sp)
isp_final <- isp_values[common_sp]
lambda_test <- phylosig(tree_isp, isp_final, method = "lambda", test = TRUE)
K_test <- phylosig(tree_isp, isp_final, method = "K", test = TRUE, nsim = 999)
cat("\n=== ISP Phylogenetic Signal ===\n")
cat("Pagel's λ =", round(lambda_test$lambda, 3), " P =", format.pval(lambda_test$P), "\n")
cat("Blomberg's K =", round(K_test$K, 3), " P =", format.pval(K_test$P), "\n")
} else {
cat("Not enough species for analysis\n")
}
=== ISP Phylogenetic Signal ===
Pagel's <U+03BB> = 0.999 P = < 2.22e-16
Blomberg's K = 0.69 P = 0.001001
library(geiger)
library(phytools)
compare_evolutionary_patterns <- function(data, tree) {
cat("\n=== Evolutionary Pattern Analysis ===\n")
# 添加 continent 列
data <- data %>%
mutate(
continent = case_when(
region == "WEA" ~ "EU",
region %in% c("ENA", "WNA") ~ "NA",
TRUE ~ NA_character_
),
log_seed_mass = log(gmPerSeed + 0.001)
) %>%
filter(!is.na(continent))
results <- list()
for(cont in c("EU", "NA")) {
cat("\nAnalyzing", cont, "...\n")
cont_data <- data %>%
filter(continent == cont) %>%
filter(tree_name %in% tree$tip.label)
cont_species <- cont_data$tree_name
if(length(cont_species) > 10) {
tree_cont <- keep.tip(tree, cont_species)
shade_vals <- cont_data$bfec.shade
names(shade_vals) <- cont_data$tree_name
shade_vals <- shade_vals[tree_cont$tip.label]
seed_vals <- cont_data$log_seed_mass
names(seed_vals) <- cont_data$tree_name
seed_vals <- seed_vals[tree_cont$tip.label]
tryCatch({
fit_shade <- fitContinuous(tree_cont, shade_vals, model = "BM")
fit_seed <- fitContinuous(tree_cont, seed_vals, model = "BM")
K_shade <- phylosig(tree_cont, shade_vals, method = "K", test = TRUE, nsim = 99)
K_seed <- phylosig(tree_cont, seed_vals, method = "K", test = TRUE, nsim = 99)
pic_shade <- pic(shade_vals, tree_cont)
pic_seed <- pic(seed_vals, tree_cont)
pic_cor <- cor.test(pic_shade, pic_seed)
results[[cont]] <- list(
n_species = length(cont_species),
shade_rate = fit_shade$opt$sigsq,
seed_rate = fit_seed$opt$sigsq,
shade_K = K_shade$K,
seed_K = K_seed$K,
pic_correlation = pic_cor$estimate,
pic_p_value = pic_cor$p.value
)
cat(" N species:", length(cont_species), "\n")
cat(" Shade evolution rate:", round(fit_shade$opt$sigsq, 4), "\n")
cat(" Seed mass evolution rate:", round(fit_seed$opt$sigsq, 4), "\n")
cat(" Shade K:", round(K_shade$K, 3), "p =", round(K_shade$P, 3), "\n")
cat(" Seed K:", round(K_seed$K, 3), "p =", round(K_seed$P, 3), "\n")
cat(" PIC correlation:", round(pic_cor$estimate, 3),
"p =", round(pic_cor$p.value, 3), "\n")
}, error = function(e) {
cat(" Error:", e$message, "\n")
})
} else {
cat(" Not enough species with tree data\n")
}
}
# 比较结果
if(length(results) == 2) {
cat("\n=== Continental Comparison ===\n")
cat("Shade evolution rate ratio (NA/EU):",
round(results[["NA"]]$shade_rate / results[["EU"]]$shade_rate, 2), "\n")
cat("Seed evolution rate ratio (NA/EU):",
round(results[["NA"]]$seed_rate / results[["EU"]]$seed_rate, 2), "\n")
cat("PIC correlation difference:",
round(results[["NA"]]$pic_correlation - results[["EU"]]$pic_correlation, 3), "\n")
}
return(results)
}
# 运行
evol_patterns <- compare_evolutionary_patterns(matched_data_isp, tree)
=== Evolutionary Pattern Analysis ===
Analyzing EU ...
N species: 36
Shade evolution rate: 0.8525
Seed mass evolution rate: 0.1019
Shade K: 0.004 p = 0.96
Seed K: 0.244 p = 0.01
PIC correlation: -0.361 p = 0.033
Analyzing NA ...
N species: 64
Shade evolution rate: 1.427
Seed mass evolution rate: 0.4614
Shade K: 0.002 p = 0.727
Seed K: 0.08 p = 0.01
PIC correlation: -0.102 p = 0.425
=== Continental Comparison ===
Shade evolution rate ratio (NA/EU): 1.67
Seed evolution rate ratio (NA/EU): 4.53
PIC correlation difference: 0.259
# 修复1:比较性状的系统发育信号
compare_all_traits_phylosignal <- function(data, tree) {
library(phytools)
cat("\n=== Comparative Phylogenetic Signal Analysis ===\n")
# 只用可用的数值性状
traits <- c("log_ISP", "bfec.shade", "bfec.tempSite", "bfec.defSite",
"gmPerCm", "gmPerSeed", "leafN", "maxHt", "maxDbh", "SLA", "woodSG")
results <- data.frame()
for(trait in traits) {
if(trait %in% names(data)) {
trait_data <- data %>%
filter(!is.na(!!sym(trait))) %>%
filter(is.finite(!!sym(trait))) # 确保是有限数值
if(nrow(trait_data) < 10) next
trait_vals <- as.numeric(trait_data[[trait]])
names(trait_vals) <- trait_data$tree_name
common <- intersect(names(trait_vals), tree$tip.label)
if(length(common) >= 10) {
tree_temp <- keep.tip(tree, common)
trait_final <- trait_vals[common]
tryCatch({
lambda_res <- phylosig(tree_temp, trait_final, method = "lambda", test = TRUE)
K_res <- phylosig(tree_temp, trait_final, method = "K", test = TRUE, nsim = 99)
results <- rbind(results, data.frame(
Trait = trait,
Category = case_when(
trait %in% c("bfec.shade", "bfec.tempSite", "bfec.defSite") ~ "Sensitivity",
trait == "log_ISP" ~ "Reproduction",
TRUE ~ "Function"
),
N = length(common),
Lambda = round(lambda_res$lambda, 3),
Lambda_P = lambda_res$P,
K = round(K_res$K, 3),
K_P = K_res$P
))
}, error = function(e) {
cat(" Error with", trait, ":", e$message, "\n")
})
}
}
}
results <- results %>% arrange(desc(Lambda))
print(results)
return(results)
}
# 修复2:大洲分析 - 正确处理region格式
analyze_continental_isp_evolution <- function(data, tree) {
library(geiger)
library(phytools)
results <- list()
# 正确映射region
region_map <- c("WEA" = "EU", "ENA" = "NA", "WNA" = "NA")
data$continent <- region_map[data$region]
for(cont in c("EU", "NA")) {
cat("\n━━━", cont, "━━━\n")
cont_data <- data %>%
filter(continent == cont, !is.na(log_ISP))
if(nrow(cont_data) < 10) {
cat("Not enough data\n")
next
}
# 准备树和数据
cont_species <- cont_data$tree_name
tree_cont <- keep.tip(tree, intersect(tree$tip.label, cont_species))
if(length(tree_cont$tip.label) < 10) next
isp_vals <- as.numeric(cont_data$log_ISP)
names(isp_vals) <- cont_data$tree_name
isp_vals <- isp_vals[tree_cont$tip.label]
# 系统发育信号
lambda_cont <- phylosig(tree_cont, isp_vals, method = "lambda", test = TRUE)
K_cont <- phylosig(tree_cont, isp_vals, method = "K", test = TRUE, nsim = 99)
# 基础统计
stats <- cont_data %>%
summarise(
mean_ISP = mean(exp(log_ISP), na.rm = TRUE),
sd_ISP = sd(exp(log_ISP), na.rm = TRUE),
cv_ISP = sd_ISP / mean_ISP
)
results[[cont]] <- list(
n = length(isp_vals),
stats = stats,
lambda = lambda_cont$lambda,
K = K_cont$K
)
cat("Sample size:", length(isp_vals), "species\n")
cat("Mean ISP:", round(stats$mean_ISP, 3), "\n")
cat("λ =", round(lambda_cont$lambda, 3), " K =", round(K_cont$K, 3), "\n")
}
return(results)
}
# 运行
trait_comparison <- compare_all_traits_phylosignal(matched_data_isp, tree)
=== Comparative Phylogenetic Signal Analysis ===
Trait Category N Lambda Lambda_P K K_P
1 gmPerCm Function 97 1.000 8.720774e-42 0.869 0.01010101
2 leafN Function 100 1.000 3.499974e-61 1.257 0.01010101
3 maxHt Function 100 1.000 1.304940e-56 1.102 0.01010101
4 maxDbh Function 99 1.000 7.146564e-26 0.298 0.01010101
5 SLA Function 97 1.000 4.168186e-56 1.064 0.01010101
6 woodSG Function 97 1.000 7.499977e-32 0.440 0.01010101
7 log_ISP Reproduction 75 0.999 3.260669e-40 0.690 0.01010101
8 gmPerSeed Function 100 0.924 9.006442e-12 0.016 0.11111111
9 bfec.shade Sensitivity 101 0.000 1.000000e+00 0.002 0.93939394
10 bfec.tempSite Sensitivity 101 0.000 1.000000e+00 0.001 0.97979798
11 bfec.defSite Sensitivity 101 0.000 1.000000e+00 0.001 0.90909091
continental_results <- analyze_continental_isp_evolution(matched_data_isp, tree)
<U+2501><U+2501><U+2501> EU <U+2501><U+2501><U+2501>
Sample size: 27 species
Mean ISP: 0.554
<U+03BB> = 1.001 K = 1.193
<U+2501><U+2501><U+2501> NA <U+2501><U+2501><U+2501>
Sample size: 48 species
Mean ISP: 0.594
<U+03BB> = 1 K = 1.925
# 修复版本 - ISP与生态策略
analyze_isp_ecological_strategies <- function(data, tree) {
library(nlme)
library(ggplot2)
cat("\n=== ISP and Ecological Strategies ===\n")
# 准备数据 - 创建缺失的列
eco_data <- data %>%
filter(!is.na(log_ISP), !is.na(bfec.shade), !is.na(gmPerSeed)) %>%
mutate(
log_seed_mass = log(gmPerSeed),
continent = case_when(
region == "WEA" ~ "EU",
region %in% c("ENA", "WNA") ~ "NA",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(continent))
cat("Species for analysis:", nrow(eco_data), "\n")
cat(" EU:", sum(eco_data$continent == "EU"), "\n")
cat(" NA:", sum(eco_data$continent == "NA"), "\n")
# 基础相关性
cat("\n1. Basic correlations:\n")
cor_matrix <- eco_data %>%
select(log_ISP, bfec.shade, log_seed_mass) %>%
cor(use = "complete.obs")
print(round(cor_matrix, 3))
# 准备PGLS
common_sp <- intersect(eco_data$tree_name, tree$tip.label)
if(length(common_sp) < 10) {
cat("Not enough species for PGLS\n")
return(NULL)
}
eco_data <- eco_data[eco_data$tree_name %in% common_sp, ]
tree_eco <- keep.tip(tree, common_sp)
eco_data <- eco_data[match(tree_eco$tip.label, eco_data$tree_name), ]
cor_brown <- corBrownian(phy = tree_eco)
cat("\n2. Phylogenetically controlled relationships:\n")
# ISP ~ Shade
tryCatch({
m1 <- gls(log_ISP ~ bfec.shade * continent,
data = eco_data,
correlation = cor_brown,
method = "ML")
cat("\nISP ~ Shade × Continent:\n")
print(summary(m1)$tTable)
}, error = function(e) {
cat("Error in model 1:", e$message, "\n")
})
# ISP ~ Seed mass (trade-off)
tryCatch({
m2 <- gls(log_ISP ~ log_seed_mass * continent,
data = eco_data,
correlation = cor_brown,
method = "ML")
cat("\nISP ~ Seed mass × Continent:\n")
print(summary(m2)$tTable)
}, error = function(e) {
cat("Error in model 2:", e$message, "\n")
})
# 简单可视化
cat("\n3. Visualization...\n")
p1 <- ggplot(eco_data, aes(x = bfec.shade, y = exp(log_ISP), color = continent)) +
geom_point(size = 3, alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
scale_y_log10() +
scale_color_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C")) +
labs(title = "ISP vs Shade Sensitivity",
x = "Shade Sensitivity",
y = "ISP (seeds/cm²)",
color = "Continent") +
theme_minimal()
p2 <- ggplot(eco_data, aes(x = exp(log_seed_mass), y = exp(log_ISP), color = continent)) +
geom_point(size = 3, alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE, alpha = 0.2) +
scale_x_log10() +
scale_y_log10() +
scale_color_manual(values = c("EU" = "#3498DB", "NA" = "#E74C3C")) +
labs(title = "Seed Size-Number Trade-off",
x = "Seed Mass (mg)",
y = "ISP (seeds/cm²)",
color = "Continent") +
theme_minimal()
print(p1)
print(p2)
return(list(data = eco_data, n_species = nrow(eco_data)))
}
# 运行
ecological_results <- analyze_isp_ecological_strategies(matched_data_isp, tree)
=== ISP and Ecological Strategies ===
Species for analysis: 149
EU: 53
NA: 96
1. Basic correlations:
log_ISP bfec.shade log_seed_mass
log_ISP 1.000 -0.023 0.790
bfec.shade -0.023 1.000 -0.004
log_seed_mass 0.790 -0.004 1.000
2. Phylogenetically controlled relationships:
ISP ~ Shade × Continent:
Value Std.Error t-value p-value
(Intercept) -2.43201940 1.18621953 -2.0502271 0.04403585
bfec.shade 0.01968203 0.01827018 1.0772761 0.28500363
continentNA 0.16738539 0.07538951 2.2202743 0.02959341
bfec.shade:continentNA -0.02046622 0.02192930 -0.9332821 0.35383740
ISP ~ Seed mass × Continent:
Value Std.Error t-value p-value
(Intercept) -2.45654292 1.08842993 -2.256960 0.0270892460
log_seed_mass -0.07549011 0.03408093 -2.215025 0.0299679027
continentNA 0.15181887 0.04365804 3.477455 0.0008682577
log_seed_mass:continentNA 0.10771155 0.02953383 3.647057 0.0005022262
3. Visualization...
detailed_statistical_analysis <- function(matched_data_isp, tree) {
library(nlme)
cat("\n=== Detailed Statistical Models ===\n")
# 准备数据
model_data <- matched_data_isp %>%
filter(!is.na(log_ISP), !is.na(gmPerSeed), !is.na(bfec.shade)) %>%
mutate(
log_seed = log(gmPerSeed + 0.001),
continent = case_when(
region == "WEA" ~ "EU",
region %in% c("ENA", "WNA") ~ "NA",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(continent))
cat("Total species:", nrow(model_data), "\n")
# 匹配树
common_sp <- intersect(model_data$tree_name, tree$tip.label)
if(length(common_sp) < 10) {
cat("Not enough species\n")
return(NULL)
}
model_data <- model_data[model_data$tree_name %in% common_sp, ]
tree_model <- keep.tip(tree, common_sp)
model_data <- model_data[match(tree_model$tip.label, model_data$tree_name), ]
cor_brown <- corBrownian(phy = tree_model)
# 完整模型
cat("\n1. FULL MODEL:\n")
m_full <- gls(log_ISP ~ log_seed * bfec.shade * continent,
data = model_data,
correlation = cor_brown,
method = "ML")
print(summary(m_full)$tTable)
# 分别为每个大洲建模
cat("\n2. SEPARATE MODELS BY CONTINENT:\n")
for(cont in c("EU", "NA")) {
cat("\n---", cont, "---\n")
cont_data <- model_data %>% filter(continent == cont)
cont_tree_tips <- intersect(cont_data$tree_name, tree$tip.label)
if(length(cont_tree_tips) > 10) {
cont_tree <- keep.tip(tree, cont_tree_tips)
cont_data <- cont_data[match(cont_tree$tip.label, cont_data$tree_name), ]
cor_cont <- corBrownian(phy = cont_tree)
m_cont <- gls(log_ISP ~ log_seed + bfec.shade,
data = cont_data,
correlation = cor_cont,
method = "ML")
print(summary(m_cont)$tTable)
cat("N species:", nrow(cont_data), "\n")
} else {
cat("Not enough species for", cont, "\n")
}
}
return(m_full)
}
# 修复版本 2:属级别分析
analyze_genus_contributions <- function(matched_data_isp) {
library(dplyr)
cat("\n════════════════════════════════════════════════════════════\n")
cat(" GENUS-LEVEL ANALYSIS OF ISP PATTERNS\n")
cat("════════════════════════════════════════════════════════════\n")
# 准备数据
analysis_data <- matched_data_isp %>%
filter(!is.na(ISP_mean), !is.na(gmPerSeed)) %>%
mutate(
continent = case_when(
region == "WEA" ~ "EU",
region %in% c("ENA", "WNA") ~ "NA",
TRUE ~ NA_character_
)
) %>%
filter(!is.na(continent))
# 计算每个属在每个大洲的相关性
genus_correlations <- analysis_data %>%
group_by(genus, continent) %>%
summarise(
n_species = n(),
mean_ISP = mean(ISP_mean, na.rm = TRUE),
mean_seed = mean(gmPerSeed, na.rm = TRUE),
sd_ISP = sd(ISP_mean, na.rm = TRUE),
sd_seed = sd(gmPerSeed, na.rm = TRUE),
.groups = 'drop'
) %>%
filter(n_species >= 2) # 至少2个物种
# 计算相关性 - 分开处理
for(i in 1:nrow(genus_correlations)) {
g <- genus_correlations$genus[i]
c <- genus_correlations$continent[i]
subset_data <- analysis_data %>%
filter(genus == g, continent == c)
if(nrow(subset_data) >= 3) {
cor_result <- tryCatch(
cor(log(subset_data$ISP_mean + 0.001),
log(subset_data$gmPerSeed + 0.001),
use = "complete.obs"),
error = function(e) NA
)
genus_correlations$cor_ISP_seed[i] <- cor_result
}
}
cat("\n=== Genus-level patterns ===\n")
print(genus_correlations %>% arrange(continent, desc(n_species)))
# 比较同一属在两个大洲的表现
cat("\n=== SAME GENUS, DIFFERENT CONTINENTS ===\n")
shared_genera <- genus_correlations %>%
group_by(genus) %>%
mutate(n_continents = n()) %>%
filter(n_continents == 2) %>%
select(genus, continent, n_species, cor_ISP_seed) %>%
arrange(genus, continent)
if(nrow(shared_genera) > 0) {
# 手动转置
shared_wide <- data.frame()
for(g in unique(shared_genera$genus)) {
eu_row <- shared_genera %>% filter(genus == g, continent == "EU")
na_row <- shared_genera %>% filter(genus == g, continent == "NA")
if(nrow(eu_row) > 0 & nrow(na_row) > 0) {
shared_wide <- rbind(shared_wide, data.frame(
genus = g,
EU_n = eu_row$n_species[1],
EU_cor = eu_row$cor_ISP_seed[1],
NA_n = na_row$n_species[1],
NA_cor = na_row$cor_ISP_seed[1],
cor_diff = na_row$cor_ISP_seed[1] - eu_row$cor_ISP_seed[1]
))
}
}
shared_wide <- shared_wide %>% arrange(desc(abs(cor_diff)))
print(shared_wide)
}
return(list(
genus_cors = genus_correlations,
shared = shared_wide
))
}
# 运行
detailed_models <- detailed_statistical_analysis(matched_data_isp, tree)
=== Detailed Statistical Models ===
Total species: 149
1. FULL MODEL:
Value Std.Error t-value p-value
(Intercept) -2.47771914 1.11232834 -2.2275070 0.029276483
log_seed -0.12249815 0.05525430 -2.2169887 0.030021475
bfec.shade 0.01889367 0.03335126 0.5665055 0.572943080
continentNA 0.12891334 0.07561499 1.7048649 0.092852791
log_seed:bfec.shade -0.02227968 0.02877754 -0.7742036 0.441533879
log_seed:continentNA 0.15201816 0.05191035 2.9284750 0.004650767
bfec.shade:continentNA -0.02096473 0.03373565 -0.6214415 0.536417912
log_seed:bfec.shade:continentNA 0.02452682 0.02777902 0.8829258 0.380433401
2. SEPARATE MODELS BY CONTINENT:
--- EU ---
Value Std.Error t-value p-value
(Intercept) -1.64322173 0.94897654 -1.731573 0.09618657
log_seed 0.18571798 0.06827636 2.720092 0.01194289
bfec.shade 0.01910266 0.01789261 1.067629 0.29631003
N species: 27
--- NA ---
Value Std.Error t-value p-value
(Intercept) -2.501526859 0.922422325 -2.7119106 0.009440419
log_seed 0.013370726 0.017341668 0.7710173 0.444726328
bfec.shade 0.001107467 0.009603453 0.1153197 0.908704792
N species: 48
genus_results <- analyze_genus_contributions(matched_data_isp)
------------------------------------------------------------
GENUS-LEVEL ANALYSIS OF ISP PATTERNS
------------------------------------------------------------
=== Genus-level patterns ===
# A tibble: 20 x 8
genus continent n_species mean_ISP mean_seed sd_ISP sd_seed cor_ISP_seed
<chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Quercus EU 16 1.30 2.16 0 0.805 NA
2 Pinus EU 8 0.0443 0.138 0 0.220 NA
3 Abies EU 5 0.0264 0.0718 0 0.0259 NA
4 Acer EU 5 0.448 0.0889 0 0.0544 NA
5 Fraxinus EU 3 0.326 0.0608 0 0.0294 NA
6 Picea EU 3 0.0100 0.00543 0 0.00177 NA
7 Prunus EU 3 0.266 0.284 0 0.176 NA
8 Juniperus EU 2 1.06 0.166 0 0.0325 NA
9 Tilia EU 2 0.225 0.0427 0 0.00750 NA
10 Quercus NA 28 1.72 2.82 0 1.67 NA
11 Pinus NA 24 0.0451 0.106 0 0.133 NA
12 Abies NA 7 0.0302 0.0313 0 0.0203 NA
13 Acer NA 7 0.555 0.0646 0 0.0406 NA
14 Picea NA 6 0.0304 0.00253 0 0.00111 NA
15 Juniperus NA 5 0.0901 0.0325 0 0.0388 NA
16 Prunus NA 4 0.136 0.230 0 0.327 NA
17 Betula NA 3 0.0224 0.000713 0 0.000338 NA
18 Fraxinus NA 3 0.210 0.0444 0 0.0122 NA
19 Ulmus NA 3 0.0146 0.0069 0 0.00365 NA
20 Alnus NA 2 0.00450 0.000791 0 0.000384 NA
=== SAME GENUS, DIFFERENT CONTINENTS ===
genus EU_n EU_cor NA_n NA_cor cor_diff
1 Abies 5 NA 7 NA NA
2 Acer 5 NA 7 NA NA
3 Fraxinus 3 NA 3 NA NA
4 Juniperus 2 NA 5 NA NA
5 Picea 3 NA 6 NA NA
6 Pinus 8 NA 24 NA NA
7 Prunus 3 NA 4 NA NA
8 Quercus 16 NA 28 NA NA