Load packages

library(ggplot2)
library(dplyr)
library(colourvalues)
library(pheatmap)
library(knitr)
library(RColorBrewer)
library(corrplot)
library(PoiClaClu)
library(stats)
library(reshape2)
library(ggpubr)
library(tidyr)
library(cowplot)
library(viridis)
library(Hmisc)
library(stringr)
library(ggfortify)

Functions been created in the analysis

### Z-score
scale_rows = function(x){
    m = apply(x, 1, mean, na.rm = T)
    s = apply(x, 1, sd, na.rm = T)
    return((x - m) / s)
}

### Summary 
data_summary <- function(data, varname, groupnames){
  require(plyr)
  summary_func <- function(x, col){
    c(mean = mean(x[[col]], na.rm=TRUE),
      sd = sd(x[[col]], na.rm=TRUE))
  }
  data_sum<-ddply(data, groupnames, .fun=summary_func,
                  varname)
  data_sum <- rename(data_sum, c("mean" = varname))
 return(data_sum)
}

### Correlation
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
    )
}

Figure 1 Physiological data

LI-6800 data

Soil microbiome data

Soil <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/01_Module/Soil-moisture.csv", header = T)

df_long <- Soil %>%
  pivot_longer(
    cols = starts_with("cm_"),
    names_to = "Depth",
    values_to = "Moisture"
  )

# 确保深度顺序正确
df_long$Depth <- factor(
  df_long$Depth,
  levels = paste0("cm_", seq(10, 70, by = 10))
)

# 作图
ggplot(df_long,
       aes(x = Day,
           y = Moisture,
           color = Depth,
           linetype = Treatment,
           group = interaction(Depth, Treatment))) +
  geom_line(linewidth = 1) +
  scale_linetype_manual(
    values = c(Control = "solid", Drought = "dashed")
  ) +
  labs(
    x = "Day",
    y = "Soil moisture",
    color = "Soil depth",
    linetype = "Treatment"
  ) +
  scale_color_manual(values = c("cm_10" = "grey70","cm_30" = "grey50","cm_50" = "grey30", "cm_70" = "grey10")) +
  theme_classic(base_size = 13) +
    geom_vline(
    xintercept = c(7, 21, 35, 49),
    linetype = "dotted",
    color = "red",
    linewidth = 0.6
  )

Plot Morphological-Data

weight <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Weight.csv")
Root <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Root-score.csv")

head(Root)

Root$Condition <- factor(Root$Condition , levels = c("WW", "WL"))

 ggplot(Root, aes(x = Condition, y = height, fill = Genotype)) +
      geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
      theme_classic() +
      scale_fill_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
                                    D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
      stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                 geom = "point", shape = 4, size = 1,
                 show.legend = FALSE) +   theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")

 ggplot(Root, aes(x = Condition, y = width, fill = Genotype)) +
      geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
      theme_classic() +
      scale_fill_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
                                    D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
      stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                 geom = "point", shape = 4, size = 1,
                 show.legend = FALSE)

Plot the Biomass data

head(weight)

weight$Treat <- factor(weight$Treat  , levels = c("WW", "WL"))

 ggplot(weight, aes(x = Treat, y = biomass_total, fill = Genotype)) +
      geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
      theme_classic() +
      scale_fill_manual(values = c(A  = "#C25759",B = "#92A5D1",C = "grey70",
                                    D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
      stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                 geom = "point", shape = 4, size = 1,
                 show.legend = FALSE) +
   theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")

  ggplot(weight, aes(x = Treat, y = Three_leaves, fill = Genotype)) +
      geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
      theme_classic() +
      scale_fill_manual(values = c(A  = "#C25759",B = "#92A5D1",C = "grey70",
                                    D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
      scale_alpha_manual(values = c(1,0.5,0.5,0.5,0.5,0.5)) +
      scale_size_manual(values = c(0.5,0.1,0.1,0.1,0.1,0.1)) +
      stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                 geom = "point", shape = 4, size = 1,
                 show.legend = FALSE) +
   theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "right")

morning <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LI6800_clean-20220324-morning.csv")
afternoon <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LI6800_clean-20220324-afternoon.csv")

Plot the LI-6800 data

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ readr     2.1.5
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ Hmisc::src()       masks dplyr::src()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ✖ Hmisc::summarize() masks dplyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# 2. 定义性状与颜色
#------------------------------------------------
traits <- c("WUEi", "Ci","Tair", "Tleaf", "gsw", "Fv..Fm.", "A", "VPDleaf")   # ⚠ 根据实际列名检查

genotype_colors <- c(
  A = "#C25759",
  B = "#92A5D1",
  C = "grey70",
  D = "#DAA87C",
  E = "#C9DCC4",
  F = "#377483"
)

df <- bind_rows(morning, afternoon)
#------------------------------------------------
# 3. 计算各 trait 在 Genotype × Treatment × Time × Period 下的 mean ± SE
#------------------------------------------------
df_summary <- df %>%
  pivot_longer(cols = all_of(traits), names_to = "Trait", values_to = "Value") %>%
  group_by(Genotype, Condition, Time, Trait) %>%
  summarise(
    mean = mean(Value, na.rm = TRUE),
    se = sd(Value, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  )

#------------------------------------------------
# 4. 绘图函数：每个 trait 单独画一张包含 WL / WW 的图
#------------------------------------------------
plot_trait <- function(trait_name) {
  df_summary[df_summary$Condition == "WL",] %>%
    filter(Trait == trait_name) %>%
    ggplot(aes(x = Time, y = mean, fill = Genotype)) +
    geom_col(position = position_dodge(width = 0.9), width = 0.8) +
    geom_errorbar(
      aes(ymin = mean - 0.8*se, ymax = mean + 0.8*se),
      position = position_dodge(width = 0.9),
      width = 0.3
    ) +
    scale_fill_manual(values = genotype_colors) +
    labs(
      x = "",
      y = trait_name
    ) +
    theme_classic(base_size = 14) +
    theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") 
}

#------------------------------------------------
# 5. 生成四个 trait 的 WL/WW 图
#------------------------------------------------

plot_trait("gsw")

plot_trait("A")

plot_trait("Ci")

plot_trait("Tleaf")

plot_trait("Tair")

plot_trait("Fv..Fm.")

plot_trait("VPDleaf")

# Figure 2 Omics data ## Number of DEGs

A_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_A.csv", header =T)
B_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_B.csv", header =T)
C_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_C.csv", header =T)
D_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_D.csv", header =T)
E_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_E.csv", header =T)
F_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_F.csv", header =T)


Combine <- na.omit(rbind(A_Group,B_Group,C_Group,D_Group,E_Group,F_Group))
names(Combine)[8] = "GeneID"
head(Combine, 50)

### Filter differentially expressed genes based on following creteria
donw_gene <- Combine[Combine$padj < 0.05 & Combine$log2FoldChange < -1,]
donw_gene <- donw_gene %>% group_by(Sample) %>% mutate(Down = n())
donw_summary <- unique(donw_gene[,c("Sample","Down")])

up_gene <- Combine[Combine$padj < 0.05 & Combine$log2FoldChange > 1,]
up_gene <- up_gene %>% group_by(Sample) %>% mutate(up = n())
up_summary <- unique(up_gene[,c("Sample","up")])

combind_summary <- left_join(up_summary, donw_summary, by = "Sample")
head(combind_summary, 20)

### Plot line graph of DEGs at differene time

Plot_DEGs <- melt(combind_summary, id.vars = c("Sample"))
plot_summary <- Plot_DEGs %>% separate(Sample, c("Accession", "Time"))
plot_summary$Index <- paste0(plot_summary$Accession, "_", plot_summary$variable)

plot_summary[is.na(plot_summary)] <- 0

ggplot(data=plot_summary, aes(x = Time, y = value, color = Accession, group = Index)) +
  geom_point(size = 3, aes(alpha = Accession, shape = variable)) +
  geom_line(size = 0.5, aes(group = Index, lty= variable, color = Accession)) +
  theme_classic() +
  scale_alpha_manual(values = c(A=1, B=0.5, C=0.5, D=0.5, E=0.5, F=0.5)) +
  scale_color_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
                                    D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
  theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90)) +
  ylab("Numbers of DEGs")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Oxidative <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/01_Oxidative_clean-20220324-update.csv")

Cluster heatmap of oxidative stress

custom_order <- c("polyph", "flav", "ascorbate","glutathione", 
           "MDA", "LOX", "pro", "GOX","HPR",
           "SOD", "AO", "POX", "CAT", "APX", "DHAR", "MDHAR", "GPX","GR", "Trxs","Frxs", "Grxs", "Prxs",
           "TAC", "caro")

Oxidative_cor <- Oxidative
rownames(Oxidative_cor) <- Oxidative_cor$Name
Oxidative_cor <- Oxidative_cor[,c(7:30)]
Oxidative_mat <- cor(Oxidative_cor)

Oxidative_remat <- Oxidative_mat[custom_order, custom_order]


Cor_plot1 <- corrplot(Oxidative_remat, method = 'color',
         addrect = 4, rect.col = 'blue', rect.lwd = 2,)

Line graph of oxidative stress

colnames(Oxidative)

##  [1] "Name"        "Genotype"    "Condition"   "Time"        "ID"         
##  [6] "Rep"         "caro"        "polyph"      "flav"        "pro"        
## [11] "TAC"         "MDA"         "LOX"         "GOX"         "HPR"        
## [16] "POX"         "CAT"         "AO"          "APX"         "MDHAR"      
## [21] "DHAR"        "GR"          "GPX"         "SOD"         "Prxs"       
## [26] "Grxs"        "Trxs"        "Frxs"        "ascorbate"   "glutathione"

Oxidative_ave <- Oxidative %>% group_by(ID) %>% dplyr::summarize(across(
  c("polyph", "flav", "ascorbate","glutathione", 
           "MDA", "LOX", "pro", "GOX","HPR",
           "SOD", "AO", "POX", "CAT", "APX", "DHAR", "MDHAR", "GPX","GR", "Trxs","Frxs", "Grxs", "Prxs",
           "TAC", "caro"), mean, .names = "mean_{.col}"))
Oxidative_ave_clean <- separate(Oxidative_ave, ID, c("Genotype", "Condition", "Time"))
Oxidative_ave_clean$Index <- paste0(Oxidative_ave_clean$Genotype, "_", Oxidative_ave_clean$Condition)
head(Oxidative_ave_clean)

Oxidative_ave_clean$Condition <- factor(Oxidative_ave_clean$Condition , levels = c("WW", "WL"))


B1 <- ggplot(Oxidative_ave_clean,
             aes(x = Time,y =mean_flav, color = Genotype, Group = Index)) +
  geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
  geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
  theme_classic() +
  facet_wrap( ~ Condition) +
  scale_alpha_manual(values = c(A=1, B=1, C=1, D=1, E=1, F=1)) +
  scale_color_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
                                    D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
 theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") 


B2 <- ggplot(Oxidative_ave_clean,
             aes(x = Time,y =mean_HPR, color = Genotype, Group = Index)) +
  geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
  geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
  theme_classic() +
  facet_wrap( ~ Condition) +
  scale_alpha_manual(values = c(A=1, B=1, C=1, D=1, E=1, F=1)) +
  scale_color_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
                                    D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
 theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") 

B3 <- ggplot(Oxidative_ave_clean,
             aes(x = Time,y =mean_POX, color = Genotype, Group = Index)) +
  geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
  geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
  theme_classic() +
  facet_wrap( ~ Condition) +
  scale_alpha_manual(values = c(A=1, B=1, C=1, D=1, E=1, F=1)) +
  scale_color_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
                                    D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
 theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") 


B4 <- ggplot(Oxidative_ave_clean,
             aes(x = Time,y =mean_Frxs, color = Genotype, Group = Index)) +
  geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
  geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
  theme_classic() +
  facet_wrap( ~ Condition) +
  scale_alpha_manual(values = c(A=1, B=1, C=1, D=1, E=1, F=1)) +
  scale_color_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
                                    D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
 theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") 


plot_grid(B1, B2, B3, B4)

Figure 3 Module selection

Plot diagnol plot

Replot <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/01_Module/GS-MM_darkmegenta.csv", header = T)
head(Replot)

### Plot the data
ggplot(Replot, aes(x=MM, y=GS, color=Group, size = Group, alpha = Group, label = GeneID)) +
      geom_point() +
      scale_alpha_manual(values = c(Fill = 1, A=1,  TAIR = 1, 
                                    Flav= 1, TAC= 1, Stress = 1,  Met = 1)) +
      scale_size_manual(values = c(Fill = 1, A=2,  TAIR = 2, 
                                    Flav= 2, TAC= 2, Stress = 2,  Met = 2, TF =3)) +
      scale_colour_manual(values=c(Fill = "grey70", A = "#a6c9e1", TAIR = "#e89151",
                                   Flav = "#d1d2e6", TAC = "#f1bda5", Stress = "#1B9E77", 
                                   Met = "#fbfa50", TF = "midnightblue")) + 
      theme_bw() + 
      theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+  
      labs(x=paste0("Module Membership"), 
           y=paste0("Gene significance")) +
      theme(axis.title.x =element_text(size=14), 
            axis.title.y=element_text(size=14),
            axis.text = element_text(size = 12),
            axis.text.x = element_text(colour = "black"),
            axis.text.y = element_text(colour = "black"),
            plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
            plot.margin = unit(rep(2,4),'lines')) +
       theme(legend.position = 'none') +
      geom_hline(aes(yintercept=0.7),colour="black",lwd=0.5,linetype=5)+
      geom_vline(aes(xintercept=0.75),colour="black",lwd=0.5,linetype=5)

Figure 5 network Boxplot and SmoothS spline plot

calculate_tpm <- function(count_matrix, gene_lengths) {
  # Step 1: Normalize counts by gene length (reads per kilobase, RPK)
  rpk <- count_matrix / (gene_lengths / 1000)
  
  # Step 2: Compute per-sample scaling factor (sum of all RPK values per sample)
  scaling_factors <- colSums(rpk)
  
  # Step 3: Divide RPK values by the scaling factor and multiply by 1 million
  tpm <- sweep(rpk, 2, scaling_factors, FUN = "/") * 1e6
  
  return(tpm)
}

Range.FC <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/Range28-FC.csv", header = T, row.names = 1)

slac.count <- Range.FC
head(slac.count)

gene_lengths <- slac.count$Length
count_matrix <- slac.count[2:97]
head(count_matrix )

tpm_values <- calculate_tpm(count_matrix, gene_lengths)
head(tpm_values)

write.csv(tpm_values, "~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/Range28-TPM.csv", quote=FALSE)

Range.TPM <- tpm_values

#Reorder samples orders by colnames
Range.TPM <- Range.TPM[ , order(names(Range.TPM))]
TPM_clean <- data.frame(matrix(ncol = 48, nrow = nrow(Range.TPM)))

Trait <- read.csv("/Users/leon/Documents/Project/Sorghum/1_Phenotype/data/Sorghum_oxidative_stress-Feb2022.csv", 
                  header = T, row.names = 1)

vector <- unique(Trait$ID)
rownames(TPM_clean) <- rownames(Range.TPM)
colnames(TPM_clean) <- vector

#Loop calculation for each accesion average
for (i in 1:length(vector)){
  TPM_clean[,grepl(vector[i],colnames(TPM_clean))] <- rowMeans(Range.TPM[,grepl(vector[i],colnames(Range.TPM))])
}

head(TPM_clean)

#write.csv(TPM_clean, "~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/Range28-TPM-Mean.csv")

Range.meta <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/01_Range_metadata.csv", header = T)
Range.TPM <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/Range28-TPM-Mean.csv", header = T)

#==============================
# 3) TPM → long，并用 Name match
#==============================
gene_col <- intersect(names(Range.TPM), c("gene","Gene","gene_id","GeneID"))[1]
if (is.na(gene_col)) gene_col <- names(Range.TPM)[1]

tpm_long <- Range.TPM  %>%
  pivot_longer(
    cols = -all_of(gene_col),
    names_to = "Name",
    values_to = "TPM"
  ) %>%
  rename(Gene = all_of(gene_col)) %>%
  mutate(Name = as.character(Name))

tpm_anno <- tpm_long %>%
  inner_join(Range.meta, by = "Name")

log2fc_range <- tpm_anno %>%
  filter(Condition %in% c("WL", "WW")) %>%
  select(Gene, Genotype, Time, Condition, TPM) %>%

  # WL / WW 展开成两列
  pivot_wider(
    names_from  = Condition,
    values_from = TPM
  ) %>%

  # 计算 log2FC
  mutate(
    log2FC_WL_vs_WW = log2((WL + 1) / (WW + 1))   # pseudocount = 1
  ) %>%
  arrange(Gene, Genotype, Time)


log2fc_range <- log2fc_range %>%
  mutate(Time = as.character(gsub("^TP", "Week", Time)))

library(tidyverse)
library(ggpubr)

Gene.group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/02_Gene-group.csv", header = TRUE)
Gene.select <- Gene.group[Gene.group$class == "A",]
#----------------------------
# 2) subset log2fc_tbl
#----------------------------
plot_df <- log2fc_range %>%
  filter(Gene %in% Gene.select$gene)
week_grid <- sort(unique(plot_df$Time))

plot_df <- plot_df %>%
  mutate(Genotype = ifelse(Genotype == "A", "A", "Other"))

 ggplot(plot_df, aes(x = Time, y = log2FC_WL_vs_WW)) +
      geom_boxplot(aes(fill = Genotype), outlier.size = 0.3, outlier.shape = 21) +
      theme_classic() +
      scale_fill_manual(values = c(A  = "#C25759",Other = "grey")) +
      stat_summary(fun = mean, aes(group = Genotype), color = "darkred", position = position_dodge(0.75),
                 geom = "point", shape = 4, size = 1,
                 show.legend = FALSE) +
      stat_compare_means(aes(group = Genotype),method = "wilcox.test", label = "p.signif") +
      theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") + ylim(-3,3)

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_summary()`).

## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).

Range.TPM <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/Range28-TPM-Mean.csv", header = T)
CDF.range <- Range.TPM[Range.TPM$Transcripts == "SORBI_3002G421900",]
 
CDF.range.plot <- CDF.range %>% 
  melt() %>% separate(variable, c("Accession", "Condition", "Time"))

## Using Transcripts as id variables

CDF.range.plot$Index <- paste0(CDF.range.plot$Accession, "_", CDF.range.plot$Condition)

CDF.range.plot$Condition <- factor(
  CDF.range.plot$Condition,
  levels = c("WW", "WL")
)

ggplot(CDF.range.plot,
       aes(x = Time, y = value, color = Accession, group = Index)) +
  geom_point(size = 3,
             aes(alpha = Accession, shape = Condition)) +
  geom_line(size = 0.5,
            aes(linetype = Condition, group = Index)) +
  facet_wrap(~ Condition, labeller = labeller(Condition = c(WW = "WW", WL = "WL"))) +
  theme_classic() +
  scale_alpha_manual(values = c(A=1, B=0.5, C=0.5, D=0.5, E=0.5, F=0.5)) +
  scale_color_manual(values = c(
    A = "#C25759", B = "#92A5D1", C = "grey70",
    D = "#DAA87C", E = "#C9DCC4", F = "#377483"
  )) +
  scale_linetype_manual(values = c(WW = "solid", WL = "dashed")) +
  scale_shape_manual(values = c(WW = 16, WL = 17)) +
  theme(
    axis.text.x = element_text(angle = 0),
    axis.text.y = element_text(angle = 90)
  ) +
  ylab("CDF3 Relative expression (TPM)")

Calculate mean score per transcripts

library(tidyverse)

Drought_TPM <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/03_sorghum-Leaves-TPM.csv", header= T)
Drought_meta <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/01_sorghum_largeDrought_metadata.csv", header = T)


tpm  <- Drought_TPM
meta <- Drought_meta 

# ==== 2) 基因列识别（自动）====
gene_col <- intersect(names(tpm), c("gene", "Gene", "gene_id", "GeneID"))[1]
if (is.na(gene_col)) gene_col <- names(tpm)[1]

gene_df <- tpm %>% select(all_of(gene_col))
tpm_mat <- tpm %>% select(-all_of(gene_col))

# ==== 3) 准备 index 向量 ====
index_vec <- unique(as.character(meta$Index))

# ==== 4) 对每个 index：在 TPM 列名中搜索 contains(index)，并取均值 ====

tpm_by_index <- map_dfc(index_vec, function(idx) {

  # 找到 TPM 中包含该 index 的所有列
  cols <- names(tpm_mat)[str_detect(names(tpm_mat), fixed(idx))]

  if (length(cols) == 0) {
    warning(paste("No TPM columns matched index:", idx))
    return(tibble(!!idx := NA_real_))
  }

  # 对这些列逐行取均值
  rowMeans(tpm_mat[, cols, drop = FALSE], na.rm = TRUE) %>%
    as_tibble() %>%
    setNames(idx)
})

# ==== 5) 合并 gene 列，形成最终 TPM-like dataframe ====
tpm_index_mean <- bind_cols(
  gene_df,
  tpm_by_index
)

# ==== 6) 查看 & 保存 ====
#tpm_index_mean
#write.csv(tpm_index_mean,"~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/03_sorghum-Leaves-Mean-TPM.csv", row.names = F)

Calculate the log2FC

tpm_long <- tpm_index_mean %>%
  pivot_longer(
    cols = -all_of(gene_col),
    names_to  = "Index",
    values_to = "TPM"
  ) %>%
  rename(gene = all_of(gene_col)) %>%
  mutate(index = as.character(Index))

#----------------------------
# 2) 准备 meta（确保唯一）
#----------------------------
meta_use <- meta %>%
  distinct(Index, Genotype, Tissue, Week, Condition) %>%
  mutate(Index = as.character(Index))

#----------------------------
# 3) merge TPM + meta
#----------------------------
tpm_anno <- tpm_long %>%
  left_join(meta_use, by = "Index")

#----------------------------
# 4) PRFD vs Control → log2FC
#----------------------------
log2fc_tbl <- tpm_anno %>%
  filter(Condition %in% c("POFD", "Control")) %>%
  group_by(gene, Genotype, Tissue, Week, Condition) %>%
  summarise(
    TPM = mean(TPM, na.rm = TRUE),   # 如果一个条件下有多个 index
    .groups = "drop"
  ) %>%
  pivot_wider(
    names_from  = Condition,
    values_from = TPM
  ) %>%
  mutate(
    log2FC_POFD_vs_Control = log2((POFD + 1) / (Control + 1))
  ) %>%
  arrange(gene, Genotype, Tissue, Week)

#----------------------------
# 5) 结果
#----------------------------
log2fc_tbl

Plot time-series logFC post flowering

library(tidyverse)

Gene.group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/02_Gene-group.csv", header = TRUE)
Gene.group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/04_Correlation-comp-target.csv", header = TRUE)
Gene.select <- Gene.group[Gene.group$class == "Test",]
#----------------------------
# 2) subset log2fc_tbl
#----------------------------
plot_df <- log2fc_tbl[log2fc_tbl$Week > 9 , ] %>%
  filter(gene %in% Gene.select$gene) %>%
  filter(!is.na(log2FC_POFD_vs_Control))

#----------------------------
# 3) smooth spline + mean ± sd
#----------------------------

# 原始 week（用于刻度=1）
week_grid <- sort(unique(plot_df$Week))

# dense week（用于画更平滑的曲线）
week_grid_dense <- seq(min(week_grid), max(week_grid), by = 0.1)

# 平滑参数（越大越平滑：0~1）
spar_val <- 0.3

spline_summary <- plot_df %>%
  group_by(Genotype) %>%
  group_modify(~ {

    df <- .x %>% arrange(Week)

    spline_pred <- df %>%
      group_by(gene) %>%
      group_modify(~ {
        d <- .x
        if (nrow(d) < 4) return(tibble())  # spline 至少需要 4 个点

        sp <- smooth.spline(
          x = d$Week,
          y = d$log2FC_POFD_vs_Control,
          spar = spar_val
        )

        pred <- predict(sp, x = week_grid_dense)

        tibble(
          Week = pred$x,
          yhat = pred$y
        )
      }) %>%
      ungroup()

    spline_pred %>%
      group_by(Week) %>%
      summarise(
        mean = mean(yhat, na.rm = TRUE),
        sd   = sd(yhat, na.rm = TRUE),
        .groups = "drop"
      )
  }) %>%
  ungroup()

#----------------------------
# 4) gene-level smooth spline
#----------------------------
gene_spline_df <- plot_df %>%
  group_by(Genotype, gene) %>%
  group_modify(~ {
    d <- .x %>% arrange(Week)
    if (nrow(d) < 4) return(tibble())

    sp <- smooth.spline(
      x = d$Week,
      y = d$log2FC_POFD_vs_Control,
      spar = spar_val
    )

    pred <- predict(sp, x = week_grid_dense)

    tibble(
      Week = pred$x,
      yhat = pred$y
    )
  }) %>%
  ungroup()

 ggplot(plot_df , aes(x = Genotype, y = log2FC_POFD_vs_Control)) +
      geom_boxplot(aes(fill = Genotype), outlier.size = 0.3, outlier.shape = 21) +
      theme_classic() +
      scale_fill_manual(values = c(RTX430  = "#e31a1c",BTX642 = "grey")) +
      stat_summary(fun = mean, aes(group = Genotype), color = "darkred", position = position_dodge(0.75),
                 geom = "point", shape = 4, size = 1,
                 show.legend = FALSE) +
      stat_compare_means(aes(group = Genotype),method = "wilcox.test") +
      theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")

p <- ggplot() +

  # gene-level smooth spline（所有 genotype 共用，淡灰）
  geom_line(
    data = gene_spline_df,
    aes(color = Genotype,
      x = Week,
      y = yhat,
      group = interaction(Genotype, gene)
    ),
    linewidth = 0.6,
    alpha = 0.15
  ) +
  # mean ± sd ribbon（按 Genotype 上色）
  geom_ribbon(
    data = spline_summary,
    aes(
      x = Week,
      ymin = mean - sd,
      ymax = mean + sd,
      fill = Genotype
    ),
    alpha = 0.25,
    color = NA
  ) +
  # mean smooth spline（按 Genotype 上色）
  geom_line(
    data = spline_summary,
    aes(
      x = Week,
      y = mean,
      color = Genotype
    ),
    linewidth = 1.2
  ) +

  # X 轴刻度每 1 week
  scale_x_continuous(
    breaks = seq(min(week_grid), max(week_grid), by = 1),
    expand = expansion(mult = c(0.01, 0.01))
  ) +

  # 颜色你可以按论文风格随便换
  scale_color_manual(
    values = c(
      "RTX430" = "#e31a1c",
      "BTX642" = "grey20"
    )
  ) +
  scale_fill_manual(
    values = c(
      "RTX430" = "#e31a1c",
      "BTX642" = "grey20"
    )
  ) +

  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    legend.title = element_blank(),
    axis.text.y = element_text(angle = 90, vjust = 0.5),
    axis.line = element_line(color = "black")
  ) +

  labs(
    x = "Week",
    y = expression(log[2](POFD / Control))
  ) 

p

CDF3 linegraph

# 需要包含：Name, Genotype, Week, Condition（以及可选 Tissue）
meta2 <- meta %>%
  rename(
    sample_name = Name,
    Genotype    = Genotype,
    Week        = Week,
    Condition   = Condition
    # Tissue    = Tissue   # 如果你也要按 tissue 分组/筛选，就打开并确保列名一致
  ) %>%
  mutate(
    sample_name = as.character(sample_name),
    Genotype = as.character(Genotype),
    Condition = as.character(Condition),
    Week = as.numeric(Week)
  )

# ========= 2) TPM 转 long，并用 meta$Name 匹配 =========
gene_col <- intersect(names(tpm), c("gene","Gene","gene_id","GeneID"))[1]
if (is.na(gene_col)) gene_col <- names(tpm)[1]

tpm_long <- tpm %>%
  pivot_longer(
    cols = -all_of(gene_col),
    names_to = "sample_name",
    values_to = "TPM"
  ) %>%
  rename(gene = all_of(gene_col)) %>%
  mutate(sample_name = as.character(sample_name))

tpm_anno <- tpm_long %>%
  inner_join(meta2, by = "sample_name")  # 只保留 meta 中存在的样本

# ========= 3) 单基因作图函数 =========
plot_single_gene_tpm <- function(gene_id,
                                 weeks = 3:17,
                                 conditions = c("Control","POFD"),
                                 error = c("SE","SD"),
                                 spar_val = 0.6) {
  error <- match.arg(error)

  df <- tpm_anno %>%
    filter(gene == gene_id) %>%
    filter(Week %in% weeks) %>%
    filter(Condition %in% conditions) %>%
    filter(!is.na(TPM))

  if (nrow(df) == 0) stop("No data after filtering. Check gene id / Week / Condition names.")

  # 每个 Genotype × Week × Condition 计算均值和误差
  sum_df <- df %>%
    group_by(Genotype, Week, Condition) %>%
    summarise(
      n = sum(!is.na(TPM)),
      mean = mean(TPM, na.rm = TRUE),
      sd = sd(TPM, na.rm = TRUE),
      se = sd / sqrt(n),
      .groups = "drop"
    ) %>%
    mutate(err = ifelse(error == "SE", se, sd))

  # spline 预测：每个 Genotype × Condition 单独拟合
  week_grid_dense <- seq(min(weeks), max(weeks), by = 0.1)

  spline_df <- sum_df %>%
    group_by(Genotype, Condition) %>%
    group_modify(~{
      d <- .x %>% arrange(Week)
      if (nrow(d) < 4) return(tibble())

      sp <- smooth.spline(
        x = d$Week,
        y = d$mean,
        spar = spar_val
      )
      pred <- predict(sp, x = week_grid_dense)

      tibble(
        Week = pred$x,
        yhat = pred$y
      )
    }) %>%
    ungroup()

  # 作图（无 facet；颜色=Genotype；线型=Condition；点之间连线）
  ggplot(sum_df, aes(x = Week, y = mean)) +

    # 原始均值点的连线（按 Genotype×Condition）
    geom_line(
      aes(group = interaction(Genotype, Condition),
          color = Genotype,
          linetype = Condition),
      linewidth = 0.8,
      alpha = 0.8
    ) +

    # 均值点
    geom_point(
      aes(color = Genotype, shape = Condition),
      size = 2.6
    ) +

    # error bar
    geom_errorbar(
      aes(ymin = mean - err, ymax = mean + err,
          color = Genotype),
      width = 0.15,
      linewidth = 0.6,
      alpha = 0.9
    ) +
    # 颜色：RTX430 黑；BTX642 红
    scale_color_manual(
      values = c(
        "RTX430" = "#e31a1c",
        "BTX642" = "black"
      )
    ) +

    # 线型：用来区分 PRFD vs Control（你可按喜好调换）
    scale_linetype_manual(
      values = c(
        "Control" = "solid",
        "POFD" = "dashed"
      )
    ) +

    theme_classic(base_size = 14) +
    theme(
      legend.title = element_blank(),
      axis.line = element_line(color = "black")
    ) +
    labs(
      title = gene_id,
      x = "Week",
      y = "TPM"
    ) + xlim(10,17)
}

# ========= 4) 调用示例 =========
p1 <- plot_single_gene_tpm("SORBI_3002G421900", error = "SE", spar_val = 0.6)
p1

## Warning: Removed 16 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 16 rows containing missing values or values outside the scale range
## (`geom_point()`).

Figure 6 HeatMap and pathways

Range28

annotation_colors = list(
    Time = c(TP1="#1F2E82", TP3="grey", TP5="#E22859", TP7="#1D88BD"),
    Accession = c(A  = "#C25759",B = "#92A5D1",C = "grey70",D = "#DAA87C",E = "#C9DCC4",F = "#377483"),
    Condition = c(WL = "#674B95", WW="#DCDD3B")
    )

sample_meta_col <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/Heatmap_Meta-cols.csv", header = T, row.names = 1)
head(sample_meta_col)

Gene.group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/02_Gene-group-plot.csv", header = TRUE)
colnames(Gene.group) <- c("Transcripts", "class")

library(pheatmap)
HeatMap_df <- right_join(Range.TPM, Gene.group[Gene.group$class == "Enrich",] , by = "Transcripts")
rownames(HeatMap_df) <- HeatMap_df$Transcripts

sample_meta_col$Accession <- factor(
  sample_meta_col$Accession,
  levels = c("A", "B", "C", "D", "E")   # ← 你想要的顺序
)
sample_meta_col <- sample_meta_col[order(sample_meta_col$Accession), ]
mat <- HeatMap_df[, 2:49]
mat <- mat[, rownames(sample_meta_col)]


pheatmap(scale_rows(mat) %>% na.omit(),
               clustering_distance_r = "correlation", 
               annotation_col = sample_meta_col,
               annotation_colors = annotation_colors,
               cutree_cols = 5,
               show_colnames =T,show_rownames = T,cluster_rows = F, cluster_cols = F,
               fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)

Large scale Heatmap

annotation_colors_public = list(
    Genotype = c(RTX430  = "#e31a1c",BTX642 = "black"),
    Condition = c(POFD = "#674B95", Control="#DCDD3B"))

sample_meta_col_public <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/LargeScale-Heatmap_Meta-cols.csv", header = T, row.names = 1)

Gene.group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/02_Gene-group-plot.csv", header = TRUE)
colnames(Gene.group) <- c("Transcripts", "class")

Public.TPM <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/03_sorghum-Leaves-Mean-TPM.csv", header = T)

Public.TPM.select <- Public.TPM[, c(
  "Transcripts",
  colnames(Public.TPM)[colnames(Public.TPM) %in% rownames(sample_meta_col_public)]
)]

HeatMap_df <- right_join(Public.TPM.select , Gene.group[Gene.group$class == "Met",] , by = "Transcripts")
rownames(HeatMap_df) <- HeatMap_df$Transcripts

sample_meta_col_public$Genotype <- factor(
  sample_meta_col_public$Genotype,
  levels = c("RTX430", "BTX642")   # ← 你想要的顺序
)

sample_meta_col_public <- sample_meta_col_public [order(sample_meta_col_public $Genotype), ]
mat <- HeatMap_df[,2:31]
mat <- mat[, rownames(sample_meta_col_public)]


pheatmap(scale_rows(mat ) %>% na.omit(),,
               clustering_distance_r = "correlation", 
               annotation_col = sample_meta_col_public[,c("Genotype", "Condition")],
               annotation_colors = annotation_colors_public,
               show_colnames =T,show_rownames = T,cluster_rows = T, cluster_cols = F,
               fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)

#write.csv(mat, "~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/test2.csv")

Sorghum-ICP

Li’ang Yu, Andrew Nelson

2025-12-09

Load packages

Functions been created in the analysis

Figure 1 Physiological data

LI-6800 data

Soil microbiome data

Plot Morphological-Data

Plot the Biomass data

Plot the LI-6800 data

Cluster heatmap of oxidative stress

Line graph of oxidative stress

Figure 3 Module selection

Plot diagnol plot

Figure 5 network Boxplot and SmoothS spline plot

Calculate mean score per transcripts

Calculate the log2FC

Plot time-series logFC post flowering

CDF3 linegraph

Figure 6 HeatMap and pathways

Range28

Large scale Heatmap