Load packages
Functions
Figure.1-Genetic-phenotypic-diversity
- Figure 1D Licor-6800 data
- Figure 1F Metabolites profile
Figure.2-Module Selection
- Figure.2D-Trait-DHU correlation
Figure.3-Blue module DHU response
Figure.4-Characterization of DUS2 mutant
Figure.5-Characterization of dus2 stress response

Load packages

library(ggplot2)
library(dplyr)
library(colourvalues)
library(pheatmap)
library(knitr)
library(RColorBrewer)
library(corrplot)
library(PoiClaClu)
library(stats)
library(reshape2)
library(ggpubr)
library(tidyr)
library(cowplot)
library(viridis)
library(Hmisc)
library(stringr)
library(ggfortify)

Functions

### Z-score
scale_rows = function(x){
    m = apply(x, 1, mean, na.rm = T)
    s = apply(x, 1, sd, na.rm = T)
    return((x - m) / s)
}

### Summary 
data_summary <- function(data, varname, groupnames){
  require(plyr)
  summary_func <- function(x, col){
    c(mean = mean(x[[col]], na.rm=TRUE),
      sd = sd(x[[col]], na.rm=TRUE))
  }
  data_sum<-ddply(data, groupnames, .fun=summary_func,
                  varname)
  data_sum <- rename(data_sum, c("mean" = varname))
 return(data_sum)
}

### Correlation
flattenCorrMatrix <- function(cormat, pmat) {
  ut <- upper.tri(cormat)
  data.frame(
    row = rownames(cormat)[row(cormat)[ut]],
    column = rownames(cormat)[col(cormat)[ut]],
    cor  =(cormat)[ut],
    p = pmat[ut]
    )
}

Figure.1-Genetic-phenotypic-diversity

Oxdative <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/01_Oxidative_clean-20220324-update.csv")
LIM <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LI6800_clean-20220324-morning.csv")
LIA <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LI6800_clean-20220324-afternoon.csv")
Metabolites <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Metabolites-2024.csv", header = T)

head(Oxdative)

head(LIM)

head(LIA)

head(Metabolites)

Figure 1D Licor-6800 data

Afternoon dataset

### Afternoon dataset 
head(LIA)

colnames(LIA)

##  [1] "Name"      "Genotype"  "Condition" "Time"      "Rep"       "ID"       
##  [7] "Period"    "WUEi"      "A"         "Ci"        "gsw"       "VPDleaf"  
## [13] "Fm."       "Fv..Fm."   "Fo."       "Fv."       "H2O_r"     "Tair"     
## [19] "Tleaf"

LIA_ave <- LIA %>% 
  group_by(ID) %>% 
  dplyr::summarize(across(c("WUEi", "A", "Ci", "gsw", "VPDleaf", "Fv..Fm.", "H2O_r", "Tair", "Tleaf"), 
                   mean, .names = "mean_{.col}"))

LIA_ave_clean <- separate(LIA_ave, ID, c("Genotype", "Condition", "Time"))
LIA_ave_clean$Index <- paste0(LIA_ave_clean$Genotype, "_", LIA_ave_clean$Condition)
head(LIA_ave_clean)

Afternoon dataset plot

A1 <- ggplot(LIA_ave_clean[LIA_ave_clean$Condition == "WL",],aes(x = Time,y =mean_WUEi, color = Genotype, Group = Index)) +
  geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
  geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
  theme_classic() +
  scale_alpha_manual(values = c(A=1, B=0.5, C=0.5, D=0.5, E=0.5, F=0.5)) +
  scale_color_manual(values = c(A="#E81B25", B="#76b5c5", 
                                C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
  theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") 


A2 <- ggplot(LIA_ave_clean[LIA_ave_clean$Condition == "WL",],aes(x = Time,y =mean_Fv..Fm., color = Genotype, Group = Index)) +
  geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
  geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
  theme_classic() +
  scale_alpha_manual(values = c(A=1, B=0.5, C=0.5, D=0.5, E=0.5, F=0.5)) +
  scale_color_manual(values = c(A="#E81B25", B="#76b5c5", 
                                C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
  theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") 


A3 <- ggplot(LIA_ave_clean[LIA_ave_clean$Condition == "WL",],aes(x = Time,y =mean_Ci, color = Genotype, Group = Index)) +
  geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
  geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
  theme_classic() +
  scale_alpha_manual(values = c(A=1, B=0.5, C=0.5, D=0.5, E=0.5, F=0.5)) +
  scale_color_manual(values = c(A="#E81B25", B="#76b5c5", 
                                C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
  theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")

Morning dataset

head(LIM)

colnames(LIM)

##  [1] "Name"      "Genotype"  "Condition" "Time"      "Rep"       "ID"       
##  [7] "Period"    "WUEi"      "A"         "Ci"        "gsw"       "VPDleaf"  
## [13] "Fm."       "Fv..Fm."   "Fo."       "Fv."       "H2O_r"     "Tair"     
## [19] "Tleaf"

LIM_ave <- LIM %>% group_by(ID) %>%  dplyr::summarize(across(c("WUEi", "A", "Ci", "gsw", 
                                                       "VPDleaf", "Fv..Fm.", "H2O_r",
                                                       "Tair", "Tair", "Tleaf"), mean, .names = "mean_{.col}"))
LIM_ave_clean <- separate(LIM_ave, ID, c("Genotype", "Condition", "Time"))
LIM_ave_clean$Index <- paste0(LIM_ave_clean$Genotype, "_", LIM_ave_clean$Condition)
head(LIM_ave_clean)

Morning dataset plot

A4 <- ggplot(LIM_ave_clean[LIM_ave_clean$Condition == "WL",],aes(x = Time,y =mean_WUEi, color = Genotype, Group = Index)) +
  geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
  geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
  theme_classic() +
  scale_alpha_manual(values = c(A=1, B=0.5, C=0.5, D=0.5, E=0.5, F=0.5)) +
  scale_color_manual(values = c(A="#E81B25", B="#76b5c5", 
                                C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
  theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") 

A5 <-ggplot(LIM_ave_clean[LIM_ave_clean$Condition == "WL",],aes(x = Time,y =mean_Fv..Fm., color = Genotype, Group = Index)) +
  geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
  geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
  theme_classic() +
  scale_alpha_manual(values = c(A=1, B=0.5, C=0.5, D=0.5, E=0.5, F=0.5)) +
  scale_color_manual(values = c(A="#E81B25", B="#76b5c5", 
                                C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
  theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") 

A6 <-ggplot(LIM_ave_clean[LIM_ave_clean$Condition == "WL",],aes(x = Time,y =mean_Ci, color = Genotype, Group = Index)) +
  geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
  geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
  theme_classic() +
  scale_alpha_manual(values = c(A=1, B=0.5, C=0.5, D=0.5, E=0.5, F=0.5)) +
  scale_color_manual(values = c(A="#E81B25", B="#76b5c5", 
                                C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
  theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") 

plot_grid(A1, A2, A3, A4, A5, A6)

Figure 1F Metabolites profile

Met_field <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Metabolites-2024-all.csv",  header = T, row.names = 1)

Met_field <-Met_field %>%
  filter(!str_detect(Time, "TP2|TP4|TP6|TP8"))
datMet_field <- Met_field[,6:57]

Met_field_ave <- data.frame(matrix(ncol = 52, nrow = 48))
vector <- unique(Met_field$ID)
colnames(Met_field_ave) <- colnames(datMet_field)
rownames(Met_field_ave) <- vector
head(datMet_field,30)

datTrait <- datMet_field

for (i in 1:length(vector)){
  Met_field_ave[grepl(vector[i],rownames(Met_field_ave)),] <- colMeans(datTrait[grepl(vector[i],rownames(datTrait)),])
}

### Oxdative stress
Met_PCA <- data.frame(Met_field_ave)
Met_results <- prcomp(Met_PCA, scale = TRUE)
Met_field_ave.data <- Met_field_ave
Met_field_ave.data$Sample <- rownames(Met_field_ave.data)
Met_field_ave.data <- separate(Met_field_ave.data, Sample, c("Genotype", "Condition", "Time"))

Met_field_ave.data$Condition <- factor(Met_field_ave.data$Condition , levels = c("WW", "WL"))


autoplot(Met_results ,data = Met_field_ave.data, colour = 'Genotype', shape = "Condition",size = 3) +
 scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) + 
 theme_classic() +
 theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
  geom_text(aes(label = Time), vjust = -1, hjust = 0.5)

Figure.2-Module Selection

Figure.2D-Trait-DHU correlation

DHU.trait.cor <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DHU/DHU-transcripts-correlation.csv", header = T)

ggplot(DHU.trait.cor, aes(x = Group, y = R, color = Group)) +
              geom_boxplot(outlier.size = 0.3, outlier.shape = NA) +
              theme_classic() +
              facet_wrap(~ Trait) +
              scale_color_manual(values = c("Non-DHU" = '#999999',DHU = '#E69F00')) +
              stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                         geom = "point", shape = 4, size = 1,
                         show.legend = FALSE)  + xlab("Conidtions") + ylab("Trait-transcripts correlation") +
              stat_compare_means(method = "t.test") +
              theme(axis.text.y=element_text(angle=90), legend.position = "none")

Figure.3-Blue module DHU response

Figure.3C- PCA of DHU levels in the module

library(plotly)
library(ggfortify)


Blue <- read.csv("/Users/leon/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DHU/01_DHU-blue-PCA.csv", header = T, row.names = 1)
Blue[is.na(Blue)] <- 0

Blue.PCA <- data.frame(t(Blue)) 
Blue.PCA$Sample <- rownames(Blue.PCA)
Blue.PCA.Plot <-  separate(Blue.PCA, Sample, c("Genotype", "Condition", "Time"))

pca_res <- prcomp(Blue.PCA.Plot[,1:326], scale. = TRUE)

  
  autoplot(pca_res, data = Blue.PCA.Plot, colour = 'Genotype',shape = "Condition", size = 2) +
    scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) + 
    theme_classic() + 
    theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
    geom_text(aes(label = Time), vjust = -2, hjust = 0.5)

Figure.3C- Boxplot of DHU level and trancript abundance of 29 transcripts

### Plot logFC of transcripts
SelectGene <- read.csv("/Users/leon/Documents/Project/Sorghum/4_FeatureCounts/Select_HeatMap-DHU.csv")
tail(SelectGene, 10)

logFC <- read.csv("/Users/leon/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_logFC.combine.csv", header = T)

stress.logFC <- left_join(SelectGene, logFC, by = "Name") 
stress.logFC.plot <- separate(stress.logFC, Sample, c("Genotype", "Time"))
stress.logFC.plot$log2FoldChange <- as.numeric(stress.logFC.plot$log2FoldChange)

ggplot(stress.logFC.plot, aes(x = Time, y = log2FoldChange, fill = Genotype)) +
      geom_boxplot(outlier.size = 0.3, outlier.shape = NA) +
      theme_classic() + 
      scale_fill_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
      stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                         geom = "point", shape = 4, size = 1,
                         show.legend = FALSE) + ylim(-3,3)

### Plot logFC of DHU level
stress.logFC.DHU <- read.csv("/Users/leon/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DHU/logFC-DHU.csv", header = T)
head(stress.logFC.DHU)

ggplot(stress.logFC.DHU, aes(x = Time, y = logFC, fill = Genotype)) +
      geom_boxplot(outlier.size = 0.3, outlier.shape = NA) +
      theme_classic() + 
      scale_fill_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
      stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                         geom = "point", shape = 4, size = 1,
                         show.legend = FALSE) + ylim(-0.75,0.75)

Figure.3D global DHU level in range28 and the DUS2 transcript abundance

range28.DHU <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DHU/02_Range28_DHU-DUS-genotype.csv", header = T)
head(range28.DHU)

range28.DHU$Condition <- factor(range28.DHU$Condition , levels = c("Control", "Drought"))

ggplot(range28.DHU[range28.DHU$Type == "DHU" & range28.DHU$Gene == "DUS2",], 
       aes(x = Genotype, y = Level, color = Condition)) +
  geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
  theme_classic() +
  scale_color_manual(values = c(Drought = "#674B95", Control="#DCDD3B")) +
  stat_summary(fun = mean, color = "darkred",  aes(group = Condition),position = position_dodge(0.75),
               geom = "point", shape = 4, size = 1, show.legend = FALSE) +
  theme(axis.text.x = element_text(angle = 0),
        axis.text.y = element_text(angle = 90),
        legend.position = "none") +
  stat_compare_means(aes(group = Condition), 
                     method = "t.test", 
                     label = "p.format", 
                     size = 3) +
   ylab("Mean DHU level")

ggplot(range28.DHU[range28.DHU$Type == "TPM" & range28.DHU$Gene == "DUS2",], 
       aes(x = Genotype, y = Level, color = Condition)) +
  geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
  theme_classic() +
  scale_color_manual(values = c(Drought = "#674B95", Control="#DCDD3B")) +
  stat_summary(fun = mean, color = "darkred",  aes(group = Condition), 
               position = position_dodge(0.75),
               geom = "point", shape = 4, size = 1, show.legend = FALSE) +
  theme(axis.text.x = element_text(angle = 0),
        axis.text.y = element_text(angle = 90),
        legend.position = "none") +
  stat_compare_means(aes(group = Condition), 
                     method = "t.test", 
                     label = "p.format", 
                     size = 3) +
   ylab("TPM score of SbDUS2")

Figure.4-Characterization of DUS2 mutant

Figure.4B-DHU level confirmation of Col-0 and dus2

DUS.MS <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/11_MS/MS-DUS2.csv",header = T)
head(DUS.MS)

ggplot(DUS.MS, aes(x = Genotype, y = Dihydrouridine_level, color = Genotype)) +
            geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
            theme_classic() +
            scale_color_manual(values = c("Col-0" = '#999999',DUS2 = '#E69F00')) +
            stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                       geom = "point", shape = 4, size = 1,
                       show.legend = FALSE)  + xlab("Genotype") + ylab("5'6-dihydrouridine concentration (nM)") +
            geom_jitter() +   stat_compare_means(method = "t.test") +
            theme(axis.text.y=element_text(angle=90), legend.position = "none")

Figure.4C-Germination rate comparison between Col-0 and dus2

data_summary <- function(data, varname, groupnames){
  require(plyr)
  summary_func <- function(x, col){
    c(mean = mean(x[[col]], na.rm=TRUE),
      sd = sd(x[[col]], na.rm=TRUE))
  }
  data_sum<-ddply(data, groupnames, .fun=summary_func,
                  varname)
  data_sum <- rename(data_sum, c("mean" = varname))
 return(data_sum)
}

DUS.germination <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/01_Phenotype/DUS2-germination.csv", header = T)

df2 <- data_summary(DUS.germination, varname="Germination", 
                    groupnames=c("Time", "Genotype"))
# Convert dose to a factor variable
df2$Time=factor(df2$Time, levels = DUS.germination$Time %>% unique())
head(df2)

ggplot(df2, aes(x=Time, y=Germination, group=Genotype, color=Genotype)) + 
  geom_line() +
  geom_point()+
  geom_errorbar(aes(ymin=Germination-sd*0.25, ymax=Germination+sd*0.25), width=.2,
                 position=position_dodge(0.05)) +
 labs(x="Time for germination (hour)", y = "Germination rate (%)")+
   theme_classic() +
   scale_color_manual(values=c('#999999','#E69F00'))

Figure.4D-Transcripts abundace comparison between Col-0 and dus2 of KEGG enriched proteins

KEGG.protein <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/15_Protein/04_Protein-mRNA-kegg-boxplot.csv", header = T)
head(KEGG.protein)

ggplot(KEGG.protein[KEGG.protein$Type == "Transcripts", ], 
       aes(x = Genotype, y = Value,  color = Genotype)) +
  geom_boxplot(outlier.size = 0, outlier.shape = NA) +
  theme_classic() +
  scale_color_manual(values = c("Col-0" = '#999999', DUS2 = '#E69F00')) +
  stat_summary(fun = mean, color = "darkred", aes(group = Genotype), position = position_dodge(0.75),
               geom = "point", shape = 4, size = 3, show.legend = FALSE) +
  xlab("Groups") + ylab("Protein abundance") +
  stat_compare_means(method = "t.test", 
                     aes(group = Genotype), 
                     label = "p.format")  + geom_jitter(size = 3) +
  theme(axis.text.y = element_text(angle = 90), legend.position = "none")

Figure.4E-Correlation between log2FC(trancripts) relative to log2FC(proteins)

TP <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/15_Protein/03_Transcripts-protein.csv", header = T)
head(TP)

ggplot(TP,aes(x = Transcripts,y =Protein)) +
  geom_point(size = 3, aes(shape = Group), color = "steelblue") +
  theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")+
  geom_hline(yintercept = 0, color = "black", linewidth = 0.25) +  # X axis at y=0
  geom_vline(xintercept = 0, color = "black", linewidth = 0.25) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "blue", linewidth = 0.5) +
  geom_smooth(method = "lm", se = TRUE, color = "blue", fill = "steelblue", linewidth = 0.75, alpha = 0.15) +
      stat_cor(method = "pearson", 
           label.x = min(TP$Transcripts), 
           label.y = max(TP$Protein),
           size = 5) + theme_minimal() + 
            xlim(-1.5,1.5) + xlab("Transcript log2FC(dus2/Col-0)") + ylab("Protein log2FC(dus2/Col-0)")

ggplot(TP[TP$DHU < 2 & TP$Protein > -1, ],aes(x = DHU,y =Protein)) +
  geom_point(size = 3, aes(shape = Group), color = "tomato") +
  theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")+
  geom_hline(yintercept = 0, color = "black", linewidth = 0.25) +  # X axis at y=0
  geom_vline(xintercept = 0, color = "black", linewidth = 0.25) +
  geom_abline(intercept = 0, slope = -1, linetype = "dashed", color = "brown", linewidth = 0.5) +
  geom_smooth(method = "lm", se = TRUE, color = "darkred", fill = "tomato", linewidth = 0.75, alpha = 0.15) +
      stat_cor(method = "pearson", 
           label.x = min(TP$Transcripts), 
           label.y = max(TP$Protein),
           size = 5) + theme_minimal() + 
            xlim(-1.5,1.5) + xlab("Transcript log2FC(dus2/Col-0)") + ylab("Protein log2FC(dus2/Col-0)")

Figure. 4F-Heatmap of protein and transcripts

Import raw gene counts information

### feature countdata
At_Counts <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/08_FeatureCounts/Arabdiopsis.dus2.hisat2.update-2025.csv", header = TRUE, row.names = 1)

#Convert NAs to 0s
At_Counts [is.na(At_Counts)] = 0 
At_data <- At_Counts[, 2:9]

head(At_data)

Generate TPM score

calculate_tpm <- function(count_matrix, gene_lengths) {
  # Step 1: Normalize counts by gene length (reads per kilobase, RPK)
  rpk <- count_matrix / (gene_lengths / 1000)
  # Step 2: Compute per-sample scaling factor (sum of all RPK values per sample)
  scaling_factors <- colSums(rpk)
  # Step 3: Divide RPK values by the scaling factor and multiply by 1 million
  tpm <- sweep(rpk, 2, scaling_factors, FUN = "/") * 1e6
  
  return(tpm)
}

Transform TPM score

slac.count <- At_Counts
head(slac.count)

gene_lengths <- slac.count$Length
count_matrix <- slac.count[2:9]
head(count_matrix )

tpm_values <- calculate_tpm(count_matrix, gene_lengths)
head(tpm_values)

Import DHU statistics table

DHU.stats <- read.csv("~/Desktop/DHU/02_DHU-boxplot-T2T4.csv", header = T)
DHU.stats$Name <- paste0(DHU.stats$Genotype, "_", DHU.stats$Condition)
head(DHU.stats)

DHU_plot <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/12_DEGs/HeatMap-plot.csv", header = T)
head(DHU_plot)

DHU.stats.gene <- DHU.stats %>%
  dplyr::group_by(Gene,Index2, Name, Genotype, Condition) %>%
  dplyr::summarise(DHU = mean(variant_proportion))

DHU_DMTs <- left_join(DHU_plot[DHU_plot$Group == "Contrl-protein",1,drop = F],
                      DHU.stats.gene, by = "Gene")

### Plot control only
ggplot(DHU_DMTs[DHU_DMTs$Condition == "Control", ], aes(x = Genotype, y = DHU, color = Genotype)) +
  geom_boxplot(outlier.size = 0, outlier.shape = NA) +
  theme_classic() +
  scale_color_manual(values = c("Col-0" = '#999999', DUS2 = '#E69F00')) +
  stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
               geom = "point", shape = 4, size = 3, show.legend = FALSE) +
  geom_jitter(size = 3) +
  xlab("Conditions") + ylab("DHU level") +
  stat_compare_means(method = "t.test", label = "p.format") +
  theme(axis.text.y = element_text(angle = 90), legend.position = "none") + ylim(0,0.015)

Figure.5-Characterization of dus2 stress response

Figure.5A-Survial rate

Plot survival rate

Survival <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/01_Phenotype/Survival_rate.csv", header = T)
Survival$Time <- factor(Survival$Time, levels = c("1", "2", "3", "4", "5", "6",
                                                  "7", "8", "9", "10", "11", "12"))
ggplot(Survival, aes(x=Time, y=rate, group=Genotype, color=Genotype)) + 
  geom_line() +
  geom_point() +
 labs(x="Time for heat treatment (Days)", y = "Survival rate (%)")+
   theme_classic() +
   scale_color_manual(values=c("Col-0" = '#999999',dus2 = '#E69F00')) +
       theme(axis.text.y=element_text(angle=90), legend.position = "none")

Figure.5B- Licor-data comparison

Plot Licor-6800 data

Licor <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/05_Licor/DUS2-Licor-data.csv", header = T)

A_Licor <- ggplot(Licor, aes(x = Genotype, y = A, color = Genotype)) +
            geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
            theme_classic() +
            facet_wrap(~ Condition) +
            scale_color_manual(values = c("Col-0" = '#999999',DUS2 = '#E69F00')) +
            stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                       geom = "point", shape = 4, size = 1,
                       show.legend = FALSE)  + xlab("Conidtions") + ylab("CO2 net assimilation") +
            geom_jitter() +   stat_compare_means(method = "t.test") +
            theme(axis.text.y=element_text(angle=90), legend.position = "none")

gsw_Licor <- ggplot(Licor, aes(x = Genotype, y = gsw, color = Genotype)) +
              geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
              theme_classic() +
              facet_wrap(~ Condition) +
              scale_color_manual(values = c("Col-0" = '#999999',DUS2 = '#E69F00')) +
              stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                         geom = "point", shape = 4, size = 1,
                         show.legend = FALSE)  + xlab("Conidtions") + ylab("Stomata conductence") +
              geom_jitter() +   stat_compare_means(method = "t.test") +
              theme(axis.text.y=element_text(angle=90), legend.position = "none")

plot_grid(A_Licor, gsw_Licor)

Figure.5C- Modtect-DHU comaparison

dus2.counts.freq <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/10_Modtect/Modtect_dus2-D-frequency.csv", header = T)
head(dus2.counts.freq)

ggplot(dus2.counts.freq, aes(x = Condition, y = Mods.freq, color = Genotype)) +
            geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
            theme_classic() +
            facet_wrap(~ Genotype) +
            scale_color_manual(values = c("Col-0" = '#999999',DUS2 = '#E69F00')) +
            stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                       geom = "point", shape = 4, size = 1,
                       show.legend = FALSE)  + xlab("Conidtions") + ylab("DHU level") +
            geom_jitter(size = 3) + stat_compare_means(comparisons = list(c("Control", "Heat")), method = "t.test") +
            theme(axis.text.y=element_text(angle=90), legend.position = "none")

## [1] FALSE

Figure.5D-Upset plot

library(UpSetR)
Upset <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/12_DEGs/Upset_DEGs.csv", header = T)
head(Upset)

Upset_filtered <- Upset[!(Upset$Col_DOWN == 1 & Upset$DUS2_UP == 1), ]
Upset_filtered <- Upset_filtered[!(Upset_filtered$Col_UP == 1 & Upset_filtered$DUS_DOWN == 1), ]

upset(Upset_filtered[, c("Transcripts","DUS2_UP","DUS_DOWN","Col_UP","Col_DOWN")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("DUS2_UP","DUS_DOWN","Col_UP","Col_DOWN"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 1,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

Figure.5F-DHU-Control-dus2-boxplot

Transcripts

DHU_plot <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/12_DEGs/HeatMap-plot.csv", header = T)
head(DHU_plot)

Plot.Zscore <- tpm_values
head(Plot.Zscore)

Plot.Zscore.control <- Plot.Zscore[, c("Col.0_Control_1", "Col.0_Control_2","Col.0_Heat_1","Col.0_Heat_2",
                                       "DUS2_Control_1","DUS2_Control_2","DUS2_Heat_1","DUS2_Heat_2")]
Plot.Zscore.control$Gene <- rownames(Plot.Zscore.control)

DEGs_plot_Z <- left_join(DHU_plot[DHU_plot$Group == "Treatment",1, drop = F],
                         Plot.Zscore.control, by = "Gene")
rownames(DEGs_plot_Z) <- DEGs_plot_Z$Gene

pheatmap(scale_rows(DEGs_plot_Z[,c(2:9)]) %>% na.omit(),
         color=inferno(30),
               clustering_distance_r = "correlation", 
               show_colnames =T,show_rownames = T,cluster_rows = F, cluster_cols = F,
               fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)

DHU level

DHU.stats <- read.csv("~/Desktop/DHU/02_DHU-boxplot-T2T4.csv", header = T)
DHU.stats$Name <- paste0(DHU.stats$Genotype, "_", DHU.stats$Condition)
DHU.stats <- DHU.stats[DHU.stats$variant_proportion < 0.0025,]

DHU.stats.gene <- DHU.stats %>%
  dplyr::group_by(Gene,Index2, Name, Genotype, Condition) %>%
  dplyr::summarise(DHU = mean(variant_proportion))

DHU_DMTs <- left_join(DHU_plot[DHU_plot$Group == "Treatment",1,drop = F],
                      DHU.stats.gene, by = "Gene")

ggplot(DHU_DMTs, aes(x = Condition, y = DHU, color = Genotype)) +
  geom_boxplot(outlier.size = 0, outlier.shape = NA) +
  theme_classic() +
  facet_wrap(~ Genotype) +
  scale_color_manual(values = c("Col-0" = '#999999', DUS2 = '#E69F00')) +
  stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
               geom = "point", shape = 4, size = 3, show.legend = FALSE) +
  geom_jitter(size = 3) +
  xlab("Conditions") + ylab("DHU level") +
  stat_compare_means(method = "t.test", aes(group = Condition), label = "p.format") +
  theme(axis.text.y = element_text(angle = 90), legend.position = "none") + ylim(0,0.002)

Figure.5G-DHU-Control-dus2-boxplot-sorghum-orthologs

DHU.ortholog <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/07_Atlas/DHU-pattern-public-sorghum-figure.csv", header = T)

head(DHU.ortholog)

ggplot(DHU.ortholog[DHU.ortholog$Data == "Range",],
       aes(x = Condition, y = DHU, color = Condition)) +
         geom_boxplot(outlier.size = 0, outlier.shape = 21) +
            theme_classic() + facet_wrap( ~ Genotype) +
             scale_color_manual(values = c(Drought = "#674B95", Control="#DCDD3B")) +
            stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                       geom = "point", shape = 4, size = 3,
                       show.legend = FALSE)  + xlab("Conidtions") + ylab("mean DHU level") +
            geom_jitter(size = 3) + stat_compare_means(method = "t.test") +
            theme(axis.text.y=element_text(angle=90), legend.position = "none")

ggplot(DHU.ortholog[DHU.ortholog$Data == "Drought",],
       aes(x = Condition, y = DHU, color = Condition)) +
            geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
            theme_classic() + facet_wrap( ~ Genotype) +
             scale_color_manual(values = c(Drought = "#674B95", Control="#DCDD3B")) +
            stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
                       geom = "point", shape = 4, size = 3,
                       show.legend = FALSE)  + xlab("Conidtions") + ylab("mean DHU level") +
            geom_jitter(size = 3) + stat_compare_means(method = "t.test") +
            theme(axis.text.y=element_text(angle=90), legend.position = "none")

Sorghum-Range28-Main-Figures

Li’ang Yu, Andrew Nelson, Duke Pauli

2025-07-12

Load packages

Functions

Figure.1-Genetic-phenotypic-diversity

Figure 1D Licor-6800 data

Afternoon dataset

Afternoon dataset plot

Morning dataset

Morning dataset plot

Figure 1F Metabolites profile

Figure.2-Module Selection

Figure.2D-Trait-DHU correlation

Figure.3-Blue module DHU response

Figure.3C- PCA of DHU levels in the module

Figure.3C- Boxplot of DHU level and trancript abundance of 29 transcripts

Figure.3D global DHU level in range28 and the DUS2 transcript abundance

Figure.4-Characterization of DUS2 mutant

Figure.4B-DHU level confirmation of Col-0 and dus2

Figure.4C-Germination rate comparison between Col-0 and dus2

Figure.4D-Transcripts abundace comparison between Col-0 and dus2 of KEGG enriched proteins

Figure.4E-Correlation between log2FC(trancripts) relative to log2FC(proteins)

Figure. 4F-Heatmap of protein and transcripts

Import raw gene counts information

Generate TPM score

Transform TPM score

Import DHU statistics table

Figure.5-Characterization of dus2 stress response

Figure.5A-Survial rate

Plot survival rate

Figure.5B- Licor-data comparison

Plot Licor-6800 data

Figure.5C- Modtect-DHU comaparison

Figure.5D-Upset plot

Figure.5F-DHU-Control-dus2-boxplot

Transcripts

DHU level

Figure.5G-DHU-Control-dus2-boxplot-sorghum-orthologs