library(ggplot2)
library(dplyr)
library(colourvalues)
library(pheatmap)
library(knitr)
library(RColorBrewer)
library(corrplot)
library(PoiClaClu)
library(stats)
library(reshape2)
library(ggpubr)
library(tidyr)
library(cowplot)
library(viridis)
library(Hmisc)
library(stringr)
library(ggfortify)
### Z-score
scale_rows = function(x){
m = apply(x, 1, mean, na.rm = T)
s = apply(x, 1, sd, na.rm = T)
return((x - m) / s)
}
### Summary
data_summary <- function(data, varname, groupnames){
require(plyr)
summary_func <- function(x, col){
c(mean = mean(x[[col]], na.rm=TRUE),
sd = sd(x[[col]], na.rm=TRUE))
}
data_sum<-ddply(data, groupnames, .fun=summary_func,
varname)
data_sum <- rename(data_sum, c("mean" = varname))
return(data_sum)
}
### Correlation
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
)
}
Soil <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/01_Module/Soil-moisture.csv", header = T)
df_long <- Soil %>%
pivot_longer(
cols = starts_with("cm_"),
names_to = "Depth",
values_to = "Moisture"
)
# 确保深度顺序正确
df_long$Depth <- factor(
df_long$Depth,
levels = paste0("cm_", seq(10, 70, by = 10))
)
# 作图
ggplot(df_long,
aes(x = Day,
y = Moisture,
color = Depth,
linetype = Treatment,
group = interaction(Depth, Treatment))) +
geom_line(linewidth = 1) +
scale_linetype_manual(
values = c(Control = "solid", Drought = "dashed")
) +
labs(
x = "Day",
y = "Soil moisture",
color = "Soil depth",
linetype = "Treatment"
) +
scale_color_manual(values = c("cm_10" = "grey70","cm_30" = "grey50","cm_50" = "grey30", "cm_70" = "grey10")) +
theme_classic(base_size = 13) +
geom_vline(
xintercept = c(7, 21, 35, 49),
linetype = "dotted",
color = "red",
linewidth = 0.6
)
weight <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Weight.csv")
Root <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Root-score.csv")
head(Root)
Root$Condition <- factor(Root$Condition , levels = c("WW", "WL"))
ggplot(Root, aes(x = Condition, y = height, fill = Genotype)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
scale_fill_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")
ggplot(Root, aes(x = Condition, y = width, fill = Genotype)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
scale_fill_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE)
head(weight)
weight$Treat <- factor(weight$Treat , levels = c("WW", "WL"))
ggplot(weight, aes(x = Treat, y = biomass_total, fill = Genotype)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
scale_fill_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")
ggplot(weight, aes(x = Treat, y = Three_leaves, fill = Genotype)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
scale_fill_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
scale_alpha_manual(values = c(1,0.5,0.5,0.5,0.5,0.5)) +
scale_size_manual(values = c(0.5,0.1,0.1,0.1,0.1,0.1)) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "right")
morning <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LI6800_clean-20220324-morning.csv")
afternoon <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LI6800_clean-20220324-afternoon.csv")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ readr 2.1.5
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ Hmisc::src() masks dplyr::src()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ✖ Hmisc::summarize() masks dplyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# 2. 定义性状与颜色
#------------------------------------------------
traits <- c("WUEi", "Ci","Tair", "Tleaf", "gsw", "Fv..Fm.", "A", "VPDleaf") # ⚠ 根据实际列名检查
genotype_colors <- c(
A = "#C25759",
B = "#92A5D1",
C = "grey70",
D = "#DAA87C",
E = "#C9DCC4",
F = "#377483"
)
df <- bind_rows(morning, afternoon)
#------------------------------------------------
# 3. 计算各 trait 在 Genotype × Treatment × Time × Period 下的 mean ± SE
#------------------------------------------------
df_summary <- df %>%
pivot_longer(cols = all_of(traits), names_to = "Trait", values_to = "Value") %>%
group_by(Genotype, Condition, Time, Trait) %>%
summarise(
mean = mean(Value, na.rm = TRUE),
se = sd(Value, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
#------------------------------------------------
# 4. 绘图函数:每个 trait 单独画一张包含 WL / WW 的图
#------------------------------------------------
plot_trait <- function(trait_name) {
df_summary[df_summary$Condition == "WL",] %>%
filter(Trait == trait_name) %>%
ggplot(aes(x = Time, y = mean, fill = Genotype)) +
geom_col(position = position_dodge(width = 0.9), width = 0.8) +
geom_errorbar(
aes(ymin = mean - 0.8*se, ymax = mean + 0.8*se),
position = position_dodge(width = 0.9),
width = 0.3
) +
scale_fill_manual(values = genotype_colors) +
labs(
x = "",
y = trait_name
) +
theme_classic(base_size = 14) +
theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")
}
#------------------------------------------------
# 5. 生成四个 trait 的 WL/WW 图
#------------------------------------------------
plot_trait("gsw")
plot_trait("A")
plot_trait("Ci")
plot_trait("Tleaf")
plot_trait("Tair")
plot_trait("Fv..Fm.")
plot_trait("VPDleaf")
# Figure 2 Omics data ## Number of DEGs
A_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_A.csv", header =T)
B_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_B.csv", header =T)
C_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_C.csv", header =T)
D_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_D.csv", header =T)
E_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_E.csv", header =T)
F_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_F.csv", header =T)
Combine <- na.omit(rbind(A_Group,B_Group,C_Group,D_Group,E_Group,F_Group))
names(Combine)[8] = "GeneID"
head(Combine, 50)
### Filter differentially expressed genes based on following creteria
donw_gene <- Combine[Combine$padj < 0.05 & Combine$log2FoldChange < -1,]
donw_gene <- donw_gene %>% group_by(Sample) %>% mutate(Down = n())
donw_summary <- unique(donw_gene[,c("Sample","Down")])
up_gene <- Combine[Combine$padj < 0.05 & Combine$log2FoldChange > 1,]
up_gene <- up_gene %>% group_by(Sample) %>% mutate(up = n())
up_summary <- unique(up_gene[,c("Sample","up")])
combind_summary <- left_join(up_summary, donw_summary, by = "Sample")
head(combind_summary, 20)
### Plot line graph of DEGs at differene time
Plot_DEGs <- melt(combind_summary, id.vars = c("Sample"))
plot_summary <- Plot_DEGs %>% separate(Sample, c("Accession", "Time"))
plot_summary$Index <- paste0(plot_summary$Accession, "_", plot_summary$variable)
plot_summary[is.na(plot_summary)] <- 0
ggplot(data=plot_summary, aes(x = Time, y = value, color = Accession, group = Index)) +
geom_point(size = 3, aes(alpha = Accession, shape = variable)) +
geom_line(size = 0.5, aes(group = Index, lty= variable, color = Accession)) +
theme_classic() +
scale_alpha_manual(values = c(A=1, B=0.5, C=0.5, D=0.5, E=0.5, F=0.5)) +
scale_color_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90)) +
ylab("Numbers of DEGs")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Oxidative <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/01_Oxidative_clean-20220324-update.csv")
custom_order <- c("polyph", "flav", "ascorbate","glutathione",
"MDA", "LOX", "pro", "GOX","HPR",
"SOD", "AO", "POX", "CAT", "APX", "DHAR", "MDHAR", "GPX","GR", "Trxs","Frxs", "Grxs", "Prxs",
"TAC", "caro")
Oxidative_cor <- Oxidative
rownames(Oxidative_cor) <- Oxidative_cor$Name
Oxidative_cor <- Oxidative_cor[,c(7:30)]
Oxidative_mat <- cor(Oxidative_cor)
Oxidative_remat <- Oxidative_mat[custom_order, custom_order]
Cor_plot1 <- corrplot(Oxidative_remat, method = 'color',
addrect = 4, rect.col = 'blue', rect.lwd = 2,)
colnames(Oxidative)
## [1] "Name" "Genotype" "Condition" "Time" "ID"
## [6] "Rep" "caro" "polyph" "flav" "pro"
## [11] "TAC" "MDA" "LOX" "GOX" "HPR"
## [16] "POX" "CAT" "AO" "APX" "MDHAR"
## [21] "DHAR" "GR" "GPX" "SOD" "Prxs"
## [26] "Grxs" "Trxs" "Frxs" "ascorbate" "glutathione"
Oxidative_ave <- Oxidative %>% group_by(ID) %>% dplyr::summarize(across(
c("polyph", "flav", "ascorbate","glutathione",
"MDA", "LOX", "pro", "GOX","HPR",
"SOD", "AO", "POX", "CAT", "APX", "DHAR", "MDHAR", "GPX","GR", "Trxs","Frxs", "Grxs", "Prxs",
"TAC", "caro"), mean, .names = "mean_{.col}"))
Oxidative_ave_clean <- separate(Oxidative_ave, ID, c("Genotype", "Condition", "Time"))
Oxidative_ave_clean$Index <- paste0(Oxidative_ave_clean$Genotype, "_", Oxidative_ave_clean$Condition)
head(Oxidative_ave_clean)
Oxidative_ave_clean$Condition <- factor(Oxidative_ave_clean$Condition , levels = c("WW", "WL"))
B1 <- ggplot(Oxidative_ave_clean,
aes(x = Time,y =mean_flav, color = Genotype, Group = Index)) +
geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
theme_classic() +
facet_wrap( ~ Condition) +
scale_alpha_manual(values = c(A=1, B=1, C=1, D=1, E=1, F=1)) +
scale_color_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")
B2 <- ggplot(Oxidative_ave_clean,
aes(x = Time,y =mean_HPR, color = Genotype, Group = Index)) +
geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
theme_classic() +
facet_wrap( ~ Condition) +
scale_alpha_manual(values = c(A=1, B=1, C=1, D=1, E=1, F=1)) +
scale_color_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")
B3 <- ggplot(Oxidative_ave_clean,
aes(x = Time,y =mean_POX, color = Genotype, Group = Index)) +
geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
theme_classic() +
facet_wrap( ~ Condition) +
scale_alpha_manual(values = c(A=1, B=1, C=1, D=1, E=1, F=1)) +
scale_color_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")
B4 <- ggplot(Oxidative_ave_clean,
aes(x = Time,y =mean_Frxs, color = Genotype, Group = Index)) +
geom_point(size = 3, aes(alpha = Genotype, shape = Condition)) +
geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Genotype)) +
theme_classic() +
facet_wrap( ~ Condition) +
scale_alpha_manual(values = c(A=1, B=1, C=1, D=1, E=1, F=1)) +
scale_color_manual(values = c(A = "#C25759",B = "#92A5D1",C = "grey70",
D = "#DAA87C",E = "#C9DCC4",F = "#377483")) +
theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")
plot_grid(B1, B2, B3, B4)
Replot <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/01_Module/GS-MM_darkmegenta.csv", header = T)
head(Replot)
### Plot the data
ggplot(Replot, aes(x=MM, y=GS, color=Group, size = Group, alpha = Group, label = GeneID)) +
geom_point() +
scale_alpha_manual(values = c(Fill = 1, A=1, TAIR = 1,
Flav= 1, TAC= 1, Stress = 1, Met = 1)) +
scale_size_manual(values = c(Fill = 1, A=2, TAIR = 2,
Flav= 2, TAC= 2, Stress = 2, Met = 2, TF =3)) +
scale_colour_manual(values=c(Fill = "grey70", A = "#a6c9e1", TAIR = "#e89151",
Flav = "#d1d2e6", TAC = "#f1bda5", Stress = "#1B9E77",
Met = "#fbfa50", TF = "midnightblue")) +
theme_bw() +
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+
labs(x=paste0("Module Membership"),
y=paste0("Gene significance")) +
theme(axis.title.x =element_text(size=14),
axis.title.y=element_text(size=14),
axis.text = element_text(size = 12),
axis.text.x = element_text(colour = "black"),
axis.text.y = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
plot.margin = unit(rep(2,4),'lines')) +
theme(legend.position = 'none') +
geom_hline(aes(yintercept=0.7),colour="black",lwd=0.5,linetype=5)+
geom_vline(aes(xintercept=0.75),colour="black",lwd=0.5,linetype=5)
calculate_tpm <- function(count_matrix, gene_lengths) {
# Step 1: Normalize counts by gene length (reads per kilobase, RPK)
rpk <- count_matrix / (gene_lengths / 1000)
# Step 2: Compute per-sample scaling factor (sum of all RPK values per sample)
scaling_factors <- colSums(rpk)
# Step 3: Divide RPK values by the scaling factor and multiply by 1 million
tpm <- sweep(rpk, 2, scaling_factors, FUN = "/") * 1e6
return(tpm)
}
Range.FC <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/Range28-FC.csv", header = T, row.names = 1)
slac.count <- Range.FC
head(slac.count)
gene_lengths <- slac.count$Length
count_matrix <- slac.count[2:97]
head(count_matrix )
tpm_values <- calculate_tpm(count_matrix, gene_lengths)
head(tpm_values)
write.csv(tpm_values, "~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/Range28-TPM.csv", quote=FALSE)
Range.TPM <- tpm_values
#Reorder samples orders by colnames
Range.TPM <- Range.TPM[ , order(names(Range.TPM))]
TPM_clean <- data.frame(matrix(ncol = 48, nrow = nrow(Range.TPM)))
Trait <- read.csv("/Users/leon/Documents/Project/Sorghum/1_Phenotype/data/Sorghum_oxidative_stress-Feb2022.csv",
header = T, row.names = 1)
vector <- unique(Trait$ID)
rownames(TPM_clean) <- rownames(Range.TPM)
colnames(TPM_clean) <- vector
#Loop calculation for each accesion average
for (i in 1:length(vector)){
TPM_clean[,grepl(vector[i],colnames(TPM_clean))] <- rowMeans(Range.TPM[,grepl(vector[i],colnames(Range.TPM))])
}
head(TPM_clean)
#write.csv(TPM_clean, "~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/Range28-TPM-Mean.csv")
Range.meta <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/01_Range_metadata.csv", header = T)
Range.TPM <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/Range28-TPM-Mean.csv", header = T)
#==============================
# 3) TPM → long,并用 Name match
#==============================
gene_col <- intersect(names(Range.TPM), c("gene","Gene","gene_id","GeneID"))[1]
if (is.na(gene_col)) gene_col <- names(Range.TPM)[1]
tpm_long <- Range.TPM %>%
pivot_longer(
cols = -all_of(gene_col),
names_to = "Name",
values_to = "TPM"
) %>%
rename(Gene = all_of(gene_col)) %>%
mutate(Name = as.character(Name))
tpm_anno <- tpm_long %>%
inner_join(Range.meta, by = "Name")
log2fc_range <- tpm_anno %>%
filter(Condition %in% c("WL", "WW")) %>%
select(Gene, Genotype, Time, Condition, TPM) %>%
# WL / WW 展开成两列
pivot_wider(
names_from = Condition,
values_from = TPM
) %>%
# 计算 log2FC
mutate(
log2FC_WL_vs_WW = log2((WL + 1) / (WW + 1)) # pseudocount = 1
) %>%
arrange(Gene, Genotype, Time)
log2fc_range <- log2fc_range %>%
mutate(Time = as.character(gsub("^TP", "Week", Time)))
library(tidyverse)
library(ggpubr)
Gene.group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/02_Gene-group.csv", header = TRUE)
Gene.select <- Gene.group[Gene.group$class == "A",]
#----------------------------
# 2) subset log2fc_tbl
#----------------------------
plot_df <- log2fc_range %>%
filter(Gene %in% Gene.select$gene)
week_grid <- sort(unique(plot_df$Time))
plot_df <- plot_df %>%
mutate(Genotype = ifelse(Genotype == "A", "A", "Other"))
ggplot(plot_df, aes(x = Time, y = log2FC_WL_vs_WW)) +
geom_boxplot(aes(fill = Genotype), outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
scale_fill_manual(values = c(A = "#C25759",Other = "grey")) +
stat_summary(fun = mean, aes(group = Genotype), color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
stat_compare_means(aes(group = Genotype),method = "wilcox.test", label = "p.signif") +
theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none") + ylim(-3,3)
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_compare_means()`).
Range.TPM <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/Range28-TPM-Mean.csv", header = T)
CDF.range <- Range.TPM[Range.TPM$Transcripts == "SORBI_3002G421900",]
CDF.range.plot <- CDF.range %>%
melt() %>% separate(variable, c("Accession", "Condition", "Time"))
## Using Transcripts as id variables
CDF.range.plot$Index <- paste0(CDF.range.plot$Accession, "_", CDF.range.plot$Condition)
CDF.range.plot$Condition <- factor(
CDF.range.plot$Condition,
levels = c("WW", "WL")
)
ggplot(CDF.range.plot,
aes(x = Time, y = value, color = Accession, group = Index)) +
geom_point(size = 3,
aes(alpha = Accession, shape = Condition)) +
geom_line(size = 0.5,
aes(linetype = Condition, group = Index)) +
facet_wrap(~ Condition, labeller = labeller(Condition = c(WW = "WW", WL = "WL"))) +
theme_classic() +
scale_alpha_manual(values = c(A=1, B=0.5, C=0.5, D=0.5, E=0.5, F=0.5)) +
scale_color_manual(values = c(
A = "#C25759", B = "#92A5D1", C = "grey70",
D = "#DAA87C", E = "#C9DCC4", F = "#377483"
)) +
scale_linetype_manual(values = c(WW = "solid", WL = "dashed")) +
scale_shape_manual(values = c(WW = 16, WL = 17)) +
theme(
axis.text.x = element_text(angle = 0),
axis.text.y = element_text(angle = 90)
) +
ylab("CDF3 Relative expression (TPM)")
library(tidyverse)
Drought_TPM <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/03_sorghum-Leaves-TPM.csv", header= T)
Drought_meta <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/01_sorghum_largeDrought_metadata.csv", header = T)
tpm <- Drought_TPM
meta <- Drought_meta
# ==== 2) 基因列识别(自动)====
gene_col <- intersect(names(tpm), c("gene", "Gene", "gene_id", "GeneID"))[1]
if (is.na(gene_col)) gene_col <- names(tpm)[1]
gene_df <- tpm %>% select(all_of(gene_col))
tpm_mat <- tpm %>% select(-all_of(gene_col))
# ==== 3) 准备 index 向量 ====
index_vec <- unique(as.character(meta$Index))
# ==== 4) 对每个 index:在 TPM 列名中搜索 contains(index),并取均值 ====
tpm_by_index <- map_dfc(index_vec, function(idx) {
# 找到 TPM 中包含该 index 的所有列
cols <- names(tpm_mat)[str_detect(names(tpm_mat), fixed(idx))]
if (length(cols) == 0) {
warning(paste("No TPM columns matched index:", idx))
return(tibble(!!idx := NA_real_))
}
# 对这些列逐行取均值
rowMeans(tpm_mat[, cols, drop = FALSE], na.rm = TRUE) %>%
as_tibble() %>%
setNames(idx)
})
# ==== 5) 合并 gene 列,形成最终 TPM-like dataframe ====
tpm_index_mean <- bind_cols(
gene_df,
tpm_by_index
)
# ==== 6) 查看 & 保存 ====
#tpm_index_mean
#write.csv(tpm_index_mean,"~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/03_sorghum-Leaves-Mean-TPM.csv", row.names = F)
tpm_long <- tpm_index_mean %>%
pivot_longer(
cols = -all_of(gene_col),
names_to = "Index",
values_to = "TPM"
) %>%
rename(gene = all_of(gene_col)) %>%
mutate(index = as.character(Index))
#----------------------------
# 2) 准备 meta(确保唯一)
#----------------------------
meta_use <- meta %>%
distinct(Index, Genotype, Tissue, Week, Condition) %>%
mutate(Index = as.character(Index))
#----------------------------
# 3) merge TPM + meta
#----------------------------
tpm_anno <- tpm_long %>%
left_join(meta_use, by = "Index")
#----------------------------
# 4) PRFD vs Control → log2FC
#----------------------------
log2fc_tbl <- tpm_anno %>%
filter(Condition %in% c("POFD", "Control")) %>%
group_by(gene, Genotype, Tissue, Week, Condition) %>%
summarise(
TPM = mean(TPM, na.rm = TRUE), # 如果一个条件下有多个 index
.groups = "drop"
) %>%
pivot_wider(
names_from = Condition,
values_from = TPM
) %>%
mutate(
log2FC_POFD_vs_Control = log2((POFD + 1) / (Control + 1))
) %>%
arrange(gene, Genotype, Tissue, Week)
#----------------------------
# 5) 结果
#----------------------------
log2fc_tbl
library(tidyverse)
Gene.group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/02_Gene-group.csv", header = TRUE)
Gene.group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/04_Correlation-comp-target.csv", header = TRUE)
Gene.select <- Gene.group[Gene.group$class == "Test",]
#----------------------------
# 2) subset log2fc_tbl
#----------------------------
plot_df <- log2fc_tbl[log2fc_tbl$Week > 9 , ] %>%
filter(gene %in% Gene.select$gene) %>%
filter(!is.na(log2FC_POFD_vs_Control))
#----------------------------
# 3) smooth spline + mean ± sd
#----------------------------
# 原始 week(用于刻度=1)
week_grid <- sort(unique(plot_df$Week))
# dense week(用于画更平滑的曲线)
week_grid_dense <- seq(min(week_grid), max(week_grid), by = 0.1)
# 平滑参数(越大越平滑:0~1)
spar_val <- 0.3
spline_summary <- plot_df %>%
group_by(Genotype) %>%
group_modify(~ {
df <- .x %>% arrange(Week)
spline_pred <- df %>%
group_by(gene) %>%
group_modify(~ {
d <- .x
if (nrow(d) < 4) return(tibble()) # spline 至少需要 4 个点
sp <- smooth.spline(
x = d$Week,
y = d$log2FC_POFD_vs_Control,
spar = spar_val
)
pred <- predict(sp, x = week_grid_dense)
tibble(
Week = pred$x,
yhat = pred$y
)
}) %>%
ungroup()
spline_pred %>%
group_by(Week) %>%
summarise(
mean = mean(yhat, na.rm = TRUE),
sd = sd(yhat, na.rm = TRUE),
.groups = "drop"
)
}) %>%
ungroup()
#----------------------------
# 4) gene-level smooth spline
#----------------------------
gene_spline_df <- plot_df %>%
group_by(Genotype, gene) %>%
group_modify(~ {
d <- .x %>% arrange(Week)
if (nrow(d) < 4) return(tibble())
sp <- smooth.spline(
x = d$Week,
y = d$log2FC_POFD_vs_Control,
spar = spar_val
)
pred <- predict(sp, x = week_grid_dense)
tibble(
Week = pred$x,
yhat = pred$y
)
}) %>%
ungroup()
ggplot(plot_df , aes(x = Genotype, y = log2FC_POFD_vs_Control)) +
geom_boxplot(aes(fill = Genotype), outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
scale_fill_manual(values = c(RTX430 = "#e31a1c",BTX642 = "grey")) +
stat_summary(fun = mean, aes(group = Genotype), color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
stat_compare_means(aes(group = Genotype),method = "wilcox.test") +
theme(axis.text.x=element_text(angle=0), axis.text.y=element_text(angle=90), legend.position = "none")
p <- ggplot() +
# gene-level smooth spline(所有 genotype 共用,淡灰)
geom_line(
data = gene_spline_df,
aes(color = Genotype,
x = Week,
y = yhat,
group = interaction(Genotype, gene)
),
linewidth = 0.6,
alpha = 0.15
) +
# mean ± sd ribbon(按 Genotype 上色)
geom_ribbon(
data = spline_summary,
aes(
x = Week,
ymin = mean - sd,
ymax = mean + sd,
fill = Genotype
),
alpha = 0.25,
color = NA
) +
# mean smooth spline(按 Genotype 上色)
geom_line(
data = spline_summary,
aes(
x = Week,
y = mean,
color = Genotype
),
linewidth = 1.2
) +
# X 轴刻度每 1 week
scale_x_continuous(
breaks = seq(min(week_grid), max(week_grid), by = 1),
expand = expansion(mult = c(0.01, 0.01))
) +
# 颜色你可以按论文风格随便换
scale_color_manual(
values = c(
"RTX430" = "#e31a1c",
"BTX642" = "grey20"
)
) +
scale_fill_manual(
values = c(
"RTX430" = "#e31a1c",
"BTX642" = "grey20"
)
) +
theme_classic(base_size = 14) +
theme(
legend.position = "top",
legend.title = element_blank(),
axis.text.y = element_text(angle = 90, vjust = 0.5),
axis.line = element_line(color = "black")
) +
labs(
x = "Week",
y = expression(log[2](POFD / Control))
)
p
# 需要包含:Name, Genotype, Week, Condition(以及可选 Tissue)
meta2 <- meta %>%
rename(
sample_name = Name,
Genotype = Genotype,
Week = Week,
Condition = Condition
# Tissue = Tissue # 如果你也要按 tissue 分组/筛选,就打开并确保列名一致
) %>%
mutate(
sample_name = as.character(sample_name),
Genotype = as.character(Genotype),
Condition = as.character(Condition),
Week = as.numeric(Week)
)
# ========= 2) TPM 转 long,并用 meta$Name 匹配 =========
gene_col <- intersect(names(tpm), c("gene","Gene","gene_id","GeneID"))[1]
if (is.na(gene_col)) gene_col <- names(tpm)[1]
tpm_long <- tpm %>%
pivot_longer(
cols = -all_of(gene_col),
names_to = "sample_name",
values_to = "TPM"
) %>%
rename(gene = all_of(gene_col)) %>%
mutate(sample_name = as.character(sample_name))
tpm_anno <- tpm_long %>%
inner_join(meta2, by = "sample_name") # 只保留 meta 中存在的样本
# ========= 3) 单基因作图函数 =========
plot_single_gene_tpm <- function(gene_id,
weeks = 3:17,
conditions = c("Control","POFD"),
error = c("SE","SD"),
spar_val = 0.6) {
error <- match.arg(error)
df <- tpm_anno %>%
filter(gene == gene_id) %>%
filter(Week %in% weeks) %>%
filter(Condition %in% conditions) %>%
filter(!is.na(TPM))
if (nrow(df) == 0) stop("No data after filtering. Check gene id / Week / Condition names.")
# 每个 Genotype × Week × Condition 计算均值和误差
sum_df <- df %>%
group_by(Genotype, Week, Condition) %>%
summarise(
n = sum(!is.na(TPM)),
mean = mean(TPM, na.rm = TRUE),
sd = sd(TPM, na.rm = TRUE),
se = sd / sqrt(n),
.groups = "drop"
) %>%
mutate(err = ifelse(error == "SE", se, sd))
# spline 预测:每个 Genotype × Condition 单独拟合
week_grid_dense <- seq(min(weeks), max(weeks), by = 0.1)
spline_df <- sum_df %>%
group_by(Genotype, Condition) %>%
group_modify(~{
d <- .x %>% arrange(Week)
if (nrow(d) < 4) return(tibble())
sp <- smooth.spline(
x = d$Week,
y = d$mean,
spar = spar_val
)
pred <- predict(sp, x = week_grid_dense)
tibble(
Week = pred$x,
yhat = pred$y
)
}) %>%
ungroup()
# 作图(无 facet;颜色=Genotype;线型=Condition;点之间连线)
ggplot(sum_df, aes(x = Week, y = mean)) +
# 原始均值点的连线(按 Genotype×Condition)
geom_line(
aes(group = interaction(Genotype, Condition),
color = Genotype,
linetype = Condition),
linewidth = 0.8,
alpha = 0.8
) +
# 均值点
geom_point(
aes(color = Genotype, shape = Condition),
size = 2.6
) +
# error bar
geom_errorbar(
aes(ymin = mean - err, ymax = mean + err,
color = Genotype),
width = 0.15,
linewidth = 0.6,
alpha = 0.9
) +
# 颜色:RTX430 黑;BTX642 红
scale_color_manual(
values = c(
"RTX430" = "#e31a1c",
"BTX642" = "black"
)
) +
# 线型:用来区分 PRFD vs Control(你可按喜好调换)
scale_linetype_manual(
values = c(
"Control" = "solid",
"POFD" = "dashed"
)
) +
theme_classic(base_size = 14) +
theme(
legend.title = element_blank(),
axis.line = element_line(color = "black")
) +
labs(
title = gene_id,
x = "Week",
y = "TPM"
) + xlim(10,17)
}
# ========= 4) 调用示例 =========
p1 <- plot_single_gene_tpm("SORBI_3002G421900", error = "SE", spar_val = 0.6)
p1
## Warning: Removed 16 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 16 rows containing missing values or values outside the scale range
## (`geom_point()`).
annotation_colors = list(
Time = c(TP1="#1F2E82", TP3="grey", TP5="#E22859", TP7="#1D88BD"),
Accession = c(A = "#C25759",B = "#92A5D1",C = "grey70",D = "#DAA87C",E = "#C9DCC4",F = "#377483"),
Condition = c(WL = "#674B95", WW="#DCDD3B")
)
sample_meta_col <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/Heatmap_Meta-cols.csv", header = T, row.names = 1)
head(sample_meta_col)
Gene.group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/02_Gene-group-plot.csv", header = TRUE)
colnames(Gene.group) <- c("Transcripts", "class")
library(pheatmap)
HeatMap_df <- right_join(Range.TPM, Gene.group[Gene.group$class == "Enrich",] , by = "Transcripts")
rownames(HeatMap_df) <- HeatMap_df$Transcripts
sample_meta_col$Accession <- factor(
sample_meta_col$Accession,
levels = c("A", "B", "C", "D", "E") # ← 你想要的顺序
)
sample_meta_col <- sample_meta_col[order(sample_meta_col$Accession), ]
mat <- HeatMap_df[, 2:49]
mat <- mat[, rownames(sample_meta_col)]
pheatmap(scale_rows(mat) %>% na.omit(),
clustering_distance_r = "correlation",
annotation_col = sample_meta_col,
annotation_colors = annotation_colors,
cutree_cols = 5,
show_colnames =T,show_rownames = T,cluster_rows = F, cluster_cols = F,
fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)
annotation_colors_public = list(
Genotype = c(RTX430 = "#e31a1c",BTX642 = "black"),
Condition = c(POFD = "#674B95", Control="#DCDD3B"))
sample_meta_col_public <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/LargeScale-Heatmap_Meta-cols.csv", header = T, row.names = 1)
Gene.group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/02_Gene-group-plot.csv", header = TRUE)
colnames(Gene.group) <- c("Transcripts", "class")
Public.TPM <- read.csv("~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/03_sorghum-Leaves-Mean-TPM.csv", header = T)
Public.TPM.select <- Public.TPM[, c(
"Transcripts",
colnames(Public.TPM)[colnames(Public.TPM) %in% rownames(sample_meta_col_public)]
)]
HeatMap_df <- right_join(Public.TPM.select , Gene.group[Gene.group$class == "Met",] , by = "Transcripts")
rownames(HeatMap_df) <- HeatMap_df$Transcripts
sample_meta_col_public$Genotype <- factor(
sample_meta_col_public$Genotype,
levels = c("RTX430", "BTX642") # ← 你想要的顺序
)
sample_meta_col_public <- sample_meta_col_public [order(sample_meta_col_public $Genotype), ]
mat <- HeatMap_df[,2:31]
mat <- mat[, rownames(sample_meta_col_public)]
pheatmap(scale_rows(mat ) %>% na.omit(),,
clustering_distance_r = "correlation",
annotation_col = sample_meta_col_public[,c("Genotype", "Condition")],
annotation_colors = annotation_colors_public,
show_colnames =T,show_rownames = T,cluster_rows = T, cluster_cols = F,
fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)
#write.csv(mat, "~/Library/CloudStorage/Box-Box/Projects/22_ICP/02_Atlas/test2.csv")