library(ggplot2)
library(dplyr)
library(colourvalues)
library(pheatmap)
library(knitr)
library(RColorBrewer)
library(corrplot)
library(PoiClaClu)
library(stats)
library(reshape2)
library(ggpubr)
library(tidyr)
library(cowplot)
library(viridis)
library(Hmisc)
library(stringr)
library(UpSetR)
library(ggfortify)
scale_rows = function(x){
m = apply(x, 1, mean, na.rm = T)
s = apply(x, 1, sd, na.rm = T)
return((x - m) / s)
}
library(ggplot2)
Genotype.SV <- read.csv("/Users/leon/Documents/Project/Sorghum/10_Variants/Combine.stats.csv", header = T)
Genotype.SV$Region <- as.factor(Genotype.SV$Region)
SV1 <- ggplot(Genotype.SV, aes(x="", y=Count, fill=Region)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) +
scale_fill_manual(values = c(Downstream="#FAECA8", Intergenic="#CAC0E1",
Intron="grey", Splicing="#1E803D", Upstream="#FFD47F")) + theme_void()
SV1
SV2 <- ggplot(Genotype.SV[Genotype.SV$Region == "Upstream",], aes(x="", y=Count, fill=Genotype)) +
geom_bar(stat="identity", width=1,alpha = 0.8) +
coord_polar("y", start=0) +
scale_fill_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF", "Overlap" = "#FAECA8")) + theme_void()
SV3 <- ggplot(Genotype.SV[Genotype.SV$Region == "Intron",], aes(x="", y=Count, fill=Genotype)) +
geom_bar(stat="identity", width=1 , alpha = 0.8) +
coord_polar("y", start=0) +
scale_fill_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF", "Overlap" = "#FAECA8")) + theme_void()
SV4 <- ggplot(Genotype.SV[Genotype.SV$Region == "Downstream",], aes(x="", y=Count, fill=Genotype)) +
geom_bar(stat="identity", width=1, , alpha = 0.8) +
coord_polar("y", start=0) +
scale_fill_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF", "Overlap" = "#FAECA8")) + theme_void()
SV5 <- ggplot(Genotype.SV[Genotype.SV$Region == "Splicing",], aes(x="", y=Count, fill=Genotype)) +
geom_bar(stat="identity", width=1, alpha = 0.8) +
coord_polar("y", start=0) +
scale_fill_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF", "Overlap" = "#FAECA8")) + theme_void()
library(cowplot)
plot_grid(SV2, SV3, SV4, SV5)
# Figure.S2-Metabolites-Microbiome ## Figure S2A and S2B ### Plot
selective metabolites data in boxplots
Met_field <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Metabolites-2024-all.csv", header = T, row.names = 1)
### Under drought
M1 <- ggplot(Met_field[Met_field$Condition == "WL",],
aes(x = Time, y = Sucrose, alpha = Genotype, fill = Genotype, size = Genotype)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
scale_fill_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
scale_alpha_manual(values = c(1,0.5,0.5,0.5,0.5,0.5)) +
scale_size_manual(values = c(0.5,0.1,0.1,0.1,0.1,0.1)) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("Drought condition") +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
M2 <- ggplot(Met_field[Met_field$Condition == "WW",],
aes(x = Time, y = Sucrose, alpha = Genotype, fill = Genotype, size = Genotype)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
scale_fill_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
scale_alpha_manual(values = c(1,0.5,0.5,0.5,0.5,0.5)) +
scale_size_manual(values = c(0.5,0.1,0.1,0.1,0.1,0.1)) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("Control condition") +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
M3 <- ggplot(Met_field[Met_field$Condition == "WL",],
aes(x = Time, y = chlorogenic.acid.2.1, alpha = Genotype, fill = Genotype, size = Genotype)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
scale_fill_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
scale_alpha_manual(values = c(1,0.5,0.5,0.5,0.5,0.5)) +
scale_size_manual(values = c(0.5,0.1,0.1,0.1,0.1,0.1)) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("Drought condition") +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
M4 <- ggplot(Met_field[Met_field$Condition == "WW",],
aes(x = Time, y = chlorogenic.acid.2.1, alpha = Genotype, fill = Genotype, size = Genotype)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
scale_fill_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
scale_alpha_manual(values = c(1,0.5,0.5,0.5,0.5,0.5)) +
scale_size_manual(values = c(0.5,0.1,0.1,0.1,0.1,0.1)) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("Control condition") +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
plot_grid(M1, M2, M3, M4)
## Figure S2B ### Process Fungi data
Fungi <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/19_Microbiome/Fungi/2020_ITS_Roots_otu_table.csv", header = T)
Fungi.meta <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/19_Microbiome/Fungi/2020_ITS_Roots_metadata_table.csv", header = T)
Fungi.sample <- Fungi.meta$Index %>% unique()
Fungi.ave <- data.frame(matrix(ncol = 12, nrow = 46))
vector <- Fungi.meta$Index %>% unique()
colnames(Fungi.ave) <- Fungi.sample
rownames(Fungi.ave) <- Fungi$OTSU
for (i in 1:length(vector)){
Fungi.ave[,grepl(vector[i],colnames(Fungi.ave))] <- rowMeans(Fungi[,grepl(vector[i],colnames(Fungi))])
}
head(Fungi.ave)
datTrait_PCA <- scale_rows(Fungi.ave)
datTrait_results <- prcomp(datTrait_PCA, scale = TRUE)
datTrait_results$rotation <- -1*datTrait_results$rotation
datTrait_results$sdev^2 / sum(datTrait_results$sdev^2)
## [1] 1.534542e-01 1.430059e-01 1.162706e-01 1.020929e-01 9.497913e-02
## [6] 7.896863e-02 7.607445e-02 6.614997e-02 6.416320e-02 5.573367e-02
## [11] 4.910745e-02 1.328194e-32
datTrait_result <- data.frame(datTrait_results$rotation)
sdev <- datTrait_results$sdev
variance_explained <- sdev^2
# Calculate the explained variance ratio
explained_variance_ratio <- variance_explained / sum(variance_explained)
datTrait_result$Sample <- rownames(datTrait_result)
datTrait_result <- separate(datTrait_result, Sample, c("Genotype", "Condition"))
### GHenotype effects
datTrait_result$Condition <- factor(datTrait_result$Condition, levels = c("WW", "WL"))
ggplot(datTrait_result, aes(x = PC1, y = PC2, color = factor(Genotype), shape = factor(Condition))) +
geom_point(size = 3,aes(color=factor(Genotype), shape = factor(Condition))) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
xlab(paste0("PC1 15.35%")) +
ylab(paste0("PC2 14.30%")) +
theme_classic() +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
sample_pca <- prcomp(t(scale_rows(Fungi.ave)))
pc_loadings <- sample_pca$rotation
pc_loadings <- pc_loadings %>%
as_tibble(rownames = "Fungi")
# print the result
pc_loadings
top_Fungi <- pc_loadings %>%
# select only the PCs we are interested in
select(Fungi, PC1, PC2) %>%
# convert to a "long" format
pivot_longer(matches("PC"), names_to = "PC", values_to = "loading") %>%
# for each PC
group_by(PC) %>%
# arrange by descending order of loading
arrange(desc(abs(loading))) %>%
# take the 5 top rows
slice(1:5) %>%
# pull the gene column as a vector
pull(Fungi) %>%
# ensure only unique genes are retained
unique()
top_Fungi
## [1] "Otu539" "Otu25" "Otu186" "Otu577" "Otu560" "Otu191" "Otu173" "Otu174"
## [9] "Otu567" "Otu537"
library(patchwork)
##
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
##
## align_plots
top_loadings <- pc_loadings %>%
filter(Fungi %in% top_Fungi)
ggplot(data = top_loadings) +
geom_segment(aes(x = 0, y = 0, xend = PC1, yend = PC2),
arrow = arrow(length = unit(0.1, "in")),
PC2 = "brown", color = "steelblue") +
geom_text(aes(x = PC1, y = PC2, label = Fungi),
nudge_y = 0.005, size = 3) +
scale_x_continuous(expand = c(0.02, 0.02)) +
xlab(paste0("PC1 15.35%")) +
ylab(paste0("PC2 14.30%")) +
theme_classic() +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
## Warning in geom_segment(aes(x = 0, y = 0, xend = PC1, yend = PC2), arrow =
## arrow(length = unit(0.1, : Ignoring unknown parameters: `PC2`
Bac.meta <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/19_Microbiome/Bacteria/2020_16S_Roots_metadata_table.csv", header = T)
Bac <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/19_Microbiome/Bacteria/2020_16S_Roots_otu_table.csv", header = T)
Bac.sample <- Bac.meta$Index %>% unique()
Bac.ave <- data.frame(matrix(ncol = 12, nrow = 515))
vector <- Bac.meta$Index %>% unique()
colnames(Bac.ave) <- Bac.sample
rownames(Bac.ave) <- Bac$OTSU
for (i in 1:length(vector)){
Bac.ave[,grepl(vector[i],colnames(Bac.ave))] <- rowMeans(Bac[,grepl(vector[i],colnames(Bac))])
}
head(Bac.ave)
datTrait_PCA <- scale_rows(Bac.ave)
datTrait_results <- prcomp(datTrait_PCA, scale = TRUE)
datTrait_results$rotation <- -1*datTrait_results$rotation
datTrait_results$sdev^2 / sum(datTrait_results$sdev^2)
## [1] 1.775699e-01 1.085665e-01 1.038095e-01 9.320221e-02 9.077528e-02
## [6] 8.173338e-02 7.731792e-02 7.609412e-02 7.213208e-02 6.265997e-02
## [11] 5.613908e-02 1.380547e-32
datTrait_result <- data.frame(datTrait_results$rotation)
sdev <- datTrait_results$sdev
variance_explained <- sdev^2
# Calculate the explained variance ratio
explained_variance_ratio <- variance_explained / sum(variance_explained)
explained_variance_ratio[1] * 100
## [1] 17.75699
explained_variance_ratio[2] * 100
## [1] 10.85665
explained_variance_ratio[3] * 100
## [1] 10.38095
datTrait_result$Sample <- rownames(datTrait_result)
datTrait_result <- separate(datTrait_result, Sample, c("Genotype", "Condition"))
### GHenotype effects
datTrait_result$Condition <- factor(datTrait_result$Condition, levels = c("WW", "WL"))
ggplot(datTrait_result, aes(x = PC1, y = PC2, color = factor(Genotype), shape = factor(Condition))) +
geom_point(size = 3,aes(color=factor(Genotype), shape = factor(Condition))) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
xlab(paste0("PC1 17.76%")) +
ylab(paste0("PC2 10.86%")) +
theme_classic() +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
sample_pca <- prcomp(t(scale_rows(Bac.ave)))
pc_loadings <- sample_pca$rotation
pc_loadings <- pc_loadings %>%
as_tibble(rownames = "Bacteria")
# print the result
pc_loadings
top_Bacteria <- pc_loadings %>%
# select only the PCs we are interested in
select(Bacteria, PC1, PC2) %>%
# convert to a "long" format
pivot_longer(matches("PC"), names_to = "PC", values_to = "loading") %>%
# for each PC
group_by(PC) %>%
# arrange by descending order of loading
arrange(desc(abs(loading))) %>%
# take the 5 top rows
slice(1:5) %>%
# pull the gene column as a vector
pull(Bacteria) %>%
# ensure only unique genes are retained
unique()
top_Bacteria
## [1] "Otu39" "Otu1077" "Otu16" "Otu33" "Otu1065" "Otu616" "Otu1791"
## [8] "Otu340" "Otu821" "Otu1188"
### Load file
Oxidative <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/5_Phenotype/2_Clean_data/01_Oxidative_clean-20220324-update.csv")
### Calcualte the average
colnames(Oxidative)
## [1] "Name" "Genotype" "Condition" "Time" "ID"
## [6] "Rep" "caro" "polyph" "flav" "pro"
## [11] "TAC" "MDA" "LOX" "GOX" "HPR"
## [16] "POX" "CAT" "AO" "APX" "MDHAR"
## [21] "DHAR" "GR" "GPX" "SOD" "Prxs"
## [26] "Grxs" "Trxs" "Frxs" "ascorbate" "glutathione"
Oxidative_ave <- Oxidative %>% group_by(ID) %>% dplyr::summarize(across(
c("polyph", "flav", "ascorbate","glutathione",
"MDA", "LOX", "pro", "GOX","HPR",
"SOD", "AO", "POX", "CAT", "APX", "DHAR", "MDHAR", "GPX","GR", "Trxs","Frxs", "Grxs", "Prxs",
"TAC", "caro"), mean, .names = "mean_{.col}"))
Oxidative_ave_clean <- separate(Oxidative_ave, ID, c("Genotype", "Condition", "Time"))
Oxidative_ave_clean$Index <- paste0(Oxidative_ave_clean$Genotype, "_", Oxidative_ave_clean$Condition)
head(Oxidative_ave_clean)
Oxidative_ave_clean$Sample <- paste0(Oxidative_ave_clean$Genotype,
"_",Oxidative_ave_clean$Condition,
"_",Oxidative_ave_clean$Time)
datTrait_PCA <- as.data.frame(Oxidative_ave_clean[,4:27])
rownames(datTrait_PCA) <- Oxidative_ave_clean$Sample
PCA.data <- data.frame(datTrait_PCA)
pca_result <- prcomp(PCA.data, scale. = TRUE)
autoplot(pca_result , data = Oxidative_ave_clean, colour = 'Genotype', shape = "Condition",size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
A_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_A.csv", header =T)
B_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_B.csv", header =T)
C_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_C.csv", header =T)
D_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_D.csv", header =T)
E_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_E.csv", header =T)
F_Group <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/DEGs_F.csv", header =T)
Combine <- na.omit(rbind(A_Group,B_Group,C_Group,D_Group,E_Group,F_Group))
names(Combine)[8] = "GeneID"
head(Combine, 6)
### Filter differentially expressed genes based on following creteria
donw_gene <- Combine[Combine$padj < 0.05 & Combine$log2FoldChange < -1,]
donw_gene <- donw_gene %>% dplyr::group_by(Sample) %>% dplyr::mutate(Down = n())
donw_summary <- unique(donw_gene[,c("Sample","Down")])
up_gene <- Combine[Combine$padj < 0.05 & Combine$log2FoldChange > 1,]
up_gene <- up_gene %>% dplyr::group_by(Sample) %>% dplyr::mutate(up = n())
up_summary <- unique(up_gene[,c("Sample","up")])
combind_summary <- left_join(up_summary, donw_summary, by = "Sample")
head(combind_summary, 6)
### Generate the non-duplicated DEGs list
Down_list <- unique(data.frame(GeneID = donw_gene[,"GeneID"]))
Up_list <- unique(data.frame(GeneID = up_gene[,"GeneID"]))
head(Down_list,20)
head(Up_list,20)
### Check sample list
sample_list <- unique(data.frame(Sample = Combine[,"Sample"]))
sample_list
####for down-regulated DEGs
for (i in 1:nrow(sample_list)){
df <- left_join(Down_list,donw_gene[donw_gene$Sample == sample_list[i,1],c("GeneID","log2FoldChange")],by = "GeneID")
df <- as.data.frame(df[,2])
colnames(df) <- sample_list[i,1]
assign(paste0("DEGs_down_",sample_list[i,1]),df)
}
#Combined all dfs
merge_list1 <- lapply(ls(pattern = "DEGs_down_"), get)
down_combine <- bind_cols(Down_list ,merge_list1)
#Replace NA into 0
down_combine[is.na(down_combine)] <- 0
down_combine_upset <- down_combine
#Replace all values not equal with 1
for (i in 2:ncol(down_combine)){
down_combine_upset[,i] <- replace(x = down_combine_upset[,i],
down_combine_upset[,i] != 0.000000 ,
values = 1)
}
### Check the logFC in a combined table
head(down_combine, 6)
#write.csv(down_combine,"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/Down.csv")
head(down_combine_upset, 6)
#write.csv(down_combine,"~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/Down.csv")
###Plot by accessions
upset(down_combine_upset[,c("GeneID","A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(down_combine_upset[,c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(down_combine_upset[,c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(down_combine_upset[,c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
### Build dataframe for up-regulated genes
####for down-regulated DEGs
for (i in 1:nrow(sample_list)){
df <- left_join(Up_list,up_gene[up_gene$Sample == sample_list[i,1],c("GeneID","log2FoldChange")],by = "GeneID")
df <- as.data.frame(df[,2])
colnames(df) <- sample_list[i,1]
assign(paste0("DEGs_up_",sample_list[i,1]),df)
}
#Combined all dfs
merge_list2 <- lapply(ls(pattern = "DEGs_up_"), get)
Up_combine <- bind_cols(Up_list ,merge_list2)
#Replace NA into 0
Up_combine[is.na(Up_combine)] <- 0
Up_combine_upset <- Up_combine
#Replace all values not equal with 1
for (i in 2:ncol(Up_combine)){
Up_combine_upset[,i] <- replace(x = Up_combine_upset[,i],
Up_combine_upset[,i] != 0.000000 ,
values = 1)
}
head(Up_combine, 5)
#write.csv(down_combine,"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/Down.csv")
head(Up_combine_upset, 5)
#write.csv(Up_combine,"~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/4_DEGs/Up.csv")
###Plot by accessions
upset(Up_combine_upset[,c("GeneID","A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(Up_combine_upset[,c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(Up_combine_upset[,c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(Up_combine_upset[,c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
# Figure.S4-Mods-stats ## Figure S4C Modification correlation
library(ggpubr)
MF <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/RML.csv", header = T)
head(MF)
Sorghum_TPM <- read.csv("/Users/leon/Documents/Project/Sorghum/4_FeatureCounts/Sorghum_20240629_clean_TPM_update96_ave.csv", header = T)
colnames(Sorghum_TPM)[1] <- "Transcripts"
head(Sorghum_TPM)
Sorghum_TPM_melt <- melt(Sorghum_TPM, id.vars = c("Transcripts"))
colnames(Sorghum_TPM_melt) <- c("Transcripts", "Sample", "TPM")
MF_melt <- melt(MF, id.vars = c("Transcripts"))
colnames(MF_melt) <- c("Transcripts", "Sample", "MF")
MF.TPM <- inner_join(Sorghum_TPM_melt,MF_melt, by = c("Transcripts", "Sample"))
MF.TPM <- separate(MF.TPM, Sample, c("Accession", "Condition", "Time"))
head(MF.TPM)
MF.TPM$log2TPM <- log2(MF.TPM$TPM)
MF.TPM$log2MF <- log2(MF.TPM$MF)
MF.TPM %>%
ggplot(aes(x=log2MF, y=log2TPM, color=Accession))+
geom_point(size = 1, alpha = 0.5)+
scale_color_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF"))+
theme_classic()+
stat_smooth(aes(color= Accession), size = 0.5, level = 0.95) +
stat_cor() +
xlab("log2(length normalized modification frequency)") +
ylab("log2(TPM)")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 1568674 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1568674 rows containing non-finite values (`stat_cor()`).
RCM1 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/D-PAM.csv", header = T)
RCM2 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/Y-PAM.csv", header = T)
RCM3 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/i6A-PAM.csv", header = T)
RCM4 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/m1G-PAM.csv", header = T)
RCM5 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/m3C-PAM.csv", header = T)
RCM6 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/m1A-PAM.csv", header = T)
Consensus.ID <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/Consensus-ID.csv", header = T)
RCM1 <- left_join(Consensus.ID, RCM1, by = "Transcripts")
RCM2 <- left_join(Consensus.ID, RCM2, by = "Transcripts")
RCM3 <- left_join(Consensus.ID, RCM3, by = "Transcripts")
RCM4 <- left_join(Consensus.ID, RCM4, by = "Transcripts")
RCM5 <- left_join(Consensus.ID, RCM5, by = "Transcripts")
RCM6 <- left_join(Consensus.ID, RCM6, by = "Transcripts")
RCM_combine <- bind_cols(RCM1, RCM2[,2:49], RCM3[,2:49], RCM4[,2:49], RCM5[,2:49], RCM6[,2:49])
RCM_combine[is.na(RCM_combine)] <- 0
head(RCM_combine, 5)
rownames(RCM_combine) <- RCM_combine$Transcripts
RCM_combine.PAM <- data.frame(t(RCM_combine[2:289]))
RCM_combine.PAM.data <- RCM_combine.PAM
RCM_combine.PAM.data$Sample <- rownames(RCM_combine.PAM.data)
RCM_combine.PAM.data <- separate(RCM_combine.PAM.data, Sample, c("RCM", "Genotype", "Condition", "Time"))
results <- prcomp(RCM_combine.PAM)
MF.PCA_result <- data.frame(results$rotation)
MF.PCA_result$Sample <- rownames(MF.PCA_result)
MF.PCA_result <- separate(MF.PCA_result, Sample, c("RCM", "Genotype", "Condition", "Time"))
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 3715 rows [1, 2, 3, 4, 5,
## 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
### Calcualte the contribution rate
sdev <- results$sdev
variance_explained <- sdev^2
# Calculate the explained variance ratio
explained_variance_ratio <- variance_explained / sum(variance_explained)
explained_variance_ratio[1] * 100
## [1] 16.92986
explained_variance_ratio[2] * 100
## [1] 9.984136
MF.PCA_result$Condition <- factor(MF.PCA_result$Condition , levels = c("WW", "WL"))
MF.PCA_result$Sample <- rownames(MF.PCA_result)
### Plot the PCA
ggplot(MF.PCA_result, aes(x = PC1, y = PC2, color = factor(RCM))) +
geom_point(size = 3,aes(color=factor(RCM), shape = factor(Condition))) +
scale_color_manual(values = c("m1A" = "#c0db82", "m3C" = "#54beaa", "D" = "#eb8e47",
"m1G" = "#29a15c", "i6A" = "grey10", "Y"= "grey")) +
xlab(paste0("PC1 (16.93%)")) +
ylab(paste0("PC2 (9.98%)")) +
theme_classic() +
# geom_text(aes(label = Sample), vjust = -1, hjust = 0.5) +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "bottom")
## Warning: Removed 3715 rows containing missing values (`geom_point()`).
ggplot(MF.PCA_result, aes(x = PC1, y = PC2, color = Genotype)) +
geom_point(size = 3,aes(color=factor(Genotype), shape = factor(Condition))) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
xlab(paste0("PC1 (16.93%)")) +
ylab(paste0("PC2 (9.98%)")) +
theme_classic() +
# geom_text(aes(label = Sample), vjust = -1, hjust = 0.5) +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
## Warning: Removed 3715 rows containing missing values (`geom_point()`).
MF1 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/D-sites.csv", header = T)
MF2 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/Y-sites.csv", header = T)
MF3 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/i6A-sites.csv", header = T)
MF4 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/m1G-sites.csv", header = T)
MF5 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/m3C-sites.csv", header = T)
MF6 <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/m1A-sites.csv", header = T)
MF1.PCA <- data.frame(t(MF1[,2:49]))
MF1.PCA.data <- MF1.PCA
MF1.PCA.data$Sample <- rownames(MF1.PCA)
MF1.PCA.data <- separate(MF1.PCA.data, Sample, c("Genotype", "Condition", "Time"))
MF1.PCA.data$Condition <- factor(MF1.PCA.data$Condition, levels = c("WW", "WL"))
MF2.PCA <- data.frame(t(MF2[,2:49]))
MF2.PCA.data <- MF2.PCA
MF2.PCA.data$Sample <- rownames(MF2.PCA)
MF2.PCA.data <- separate(MF2.PCA.data, Sample, c("Genotype", "Condition", "Time"))
MF2.PCA.data$Condition <- factor(MF2.PCA.data$Condition, levels = c("WW", "WL"))
MF3.PCA <- data.frame(t(MF3[,2:49]))
MF3.PCA.data <- MF3.PCA
MF3.PCA.data$Sample <- rownames(MF3.PCA)
MF3.PCA.data <- separate(MF3.PCA.data, Sample, c("Genotype", "Condition", "Time"))
MF3.PCA.data$Condition <- factor(MF3.PCA.data$Condition, levels = c("WW", "WL"))
MF4.PCA <- data.frame(t(MF4[,2:49]))
MF4.PCA.data <- MF4.PCA
MF4.PCA.data$Sample <- rownames(MF4.PCA)
MF4.PCA.data <- separate(MF4.PCA.data, Sample, c("Genotype", "Condition", "Time"))
MF4.PCA.data$Condition <- factor(MF4.PCA.data$Condition, levels = c("WW", "WL"))
MF5.PCA <- data.frame(t(MF5[,2:49]))
MF5.PCA.data <- MF5.PCA
MF5.PCA.data$Sample <- rownames(MF5.PCA)
MF5.PCA.data <- separate(MF5.PCA.data, Sample, c("Genotype", "Condition", "Time"))
MF5.PCA.data$Condition <- factor(MF5.PCA.data$Condition, levels = c("WW", "WL"))
MF6.PCA <- data.frame(t(MF6[,2:49]))
MF6.PCA.data <- MF6.PCA
MF6.PCA.data$Sample <- rownames(MF6.PCA)
MF6.PCA.data <- separate(MF6.PCA.data, Sample, c("Genotype", "Condition", "Time"))
MF6.PCA.data$Condition <- factor(MF6.PCA.data$Condition, levels = c("WW", "WL"))
results.MF1 <- prcomp(MF1.PCA , scale = TRUE)
results.MF2 <- prcomp(MF2.PCA , scale = TRUE)
results.MF3 <- prcomp(MF3.PCA , scale = TRUE)
results.MF4 <- prcomp(MF4.PCA , scale = TRUE)
results.MF5 <- prcomp(MF5.PCA , scale = TRUE)
results.MF6 <- prcomp(MF6.PCA , scale = TRUE)
autoplot(results.MF1 , data = MF1.PCA.data, colour = "Genotype", shape = "Condition", size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
geom_text(aes(label = Time), vjust = -1, hjust = 0.5)
autoplot(results.MF2 , data = MF2.PCA.data, colour = "Genotype", shape = "Condition", size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
geom_text(aes(label = Time), vjust = -1, hjust = 0.5)
autoplot(results.MF3 , data = MF3.PCA.data, colour = "Genotype", shape = "Condition", size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
geom_text(aes(label = Time), vjust = -1, hjust = 0.5)
autoplot(results.MF4 , data = MF4.PCA.data, colour = "Genotype", shape = "Condition", size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
geom_text(aes(label = Time), vjust = -1, hjust = 0.5)
autoplot(results.MF5 , data = MF5.PCA.data, colour = "Genotype", shape = "Condition", size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
geom_text(aes(label = Time), vjust = -1, hjust = 0.5)
autoplot(results.MF6 , data = MF6.PCA.data, colour = "Genotype", shape = "Condition", size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
geom_text(aes(label = Time), vjust = -1, hjust = 0.5)
library(ggfortify)
MD <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/MF.csv", header = T, row.names = 1)
MF <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/RMA_dcast.csv", header = T, row.names = 1)
Sorghum_vsd <- read.csv("/Users/leon/Documents/Project/Sorghum/4_FeatureCounts/Zscore_vsd_20230101.csv", header = T)
RCM.list <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/01_Modified-transcripts.csv", header = T)
PAM <- MD
PAM[PAM != 0] <- 1
RNA.score <- right_join(Sorghum_vsd, RCM.list, by = "Transcripts")
MD.PCA <- data.frame(t(MD))
MF.PCA <- data.frame(t(MF))
PAM.PCA <- data.frame(t(PAM))
RNA.score.PCA <- data.frame(t(RNA.score[2:49]))
### Reorganize data
MD.PCA.data <- MD.PCA
MD.PCA.data$Sample <- rownames(MD.PCA)
MD.PCA.data <- separate(MD.PCA.data, Sample, c("Genotype", "Condition", "Time"))
MD.PCA.data$Condition <- factor(MD.PCA.data$Condition, levels = c("WW", "WL"))
MF.PCA.data <- MF.PCA
MF.PCA.data$Sample <- rownames(MF.PCA)
MF.PCA.data <- separate(MF.PCA.data, Sample, c( "Genotype", "Condition", "Time"))
MF.PCA.data$Condition <- factor(MF.PCA.data$Condition, levels = c("WW", "WL"))
PAM.PCA.data <- PAM.PCA
PAM.PCA.data$Sample <- rownames(PAM.PCA)
PAM.PCA.data <- separate(PAM.PCA.data, Sample, c("Genotype", "Condition", "Time"))
PAM.PCA.data$Condition <- factor(PAM.PCA.data$Condition, levels = c("WW", "WL"))
RNA.score.PCA.data <- RNA.score.PCA
RNA.score.PCA.data$Sample <- rownames(RNA.score.PCA)
RNA.score.PCA.data <- separate(RNA.score.PCA.data, Sample, c("Genotype", "Condition", "Time"))
RNA.score.PCA.data$Condition <- factor(RNA.score.PCA.data$Condition, levels = c("WW", "WL"))
results.MD <- prcomp(MD.PCA , scale = TRUE)
results.MF <- prcomp(MF.PCA , scale = TRUE)
results.PAM <- prcomp(PAM.PCA)
results.RNA.score <- prcomp(RNA.score.PCA[, colSums(is.na(RNA.score.PCA)) == 0])
MD.plot <- autoplot(results.MD , data = MD.PCA.data, colour = "Genotype", shape = "Condition", size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
geom_text(aes(label = Time), vjust = -1, hjust = 0.5)
MF.plot <- autoplot(results.MF , data = MF.PCA.data, colour = "Genotype", shape = "Condition",size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
geom_text(aes(label = Time), vjust = -1, hjust = 0.5)
PAM.plot <- autoplot(results.PAM , data = PAM.PCA.data, colour = "Genotype", shape = "Condition",size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
geom_text(aes(label = Time), vjust = -1, hjust = 0.5)
RNA.score.plot <- autoplot(results.RNA.score , data = RNA.score.PCA.data, colour = "Genotype", shape = "Condition",size = 3) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5", C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none") +
geom_text(aes(label = Time), vjust = -1, hjust = 0.5)
plot_grid(PAM.plot, MD.plot, MF.plot, RNA.score.plot)
MF.PCA_result$Condition <- factor(MF.PCA_result$Condition , levels = c("WW", "WL"))
### Plot the PCA
ggplot(MF.PCA_result, aes(x = PC1, y = PC2, color = factor(Genotype))) +
geom_point(size = 3,aes(color=factor(Genotype), shape = factor(Condition))) +
scale_color_manual(values=c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
xlab(paste0("PC1 (51.59%)")) +
ylab(paste0("PC2 (3.22%)")) +
theme_classic() +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "bottom")
## Warning: Removed 3715 rows containing missing values (`geom_point()`).
## Figure S6B Global RCM mean density and frequency
RMA.TPM <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/RMA_cleaned.csv", header = T)
RMA.TPM_select <- RMA.TPM[,c("Accession", "Time","Condition","mean_RMA")]
head(RMA.TPM_select)
RMA.TPM.plot <- RMA.TPM_select %>%
dplyr::group_by(Accession, Time, Condition) %>% summarise(mean_RMA.all = mean(mean_RMA))
RMA.TPM.plot$Index <- paste0(RMA.TPM.plot$Accession, "_", RMA.TPM.plot$Condition)
RMA.TPM_select[,1] %>% unique() %>% length()
## [1] 6
ggplot(RMA.TPM.plot,
aes(x = Time,y = mean_RMA.all, color = Accession, group = Index)) +
geom_point(size = 3, aes(shape = Condition)) +
geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Condition)) +
theme_classic() +
facet_wrap( ~ Condition) +
scale_alpha_manual(values = c(WL=1, WW=1)) +
scale_color_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5))
MF <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/MF.csv", header = T, row.names = 1)
MF_Z <- scale_rows(MF)
MF_Z$Transcripts <- rownames(MF_Z)
MF.Z.melt <- melt(MF_Z, id.vars = c("Transcripts"))
MF.Z.melt <- separate(MF.Z.melt, variable, c("Genotype", "Condition", "Time"))
MF.Z.plot <- MF.Z.melt %>%
group_by(Genotype, Time, Condition) %>% summarise(mean_density = mean(value))
## `summarise()` has grouped output by 'Genotype', 'Time'. You can override using
## the `.groups` argument.
MF.Z.plot$Index <- paste0(MF.Z.plot$Genotype, "_",MF.Z.plot$Condition)
MF.Z.plot$Condition <- factor(MF.Z.plot$Condition, levels = c("WW", "WL"))
head(MF.Z.plot)
ggplot(MF.Z.plot ,
aes(x = Time,y = mean_density, color = Genotype, group = Index)) +
geom_point(size = 3, aes(shape = Condition)) +
geom_line(size = 0.5, aes(group = Index, lty= Condition, alpha = Condition)) +
theme_classic() +
facet_wrap( ~ Condition) +
scale_alpha_manual(values = c(WL=1, WW=1)) +
scale_color_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
RMF_stats <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/RMF_stats.csv", header = T)
head(RMF_stats)
RMF_stats$Condition<- factor(RMF_stats$Condition, levels = c("WW", "WL"))
RMF_stats <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/RMF_stats.csv", header = T)
RMF_stats
my_comparisons <- list( c("A", "B"), c("A", "C"), c("A", "D"),c("A", "E"), c("A", "F"))
ggplot(RMF_stats[RMF_stats$Time == c("TP5", "TP7"),],
aes(x = Genotype, y = Counts, alpha = Genotype, fill = Genotype, size = Genotype)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values = c(A="#E81B25", B="#76b5c5",
C="grey40", D="#eab676", E="#eeeee4", F="#E6CECF")) +
scale_alpha_manual(values = c(1,0.5,0.5,0.5,0.5,0.5)) +
scale_size_manual(values = c(0.5,0.1,0.1,0.1,0.1,0.1)) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
stat_compare_means(comparisons = my_comparisons, method = "wilcox.test") +
ylab("Numbers of modified sites")
## [1] FALSE
## Figure S7B Number of modified transcrips by conditions
RMF_stats$Condition <- factor(RMF_stats$Condition, levels = c("WW", "WL"))
S5BA <- ggplot(RMF_stats[RMF_stats$Genotype == "A", ], aes(x = Condition, y = Counts, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values=c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") +
stat_compare_means(method = "t.test") + ylim(1000,2000) +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
S5BB <- ggplot(RMF_stats[RMF_stats$Genotype == "B", ], aes(x = Condition, y = Counts, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values=c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") +
stat_compare_means(method = "t.test") + ylim(1000,2000) +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
S5BC <- ggplot(RMF_stats[RMF_stats$Genotype == "C", ], aes(x = Condition, y = Counts, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values=c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") +
stat_compare_means(method = "t.test") + ylim(1000,2000) +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
S5BD <- ggplot(RMF_stats[RMF_stats$Genotype == "D", ], aes(x = Condition, y = Counts, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values=c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") +
stat_compare_means(method = "t.test") + ylim(1000,2000) +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
S5BE <- ggplot(RMF_stats[RMF_stats$Genotype == "E", ], aes(x = Condition, y = Counts, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values=c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") +
stat_compare_means(method = "t.test") + ylim(1000,2000) +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
S5BF <- ggplot(RMF_stats[RMF_stats$Genotype == "F", ], aes(x = Condition, y = Counts, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values=c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") +
stat_compare_means(method = "t.test") + ylim(1000,2000) +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
plot_grid(S5BA, S5BB, S5BC,
S5BD, S5BE, S5BF)
## Figure S7C Modification density
MF.Z.melt.stats <- MF.Z.melt %>% dplyr::group_by(Genotype, Condition, Time) %>%
dplyr::summarise(sample.mean.density = mean(value))
## `summarise()` has grouped output by 'Genotype', 'Condition'. You can override
## using the `.groups` argument.
MF.Z.melt.stats$Condition <- factor(MF.Z.melt.stats$Condition, levels = c("WW", "WL"))
S7BA <- ggplot(MF.Z.melt.stats[MF.Z.melt.stats$Genotype == "A",],
(aes(x=Condition, y= sample.mean.density, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
ylab("Z-score Modification density")+
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("") + ylab("") +
stat_compare_means(method = "t.test") +
geom_jitter() + ylim(-0.3, 0.6) +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
S7BB <- ggplot(MF.Z.melt.stats[MF.Z.melt.stats$Genotype == "B",],
(aes(x=Condition, y= sample.mean.density, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
ylab("Z-score Modification density")+
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("") + ylab("") +
stat_compare_means(method = "t.test") +
geom_jitter() + ylim(-0.3, 0.6) +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
S7BC <- ggplot(MF.Z.melt.stats[MF.Z.melt.stats$Genotype == "C",],
(aes(x=Condition, y= sample.mean.density, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
ylab("Z-score Modification density")+
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("") + ylab("") +
stat_compare_means(method = "t.test") +
geom_jitter() + ylim(-0.3, 0.6) +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
S7BD <- ggplot(MF.Z.melt.stats[MF.Z.melt.stats$Genotype == "D",],
(aes(x=Condition, y= sample.mean.density, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
ylab("Z-score Modification density")+
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("") + ylab("") +
stat_compare_means(method = "t.test") +
geom_jitter() + ylim(-0.3, 0.6) +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
S7BE <- ggplot(MF.Z.melt.stats[MF.Z.melt.stats$Genotype == "E",],
(aes(x=Condition, y= sample.mean.density, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
ylab("Z-score Modification density")+
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("") + ylab("") +
stat_compare_means(method = "t.test") +
geom_jitter() + ylim(-0.3, 0.6) +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
S7BF <- ggplot(MF.Z.melt.stats[MF.Z.melt.stats$Genotype == "F",],
(aes(x=Condition, y= sample.mean.density, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
ylab("Z-score Modification density")+
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("") + ylab("") +
stat_compare_means(method = "t.test") +
geom_jitter() + ylim(-0.3, 0.6) +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
plot_grid(S7BA, S7BB, S7BC, S7BD, S7BE, S7BF)
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 1 rows containing non-finite values (`stat_compare_means()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 1 rows containing non-finite values (`stat_compare_means()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 1 rows containing non-finite values (`stat_compare_means()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 1 rows containing non-finite values (`stat_compare_means()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
RMA.TPM <- read.csv("/Users/leon/Documents/Project/Sorghum/7_PTM-Pattern/RMA_cleaned.csv", header = T)
RMA.TPM$Condition <- factor(RMA.TPM$Condition , levels = c("WW", "WL"))
head(RMA.TPM)
RMA.TPM.stats <- RMA.TPM %>% dplyr::group_by(Accession, Condition, Time) %>%
dplyr::summarise(sample.mean.frequency = mean(mean_RMA))
## `summarise()` has grouped output by 'Accession', 'Condition'. You can override
## using the `.groups` argument.
RMA.TPM.stats$Condition <- factor(RMA.TPM.stats$Condition, levels = c("WW", "WL"))
S7DA <- ggplot(RMA.TPM.stats[RMA.TPM.stats $Accession == "A",],
(aes(x=Condition, y= sample.mean.frequency, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") + ylim(0.06, 0.08 ) +
stat_compare_means(method = "t.test") +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
S7DB <- ggplot(RMA.TPM.stats[RMA.TPM.stats $Accession == "B",],
(aes(x=Condition, y= sample.mean.frequency, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") + ylim(0.06, 0.08 ) +
stat_compare_means(method = "t.test") +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
S7DC <- ggplot(RMA.TPM.stats[RMA.TPM.stats $Accession == "C",],
(aes(x=Condition, y= sample.mean.frequency, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") + ylim(0.06, 0.08 ) +
stat_compare_means(method = "t.test") +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
S7DD <- ggplot(RMA.TPM.stats[RMA.TPM.stats $Accession == "D",],
(aes(x=Condition, y= sample.mean.frequency, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") + ylim(0.06, 0.08 ) +
stat_compare_means(method = "t.test") +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
S7DE <- ggplot(RMA.TPM.stats[RMA.TPM.stats $Accession == "E",],
(aes(x=Condition, y= sample.mean.frequency, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") + ylim(0.06, 0.08 ) +
stat_compare_means(method = "t.test") +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
S7DF <- ggplot(RMA.TPM.stats[RMA.TPM.stats $Accession == "F",],
(aes(x=Condition, y= sample.mean.frequency, fill = Condition))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_classic() +
geom_jitter() +
scale_fill_manual(values = c(WL = "#674B95", WW="#DCDD3B")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) +
ylab("") + xlab("") + ylim(0.06, 0.08 ) +
stat_compare_means(method = "t.test") +
theme(axis.text.y = element_text(angle = 90, hjust = 0.5), legend.position = "none")
plot_grid(S7DA,S7DB,S7DC,S7DD,S7DE,S7DF)
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 1 rows containing non-finite values (`stat_compare_means()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 1 rows containing non-finite values (`stat_compare_means()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
TPM.sorghum.atlas <- read.table("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/07_Atlas/01_Sorghum-tissue-atlas/Mccormick/04_sorghum_expression_atlas_tpm.txt", header = T)
Sb.downstream.atlas <- TPM.sorghum.atlas %>%
filter(gene %in% c("SORBI_3006G158100","SORBI_3009G132900","SORBI_3008G102000"))
head(Sb.downstream.atlas)
temp2 <- data.frame(t(Sb.downstream.atlas))
colnames(temp2) <- temp2[1,]
Sb.downstream.atlas.clean <- temp2[-1,]
Sb.downstream.atlas.clean[] <- lapply(Sb.downstream.atlas.clean, as.numeric)
Sb.downstream.atlas.clean.log <- Sb.downstream.atlas.clean
Sb.downstream.atlas.clean.log[] <- lapply(Sb.downstream.atlas.clean.log,
function(x) log2(as.numeric(x)))
plot_1 <- ggscatter(Sb.downstream.atlas.clean, x = "SORBI_3006G158100",
y = "SORBI_3008G102000", color = "black",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Relative transcript abundance of SbDUS2 (TPM)",
ylab = "Relative transcript abundance of SbCRRL (TPM)",
add.params = list(color = "grey50", fill = "lightgray"))
plot_2 <- ggscatter(Sb.downstream.atlas.clean, x = "SORBI_3006G158100",
y = "SORBI_3009G132900", color = "black",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Relative transcript abundance of SbDUS2 (TPM)",
ylab = "Relative transcript abundance of SbPPDK (TPM)",
add.params = list(color = "grey50", fill = "lightgray"))
plot_grid(plot_1, plot_2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
PPDK.DUS2.drought <-read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/07_Atlas/01_Sorghum-tissue-atlas/Varoquax/PPDK.DUS2.drought.csv")
CRRL.DUS2.drought <-read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/07_Atlas/01_Sorghum-tissue-atlas/Varoquax/CRRL.DUS2.drought.csv")
P1 <- ggscatter(PPDK.DUS2.drought, x = "TPM.y", y = "TPM.x", color = "#00436d",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
add.params = list(color = "grey", fill = "lightgray")) +
ylab("Transcript abundance (TPM) of SbPPDK") +
xlab("Transcript abundance (TPM) of SbDUS2")
P2 <- ggscatter(CRRL.DUS2.drought, x = "TPM.y", y = "TPM.x", color = "#00436d",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
add.params = list(color = "grey", fill = "lightgray")) +
ylab("Transcript abundance (TPM) of SbCRRL") +
xlab("Transcript abundance (TPM) of SbDUS2")
plot_grid(P1,P2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
Atdus2.correlation <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/08_FeatureCounts/At-dus2-TPM.hisat2-correlation.csv", header = T, row.names = 1)
head(Atdus2.correlation)
plot_3 <- ggscatter(Atdus2.correlation, x = "AT3G49640",
y = "AT5G21430", color = "black",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Relative transcript abundance of AtDUS2 (TPM)",
ylab = "Relative transcript abundance of AtCRRL (TPM)",
add.params = list(color = "grey50", fill = "lightgray"))
plot_4 <- ggscatter(Atdus2.correlation, x = "AT3G49640",
y = "AT4G15530", color ="black",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Relative transcript abundance of AtDUS2 (TPM)",
ylab = "Relative transcript abundance of AtPPDK (TPM)",
add.params = list(color = "grey50", fill = "lightgray"))
plot_grid(plot_3, plot_4)
klepikova <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/07_Atlas/02_Arabidopsis-tissue-atlas/Klepikova/Plot-correlation.csv", header = T)
plot_5 <- ggscatter(klepikova, x = "AT3G49640",
y = "AT5G21430", color = "black",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Relative transcript abundance of AtDUS2 (TPM)",
ylab = "Relative transcript abundance of AtCRRL (TPM)",
add.params = list(color = "grey50", fill = "lightgray"))
plot_6 <- ggscatter(klepikova, x = "AT3G49640",
y = "AT4G15530", color ="black",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Relative transcript abundance of AtDUS2 (TPM)",
ylab = "Relative transcript abundance of AtPPDK (TPM)",
add.params = list(color = "grey50", fill = "lightgray"))
plot_grid(plot_5, plot_6)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
CRRL.freq <- read.csv("~/Library/CloudStorage/Box-Box/Projects/9_Sorghum/DUS2/10_Modtect/CRRL-freq-boxplot.csv", header = T)
CRRL.freq
ggplot(CRRL.freq, aes(x = Genotype, y = Freq, color = Genotype)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21) +
theme_classic() +
facet_wrap(~ Condition) +
scale_color_manual(values = c("Col-0" = '#999999',DUS2 = '#E69F00')) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 1,
show.legend = FALSE) + xlab("Conidtions") + ylab("Modification frequency of AtCRRL (D sites)") +
geom_jitter() + stat_compare_means(method = "t.test") +
theme(axis.text.y=element_text(angle=90), legend.position = "none")
rm(MF)
rm(MF_melt)
rm(MF.Z.melt)
rm(MF.TPM)
rm(Sorghum.Drought.D.stats)
## Warning in rm(Sorghum.Drought.D.stats): object 'Sorghum.Drought.D.stats' not
## found
rm(Sorghum.Drought.D)
## Warning in rm(Sorghum.Drought.D): object 'Sorghum.Drought.D' not found
rm(Sorghum.Drought.D.clean)
## Warning in rm(Sorghum.Drought.D.clean): object 'Sorghum.Drought.D.clean' not
## found
rm(Sorghum_TPM)
rm(Sorghum_TPM_melt)
rm(Sorghum.Drought.RCM)
## Warning in rm(Sorghum.Drought.RCM): object 'Sorghum.Drought.RCM' not found
rm(Sorghum.Drought.TPM)
## Warning in rm(Sorghum.Drought.TPM): object 'Sorghum.Drought.TPM' not found
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 2937384 156.9 9173021 489.9 NA 17916055 956.9
## Vcells 33957492 259.1 115143564 878.5 16384 143929453 1098.1