library(dplyr)
library(ggplot2)
library(knitr)
library(RColorBrewer)
library(ggpubr)
library(tidyr)
library(cowplot)
library(knitr)
Trait Tables
Trait <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/6_Cotton/10_phenotype/FIBER_yield_and_quality_2019_BLUEs_TIPO_removed.csv", header = T)
rownames(Trait) <- Trait$genotype
head(Trait)
Metabolites_BLUE <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/6_Cotton/10_phenotype/Metabolites_2019_BLUEs.csv", header = T, row.names = 1)
Metabolites_BLUE$genotype <- rownames(Metabolites_BLUE)
head(Metabolites_BLUE)
Gene expression Table
TPM_bind <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/6_Cotton/3_FeatureCounts/Gh_WGCNA-TPM-ave_bind.csv", header = T, row.names = 1)
TPM_corr = as.data.frame(t(TPM_bind))
head(TPM_corr)
Zscore_bind <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/6_Cotton/14_MapMan/turquoise_Zscore.csv", header = T, row.names = 1)
Zscore_corr = as.data.frame(t(Zscore_bind))
head(Zscore_corr)
Gene information List
GenePlot <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/6_Cotton/17_Cistrome/03_DL-prediction/Plot_GeneList.csv", header = T)
head(GenePlot)
### import lint related genes
yield_gene <- GenePlot[GenePlot$Class == "QTLs", ]
yield_gene
for (i in 1:nrow(yield_gene)){
yield_df <- TPM_corr[,yield_gene[i,1], drop = F]
yield_df$Gene <- yield_gene[i,7]
yield_df$Accessions <- rownames(yield_df)
rownames(yield_df) <- NULL
colnames(yield_df) <- c("TPM", "Gene", "genotype")
assign(paste0("yield_QTLs_", yield_gene[i,1]), yield_df)
}
### Merge QTLs into same datafroma
QTLs_combine <- bind_rows(lapply(ls(pattern = "yield_QTLs_"), get))
QTLs_combine$TPM <- as.numeric(QTLs_combine$TPM)
## Warning: NAs introduced by coercion
head(QTLs_combine)
#rm(list=ls(pattern="yield_QTLs_"))
### for QTLs paralogs
QTLs_paralogs <- GenePlot[GenePlot$Class == "QTLs_paralogs", ]
QTLs_paralogs
for (i in 1:nrow(QTLs_paralogs)){
QTLs_paralogs_df <- TPM_corr[,QTLs_paralogs[i,1], drop = F]
QTLs_paralogs_df$Gene <- QTLs_paralogs[i,7]
QTLs_paralogs_df$Accessions <- rownames(QTLs_paralogs_df)
rownames(QTLs_paralogs_df) <- NULL
colnames(QTLs_paralogs_df) <- c("TPM", "Gene", "genotype")
assign(paste0("Paralogs-yield_QTLs_", QTLs_paralogs[i,1]), QTLs_paralogs_df)
}
### Merge QTLs into same datafroma
QTLs_paralogs_combine <- bind_rows(lapply(ls(pattern = "Paralogs-yield_QTLs_"), get))
QTLs_paralogs_combine$TPM <- as.numeric(QTLs_paralogs_combine$TPM)
## Warning: NAs introduced by coercion
head(QTLs_paralogs_combine)
### import lint related genes
yield_gene <- GenePlot[GenePlot$Class == "QTLs", ]
yield_gene
for (i in 1:nrow(yield_gene)){
yield_df <- Zscore_corr[,yield_gene[i,1], drop = F]
yield_df$Gene <- yield_gene[i,7]
yield_df$Accessions <- rownames(yield_df)
rownames(yield_df) <- NULL
colnames(yield_df) <- c("Zscore", "Gene", "genotype")
assign(paste0("Zsore_yieldQTLs-", yield_gene[i,1]), yield_df)
}
### Merge QTLs into same datafroma
QTLs_Zscore <- bind_rows(lapply(ls(pattern = "Zsore_yieldQTLs-"), get))
QTLs_Zscore$Zscore <- as.numeric(QTLs_Zscore$Zscore)
head(QTLs_Zscore)
#rm(list=ls(pattern="yield_QTLs_"))
### Extract values of selected traits
Select_traits <- Trait[,c("genotype", "Lint_yield", "treatment")]
rownames(Select_traits) <- NULL
### combine two sets
Select_Comp <- left_join(QTLs_combine, Select_traits, by = "genotype")
head(Select_Comp)
Select_Zscore <- left_join(QTLs_Zscore, Select_traits, by = "genotype")
head(Select_Zscore)
### combine two sets
QTLs_paralogs_combine_Comp <- left_join(QTLs_paralogs_combine, Select_traits, by = "genotype")
head(QTLs_paralogs_combine_Comp)
Select_Zscore <- left_join(QTLs_Zscore, Select_traits, by = "genotype")
head(Select_Zscore)
Plot_WL <- Select_Comp[Select_Comp$treatment == "WL",]
### Plot each QTLs
for (i in 1:nrow(yield_gene)){
print(ggscatter(Plot_WL[Plot_WL$Gene == yield_gene[i,7],],
x = "TPM", y = "Lint_yield",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = paste0("TPM of ", yield_gene[i,7]), ylab = "Lint yield",
shape = 21, size = 3,
add.params = list(color = "blue", fill = "lightgray")))
}
Plot_WL_paralogs <- QTLs_paralogs_combine_Comp[QTLs_paralogs_combine_Comp$treatment == "WL",]
### Plot each QTLs
for (i in 1:nrow(QTLs_paralogs)){
print(ggscatter(Plot_WL_paralogs[Plot_WL_paralogs$Gene == QTLs_paralogs[i,7],],
x = "TPM", y = "Lint_yield",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = paste0("TPM of ", QTLs_paralogs[i,7]), ylab = "Lint yield",
shape = 21, size = 3,
add.params = list(color = "blue", fill = "lightgray")))
}
### Plot QTLs paralogs and QTLs correlation with TPM side by side
Plot_WL_paralogs$Class <- "QTL_paralogs"
Plot_WL$Class <- "QTL"
### combined two dataset
QTLs_comparison <- bind_rows(Plot_WL_paralogs, Plot_WL)
head(QTLs_comparison)
QTLs_comparison[QTLs_comparison$Gene == "API5",]%>%
ggplot(aes(x=TPM, y=Lint_yield, color=Class))+
geom_point(size = 3, alpha = 0.7)+
scale_color_manual(values = c(QTL_paralogs="steelblue", QTL="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and API5 TPM") +
geom_smooth(method = "lm", aes(color= Class), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("TPM of API5") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
QTLs_comparison[QTLs_comparison$Gene == "IPS",]%>%
ggplot(aes(x=TPM, y=Lint_yield, color=Class))+
geom_point(size = 3, alpha = 0.7)+
scale_color_manual(values = c(QTL_paralogs="steelblue", QTL="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and IPS TPM") +
geom_smooth(method = "lm", aes(color= Class), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("TPM of IPS") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
QTLs_comparison[QTLs_comparison$Gene == "ABP",]%>%
ggplot(aes(x=TPM, y=Lint_yield, color=Class))+
geom_point(size = 3, alpha = 0.7)+
scale_color_manual(values = c(QTL_paralogs="steelblue", QTL="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and ABP TPM") +
geom_smooth(method = "lm", aes(color= Class), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("TPM of ABP") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
QTLs_comparison[QTLs_comparison$Gene == "DBC",]%>%
ggplot(aes(x=TPM, y=Lint_yield, color=Class))+
geom_point(size = 3, alpha = 0.7)+
scale_color_manual(values = c(QTL_paralogs="steelblue", QTL="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and DBC TPM") +
geom_smooth(method = "lm", aes(color= Class), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("TPM of DBC") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
QTLs_comparison[QTLs_comparison$Gene == "L7T",]%>%
ggplot(aes(x=TPM, y=Lint_yield, color=Class))+
geom_point(size = 3, alpha = 0.7)+
scale_color_manual(values = c(QTL_paralogs="steelblue", QTL="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and L7T TPM") +
geom_smooth(method = "lm", aes(color= Class), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("TPM of L7T") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
Zscore_WL <- Select_Zscore [Select_Zscore$treatment == "WL",]
head(Zscore_WL)
### Plot each QTLs
Zscore_WL %>%
ggplot(aes(x=Zscore, y=Lint_yield, color=Gene))+
geom_point(size = 3, alpha = 0.5)+
scale_color_manual(values = c(IPS="#1B9E77", ABP="tomato", L7T="#E7298A", DBC="steelblue", API5="#E6AB02"))+
theme_bw()+
ggtitle("Correaltion between lint yield and 5 QTLs Zscore") +
geom_smooth(method = "lm", aes(color= Gene), size = 0.5, level = 0.95) +
xlab("Zscore of lint yield QTLs") +
ylab("Lint yieled (Kg)") +
stat_cor(label.y.npc = "top") +
theme(legend.position = c(0.8, 0.2))
### import lint related genes
TF_gene_all <- GenePlot[GenePlot$Class == "TFs" | GenePlot$Class == "TFs_paralogs", ]
TF_gene_all
for (i in 1:nrow(TF_gene_all)){
TF_df <- TPM_corr[,TF_gene_all[i,1], drop = F]
TF_df$Gene <- TF_gene_all[i,1]
TF_df$Accessions <- rownames(TF_df)
rownames(TF_df) <- NULL
colnames(TF_df) <- c("TPM", "Gene", "genotype")
assign(paste0("yield_TFs_", TF_gene_all[i,1]), TF_df)
}
### Merge QTLs into same datafroma
TFs_combine <- bind_rows(lapply(ls(pattern = "yield_TFs_"), get))
TFs_combine$TPM <- as.numeric(TFs_combine$TPM)
## Warning: NAs introduced by coercion
head(TFs_combine)
TF_gene <- GenePlot[GenePlot$Class == "TFs", ]
TF_gene
for (i in 1:nrow(TF_gene)){
TF_df <- Zscore_corr[,TF_gene[i,1], drop = F]
TF_df$Gene <- TF_gene[i,1]
TF_df$Accessions <- rownames(TF_df)
rownames(TF_df) <- NULL
colnames(TF_df) <- c("Zscore", "Gene", "genotype")
assign(paste0("Zscore_yield_TFs-", TF_gene[i,1]), TF_df)
}
### Merge QTLs into same datafroma
TFs_Zscore <- bind_rows(lapply(ls(pattern = "Zscore_yield_TFs-"), get))
TFs_Zscore$Zscore <- as.numeric(TFs_Zscore$Zscore)
head(TFs_Zscore)
### combine two sets
Select_TF <- left_join(TFs_combine, Select_traits, by = "genotype")
head(Select_TF)
Select_Zscore_TF <- left_join(TFs_Zscore, Select_traits, by = "genotype")
head(Select_Zscore_TF)
Plot_WL_TF <- Select_TF[Select_TF$treatment == "WL",] %>% na.omit()
Plot_WL_TF
### Plot each QTLs
for (i in 1:nrow(TF_gene_all)){
print(ggscatter(Plot_WL_TF[Plot_WL_TF$Gene == TF_gene_all[i,1],],
x = "TPM", y = "Lint_yield",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = paste0("TPM of ", TF_gene_all[i,1]),
ylab = "Lint yield (Kg)",
shape = 21, size = 3,
add.params = list(color = "blue", fill = "lightgray"),
title = paste0(TF_gene_all[i,7], " and lint yield correlation")))
}
HSFB2A
Plot_WL_TF[Plot_WL_TF$Gene == "Gohir.D11G009300" | Plot_WL_TF$Gene =="Gohir.A11G010000",] %>%
ggplot(aes(x=TPM, y=Lint_yield, color=Gene))+
geom_point(size = 3, alpha = 0.7)+
scale_color_manual(values = c(Gohir.D11G009300="steelblue", Gohir.A11G010000="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and HSFB2A TPM") +
geom_smooth(method = "lm", aes(color= Gene), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("TPM of HSFB2A") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
## `geom_smooth()` using formula = 'y ~ x'
DREB2C
Plot_WL_TF[Plot_WL_TF$Gene == "Gohir.A13G021700" | Plot_WL_TF$Gene =="Gohir.D13G022300",] %>%
ggplot(aes(x=TPM, y=Lint_yield, color=Gene))+
geom_point(size = 3, alpha = 0.7)+
scale_color_manual(values = c(Gohir.A13G021700="steelblue", Gohir.D13G022300="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and GhDREB2C TPM") +
geom_smooth(method = "lm", aes(color= Gene), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("TPM of GhDREB2C") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
## `geom_smooth()` using formula = 'y ~ x'
HSFB2B
Plot_WL_TF[Plot_WL_TF$Gene == "Gohir.A03G014700" | Plot_WL_TF$Gene =="Gohir.D03G153000" |
Plot_WL_TF$Gene == "Gohir.D08G198000" | Plot_WL_TF$Gene =="Gohir.A08G179400",] %>%
ggplot(aes(x=TPM, y=Lint_yield, color=Gene))+
geom_point(size = 3, alpha = 0.7)+
scale_color_manual(values = c(Gohir.A03G014700="steelblue", Gohir.D03G153000="tomato",
Gohir.D08G198000="black", Gohir.A08G179400="#E6AB02"))+
theme_bw()+
ggtitle("Correaltion between lint yield and GhHSFB2B TPM") +
geom_smooth(method = "lm", aes(color= Gene), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("TPM of GhHSFB2B") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
## `geom_smooth()` using formula = 'y ~ x'
HSFA6B
Plot_WL_TF[Plot_WL_TF$Gene == "Gohir.A08G064100" | Plot_WL_TF$Gene =="Gohir.D08G072600" ,] %>%
ggplot(aes(x=TPM, y=Lint_yield, color=Gene))+
geom_point(size = 3, alpha = 0.7)+
scale_color_manual(values = c(Gohir.A08G064100="steelblue", Gohir.D08G072600="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and GhHSFA6B TPM") +
geom_smooth(method = "lm", aes(color= Gene), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("TPM of GhHSFA6B") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
## `geom_smooth()` using formula = 'y ~ x'
Zscore_WL_TF <- Select_Zscore_TF[Select_Zscore_TF$treatment == "WL",]
head(Zscore_WL_TF)
MBFC1
### Plot each QTLs
MBFC_Zscore <- Zscore_WL_TF[Zscore_WL_TF$Gene == "Gohir.A06G061400" | Zscore_WL_TF$Gene == "Gohir.D06G059700", ]
MBFC_Zscore %>%
ggplot(aes(x=Zscore, y=Lint_yield, color=Gene))+
geom_point(size = 2, alpha = 0.7)+
scale_color_manual(values = c(Gohir.A06G061400="steelblue", Gohir.D06G059700="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and GhMBFC1 Zscore") +
geom_smooth(method = "lm", aes(color= Gene), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("Zscore of TFs") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
DREB2C
### Plot each QTLs
DREB2C_Zscore <- Zscore_WL_TF[Zscore_WL_TF$Gene == "Gohir.A13G021700" | Zscore_WL_TF$Gene == "Gohir.D13G022300", ]
DREB2C_Zscore %>%
ggplot(aes(x=Zscore, y=Lint_yield, color=Gene))+
geom_point(size = 2, alpha = 0.7)+
scale_color_manual(values = c(Gohir.A13G021700="steelblue", Gohir.D13G022300="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and GhDREB2C Zscore") +
geom_smooth(method = "lm", aes(color= Gene), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("Zscore of TFs") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
HSFA6B
### Plot each QTLs
HSFA6B_Zscore <- Zscore_WL_TF[Zscore_WL_TF$Gene == "Gohir.A08G064100" | Zscore_WL_TF$Gene == "Gohir.D08G072600",]
HSFA6B_Zscore %>%
ggplot(aes(x=Zscore, y=Lint_yield, color=Gene))+
geom_point(size = 2, alpha = 0.7)+
scale_color_manual(values = c(Gohir.A08G064100="steelblue", Gohir.D08G072600="tomato"))+
theme_bw()+
ggtitle("Correaltion between lint yield and GhHSFA6B expression") +
geom_smooth(method = "lm", aes(color= Gene), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("Zscore of TFs") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
HSFB2B
### Plot each QTLs
HSF7_Zscore <- Zscore_WL_TF[Zscore_WL_TF$Gene == "Gohir.D03G153000" | Zscore_WL_TF$Gene == "Gohir.D08G198000" |
Zscore_WL_TF$Gene == "Gohir.A08G179400",]
HSF7_Zscore %>%
ggplot(aes(x=Zscore, y=Lint_yield, color=Gene))+
geom_point(size = 2, alpha = 0.7)+
scale_color_manual(values = c(Gohir.A08G179400="steelblue", Gohir.D08G198000="tomato", Gohir.D03G153000 = "black"))+
theme_bw()+
ggtitle("Correaltion between lint yield and GhHSFB2B Zscore") +
geom_smooth(method = "lm", aes(color= Gene), size = 0.5, level = 0.95) +
stat_cor(label.y.npc = "top") +
xlab("Zscore of TFs") +
ylab("Lint yieled (Kg)") +
theme(legend.position = c(0.8, 0.2))
library(Hmisc)
library(dplyr)
library(reshape2)
library(ggpubr)
library(ggVennDiagram)
library(pheatmap)
library(pheatmap)
library(stringr)
library(RColorBrewer)
library(viridis)
library(ggplot2)
library(tidyr)
library(cowplot)
library(knitr)
load("/Users/Leon/OneDrive - Cornell University/Projects/6_Cotton/2_Pipeline/LogFC.RData")
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
)
}
LOG <- read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/Ratio_LogFC.csv", header = T)
row.names(LOG) = LOG[,1]
head(LOG)
Ratio <- read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/LogFC_meta_table.csv", header = T)
row.names(Ratio) = Ratio$Gene
head(Ratio)
df1 <- data.frame(matrix(ncol = 7 , nrow = 67413))
df2 <- data.frame(matrix(ncol = 7 , nrow = 67413))
df3 <- data.frame(matrix(ncol = 7 , nrow = 67413))
df4 <- data.frame(matrix(ncol = 7 , nrow = 67413))
df5 <- data.frame(matrix(ncol = 7 , nrow = 67413))
df6 <- data.frame(matrix(ncol = 7 , nrow = 67413))
df7 <- data.frame(matrix(ncol = 7 , nrow = 67413))
df8 <- data.frame(matrix(ncol = 7 , nrow = 67413))
df9 <- data.frame(matrix(ncol = 7 , nrow = 67413))
df10 <- data.frame(matrix(ncol = 7 , nrow = 67413))
colnames(df1) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
colnames(df2) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
colnames(df3) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
colnames(df4) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
colnames(df5) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
colnames(df6) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
colnames(df7) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
colnames(df8) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
colnames(df9) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
colnames(df10) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[1,-1], Ratio[i,-1])))
df1[i,1:4] <- flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[1,-1], Ratio[i,-1])))
sm = summary(lm(Lint_yield ~ get(Ratio[i,1]), temp.df))
df1[i,5] = sm$coefficients[1]
df1[i,6] = sm$coefficients[2]
df1[i,7] = sm$r.squared
}
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[2,-1], Ratio[i,-1])))
df2[i,1:4] <- flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[2,-1], Ratio[i,-1])))
sm = summary(lm(Mic ~ get(Ratio[i,1]), temp.df))
df2[i,5] = sm$coefficients[1]
df2[i,6] = sm$coefficients[2]
df2[i,7] = sm$r.squared
}
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[3,-1], Ratio[i,-1])))
df3[i,1:4] <- flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[3,-1], Ratio[i,-1])))
sm = summary(lm(UHM ~ get(Ratio[i,1]), temp.df))
df3[i,5] = sm$coefficients[1]
df3[i,6] = sm$coefficients[2]
df3[i,7] = sm$r.squared
}
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[4,-1], Ratio[i,-1])))
df4[i,1:4] <- flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[4,-1], Ratio[i,-1])))
sm = summary(lm(UI ~ get(Ratio[i,1]), temp.df))
df4[i,5] = sm$coefficients[1]
df4[i,6] = sm$coefficients[2]
df4[i,7] = sm$r.squared
}
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[5,-1], Ratio[i,-1])))
df5[i,1:4] <- flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[5,-1], Ratio[i,-1])))
sm = summary(lm(Str ~ get(Ratio[i,1]), temp.df))
df5[i,5] = sm$coefficients[1]
df5[i,6] = sm$coefficients[2]
df5[i,7] = sm$r.squared
}
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[6,-1], Ratio[i,-1])))
df6[i,1:4] <- flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[6,-1], Ratio[i,-1])))
sm = summary(lm(Elo ~ get(Ratio[i,1]), temp.df))
df6[i,5] = sm$coefficients[1]
df6[i,6] = sm$coefficients[2]
df6[i,7] = sm$r.squared
}
### for vegetative indices
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[7,-1], Ratio[i,-1])))
df7[i,1:4] <- flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[7,-1], Ratio[i,-1])))
sm = summary(lm(NDVI ~ get(Ratio[i,1]), temp.df))
df7[i,5] = sm$coefficients[1]
df7[i,6] = sm$coefficients[2]
df7[i,7] = sm$r.squared
}
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[8,-1], Ratio[i,-1])))
df8[i,1:4] <- flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[8,-1], Ratio[i,-1])))
sm = summary(lm(sPRI ~ get(Ratio[i,1]), temp.df))
df8[i,5] = sm$coefficients[1]
df8[i,6] = sm$coefficients[2]
df8[i,7] = sm$r.squared
}
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[9,-1], Ratio[i,-1])))
df9[i,1:4] <- flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[9,-1], Ratio[i,-1])))
sm = summary(lm(CRI ~ get(Ratio[i,1]), temp.df))
df9[i,5] = sm$coefficients[1]
df9[i,6] = sm$coefficients[2]
df9[i,7] = sm$r.squared
}
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[10,-1], Ratio[i,-1])))
df10[i,1:4] <- flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[10,-1], Ratio[i,-1])))
sm = summary(lm(WI.NDVI ~ get(Ratio[i,1]), temp.df))
df10[i,5] = sm$coefficients[1]
df10[i,6] = sm$coefficients[2]
df10[i,7] = sm$r.squared
}
### Combined all this datasets
combined <- bind_rows(df1, df2, df3, df4, df5, df6, df7, df8, df9, df10)
head(combine)
##
## 1 function (...)
## 2 {
## 3 lifecycle::deprecate_warn("1.0.0", "combine()", "vctrs::vec_c()")
## 4 args <- list2(...)
## 5 if (length(args) == 1 && is.list(args[[1]])) {
## 6 args <- args[[1]]
combine_filter <- combined[combined$`P-value` < 0.05,]
combine_filter$Type <- "fill"
combine_filter[combine_filter$R < 0,8] = "N correlated"
combine_filter[combine_filter$R > 0,8] = "Pcorrelated"
combine_filter$ABS = abs(combine_filter$Effects)
head(combine_filter)
write.csv(combine_filter,"/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/Combined_file.csv", row.names = F, quote = F)
write.csv(combined,"/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/Combined-non-filter_file.csv", row.names = F, quote = F)
combine_stats <- combine_filter %>% group_by(row, Type) %>% mutate(counts = n())
combine_stats <- unique(combine_stats[,c(1,8,10)])
combine_stats <- dcast(combine_stats, row~Type,sum)
combine_stats
write.csv(combine_stats,"/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/Combined_stats.csv", row.names = F, quote = F)
### Load metabolties Z-score
Zscore <- read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/11_Metabolties/Zscores_Met_2019_BLUEs_WW_and_WL_plot.csv", header = T)
sample_col <- read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/11_Metabolties/sample_col.csv", header = T)
sample_row <- read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/11_Metabolties/sample_row.csv", header = T)
rownames(sample_row) <- sample_row[,1]
rownames(sample_col) <- sample_col[,1]
rownames(Zscore) <- Zscore[,1]
head(Zscore)
head(sample_col)
head(sample_row)
annotation_colors = list(
treatment = c(WL="red", WW="royalblue"),
Group = c(Wild="pink", Commercial="slateblue1", Elite="palegreen"))
Metablites_select <- Zscore[Zscore$Class == "Genotype",3:44]
head(Metablites_select)
pheatmap(Metablites_select,
annotation_col = sample_col[,2:3],
annotation_row = sample_row[sample_row$Type == "Genotype",2:3],
color = inferno(30),
annotation_colors = annotation_colors,
clustering_distance_r = "manhattan",
show_colnames =F,show_rownames = F,
cluster_rows = T, cluster_cols = T,
fontsize_row = 4, fontsize_col = 4,
angle_col = 90, border_color = NA)
annotation_colors = list(
treatment = c(WL="red", WW="royalblue"),
Group = c(Wild="pink", Commercial="slateblue1", Elite="palegreen"))
Metablites_select <- Zscore[Zscore$Class == "Env",3:44]
head(Metablites_select)
pheatmap(Metablites_select,
annotation_col = sample_col[,2:3],
annotation_row = sample_row[sample_row$Type == "Env",2:3],
color = inferno(30),
annotation_colors = annotation_colors,
clustering_distance_r = "manhattan",
show_colnames =F,show_rownames = F,
cluster_rows = T, cluster_cols = T,
fontsize_row = 4, fontsize_col = 4,
angle_col = 90, border_color = NA)
annotation_colors = list(
treatment = c(WL="red", WW="royalblue"),
Group = c(Wild="pink", Commercial="slateblue1", Elite="palegreen"))
Metablites_select <- Zscore[Zscore$Class == "Interaction",3:44]
head(Metablites_select)
pheatmap(Metablites_select,
annotation_col = sample_col[,2:3],
annotation_row = sample_row[sample_row$Type == "Interaction",2:3],
color = inferno(30),
annotation_colors = annotation_colors,
clustering_distance_r = "manhattan",
show_colnames =F,show_rownames = F,
cluster_rows = T, cluster_cols = T,
fontsize_row = 4, fontsize_col = 4,
angle_col = 90, border_color = NA)
Ratio <- read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/Metabolites_WWWL_ratio.csv", header = T)
row.names(Ratio) =Ratio$Metabolites
head(Ratio)
LOG <- read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/Ratio_LogFC.csv", header = T)
row.names(LOG) = LOG[,1]
head(LOG)
PM_combine: Phenotype-metabolites
###Define DF first for
df <- data.frame(matrix(ncol = 7 , nrow = 451))
colnames(df) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
### Loop analysis nested in metabolites
for (n in 1:nrow(LOG)){
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[n,-1], Ratio[i,-1])))
df[i,1:4] = flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[n,-1], Ratio[i,-1])))
sm = summary(lm(get(LOG[n,1]) ~ get(Ratio[i,1]), temp.df))
df[i,5] = sm$coefficients[1]
df[i,6] = sm$coefficients[2]
df[i,7] = sm$r.squared
}
assign(paste0("Metabolites_df",n), df)
}
merge_list <- lapply(ls(pattern = "Metabolites_df"), get)
PM_combine <- bind_rows(merge_list)
PM_combine_clean <- PM_combine[PM_combine$`P-value` < 0.05, ]
head(PM_combine_clean)
write.csv(PM_combine_clean, "/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/Phenotype-metabolites.correlation-Feb15-2022.csv", quote = F)
Ratio <- read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/LogFC_meta_table.csv", header = T)
row.names(Ratio) = Ratio$Gene
head(Ratio)
LOG <- read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/Metabolites_WWWL_ratio.csv", header = T)
row.names(LOG) =LOG$Metabolites
head(LOG)
GM_combine: Genotype-metabolites
###Define DF first for
df <- data.frame(matrix(ncol = 7 , nrow = 451))
colnames(df) <- c("row", "column", "R", "P-value", "Intercept", "Effects", "R.square")
### Loop analysis nested in metabolites
for (n in 1:nrow(LOG)){
for (i in 1:(nrow(Ratio))){
res <- rcorr(t(rbind(LOG[n,-1], Ratio[i,-1])))
df[i,1:4] = flattenCorrMatrix(res$r, res$P)
temp.df = data.frame(t(rbind(LOG[n,-1], Ratio[i,-1])))
sm = summary(lm(get(LOG[n,1]) ~ get(Ratio[i,1]), temp.df))
df[i,5] = sm$coefficients[1]
df[i,6] = sm$coefficients[2]
df[i,7] = sm$r.squared
}
assign(paste0("GM_df",n), df)
}
merge_list <- lapply(ls(pattern = "GM_df"), get)
GM_combine <- bind_rows(merge_list)
GM_combine_clean <- GM_combine[GM_combine$`P-value` < 0.05, ]
head(GM_combine_clean)
write.csv(GM_combine_clean, "/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/LogFC_ratio/Gene-metabolites.correlation-Feb15-2022.csv", quote = F)
### PLot the density plot of 10 traits and 451 metabolites
combine_filter = read.csv("/Users/Leon/OneDrive - Cornell University/3_Manuscript/Manuscript8/7_LogFC_ratio/Plot_meta_Feb2022.csv", header = T)
combine_filter$abs.effect = abs(combine_filter$Effects)
head(combine_filter)
### define length function
fun_length <- function(x){
return(data.frame(y=median(x),label= paste0("N=", length(x))))
}
### Compare effects across the three types of phenotype (density plot)
ggplot(data = combine_filter, aes(y=log2(abs.effect), fill = Type))+
geom_density(alpha = 0.2) +
facet_wrap(~ Trait, ncol=3) + theme_bw() +
xlab("Log2 transformed Abosolute value of effects") +
ylab("Density")
### Compare R square across thre types of phenotypes
ggplot(data = combine_filter, aes(x=R.square, color = Type))+
facet_wrap(~ Trait, ncol=3) +
geom_density(alpha = 0.5) + theme_bw() +
xlab("R square of linear regression fit") +
ylab("Density")
### Compare effects across the three types of phenotype (boxplot)
level_order = c("Metabolites", "Production", "Index")
ggplot(data = combine_filter, aes(x = factor(Trait, level = level_order), y = log2(abs.effect)))+
geom_boxplot(alpha = 0.3, outlier.size = 0.02, fill = "grey") +
theme_bw() +
ylab("Log2 transformed value of absolute effect") +
stat_summary(fun = mean, color = "darkred", position = position_dodge(1),
geom = "point", shape = 4, size = 6,
show.legend = F) +
stat_summary(fun.data = fun_length, geom = "text", vjust = -18, size = 3, color = "red") +
stat_compare_means(method = "kruskal") +
theme(legend.position="none")
venn_combine <- list(Meatbolites = combine_filter[combine_filter$Trait == "Metabolites",2] ,
Index = combine_filter[combine_filter$Trait == "Index",2],
Production = combine_filter[combine_filter$Trait == "Production",2])
ggVennDiagram(venn_combine, label_alpha = 2, set_color = "black", color = "red")