Load libraries
library(ggplot2)
library(tidyr)
library(dplyr)
library(ggpubr)
library(corrplot)
library(cowplot)
Load phenotype data
LI1 <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LI6800_clean-20220324-morning.csv", header = T, row.names = 1)
dat_LI1 <- LI1[, 7:18]
LI1_ave <- data.frame(matrix(ncol = 12, nrow = 48))
vector1 <- unique(LI1$ID)
colnames(LI1_ave) <- colnames(dat_LI1 )
rownames(LI1_ave) <- vector1
head(dat_LI1,20)
LIA <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LI6800_clean-20220324-afternoon.csv", header = T, row.names = 1)
dat_LIA <- LIA[, 7:18]
LIA_ave <- data.frame(matrix(ncol = 12, nrow = 48))
vector1 <- unique(LIA$ID)
colnames(LIA_ave) <- colnames(dat_LIA )
rownames(LIA_ave) <- vector1
head(dat_LIA,20)
Trait <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Oxidative_clean-20220324.csv", header = T, row.names = 1)
datTrait <- Trait[, 6:34]
datTrait_ave <- data.frame(matrix(ncol = 29, nrow = 48))
vector <- unique(Trait$ID)
colnames(datTrait_ave) <- colnames(datTrait)
rownames(datTrait_ave) <- vector
head(datTrait,30)
Calculate the mean score for each phenotype entry
### View vectors been created
vector1
## [1] "A_WW_TP1" "B_WW_TP1" "C_WW_TP1" "D_WW_TP1" "E_WW_TP1" "F_WW_TP1"
## [7] "A_WL_TP1" "B_WL_TP1" "C_WL_TP1" "D_WL_TP1" "E_WL_TP1" "F_WL_TP1"
## [13] "A_WW_TP3" "B_WW_TP3" "C_WW_TP3" "D_WW_TP3" "E_WW_TP3" "F_WW_TP3"
## [19] "A_WL_TP3" "B_WL_TP3" "C_WL_TP3" "D_WL_TP3" "E_WL_TP3" "F_WL_TP3"
## [25] "A_WW_TP5" "B_WW_TP5" "C_WW_TP5" "D_WW_TP5" "E_WW_TP5" "F_WW_TP5"
## [31] "A_WL_TP5" "B_WL_TP5" "C_WL_TP5" "D_WL_TP5" "E_WL_TP5" "F_WL_TP5"
## [37] "A_WW_TP7" "B_WW_TP7" "C_WW_TP7" "D_WW_TP7" "E_WW_TP7" "F_WW_TP7"
## [43] "A_WL_TP7" "B_WL_TP7" "C_WL_TP7" "D_WL_TP7" "E_WL_TP7" "F_WL_TP7"
vector
## [1] "A_WW_TP1" "B_WW_TP1" "C_WW_TP1" "D_WW_TP1" "E_WW_TP1" "F_WW_TP1"
## [7] "A_WL_TP1" "B_WL_TP1" "C_WL_TP1" "D_WL_TP1" "E_WL_TP1" "F_WL_TP1"
## [13] "A_WW_TP3" "B_WW_TP3" "C_WW_TP3" "D_WW_TP3" "E_WW_TP3" "F_WW_TP3"
## [19] "A_WL_TP3" "B_WL_TP3" "C_WL_TP3" "D_WL_TP3" "E_WL_TP3" "F_WL_TP3"
## [25] "A_WW_TP5" "B_WW_TP5" "C_WW_TP5" "D_WW_TP5" "E_WW_TP5" "F_WW_TP5"
## [31] "A_WL_TP5" "B_WL_TP5" "C_WL_TP5" "D_WL_TP5" "E_WL_TP5" "F_WL_TP5"
## [37] "A_WW_TP7" "B_WW_TP7" "C_WW_TP7" "D_WW_TP7" "E_WW_TP7" "F_WW_TP7"
## [43] "A_WL_TP7" "B_WL_TP7" "C_WL_TP7" "D_WL_TP7" "E_WL_TP7" "F_WL_TP7"
### Calculate the average by loop
for (i in 1:length(vector1)){
LI1_ave[grepl(vector1[i],rownames(LI1_ave)),] <- colMeans(dat_LI1[grepl(vector1[i],rownames(dat_LI1)),])
}
for (i in 1:length(vector1)){
LIA_ave[grepl(vector1[i],rownames(LIA_ave)),] <- colMeans(dat_LIA[grepl(vector1[i],rownames(dat_LIA)),])
}
for (i in 1:length(vector)){
datTrait_ave[grepl(vector[i],rownames(datTrait_ave)),] <- colMeans(datTrait[grepl(vector[i],rownames(datTrait)),])
}
Calculate the score for each component under the PCA
LIM_PCA <- data.frame(t(LI1_ave))
results <- prcomp(LIM_PCA, scale = TRUE)
results$rotation <- -1*results$rotation
results$sdev^2 / sum(results$sdev^2)
## [1] 9.881866e-01 1.017782e-02 1.449840e-03 1.087013e-04 4.083482e-05
## [6] 3.227476e-05 3.878055e-06 2.299865e-08 4.743275e-09 1.156701e-10
## [11] 1.050832e-11 1.787578e-33
LIM_result <- data.frame(results$rotation)
LIM_result
write.csv(LIM_result,
"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LIM_PCA.csv")
LIA_PCA <- data.frame(t(LIA_ave))
LIA_results <- prcomp(LIA_PCA, scale = TRUE)
LIA_results$rotation <- -1*LIA_results$rotation
LIA_results$sdev^2 / sum(LIA_results$sdev^2)
## [1] 9.726724e-01 2.275437e-02 3.958922e-03 4.494922e-04 1.260522e-04
## [6] 2.248722e-05 1.616649e-05 7.779885e-08 1.982320e-08 1.832158e-10
## [11] 1.612612e-11 3.133588e-33
LIA_result <- data.frame(results$rotation)
LIA_result
LIA_plot <- data.frame(LIA_results$rotation)
write.csv(LIA_plot,
"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LIA_PCA.csv")
datTrait_PCA <- data.frame(t(datTrait_ave))
datTrait_results <- prcomp(datTrait_PCA, scale = TRUE)
datTrait_results$rotation <- -1*datTrait_results$rotation
datTrait_results$sdev^2 / sum(datTrait_results$sdev^2)
## [1] 9.955661e-01 3.024152e-03 6.446452e-04 4.409515e-04 2.275682e-04
## [6] 4.614399e-05 1.796990e-05 1.462262e-05 5.936261e-06 4.408063e-06
## [11] 3.661380e-06 1.553189e-06 8.505632e-07 5.282590e-07 3.813104e-07
## [16] 1.386509e-07 1.170517e-07 8.963051e-08 5.393089e-08 3.354159e-08
## [21] 2.850385e-08 2.095280e-08 1.668543e-08 9.253554e-09 7.824310e-09
## [26] 6.320954e-10 6.851839e-11 8.527574e-12 1.963408e-31
datTrait_result <- data.frame(datTrait_results$rotation)
datTrait_result
write.csv(datTrait_result,
"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Oxdative_PCA.csv")
Re-load the clean data
weight <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Weight.csv")
Oxidative <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/5_Phenotype/2_Clean_data/Oxidative_clean-20220324.csv")
LIM <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LI6800_clean-20220324-morning.csv")
LIA <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/5_Phenotype/2_Clean_data/LI6800_clean-20220324-afternoon.csv")
Pearson Correlation and clustering of three classes of
phenotype
Oxidative_cor <- Oxidative
rownames(Oxidative_cor) <- Oxidative_cor$Name
Oxidative_cor <- Oxidative_cor[,c(7:35)]
M <- cor(Oxidative_cor)
Cor_plot1 <- corrplot(M, method = 'color', order = 'hclust',
addrect = 6, rect.col = 'blue', rect.lwd = 2,)

weight_cor <- weight
rownames(weight_cor) <- weight_cor$Plant_ID
weight_cor <- weight_cor[,c(6:10)]
M <- cor(weight_cor)
Cor_plot2 <- corrplot(M, method = 'color', order = 'hclust',
addrect = 2, rect.col = 'blue', rect.lwd = 2,)

LIM_cor <- LIM
rownames(LIM_cor) <- LIM_cor$Name
LIM_cor <- LIM_cor[,c(8:19)]
M <- cor(LIM_cor)
Cor_plot3 <- corrplot(M, method = 'color', order = 'hclust',
addrect = 5, rect.col = 'blue', rect.lwd = 2,)

LIA_cor <- LIA
rownames(LIA_cor) <- LIA_cor$Name
LIA_cor <- LIA_cor[,c(8:19)]
M <- cor(LIA_cor)
Cor_plot4 <- corrplot(M, method = 'color', order = 'hclust',
addrect = 5, rect.col = 'blue', rect.lwd = 2,)

Plot the plant weight data
### Plot weight data
P1 <- ggplot(weight, aes(x = Genotype, y = biomass_total, fill = Treat)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.8, color = "grey40") +
theme_bw() +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Treat), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Genotype), method = "anova",label.y = 900)
P2 <- ggplot(weight, aes(x = Genotype, y = biomass_bag, fill = Treat)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.8, color = "grey40") +
theme_bw() +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Treat), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Genotype), method = "anova",label.y = 85)
P3 <- ggplot(weight, aes(x = Genotype, y = root_total, fill = Treat)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.8, color = "grey40") +
theme_bw() +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Treat), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Genotype), method = "anova",label.y = 350)
P4 <- ggplot(weight, aes(x = Genotype, y = bag_total, fill = Treat)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.8, color = "grey40") +
theme_bw() +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Treat), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Genotype), method = "anova",label.y = 40)
P5 <- ggplot(weight, aes(x = Genotype, y = Three_leaves, fill = Treat)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.8, color = "grey40") +
theme_bw() +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Treat), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Genotype), method = "anova",label.y = 12)
plot_grid(P1,P2,P3,P4,P5, labels = c('A', 'B', 'C', 'D', 'E'), label_size = 12)

plot Licor6800 (afternoon) data
data_plot = LIA
M1 <- ggplot(data_plot, aes(x = Time, y = WUEi, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 300)
M2 <- ggplot(data_plot, aes(x = Time, y = A, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 70)
M3 <- ggplot(data_plot, aes(x = Time, y = Ci, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 210)
M4 <- ggplot(data_plot, aes(x = Time, y = gsw, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 1)
M5 <- ggplot(data_plot, aes(x = Time, y = VPDleaf, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 8)
M6 <- ggplot(data_plot, aes(x = Time, y = Fm., fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 2500)
M7 <- ggplot(data_plot, aes(x = Time, y = Fv..Fm., fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 1)
M8 <- ggplot(data_plot, aes(x = Time, y = Fo., fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 850)
M9 <- ggplot(data_plot, aes(x = Time, y = Fv., fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 1650)
M10 <- ggplot(data_plot, aes(x = Time, y = H2O_r, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 50)
M11 <- ggplot(data_plot, aes(x = Time, y = Tair, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 50)
M12 <- ggplot(data_plot, aes(x = Time, y = Tleaf, fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova",label.y = 50)
#plot_grid(M1,M2,M3,M4,M5,M6,M7,M8,M9,M10,M11,M12,
#labels = c('A', 'B', 'C', 'D', 'E',
# 'F', 'G', 'H', 'I', 'J', 'K', 'L'), label_size = 12)
M1

M2

M3

M4

M5

M6

M7

M8

M9

M10

M11

M12

Plot the oxdative stress data phenotype
Oxidative
Oxd_vector <- colnames(Oxidative[,7:35])
for (i in 1:length(Oxd_vector)){
Yplot <- Oxd_vector[i]
print(ggplot(Oxidative, aes(x = Time, y = get(Yplot), fill = Condition)) +
geom_boxplot(outlier.size = 0.3, outlier.shape = 21,
size = 0.5, color = "grey40") +
theme_bw() +
facet_wrap( ~ Genotype) +
geom_jitter(size = 0.6, color = "red") +
scale_fill_manual(values = c(WL="tomato", WW="steelblue")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Condition), method = "t.test", label = "p.signif") +
stat_compare_means(aes(group = Time), method = "anova", label.y = 1.2 * max(Oxidative[,Oxd_vector[i]])) +
ylab(Oxd_vector[i]))
#assign(paste0(Oxd_vector[i], "_plot_OXD"),plot_df)
}
























