Seeds of 420 cowpea landraces collected across 19 northern state and the Genetic Resources Center (GRC) of International Institute of Tropical Agriculture (IITA), Ibadan, were used for this study. The landraces were screened for their response to drought stress at seedling stage using wooden box techniques at the department of Botany, Ahmadu Bello University, Zaria.
The two well characterized drought tolerant (Danila) and sensitive (TVu7778) lines were used as checks to monitor the progress of the drought stress and determine the time of re-irrigation. Data were collected on the plant height (PH), number of trifoliate (TN), leaf senescence score (LS) and stem greenness score (SG) over the course of the experiment. The recovery rate (RR) was assessed after re-irrigation. Soil samples were also collected to determine the soil drying progress during the experiment.
Data were collected by the cowpea research team lead by Solihu Kayode Sakariyahu and Sadam Sulaiman Indabo supervised by Dr. Aliyu E. Ramatu. The data generated was curated and analyzed using the data analysis workflow described below by Solihu Kayode Sakariyahu.
To get started let’s load all the packages we need for this analysis.
library(tidyverse)
library(cowplot)
library(plotrix)
library(ggpubr)
library(agricolae)
library(rstatix)
library(lme4)
library(reshape2)
library(ggfortify)
library(rlang)
library(factoextra)
library(corrplot)
library(Hmisc)
library(RColorBrewer)
library(viridis)
suppressPackageStartupMessages(library("ComplexHeatmap"))
library(dendextend)
library(Hmisc)
library(RColorBrewer)
library(viridis)So, we will first visualize the soil drying kinetics to understand the drought progress over the period of the experiment.
Let’s load the soil drying data.
cowpea_soil_drying <- read.csv("soil_drying.csv")
head(cowpea_soil_drying)This table has all the initial and final weight of the soil before and after drying in the oven. Let’s extract the columns we need for our analysis and make it in long format required for further analysis.
cowpea_soil_drying2 <- cowpea_soil_drying[-5:-10] %>%
pivot_longer(!c("Sampling_time","Day", "Rep","Box"),
names_to = "Sampling_Number", values_to = "value")
# Lets check the data head and names of columns.
head(cowpea_soil_drying2)names(cowpea_soil_drying2)## [1] "Sampling_time" "Day" "Rep" "Box"
## [5] "Sampling_Number" "value"
We are going to check if the data make sense by plotting the rep-wise over time.
# Rep 1
cowpea_soil_drying3 <- cowpea_soil_drying2 %>% filter(Rep == 1)
head(cowpea_soil_drying3)# Rep 2
cowpea_soil_drying3.2 <- cowpea_soil_drying2 %>% filter(Rep == 2)
head(cowpea_soil_drying3.2)Now, let’s take a look at the plots.
# Rep 1
ggplot(cowpea_soil_drying3,
aes(factor(Day, levels = c("D1", "D7", "D13", "D16")),
value, group = Sampling_Number))+
geom_line(aes(colour = Sampling_Number))+
geom_point(aes(colour = Sampling_Number))+
facet_wrap(~Box)#Rep 2
ggplot(cowpea_soil_drying3.2,
aes(factor(Day, levels = c("D1", "D7", "D13", "D16")),
value, group = Sampling_Number))+
geom_line(aes(colour = Sampling_Number))+
geom_point(aes(colour = Sampling_Number))+
facet_wrap(~Box)Since everything looks good, let’s put reps together and summary the data.
cowpea_soil_drying4 <- cowpea_soil_drying3 %>%
group_by(Day, Box) %>%
summarise(weight = mean(value),
se = std.error(value),
SD = sd(value))## `summarise()` has grouped output by 'Day'. You can override using the `.groups`
## argument.
cowpea_soil_drying4$Day <- factor(cowpea_soil_drying4$Day,
levels = c("D1", "D7", "D13", "D16"))
head(cowpea_soil_drying4)Here is the final figure of the soil drying progress for all the box used.
# Plot the final figure for the soil drying.
plot1 <- ggplot(cowpea_soil_drying4, aes(Day, weight, group = Box))+
geom_line(aes(color = factor(Box)))+
geom_point(aes(color = factor(Box)))+
geom_errorbar(aes(ymin= weight-se, ymax=weight+se, color = factor(Box)),
width = 0.05)+
scale_y_continuous(expand = c(0,0), limits = c(0, 15),breaks = seq(0, 15, by=2.5))+
theme_minimal(base_size = 15)+
labs(x = "Days after stop of irrigation", y = "Amount of water available (g)",colour = "Box")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plot1#ggsave(plot1, filename = "2022.04.04.Soil_drying_kinetics1.png", bg = "white")Data wrangling for the agronomic and stay green data collected for the drought experiment.
library(lme4)
library(agricolae)
library(reshape2)
library(tidyverse)
library(ggpubr)# Load the data
df.all_trait <- read.csv("Final_2022.03.19.cleaned.data.cowpea.seedlings.KS.csv")
head(df.all_trait)names(df.all_trait)## [1] "Accession" "Box" "Check" "Plot_No" "Rep"
## [6] "No_of_Plant" "D1_TN1" "D1_TN2" "D1_TN3" "D1_TN4"
## [11] "D11_TN1" "D11_TN2" "D11_TN3" "D11_TN4" "D12_PH1"
## [16] "D12_PH2" "D12_PH3" "D12_PH4" "D13_LS1" "D13_LS2"
## [21] "D13_LS3" "D13_LS4" "D14_TN1" "D14_TN2" "D14_TN3"
## [26] "D14_TN4" "D15_PH1" "D15_PH2" "D15_PH3" "D15_PH4"
## [31] "D16_LS1" "D16_LS2" "D16_LS3" "D16_LS4" "D17_TN1"
## [36] "D17_TN2" "D17_TN3" "D17_TN4" "D22_LS1" "D22_LS2"
## [41] "D22_LS3" "D22_LS4" "D2_PH1" "D2_PH2" "D2_PH3"
## [46] "D2_PH4" "D21_SG1" "D21_SG2" "D21_SG3" "D21_SG4"
## [51] "D19_LS1" "D19_LS2" "D19_LS3" "D19_LS4" "D24_SG1"
## [56] "D24_SG2" "D24_SG3" "D24_SG4" "D24_TN1" "D24_TN2"
## [61] "D24_TN3" "D24_TN4" "D25_LS1" "D25_LS2" "D25_LS3"
## [66] "D25_LS4" "D27_SG1" "D27_SG2" "D27_SG3" "D27_SG4"
## [71] "D6_TN1" "D6_TN2" "D6_TN3" "D6_TN4" "D7_PH1"
## [76] "D7_PH2" "D7_PH3" "D7_PH4" "RR_RR"
length(unique(df.all_trait$Accession))## [1] 422
#str(df.all_trait)
#summary(df.all_trait)# pivot longer to rename the variables and arrage the data neatly.
df.all_trait1 <- df.all_trait %>%
pivot_longer(!c("Plot_No","Accession","Check",
"Rep","Box","No_of_Plant"),
names_to = "trait", values_to = "value" ) %>%
filter(value >= 0)
length(unique(df.all_trait1$Accession))## [1] 420
df.all_trait1$Day <- sapply(strsplit(as.character(df.all_trait1$trait), "_"), "[[", 1 )
df.all_trait1$Individual <- sapply( strsplit(as.character(df.all_trait1$trait), "_"), "[[", 2 )
unique(df.all_trait1$Individual)## [1] "TN1" "TN3" "PH1" "PH3" "RR" "TN2" "PH2" "TN4" "LS2" "PH4" "LS1" "LS3"
## [13] "LS4" "SG3" "SG4" "SG1" "SG2"
df.all_trait1$variable[df.all_trait1$Individual==
"PH1"|df.all_trait1$Individual==
"PH2"|df.all_trait1$Individual==
"PH3"|df.all_trait1$Individual
=="PH4"] <- "Plant_height"## Warning: Unknown or uninitialised column: `variable`.
df.all_trait1$variable[df.all_trait1$Individual==
"TN1"|df.all_trait1$Individual==
"TN2"|df.all_trait1$Individual==
"TN3"|df.all_trait1$Individual
=="TN4"] <- "Trifoliate"
df.all_trait1$variable[df.all_trait1$Individual==
"LS1"|df.all_trait1$Individual==
"LS2"|df.all_trait1$Individual==
"LS3"|df.all_trait1$Individual
=="LS4"] <- "Senescence"
df.all_trait1$variable[df.all_trait1$Individual==
"SG1"|df.all_trait1$Individual==
"SG2"|df.all_trait1$Individual==
"SG3"|df.all_trait1$Individual==
"SG4"] <- "Stem_green"
df.all_trait1$variable[df.all_trait1$Individual==
"RR"] <- "Recovery"
head(df.all_trait1)# pivot longer to average the traits per plot and remove NAs.
df.all_trait2 <- df.all_trait1 %>%
group_by(Plot_No,Accession,Check,Rep,Box, No_of_Plant, Day, variable) %>%
summarise(value = mean(value, na.rm =TRUE))## `summarise()` has grouped output by 'Plot_No', 'Accession', 'Check', 'Rep',
## 'Box', 'No_of_Plant', 'Day'. You can override using the `.groups` argument.
df.all_trait2$heading <- paste(df.all_trait2$variable, df.all_trait2$Day, sep = "_")
head(df.all_trait2)df.all_trait3 <- df.all_trait2 %>%
pivot_wider(c("Plot_No","Accession","Check","Rep",
"Box", "No_of_Plant"), names_from = heading, values_from = value)
head(df.all_trait3)The BLUP script was adapted from https://github.com/mighster/BLUPs_Heritability/blob/master/BLUP_Tutorial.R
# setup the dataframe for BLUP analysis for the data collected.
head(df.all_trait3)colnames(df.all_trait3)## [1] "Plot_No" "Accession" "Check" "Rep"
## [5] "Box" "No_of_Plant" "Trifoliate_D1" "Plant_height_D2"
## [9] "Trifoliate_D6" "Plant_height_D7" "Recovery_RR" "Trifoliate_D14"
## [13] "Trifoliate_D11" "Plant_height_D12" "Senescence_D13" "Plant_height_D15"
## [17] "Trifoliate_D17" "Senescence_D22" "Senescence_D16" "Senescence_D19"
## [21] "Stem_green_D21" "Stem_green_D24" "Trifoliate_D24" "Senescence_D25"
## [25] "Stem_green_D27"
# Make columns as independent variables as factors
df.all_trait3$Plot_No=as.factor(df.all_trait3$Plot_No)
df.all_trait3$Accession=as.factor(df.all_trait3$Accession)
df.all_trait3$Check = as.factor(df.all_trait3$Check)
df.all_trait3$Box=as.factor(df.all_trait3$Box)
df.all_trait3$Rep=as.factor(df.all_trait3$Rep)
length(unique(df.all_trait3$Accession))## [1] 420
# Create empty data frame for BLUP output
DataOutput <- data.frame(matrix(vector(),420,1, dimnames=list(c(), c("Accession"))))
# Fill empty dataframe with 1-420 so that the cbind will work later on.
DataOutput$Accession <- unique(df.all_trait3[,2]) #fill in Entry numbers
DataOutput$Row <- c(1:420)
DataOutput$Name <- unique(sort(df.all_trait3$Accession))
# This empty dataframe is for variance components
DataVarComp <- data.frame()
DataVarCompOutput <- data.frame()
HeritabilityData <- data.frame()
# This empty dataframe is for dropped variance components
drops <- c("var1","var2","sdcor") colnames(df.all_trait3)## [1] "Plot_No" "Accession" "Check" "Rep"
## [5] "Box" "No_of_Plant" "Trifoliate_D1" "Plant_height_D2"
## [9] "Trifoliate_D6" "Plant_height_D7" "Recovery_RR" "Trifoliate_D14"
## [13] "Trifoliate_D11" "Plant_height_D12" "Senescence_D13" "Plant_height_D15"
## [17] "Trifoliate_D17" "Senescence_D22" "Senescence_D16" "Senescence_D19"
## [21] "Stem_green_D21" "Stem_green_D24" "Trifoliate_D24" "Senescence_D25"
## [25] "Stem_green_D27"
str(df.all_trait3[,7:ncol(df.all_trait3)])## tibble [861 × 19] (S3: tbl_df/tbl/data.frame)
## $ Trifoliate_D1 : num [1:861] 0 0 0 0 0 0 0 0 1.75 0 ...
## $ Plant_height_D2 : num [1:861] 1.5 1.05 1.65 3.5 3.13 ...
## $ Trifoliate_D6 : num [1:861] 0 NA 0.5 0.5 1 1 1 1 2 0 ...
## $ Plant_height_D7 : num [1:861] 0.5 NA 1.75 4.2 5 ...
## $ Recovery_RR : num [1:861] 0 0 0 0 0 0 0 0 0 0 ...
## $ Trifoliate_D14 : num [1:861] NA 0 NA 0.5 1 ...
## $ Trifoliate_D11 : num [1:861] NA NA 1 0.5 1 ...
## $ Plant_height_D12: num [1:861] NA NA 1.39 4.12 4.5 ...
## $ Senescence_D13 : num [1:861] NA NA 1 2.5 2.5 ...
## $ Plant_height_D15: num [1:861] NA NA 1.3 4.17 3.75 ...
## $ Trifoliate_D17 : num [1:861] NA NA 0 0.25 1 ...
## $ Senescence_D22 : num [1:861] NA NA 5 4.5 3 ...
## $ Senescence_D16 : num [1:861] NA NA NA 3.75 3 5 4 4 3.25 NA ...
## $ Senescence_D19 : num [1:861] NA NA NA 4.67 2.5 ...
## $ Stem_green_D21 : num [1:861] NA NA NA 1 1 1 0 1 0.25 NA ...
## $ Stem_green_D24 : num [1:861] NA NA NA 1 1 0 0 1 0 NA ...
## $ Trifoliate_D24 : num [1:861] NA NA NA 0.333 1 ...
## $ Senescence_D25 : num [1:861] NA NA NA 5 5 5 5 4 NA NA ...
## $ Stem_green_D27 : num [1:861] NA NA NA 0 0 0 0 0 0 NA ...
# Take only the columns with numerical data.
colnum=c(7:ncol(df.all_trait3))Our model for the mixed model with accessions, rep, box and box by accessions interaction treated as random-effects distinguished by vertical bars or pipes (|)
# Forward loop for the BLUPs
for (i in 1:19){ # This second loop runs through each TRAIT, one at a time.
#set the current [i] column as x
x=colnum[i]
# Sets the current column header as the trait name.
trait=colnames(df.all_trait3)[x]
# Making a copy of the dataframe that we can use for our analysis
df1 <- df.all_trait3
# Renaming the trait variable as "y" for the model analysis below.
colnames(df1)[x]="y"
# Here is our random effects mixed model
model <- lmer(y~ (1|Accession) +(1|Rep) + (1|Box) + (1|Box:Accession), df1)
summary(model)
# This calculates `estimated variances` between random-effects terms in a mixed-effects model.
varComp <- as.data.frame(VarCorr(model,comp="vcov"))
# `coef` extracts model coefficients from lmer objects returned by modeling functions.
blup = coef(model)$Accession
# rename the BLUP column by the trait in our loop
colnames(blup) <- trait
# Ammends our dataframe with the new BLUP column for the new trait.
blupdataframe <- data.frame(blup) %>% rownames_to_column()
DataOutputnew <- merge(DataOutput[2:3],blupdataframe, by.x = "Name", by.y = "rowname", all = T)
DataOutput <- cbind(DataOutput,DataOutputnew)
# Modify variance component by deleting columns in variance component dataframe that we don't need.
varComp<-varComp[ , !(names(varComp) %in% drops)]
# Set the trait name from the loop
varComp$Trait<-trait
# Add columns to existing dataframe
DataVarComp <- rbind(DataVarComp,varComp)
}## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
# Re-arrange our variance components dataframe so that we can run the heritability script.
DataVarCompOutput <- reshape(DataVarComp, idvar = "Trait",
timevar = "grp", direction = "wide")
# The estimate of broad sense heritability based on the formula described by Falconer & Mackay, 1996.
nrep = 2
HeritabilityData <- ((DataVarCompOutput[,3])) /
(((DataVarCompOutput[,3])) + (((DataVarCompOutput[,6])) / (nrep)))*100# Combine the heritability to the variance component data frame.
DataVarCompOutput <- cbind(DataVarCompOutput,HeritabilityData)
DataVarCompOutput[1:10,]# Save output to csv
# Output that variance component
#write.csv(DataVarCompOutput,"2022.04.01.VarianceComponents_cowpea.csv", row.names = F)
names(DataOutput)## [1] "Accession" "Row" "Name" "Name"
## [5] "Row" "Trifoliate_D1" "Name" "Row"
## [9] "Plant_height_D2" "Name" "Row" "Trifoliate_D6"
## [13] "Name" "Row" "Plant_height_D7" "Name"
## [17] "Row" "Recovery_RR" "Name" "Row"
## [21] "Trifoliate_D14" "Name" "Row" "Trifoliate_D11"
## [25] "Name" "Row" "Plant_height_D12" "Name"
## [29] "Row" "Senescence_D13" "Name" "Row"
## [33] "Plant_height_D15" "Name" "Row" "Trifoliate_D17"
## [37] "Name" "Row" "Senescence_D22" "Name"
## [41] "Row" "Senescence_D16" "Name" "Row"
## [45] "Senescence_D19" "Name" "Row" "Stem_green_D21"
## [49] "Name" "Row" "Stem_green_D24" "Name"
## [53] "Row" "Trifoliate_D24" "Name" "Row"
## [57] "Senescence_D25" "Name" "Row" "Stem_green_D27"
# Let's extract the columns we need for summary.
DataOutput.final <- DataOutput[c(3,6,9,12,15,18,21,24,27,30,33,36,39,
42,45,48,51,54,57,60)]
names(DataOutput.final)## [1] "Name" "Trifoliate_D1" "Plant_height_D2" "Trifoliate_D6"
## [5] "Plant_height_D7" "Recovery_RR" "Trifoliate_D14" "Trifoliate_D11"
## [9] "Plant_height_D12" "Senescence_D13" "Plant_height_D15" "Trifoliate_D17"
## [13] "Senescence_D22" "Senescence_D16" "Senescence_D19" "Stem_green_D21"
## [17] "Stem_green_D24" "Trifoliate_D24" "Senescence_D25" "Stem_green_D27"
head(DataOutput.final)# Output the blup for each landrace
#write.csv(DataOutput.final,"2022.04.01.Blupforlandraces.csv", row.names = F)###### Summary of BLUP output for landraces
meandata <- DataOutput.final[-1]
dfBLUPsummary1 <- meandata%>%
dplyr::summarise_each(funs(min(., na.rm = TRUE)))%>%
rownames_to_column() %>%
mutate(rowname = recode(rowname, "1" = 'Min'))## Warning: `summarise_each_()` was deprecated in dplyr 0.7.0.
## Please use `across()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
dfBLUPsummary2 <- meandata %>%
dplyr::summarise_each(funs(max(., na.rm = TRUE)))%>%
rownames_to_column() %>%
mutate(rowname = recode(rowname, "1" = 'Max'))
dfBLUPsummary3 <- meandata %>%
dplyr::summarise_each(funs(sd(., na.rm = TRUE)))%>%
rownames_to_column() %>%
mutate(rowname = recode(rowname, "1" = 'SD'))
dfBLUPsummary4 <- meandata %>%
dplyr::summarise_each(funs(mean(., na.rm = TRUE))) %>%
rownames_to_column() %>%
mutate(rowname = recode(rowname, "1" = 'Mean'))
dfBLUPsummary5 <- meandata %>%
dplyr::summarise_each(funs(median(., na.rm = TRUE)))%>%
rownames_to_column() %>%
mutate(rowname = recode(rowname, "1" = 'Median'))
DataOutputfinalsummary <- rbind(dfBLUPsummary1, dfBLUPsummary2,
dfBLUPsummary3, dfBLUPsummary4, dfBLUPsummary5)
DataOutputfinalsummary1 <- setNames(data.frame(t(DataOutputfinalsummary[,-1])),
DataOutputfinalsummary[,1])
DataOutputfinalsummary1#write.csv(DataOutputfinalsummary1,"2022.04.01.BLUPsummary_cowpea.csv")# Plant Height (PH)
names(DataOutput.final)## [1] "Name" "Trifoliate_D1" "Plant_height_D2" "Trifoliate_D6"
## [5] "Plant_height_D7" "Recovery_RR" "Trifoliate_D14" "Trifoliate_D11"
## [9] "Plant_height_D12" "Senescence_D13" "Plant_height_D15" "Trifoliate_D17"
## [13] "Senescence_D22" "Senescence_D16" "Senescence_D19" "Stem_green_D21"
## [17] "Stem_green_D24" "Trifoliate_D24" "Senescence_D25" "Stem_green_D27"
DataOutput1 <- DataOutput.final[, c(1,3,5,9,11)] %>% na.omit() %>%
pivot_longer(!c("Name"),
names_to = "Height", values_to = "value")
unique(DataOutput1$Height)## [1] "Plant_height_D2" "Plant_height_D7" "Plant_height_D12" "Plant_height_D15"
DataOutput1$Height <- recode(DataOutput1$Height, Plant_height_D2 = "D2",
Plant_height_D7 = "D7", Plant_height_D12 = "D12",
Plant_height_D15 = "D15") %>%
factor(levels = c("D2", "D7", "D12", "D15"))
df.summary <- DataOutput1 %>% group_by(Height) %>%
summarise(Max = max(value, na.rm = TRUE))
df.summarylsd=LSD.test(aov(value~Height,data=DataOutput1), "Height", group=T)
sig.letters <- lsd$groups[order(lsd$groups$value,decreasing = F), ]
plantheightplot <- ggplot(DataOutput1, aes(Height, value))+
geom_violin(aes(fill = Height))+
geom_boxplot( width = 0.1)+
geom_text(data=df.summary,size = 4,
aes(x = Height, y = 0.9 + Max,label = sig.letters$groups),vjust=0)+
stat_compare_means(aes(Height, value),
data = DataOutput1, method = "anova", label.y = 19.5)+
scale_y_continuous(expand = c(0,0),
limits = c(0, 20),breaks = seq(0, 20, by=2))+
theme_minimal(base_size = 15)+
labs(x = "Days after last watering", y = "Plant height (cm)",colour = "")+
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plantheightplot#ggsave(plantheightplot, filename = "2022.04.01.plantheightplot.png", bg ="white")# Density plot for Trifoliate Number (TN)
names(DataOutput.final)## [1] "Name" "Trifoliate_D1" "Plant_height_D2" "Trifoliate_D6"
## [5] "Plant_height_D7" "Recovery_RR" "Trifoliate_D14" "Trifoliate_D11"
## [9] "Plant_height_D12" "Senescence_D13" "Plant_height_D15" "Trifoliate_D17"
## [13] "Senescence_D22" "Senescence_D16" "Senescence_D19" "Stem_green_D21"
## [17] "Stem_green_D24" "Trifoliate_D24" "Senescence_D25" "Stem_green_D27"
DataOutput2 <- DataOutput.final[, c(1:2,4,7,8,12,18)] %>% na.omit() %>%
pivot_longer(!c("Name"),
names_to = "Trifoliate", values_to = "value")
DataOutput2$Trifoliate <- recode(DataOutput2$Trifoliate, Trifoliate_D1 = "D1",
Trifoliate_D6 = "D6", Trifoliate_D11 = "D11",
Trifoliate_D14 = "D14",
Trifoliate_D17 = "D17", Trifoliate_D24 = "D24") %>%
factor(levels = c("D1", "D6", "D11", "D14", "D17", "D24"))
p1 <- ggplot(DataOutput2, aes(value))+
geom_density(aes(fill = Trifoliate), alpha = 0.8)+
scale_y_continuous(expand = c(0,0), limits = c(0, 45),breaks = seq(0, 45, by=5))+
theme_minimal(base_size = 15)+
labs(x = "Average number of trifoliate", fill = "")+
theme(legend.position = "right",axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
p1# Leaf senescence score (LS) distribution
names(DataOutput.final)## [1] "Name" "Trifoliate_D1" "Plant_height_D2" "Trifoliate_D6"
## [5] "Plant_height_D7" "Recovery_RR" "Trifoliate_D14" "Trifoliate_D11"
## [9] "Plant_height_D12" "Senescence_D13" "Plant_height_D15" "Trifoliate_D17"
## [13] "Senescence_D22" "Senescence_D16" "Senescence_D19" "Stem_green_D21"
## [17] "Stem_green_D24" "Trifoliate_D24" "Senescence_D25" "Stem_green_D27"
DataOutput3 <- DataOutput.final[, c(1,10,13:15,19)] %>% na.omit() %>%
pivot_longer(!c("Name"),
names_to = "Leaf_score", values_to = "value")
unique(DataOutput3$Leaf_score)## [1] "Senescence_D13" "Senescence_D22" "Senescence_D16" "Senescence_D19"
## [5] "Senescence_D25"
DataOutput3$Leaf_score <- recode(DataOutput3$Leaf_score, Senescence_D13 = "D13",
Senescence_D16 = "D16", Senescence_D19 = "D19",
Senescence_D22 = "D22", Senescence_D25 = "D25") %>%
factor(levels = c("D13", "D16", "D19", "D22", "D25"))
p2 <- ggplot(DataOutput3, aes(value))+
geom_density(aes(fill = Leaf_score), alpha = 0.8)+
scale_y_continuous(expand = c(0,0), limits = c(0, 8),breaks = seq(0, 8, by=1))+
theme_minimal(base_size = 15)+
labs(x = "Leaf senescence score ", fill = "")+
theme(legend.position = "right",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
p2# Stem greenness (SG) distrbution
names(DataOutput.final)## [1] "Name" "Trifoliate_D1" "Plant_height_D2" "Trifoliate_D6"
## [5] "Plant_height_D7" "Recovery_RR" "Trifoliate_D14" "Trifoliate_D11"
## [9] "Plant_height_D12" "Senescence_D13" "Plant_height_D15" "Trifoliate_D17"
## [13] "Senescence_D22" "Senescence_D16" "Senescence_D19" "Stem_green_D21"
## [17] "Stem_green_D24" "Trifoliate_D24" "Senescence_D25" "Stem_green_D27"
DataOutput4 <- DataOutput.final[, c(1,16,17,20)] %>% na.omit() %>%
pivot_longer(!c("Name"),
names_to = "stem", values_to = "value")
DataOutput4$stem <- recode(DataOutput4$stem,Stem_green_D21 = "D21",
Stem_green_D24 = "D24", Stem_green_D27 = "D27") %>%
factor(levels = c("D21","D24","D27"))
p3<- ggplot(DataOutput4, aes(value))+
geom_density(aes(fill = stem), alpha = 0.8)+
scale_y_continuous(expand = c(0,0),
limits = c(0, 25),breaks = seq(0, 25, by=2.5))+
theme_minimal(base_size = 15)+
labs(x = "Stem greenness score ", fill = "")+
theme(legend.position = "right",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
p3# Recovery rate (RR)
DataOutput5 <- DataOutput.final[, c(1,6)] %>% na.omit()
p4 <- ggplot(DataOutput5, aes(Recovery_RR))+
geom_histogram(fill="#619CFF", colour="black", bins = 5)+
scale_y_continuous(expand = c(0,0), limits = c(0, 400),breaks = seq(0, 400, by=25))+
labs(x = "Recovery rate (%)")+
theme_minimal(base_size = 14) + theme(legend.position = "none")
p4#ggsave(p4, filename = "2022.04.01.RRPlot.png", bg = "white")# Putting all the plots together
#FInalplot <- plot_grid(p1, p2, p3, labels = "AUTO", ncol = 1)
#FInalplot
#ggsave(FInalplot, filename = "2022.04.01.StemTriLeaf2.png", bg = "white",
#width = 8.42 , height = 10.82)# General linear model (analysis of variance) for the trait evaluated.
write.csv(df.all_trait3, "2022.04.01.df.all_trait3.csv", row.names = F)
df_all_trait3 <- read.csv("2022.04.01.df.all_trait3.csv")
head(df_all_trait3)AVz <- rep(NA, ncol(df_all_trait3))
sink("2022.04.01.ANOVA_table_for_cowpea_seedling_droughtnew.txt")
for (k in 7:ncol(df_all_trait3)){
column <- names(df_all_trait3[k])
AVz <- summary(aov(df_all_trait3[,k] ~ Rep + Check + Box*Accession, df_all_trait3))
model <- aov(df_all_trait3[,k] ~ Rep + Check + Box*Accession, df_all_trait3)
min1 <- min(df_all_trait3[,k], na.rm = TRUE)
max1 <- max(df_all_trait3[,k], na.rm = TRUE)
}
sink()
AVz## Df Sum Sq Mean Sq F value Pr(>F)
## Rep 1 0.24 0.2351 2.498 0.11903
## Check 1 0.34 0.3413 3.625 0.06150 .
## Box 1 1.10 1.0997 11.681 0.00111 **
## Accession 405 45.56 0.1125 1.195 0.19449
## Box:Accession 260 27.51 0.1058 1.124 0.29494
## Residuals 63 5.93 0.0941
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 129 observations deleted due to missingness
# Let's check our dataset for the multivariate analysis
names(df_all_trait3)## [1] "Plot_No" "Accession" "Check" "Rep"
## [5] "Box" "No_of_Plant" "Trifoliate_D1" "Plant_height_D2"
## [9] "Trifoliate_D6" "Plant_height_D7" "Recovery_RR" "Trifoliate_D14"
## [13] "Trifoliate_D11" "Plant_height_D12" "Senescence_D13" "Plant_height_D15"
## [17] "Trifoliate_D17" "Senescence_D22" "Senescence_D16" "Senescence_D19"
## [21] "Stem_green_D21" "Stem_green_D24" "Trifoliate_D24" "Senescence_D25"
## [25] "Stem_green_D27"
df3 <- df_all_trait3[, c(2,7:25)] %>%
pivot_longer(!c(Accession),names_to = "trait", values_to = "value")
df3$trait <- recode(df3$trait,
Plant_height_D2 = "PH_D2", Plant_height_D7 = "PH_D7",
Plant_height_D12 = "PH_D12",Plant_height_D15 = "PH_D15",
Trifoliate_D1 = "TN_D1",Trifoliate_D6 = "TN_D6",
Trifoliate_D11 = "TN_D11",Trifoliate_D14 = "TN_D14",
Trifoliate_D17 = "TN_D17",Trifoliate_D24 = "TN_D24",
Senescence_D13 = "LS_D13",Senescence_D16 = "LS_D16",
Senescence_D19 = "LS_D19",Senescence_D22 = "LS_D22",
Senescence_D25 = "LS_D25",Stem_green_D21 = "SG_D21",
Stem_green_D24 = "SG_D24", Stem_green_D27 = "SG_D27",
Recovery_RR = "RR")# Mean summary of the data per accessions
df4 <- df3 %>%
group_by(Accession, trait) %>%
dplyr::summarise_each(funs(mean(., na.rm = T))) %>%
pivot_wider(Accession, names_from = trait, values_from = value)%>%
na.omit()df5 <- column_to_rownames(df4, var = "Accession")
pca <- df5 %>%
scale()%>%## scale the variables
prcomp()## print out a summary
summary(pca)## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.3468 1.8330 1.34215 1.11038 1.01315 0.92748 0.83969
## Proportion of Variance 0.2899 0.1768 0.09481 0.06489 0.05402 0.04528 0.03711
## Cumulative Proportion 0.2899 0.4667 0.56151 0.62641 0.68043 0.72571 0.76281
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.81823 0.77617 0.71099 0.69535 0.67590 0.63351 0.5977
## Proportion of Variance 0.03524 0.03171 0.02661 0.02545 0.02404 0.02112 0.0188
## Cumulative Proportion 0.79805 0.82976 0.85636 0.88181 0.90586 0.92698 0.9458
## PC15 PC16 PC17 PC18 PC19
## Standard deviation 0.58364 0.51256 0.46322 0.33691 0.31411
## Proportion of Variance 0.01793 0.01383 0.01129 0.00597 0.00519
## Cumulative Proportion 0.96371 0.97754 0.98883 0.99481 1.00000
#pca$rotation# Plot the loadings to see the major trait contribution to PC1 and PC2.
pcaloadings <- pca$rotation %>%
as.data.frame() %>%
mutate(variables = rownames(.)) %>%
gather(PC,loading,PC1:PC2) %>%
ggplot(aes(loading, variables, fill = loading > 0)) +
geom_col() + geom_text(aes(label= (round(loading, 2))),color="black", size=2)+
facet_wrap(~PC, scales = "free_y") +
labs(x = "Value of loading",y = NULL, fill = "Positive?")+
theme_minimal(base_size = 13)+ scale_fill_brewer(palette = "Set1")
pcaloadings#ggsave("2022.04.01.droughtpcaloadings.png", pcaloadings, height = 5, width = 9, bg = "white")# Plot the PCA with first with autoplot to have glance on how it will look like
autoplot(pca, data = df5,
frame = TRUE, frame.type = 'norm',
label = TRUE, label.size = 3, loadings = TRUE,
loadings.colour = 'blue',loadings.label = TRUE,
loadings.label.size = 3)# Plot the contribution of pca component using factoextra function-fviz_screeplot
screeplots <- fviz_screeplot(pca, addlabels = TRUE, title ="")+
labs(x = "Principal component")+ theme_minimal(base_size = 15)
screeplots# Now, the actual PCA plot with biplot and loadings.
drought_biplot <- fviz_pca_biplot(pca, col.ind = "black", geom = c("point"))+
theme_minimal(base_size = 15)+
labs(title = "", x = "PC1 (29.0%)", y = "PC2 (17.7%)")
drought_biplot#ggsave("2022.04.01.drought_biplot_biplot33.png", drought_biplot,height = 6, width = 9, bg = "white")# Next, let take a look at the variable contribution to the PCs
variableplot <- fviz_pca_var(pca, col.var="contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE)+ # Avoid text overlapping
labs(x = "PC1 (29.0%)", y = "PC2 (17.7%)", title = "") +theme_minimal(base_size = 15)
variableplot# Better view of the contributions of variables to PC1.
fviz_contrib(pca, choice = "var", axes = 1, top = 19)# Better view of the contributions of variables to PC2
fviz_contrib(pca, choice = "var", axes = 2, top = 19)# Let's perform k-means clustering on our PCA analysis to identify major clusters.
set.seed(123)
km.res <- kmeans(scale(df5), 3)
table(km.res$cluster)##
## 1 2 3
## 68 183 150
# We can now visualize the PCA plus the k-mean clustering.
clusterplot <- fviz_cluster(km.res, geom = "point",data = df5,
palette = c("#00AFBB","#2E9FDF", "#E7B800", "#FC4E07"),
ggtheme = theme_minimal(), main = "") +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed")+
labs(title = "", x = "PC1 (29.1%)", y = "PC2 (18.3%)")
clusterplot# We can now putting everything together to form one figure with multiple panels using plot_grid.
#bottom_row <- plot_grid(clusterplot, variableplot, labels = c('B', 'C'), label_size = 22)
#bottom_row
#completepcaplot <- plot_grid(screeplots, bottom_row, labels = c('A', ''), label_size = 22, ncol = 1)
#completepcaplot
#ggsave("2022.04.01.drought_biplot_PCA.png", completepcaplot,height = 10, width = 12, bg = "white")# So next, we want to visualize the cluster for their differences in plant height,
# RR and other traits using the BLUP value.
# Put the cluster information in a dataframe.
CLuster_data <- data.frame(km.res$cluster) %>% rownames_to_column()
# Merge the cluster with the BLUP data for the landraces
Clustervisual <- merge(CLuster_data, DataOutput.final,
by.x = "rowname",by.y = "Name")
Clustervisual$km.res.cluster <- factor(Clustervisual$km.res.cluster)
# Plant height variation among the clusters at 15 days after drought imposition.
plotPH_D15 <- ggplot(Clustervisual, aes(km.res.cluster, Plant_height_D15))+
geom_boxplot(aes(colour = km.res.cluster), width = 0.4)+
stat_compare_means(aes(km.res.cluster, Plant_height_D15),
data = Clustervisual, method = "anova")+
geom_jitter(aes(colour = km.res.cluster, shape = km.res.cluster),
width = 0.1)+
labs(x = "", y = "Plant height")+
theme_minimal(base_size = 15) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plotPH_D15# Variation in number of trifoliate among the clusters at 24 days after last irrigation.
plotTN_D24 <- ggplot(Clustervisual, aes(km.res.cluster, Trifoliate_D24))+
geom_boxplot(aes(colour = km.res.cluster), width = 0.4) +
stat_compare_means(aes(km.res.cluster, Trifoliate_D24),
data = Clustervisual, method = "anova")+
geom_jitter(aes(colour = km.res.cluster, shape = km.res.cluster),
width = 0.1)+
labs(x = "", y = "Number of trifoliate")+
theme_minimal(base_size = 15) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plotTN_D24# Variation in leaf senescence score among the clusters at 25 days after last watering.
plotLS_D25 <- ggplot(Clustervisual, aes(km.res.cluster, Senescence_D25))+
geom_boxplot(aes(colour = km.res.cluster), width = 0.4) +
stat_compare_means(aes(km.res.cluster, Senescence_D25),
data = Clustervisual, method = "anova")+
geom_jitter(aes(colour = km.res.cluster, shape = km.res.cluster),
width = 0.1)+
labs(x = "", y = "Leaf senescence score")+
theme_minimal(base_size = 15)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plotLS_D25# Variation in stem greenness score among the clusters at 27 days after drought imposition.
plotSG_D27 <- ggplot(Clustervisual, aes(km.res.cluster, Stem_green_D27))+
geom_boxplot(aes(colour = km.res.cluster), width = 0.4) +
stat_compare_means(aes(km.res.cluster, Stem_green_D27),
data = Clustervisual, method = "anova")+
geom_jitter(aes(colour = km.res.cluster, shape = km.res.cluster),
width = 0.1)+
labs(x = "Cluster", y = "Stem greenness score")+
theme_minimal(base_size = 15) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plotSG_D27# Variation in recovery rate among the clusters at 27 days after drought imposition.
plotRRbox <- ggplot(Clustervisual, aes(km.res.cluster, Recovery_RR))+
geom_boxplot(aes(colour = km.res.cluster)) +
stat_compare_means(aes(km.res.cluster,
Recovery_RR),data = Clustervisual, method = "anova")+
geom_jitter(aes(colour = km.res.cluster, shape = km.res.cluster))+
theme_minimal(base_size = 15)
plotRRbox# Variation in recovery rate among the clusters at 27 days after drought imposition.
plotRR <- ggplot(Clustervisual, aes(km.res.cluster,
Recovery_RR, fill = km.res.cluster)) +
stat_summary(fun = mean, geom = "bar", colour = "black")+
labs(x = "Cluster", y = "Recovery rate (%) ")+
theme_minimal(base_size = 15) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
plotRR# Let's put everything together as a figure with multiple panels.
#completepcaclusterplotblup <- plot_grid(plotPH_D15, plotTN_D24, plotLS_D25,
#plotSG_D27, plotRR, labels = "AUTO", label_size = 18, ncol = 2)
#completepcaclusterplotblup
#ggsave("2022.04.01.completepcaclusterplotblup.png", completepcaclusterplotblup,
#height = 14, width = 12, bg = "white")# Cluster dendrogram and heatmap based on the seedling drought response traits.
set.seed(123)
row_dend = as.dendrogram(hclust(dist(df5)))
row_dend = color_branches(row_dend, k = 5) # `color_branches()` returns a dendrogram object
dendrogramclusters <- cutree(row_dend,k=5)
table(dendrogramclusters)## dendrogramclusters
## 1 2 3 4 5
## 252 131 4 11 3
# Scaled Heatmap
dm <- dimnames(df5)
mat <- t(apply(df5,1,scale)) # z-scale by row
dimnames(mat) <- dm
args <- commandArgs(T)
#png("2022.03.19.Heatmap_Dendrogram_Scaled.png", units="in", width=8, height=10, res=360)
Heatmap(mat, cluster_rows = row_dend,
heatmap_legend_param = list(title = "Scaled"),
row_labels = "nrow", show_row_names = F)#dev.off()# Hierarchical clustering and visualization of clusters for their difference in plant height,
#recovery rate and so on.
# Put the cluster information in a dataframe.
Dendo_data <- data.frame(dendrogramclusters) %>% rownames_to_column()
# Merge the cluster with the BLUP data for the landraces
Dendo_datavisual <- merge(Dendo_data, DataOutput.final, by.x = "rowname",by.y = "Name")
Dendo_datavisual$dendrogramclusters <- factor(Dendo_datavisual$dendrogramclusters)
# Plant height variation among the clusters at 15 days after drought imposition.
DendoplotPH_D15 <- ggplot(Dendo_datavisual,
aes(dendrogramclusters, Plant_height_D15))+
geom_boxplot(aes(colour = dendrogramclusters), width = 0.4) +
stat_compare_means(aes(dendrogramclusters, Plant_height_D15),
data = Clustervisual, method = "anova")+
geom_jitter(aes(colour = dendrogramclusters, shape = dendrogramclusters), width = 0.1)+
labs(x = "", y = "Plant height")+
theme_minimal(base_size = 15) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
DendoplotPH_D15# Variation in number of trifoliate among the clusters at 24 days after last irrigation.
DendoplotTN_D24 <- ggplot(Dendo_datavisual,
aes(dendrogramclusters, Trifoliate_D24))+
geom_boxplot(aes(colour = dendrogramclusters), width = 0.4) +
stat_compare_means(aes(dendrogramclusters,Trifoliate_D24),
data = Dendo_datavisual, method = "anova")+
geom_jitter(aes(colour = dendrogramclusters, shape = dendrogramclusters), width = 0.1)+
labs(x = "", y = "Number of trifoliate")+
theme_minimal(base_size = 15) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
DendoplotTN_D24# Variation in leaf senescence score among the clusters at 25 days after last watering.
DendoplotLS_D25 <- ggplot(Dendo_datavisual,
aes(dendrogramclusters, Senescence_D25))+
geom_boxplot(aes(colour = dendrogramclusters), width = 0.4) +
stat_compare_means(aes(dendrogramclusters, Senescence_D25),
data = Dendo_datavisual, method = "anova")+
geom_jitter(aes(colour = dendrogramclusters, shape = dendrogramclusters), width = 0.1)+
labs(x = "", y = "Leaf senescence score")+
theme_minimal(base_size = 15)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
DendoplotLS_D25# Variation in stem greenness score among the clusters at 27 days after drought imposition.
DendoplotSG_D27 <- ggplot(Dendo_datavisual,
aes(dendrogramclusters, Stem_green_D27))+
geom_boxplot(aes(colour = dendrogramclusters), width = 0.4) +
stat_compare_means(aes(dendrogramclusters, Stem_green_D27),
data = Dendo_datavisual, method = "anova")+
geom_jitter(aes(colour = dendrogramclusters, shape = dendrogramclusters), width = 0.1)+
labs(x = "Cluster", y = "Stem greenness score")+
theme_minimal(base_size = 15)+
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
DendoplotSG_D27# Variation in recovery rate among the clusters at 27 days after drought imposition.
DendoplotRRbox <- ggplot(Dendo_datavisual, aes(dendrogramclusters, Recovery_RR))+
geom_boxplot(aes(colour = dendrogramclusters))+
stat_compare_means(aes(dendrogramclusters, Recovery_RR),
data = Dendo_datavisual, method = "anova")+
geom_jitter(aes(colour = dendrogramclusters, shape = dendrogramclusters))+
theme_minimal(base_size = 15)
DendoplotRRbox# Variation in recovery rate among the clusters at days after re-irrigation.
DendoplotRR <- ggplot(Dendo_datavisual, aes(dendrogramclusters,
Recovery_RR, fill = dendrogramclusters)) +
stat_summary(fun = mean, geom = "bar", colour = "black")+
labs(x = "Cluster", y = "Recovery rate (%) ")+
theme_minimal(base_size = 15) +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
DendoplotRR# Let's put everything together as a figure with multiple panels.
#Dendocompletepcaclusterplotblup <- plot_grid(DendoplotPH_D15, DendoplotTN_D24, DendoplotLS_D25,
#DendoplotSG_D27, DendoplotRR, labels = "AUTO", label_size = 18, ncol = 2)
#Dendocompletepcaclusterplotblup
#ggsave("2022.04.01.Dendocompletepcaclusterplotblup.png", Dendocompletepcaclusterplotblup,
#height = 14, width = 12, bg = "white")# So, let check the relationship between the traits using Pearson's correlation.
merged_cor3 <- as.matrix(df4[-1])
M<-rcorr(merged_cor3)
diag(M$r) = NA
diag(M$P) = NA
#png("MarkdownCorrelation.matrix.drought.cowpea1.png", units="cm", width=20, height=16, res=360)
par(mar=c(1,1,1,1))
corrplot(M$r, type = "upper", p.mat = M$P, insig = "label_sig",
sig.level = c(.001, .01, .05), pch.cex = .9, pch.col = "black",
na.label = "x", method = "color", tl.col = "black", tl.pos = "tp",
tl.cex = 0.7, diag=T, col=brewer.pal(n=10, name="PuOr"), mar = c(1, 1, 1, 1))
#col=brewer.pal(n=10, name="PuOr") #viridis_pal()(10)
corrplot(M$r, add = TRUE, type = "lower", method = "number",
col = "black", diag = FALSE, tl.pos = "n", cl.pos = "n",
number.cex = 0.5, number.digits = 2)#dev.off()