Cowpea seedling drought experiment in the screenhouse

Experimental Design

Seeds of 420 cowpea landraces collected across 19 northern state and the Genetic Resources Center (GRC) of International Institute of Tropical Agriculture (IITA), Ibadan, were used for this study. The landraces were screened for their response to drought stress at seedling stage using wooden box techniques at the department of Botany, Ahmadu Bello University, Zaria.

The two well characterized drought tolerant (Danila) and sensitive (TVu7778) lines were used as checks to monitor the progress of the drought stress and determine the time of re-irrigation. Data were collected on the plant height (PH), number of trifoliate (TN), leaf senescence score (LS) and stem greenness score (SG) over the course of the experiment. The recovery rate (RR) was assessed after re-irrigation. Soil samples were also collected to determine the soil drying progress during the experiment.

Data were collected by the cowpea research team lead by Solihu Kayode Sakariyahu and Sadam Sulaiman Indabo supervised by Dr. Aliyu E. Ramatu. The data generated was curated and analyzed using the data analysis workflow described below by Solihu Kayode Sakariyahu.

Data analysis pipeline

To get started let’s load all the packages we need for this analysis.

library(tidyverse)
library(cowplot)
library(plotrix)
library(ggpubr)
library(agricolae)
library(rstatix)
library(lme4)
library(reshape2)
library(ggfortify)
library(rlang)
library(factoextra)
library(corrplot)
library(Hmisc)
library(RColorBrewer)
library(viridis)
suppressPackageStartupMessages(library("ComplexHeatmap"))
library(dendextend)
library(Hmisc)
library(RColorBrewer)
library(viridis)

Soil Dry Kinetics during seedling drought imposition

So, we will first visualize the soil drying kinetics to understand the drought progress over the period of the experiment.

Let’s load the soil drying data.

cowpea_soil_drying <- read.csv("soil_drying.csv")
head(cowpea_soil_drying)

This table has all the initial and final weight of the soil before and after drying in the oven. Let’s extract the columns we need for our analysis and make it in long format required for further analysis.

cowpea_soil_drying2 <- cowpea_soil_drying[-5:-10] %>% 
  pivot_longer(!c("Sampling_time","Day", "Rep","Box"),
               names_to = "Sampling_Number", values_to = "value")

# Lets check the data head and names of columns.
head(cowpea_soil_drying2)

names(cowpea_soil_drying2)

## [1] "Sampling_time"   "Day"             "Rep"             "Box"            
## [5] "Sampling_Number" "value"

We are going to check if the data make sense by plotting the rep-wise over time.

# Rep 1
cowpea_soil_drying3 <- cowpea_soil_drying2 %>% filter(Rep == 1)

head(cowpea_soil_drying3)

# Rep 2
cowpea_soil_drying3.2 <- cowpea_soil_drying2 %>% filter(Rep == 2)

head(cowpea_soil_drying3.2)

Now, let’s take a look at the plots.

# Rep 1

ggplot(cowpea_soil_drying3, 
       aes(factor(Day, levels = c("D1", "D7", "D13", "D16")), 
           value, group = Sampling_Number))+
  geom_line(aes(colour = Sampling_Number))+
  geom_point(aes(colour = Sampling_Number))+
  facet_wrap(~Box)

#Rep 2

ggplot(cowpea_soil_drying3.2, 
       aes(factor(Day, levels = c("D1", "D7", "D13", "D16")), 
           value, group = Sampling_Number))+
  geom_line(aes(colour = Sampling_Number))+
  geom_point(aes(colour = Sampling_Number))+
  facet_wrap(~Box)

Since everything looks good, let’s put reps together and summary the data.

cowpea_soil_drying4 <- cowpea_soil_drying3 %>%
  group_by(Day, Box) %>%
  summarise(weight = mean(value),
            se = std.error(value),
            SD = sd(value))

## `summarise()` has grouped output by 'Day'. You can override using the `.groups`
## argument.

cowpea_soil_drying4$Day <- factor(cowpea_soil_drying4$Day,
                                  levels = c("D1", "D7", "D13", "D16"))

head(cowpea_soil_drying4)

Here is the final figure of the soil drying progress for all the box used.

# Plot the final figure for the soil drying.
plot1 <- ggplot(cowpea_soil_drying4, aes(Day, weight, group = Box))+
  geom_line(aes(color = factor(Box)))+
  geom_point(aes(color = factor(Box)))+
  geom_errorbar(aes(ymin= weight-se, ymax=weight+se, color = factor(Box)),
                width = 0.05)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 15),breaks = seq(0, 15, by=2.5))+
  theme_minimal(base_size = 15)+
  labs(x = "Days after stop of irrigation", y = "Amount of water available (g)",colour = "Box")+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

plot1

#ggsave(plot1, filename = "2022.04.04.Soil_drying_kinetics1.png", bg = "white")

Analysis of drought tolerance traits

Data wrangling for the agronomic and stay green data collected for the drought experiment.

library(lme4)
library(agricolae)
library(reshape2)
library(tidyverse)
library(ggpubr)

# Load the data

df.all_trait <- read.csv("Final_2022.03.19.cleaned.data.cowpea.seedlings.KS.csv")

head(df.all_trait)

names(df.all_trait)

##  [1] "Accession"   "Box"         "Check"       "Plot_No"     "Rep"        
##  [6] "No_of_Plant" "D1_TN1"      "D1_TN2"      "D1_TN3"      "D1_TN4"     
## [11] "D11_TN1"     "D11_TN2"     "D11_TN3"     "D11_TN4"     "D12_PH1"    
## [16] "D12_PH2"     "D12_PH3"     "D12_PH4"     "D13_LS1"     "D13_LS2"    
## [21] "D13_LS3"     "D13_LS4"     "D14_TN1"     "D14_TN2"     "D14_TN3"    
## [26] "D14_TN4"     "D15_PH1"     "D15_PH2"     "D15_PH3"     "D15_PH4"    
## [31] "D16_LS1"     "D16_LS2"     "D16_LS3"     "D16_LS4"     "D17_TN1"    
## [36] "D17_TN2"     "D17_TN3"     "D17_TN4"     "D22_LS1"     "D22_LS2"    
## [41] "D22_LS3"     "D22_LS4"     "D2_PH1"      "D2_PH2"      "D2_PH3"     
## [46] "D2_PH4"      "D21_SG1"     "D21_SG2"     "D21_SG3"     "D21_SG4"    
## [51] "D19_LS1"     "D19_LS2"     "D19_LS3"     "D19_LS4"     "D24_SG1"    
## [56] "D24_SG2"     "D24_SG3"     "D24_SG4"     "D24_TN1"     "D24_TN2"    
## [61] "D24_TN3"     "D24_TN4"     "D25_LS1"     "D25_LS2"     "D25_LS3"    
## [66] "D25_LS4"     "D27_SG1"     "D27_SG2"     "D27_SG3"     "D27_SG4"    
## [71] "D6_TN1"      "D6_TN2"      "D6_TN3"      "D6_TN4"      "D7_PH1"     
## [76] "D7_PH2"      "D7_PH3"      "D7_PH4"      "RR_RR"

length(unique(df.all_trait$Accession))

## [1] 422

#str(df.all_trait)
#summary(df.all_trait)

Data wrangling and curation

# pivot longer to rename the variables and arrage the data neatly.

df.all_trait1 <- df.all_trait %>% 
  pivot_longer(!c("Plot_No","Accession","Check",
                  "Rep","Box","No_of_Plant"),
               names_to = "trait", values_to = "value" ) %>% 
  filter(value >= 0)

length(unique(df.all_trait1$Accession))

## [1] 420

df.all_trait1$Day <- sapply(strsplit(as.character(df.all_trait1$trait), "_"), "[[", 1 ) 
df.all_trait1$Individual <- sapply( strsplit(as.character(df.all_trait1$trait), "_"), "[[", 2 )

unique(df.all_trait1$Individual)

##  [1] "TN1" "TN3" "PH1" "PH3" "RR"  "TN2" "PH2" "TN4" "LS2" "PH4" "LS1" "LS3"
## [13] "LS4" "SG3" "SG4" "SG1" "SG2"

df.all_trait1$variable[df.all_trait1$Individual==
                         "PH1"|df.all_trait1$Individual==
                         "PH2"|df.all_trait1$Individual==
                         "PH3"|df.all_trait1$Individual
                       =="PH4"] <- "Plant_height"

## Warning: Unknown or uninitialised column: `variable`.

df.all_trait1$variable[df.all_trait1$Individual==
                         "TN1"|df.all_trait1$Individual==
                         "TN2"|df.all_trait1$Individual==
                         "TN3"|df.all_trait1$Individual
                       =="TN4"] <- "Trifoliate"

df.all_trait1$variable[df.all_trait1$Individual==
                         "LS1"|df.all_trait1$Individual==
                         "LS2"|df.all_trait1$Individual==
                         "LS3"|df.all_trait1$Individual
                       =="LS4"] <- "Senescence"

df.all_trait1$variable[df.all_trait1$Individual==
                         "SG1"|df.all_trait1$Individual==
                         "SG2"|df.all_trait1$Individual==
                         "SG3"|df.all_trait1$Individual==
                         "SG4"] <- "Stem_green"

df.all_trait1$variable[df.all_trait1$Individual==
                         "RR"] <- "Recovery"


head(df.all_trait1)

# pivot longer to average the traits per plot and remove NAs.

df.all_trait2 <- df.all_trait1 %>% 
  group_by(Plot_No,Accession,Check,Rep,Box, No_of_Plant, Day, variable) %>%
  summarise(value = mean(value, na.rm =TRUE))

## `summarise()` has grouped output by 'Plot_No', 'Accession', 'Check', 'Rep',
## 'Box', 'No_of_Plant', 'Day'. You can override using the `.groups` argument.

df.all_trait2$heading <- paste(df.all_trait2$variable, df.all_trait2$Day, sep = "_")

head(df.all_trait2)

df.all_trait3 <- df.all_trait2 %>% 
  pivot_wider(c("Plot_No","Accession","Check","Rep",
                "Box", "No_of_Plant"), names_from = heading, values_from = value)

head(df.all_trait3)

Linear mixed model

The BLUP script was adapted from https://github.com/mighster/BLUPs_Heritability/blob/master/BLUP_Tutorial.R

# setup the dataframe for BLUP analysis for the data collected.

head(df.all_trait3)

colnames(df.all_trait3)

##  [1] "Plot_No"          "Accession"        "Check"            "Rep"             
##  [5] "Box"              "No_of_Plant"      "Trifoliate_D1"    "Plant_height_D2" 
##  [9] "Trifoliate_D6"    "Plant_height_D7"  "Recovery_RR"      "Trifoliate_D14"  
## [13] "Trifoliate_D11"   "Plant_height_D12" "Senescence_D13"   "Plant_height_D15"
## [17] "Trifoliate_D17"   "Senescence_D22"   "Senescence_D16"   "Senescence_D19"  
## [21] "Stem_green_D21"   "Stem_green_D24"   "Trifoliate_D24"   "Senescence_D25"  
## [25] "Stem_green_D27"

# Make columns as independent variables as factors

df.all_trait3$Plot_No=as.factor(df.all_trait3$Plot_No)

df.all_trait3$Accession=as.factor(df.all_trait3$Accession)

df.all_trait3$Check = as.factor(df.all_trait3$Check)

df.all_trait3$Box=as.factor(df.all_trait3$Box)

df.all_trait3$Rep=as.factor(df.all_trait3$Rep)

length(unique(df.all_trait3$Accession))

## [1] 420

# Create empty data frame for BLUP output

DataOutput <- data.frame(matrix(vector(),420,1, dimnames=list(c(), c("Accession"))))

# Fill empty dataframe with 1-420 so that the cbind will work later on.

DataOutput$Accession <- unique(df.all_trait3[,2]) #fill in Entry numbers

DataOutput$Row <- c(1:420)

DataOutput$Name <- unique(sort(df.all_trait3$Accession))


# This empty dataframe is for variance components

DataVarComp <- data.frame()

DataVarCompOutput <- data.frame()

HeritabilityData <- data.frame()

# This empty dataframe is for dropped variance components

drops <- c("var1","var2","sdcor")

colnames(df.all_trait3)

##  [1] "Plot_No"          "Accession"        "Check"            "Rep"             
##  [5] "Box"              "No_of_Plant"      "Trifoliate_D1"    "Plant_height_D2" 
##  [9] "Trifoliate_D6"    "Plant_height_D7"  "Recovery_RR"      "Trifoliate_D14"  
## [13] "Trifoliate_D11"   "Plant_height_D12" "Senescence_D13"   "Plant_height_D15"
## [17] "Trifoliate_D17"   "Senescence_D22"   "Senescence_D16"   "Senescence_D19"  
## [21] "Stem_green_D21"   "Stem_green_D24"   "Trifoliate_D24"   "Senescence_D25"  
## [25] "Stem_green_D27"

str(df.all_trait3[,7:ncol(df.all_trait3)])

## tibble [861 × 19] (S3: tbl_df/tbl/data.frame)
##  $ Trifoliate_D1   : num [1:861] 0 0 0 0 0 0 0 0 1.75 0 ...
##  $ Plant_height_D2 : num [1:861] 1.5 1.05 1.65 3.5 3.13 ...
##  $ Trifoliate_D6   : num [1:861] 0 NA 0.5 0.5 1 1 1 1 2 0 ...
##  $ Plant_height_D7 : num [1:861] 0.5 NA 1.75 4.2 5 ...
##  $ Recovery_RR     : num [1:861] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Trifoliate_D14  : num [1:861] NA 0 NA 0.5 1 ...
##  $ Trifoliate_D11  : num [1:861] NA NA 1 0.5 1 ...
##  $ Plant_height_D12: num [1:861] NA NA 1.39 4.12 4.5 ...
##  $ Senescence_D13  : num [1:861] NA NA 1 2.5 2.5 ...
##  $ Plant_height_D15: num [1:861] NA NA 1.3 4.17 3.75 ...
##  $ Trifoliate_D17  : num [1:861] NA NA 0 0.25 1 ...
##  $ Senescence_D22  : num [1:861] NA NA 5 4.5 3 ...
##  $ Senescence_D16  : num [1:861] NA NA NA 3.75 3 5 4 4 3.25 NA ...
##  $ Senescence_D19  : num [1:861] NA NA NA 4.67 2.5 ...
##  $ Stem_green_D21  : num [1:861] NA NA NA 1 1 1 0 1 0.25 NA ...
##  $ Stem_green_D24  : num [1:861] NA NA NA 1 1 0 0 1 0 NA ...
##  $ Trifoliate_D24  : num [1:861] NA NA NA 0.333 1 ...
##  $ Senescence_D25  : num [1:861] NA NA NA 5 5 5 5 4 NA NA ...
##  $ Stem_green_D27  : num [1:861] NA NA NA 0 0 0 0 0 0 NA ...

# Take only the columns with numerical data.
colnum=c(7:ncol(df.all_trait3))

Our model for the mixed model with accessions, rep, box and box by accessions interaction treated as random-effects distinguished by vertical bars or pipes (|)

# Forward loop for the BLUPs

for (i in 1:19){ # This second loop runs through each TRAIT, one at a time.
  
  #set the current [i] column as x
  x=colnum[i] 
  
  # Sets the current column header as the trait name.
  trait=colnames(df.all_trait3)[x]
  
  # Making a copy of the dataframe that we can use for our analysis
  df1 <- df.all_trait3
  
  # Renaming the trait variable as "y" for the model analysis below.
  colnames(df1)[x]="y"
  
  # Here is our random effects mixed model
  model <- lmer(y~  (1|Accession) +(1|Rep) + (1|Box) + (1|Box:Accession), df1)
  
  summary(model)
  
  # This calculates `estimated variances` between random-effects terms in a mixed-effects model.
  varComp <- as.data.frame(VarCorr(model,comp="vcov"))
  
  # `coef` extracts model coefficients from lmer objects returned by modeling functions.
  blup = coef(model)$Accession
  
  # rename the BLUP column by the trait in our loop
  colnames(blup) <- trait
  
  # Ammends our dataframe with the new BLUP column for the new trait.
  blupdataframe <- data.frame(blup) %>% rownames_to_column()
  
  DataOutputnew <- merge(DataOutput[2:3],blupdataframe, by.x = "Name", by.y = "rowname", all = T)
  
  DataOutput <- cbind(DataOutput,DataOutputnew) 
 
  # Modify variance component by deleting columns in variance component dataframe that we don't need.
  varComp<-varComp[ , !(names(varComp) %in% drops)]
  
  # Set the trait name from the loop
  varComp$Trait<-trait
  
  # Add columns to existing dataframe
  DataVarComp <- rbind(DataVarComp,varComp) 
}

## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')

# Re-arrange our variance components dataframe so that we can run the heritability script.

DataVarCompOutput <- reshape(DataVarComp, idvar = "Trait",
                             timevar = "grp", direction = "wide")

# The estimate of broad sense heritability based on the formula described by Falconer & Mackay, 1996.

nrep = 2

HeritabilityData <- ((DataVarCompOutput[,3])) /
  (((DataVarCompOutput[,3])) + (((DataVarCompOutput[,6])) / (nrep)))*100

# Combine  the heritability to the variance component data frame.

DataVarCompOutput <- cbind(DataVarCompOutput,HeritabilityData)

DataVarCompOutput[1:10,]

# Save output to csv

# Output that variance component

#write.csv(DataVarCompOutput,"2022.04.01.VarianceComponents_cowpea.csv", row.names = F)

names(DataOutput)

##  [1] "Accession"        "Row"              "Name"             "Name"            
##  [5] "Row"              "Trifoliate_D1"    "Name"             "Row"             
##  [9] "Plant_height_D2"  "Name"             "Row"              "Trifoliate_D6"   
## [13] "Name"             "Row"              "Plant_height_D7"  "Name"            
## [17] "Row"              "Recovery_RR"      "Name"             "Row"             
## [21] "Trifoliate_D14"   "Name"             "Row"              "Trifoliate_D11"  
## [25] "Name"             "Row"              "Plant_height_D12" "Name"            
## [29] "Row"              "Senescence_D13"   "Name"             "Row"             
## [33] "Plant_height_D15" "Name"             "Row"              "Trifoliate_D17"  
## [37] "Name"             "Row"              "Senescence_D22"   "Name"            
## [41] "Row"              "Senescence_D16"   "Name"             "Row"             
## [45] "Senescence_D19"   "Name"             "Row"              "Stem_green_D21"  
## [49] "Name"             "Row"              "Stem_green_D24"   "Name"            
## [53] "Row"              "Trifoliate_D24"   "Name"             "Row"             
## [57] "Senescence_D25"   "Name"             "Row"              "Stem_green_D27"

# Let's extract the columns we need for summary.

DataOutput.final <- DataOutput[c(3,6,9,12,15,18,21,24,27,30,33,36,39,
                                    42,45,48,51,54,57,60)]
names(DataOutput.final)

##  [1] "Name"             "Trifoliate_D1"    "Plant_height_D2"  "Trifoliate_D6"   
##  [5] "Plant_height_D7"  "Recovery_RR"      "Trifoliate_D14"   "Trifoliate_D11"  
##  [9] "Plant_height_D12" "Senescence_D13"   "Plant_height_D15" "Trifoliate_D17"  
## [13] "Senescence_D22"   "Senescence_D16"   "Senescence_D19"   "Stem_green_D21"  
## [17] "Stem_green_D24"   "Trifoliate_D24"   "Senescence_D25"   "Stem_green_D27"

head(DataOutput.final)

# Output the blup for each landrace

#write.csv(DataOutput.final,"2022.04.01.Blupforlandraces.csv", row.names = F)

###### Summary of BLUP output for landraces

meandata <- DataOutput.final[-1]

dfBLUPsummary1 <- meandata%>% 
  dplyr::summarise_each(funs(min(., na.rm = TRUE)))%>% 
  rownames_to_column() %>% 
  mutate(rowname = recode(rowname, "1" = 'Min'))

## Warning: `summarise_each_()` was deprecated in dplyr 0.7.0.
## Please use `across()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

dfBLUPsummary2 <- meandata %>% 
  dplyr::summarise_each(funs(max(., na.rm = TRUE)))%>% 
  rownames_to_column() %>% 
  mutate(rowname = recode(rowname, "1" = 'Max'))

dfBLUPsummary3 <- meandata %>% 
  dplyr::summarise_each(funs(sd(., na.rm = TRUE)))%>% 
  rownames_to_column() %>% 
  mutate(rowname = recode(rowname, "1" = 'SD'))

dfBLUPsummary4 <- meandata %>% 
  dplyr::summarise_each(funs(mean(., na.rm = TRUE))) %>% 
  rownames_to_column() %>% 
  mutate(rowname = recode(rowname, "1" = 'Mean'))

dfBLUPsummary5 <- meandata %>% 
  dplyr::summarise_each(funs(median(., na.rm = TRUE)))%>% 
  rownames_to_column() %>% 
  mutate(rowname = recode(rowname, "1" = 'Median'))


DataOutputfinalsummary <- rbind(dfBLUPsummary1, dfBLUPsummary2,
                                dfBLUPsummary3, dfBLUPsummary4, dfBLUPsummary5)

DataOutputfinalsummary1 <- setNames(data.frame(t(DataOutputfinalsummary[,-1])),
                                    DataOutputfinalsummary[,1])

DataOutputfinalsummary1

#write.csv(DataOutputfinalsummary1,"2022.04.01.BLUPsummary_cowpea.csv")

Visualizing the distribution and variability of the traits using the BLUP values

# Plant Height (PH)

names(DataOutput.final)

##  [1] "Name"             "Trifoliate_D1"    "Plant_height_D2"  "Trifoliate_D6"   
##  [5] "Plant_height_D7"  "Recovery_RR"      "Trifoliate_D14"   "Trifoliate_D11"  
##  [9] "Plant_height_D12" "Senescence_D13"   "Plant_height_D15" "Trifoliate_D17"  
## [13] "Senescence_D22"   "Senescence_D16"   "Senescence_D19"   "Stem_green_D21"  
## [17] "Stem_green_D24"   "Trifoliate_D24"   "Senescence_D25"   "Stem_green_D27"

DataOutput1 <- DataOutput.final[, c(1,3,5,9,11)] %>% na.omit() %>%
  pivot_longer(!c("Name"), 
               names_to = "Height", values_to = "value")

unique(DataOutput1$Height)

## [1] "Plant_height_D2"  "Plant_height_D7"  "Plant_height_D12" "Plant_height_D15"

DataOutput1$Height <- recode(DataOutput1$Height, Plant_height_D2 = "D2", 
                             Plant_height_D7 = "D7", Plant_height_D12 = "D12",
                             Plant_height_D15 = "D15") %>% 
  factor(levels = c("D2", "D7", "D12", "D15"))

df.summary <- DataOutput1 %>% group_by(Height) %>%
  summarise(Max = max(value, na.rm = TRUE))

df.summary

lsd=LSD.test(aov(value~Height,data=DataOutput1), "Height", group=T)

sig.letters <- lsd$groups[order(lsd$groups$value,decreasing = F), ]

plantheightplot <- ggplot(DataOutput1, aes(Height, value))+
  geom_violin(aes(fill = Height))+
  geom_boxplot( width = 0.1)+
  geom_text(data=df.summary,size = 4, 
            aes(x = Height, y = 0.9 + Max,label = sig.letters$groups),vjust=0)+
  stat_compare_means(aes(Height,  value),
                     data = DataOutput1, method = "anova", label.y = 19.5)+
  scale_y_continuous(expand = c(0,0),
                     limits = c(0, 20),breaks = seq(0, 20, by=2))+
  theme_minimal(base_size = 15)+
  labs(x = "Days after last watering", y = "Plant height (cm)",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

plantheightplot

#ggsave(plantheightplot, filename = "2022.04.01.plantheightplot.png", bg ="white")

# Density plot for Trifoliate Number (TN)

names(DataOutput.final)

##  [1] "Name"             "Trifoliate_D1"    "Plant_height_D2"  "Trifoliate_D6"   
##  [5] "Plant_height_D7"  "Recovery_RR"      "Trifoliate_D14"   "Trifoliate_D11"  
##  [9] "Plant_height_D12" "Senescence_D13"   "Plant_height_D15" "Trifoliate_D17"  
## [13] "Senescence_D22"   "Senescence_D16"   "Senescence_D19"   "Stem_green_D21"  
## [17] "Stem_green_D24"   "Trifoliate_D24"   "Senescence_D25"   "Stem_green_D27"

DataOutput2 <- DataOutput.final[, c(1:2,4,7,8,12,18)] %>% na.omit() %>%
  pivot_longer(!c("Name"),
               names_to = "Trifoliate", values_to = "value")

DataOutput2$Trifoliate <- recode(DataOutput2$Trifoliate, Trifoliate_D1 = "D1",
                                 Trifoliate_D6 = "D6", Trifoliate_D11 = "D11",
                                 Trifoliate_D14 = "D14",
                                 Trifoliate_D17 = "D17", Trifoliate_D24 = "D24") %>%
  factor(levels = c("D1", "D6", "D11", "D14", "D17", "D24"))

p1 <- ggplot(DataOutput2, aes(value))+
  geom_density(aes(fill = Trifoliate), alpha = 0.8)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 45),breaks = seq(0, 45, by=5))+
  theme_minimal(base_size = 15)+
  labs(x = "Average number of trifoliate", fill = "")+
  theme(legend.position = "right",axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

p1

# Leaf senescence score (LS) distribution

names(DataOutput.final)

##  [1] "Name"             "Trifoliate_D1"    "Plant_height_D2"  "Trifoliate_D6"   
##  [5] "Plant_height_D7"  "Recovery_RR"      "Trifoliate_D14"   "Trifoliate_D11"  
##  [9] "Plant_height_D12" "Senescence_D13"   "Plant_height_D15" "Trifoliate_D17"  
## [13] "Senescence_D22"   "Senescence_D16"   "Senescence_D19"   "Stem_green_D21"  
## [17] "Stem_green_D24"   "Trifoliate_D24"   "Senescence_D25"   "Stem_green_D27"

DataOutput3 <- DataOutput.final[, c(1,10,13:15,19)] %>% na.omit() %>%
  pivot_longer(!c("Name"),
               names_to = "Leaf_score", values_to = "value")

unique(DataOutput3$Leaf_score)

## [1] "Senescence_D13" "Senescence_D22" "Senescence_D16" "Senescence_D19"
## [5] "Senescence_D25"

DataOutput3$Leaf_score <- recode(DataOutput3$Leaf_score, Senescence_D13 = "D13",
                                 Senescence_D16 = "D16", Senescence_D19 = "D19", 
                                 Senescence_D22 = "D22", Senescence_D25 = "D25") %>% 
  factor(levels = c("D13", "D16", "D19", "D22", "D25"))

p2 <- ggplot(DataOutput3, aes(value))+
  geom_density(aes(fill = Leaf_score), alpha = 0.8)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 8),breaks = seq(0, 8, by=1))+
  theme_minimal(base_size = 15)+
  labs(x = "Leaf senescence score ", fill = "")+
  theme(legend.position = "right",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

p2

# Stem greenness (SG) distrbution

names(DataOutput.final)

##  [1] "Name"             "Trifoliate_D1"    "Plant_height_D2"  "Trifoliate_D6"   
##  [5] "Plant_height_D7"  "Recovery_RR"      "Trifoliate_D14"   "Trifoliate_D11"  
##  [9] "Plant_height_D12" "Senescence_D13"   "Plant_height_D15" "Trifoliate_D17"  
## [13] "Senescence_D22"   "Senescence_D16"   "Senescence_D19"   "Stem_green_D21"  
## [17] "Stem_green_D24"   "Trifoliate_D24"   "Senescence_D25"   "Stem_green_D27"

DataOutput4 <- DataOutput.final[, c(1,16,17,20)] %>% na.omit() %>%
  pivot_longer(!c("Name"),
               names_to = "stem", values_to = "value")

DataOutput4$stem <- recode(DataOutput4$stem,Stem_green_D21 = "D21", 
                           Stem_green_D24 = "D24", Stem_green_D27 = "D27") %>%
  factor(levels = c("D21","D24","D27"))

p3<- ggplot(DataOutput4, aes(value))+
  geom_density(aes(fill = stem), alpha = 0.8)+
  scale_y_continuous(expand = c(0,0),
                     limits = c(0, 25),breaks = seq(0, 25, by=2.5))+
  theme_minimal(base_size = 15)+
  labs(x = "Stem greenness score ", fill = "")+
  theme(legend.position = "right",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

p3

# Recovery rate (RR)

DataOutput5 <- DataOutput.final[, c(1,6)] %>% na.omit()

p4 <- ggplot(DataOutput5, aes(Recovery_RR))+
  geom_histogram(fill="#619CFF", colour="black", bins = 5)+ 
  scale_y_continuous(expand = c(0,0), limits = c(0, 400),breaks = seq(0, 400, by=25))+
  labs(x = "Recovery rate (%)")+
  theme_minimal(base_size = 14) + theme(legend.position = "none")

p4

#ggsave(p4, filename = "2022.04.01.RRPlot.png", bg = "white")

# Putting all the plots together

#FInalplot <- plot_grid(p1, p2, p3, labels = "AUTO", ncol = 1)

#FInalplot

#ggsave(FInalplot, filename = "2022.04.01.StemTriLeaf2.png", bg = "white",
#width = 8.42 , height = 10.82)

General linear model (Analysis of variance)

# General linear model (analysis of variance) for the trait evaluated.

write.csv(df.all_trait3, "2022.04.01.df.all_trait3.csv", row.names = F)

df_all_trait3 <- read.csv("2022.04.01.df.all_trait3.csv")

head(df_all_trait3)

AVz <- rep(NA, ncol(df_all_trait3))

sink("2022.04.01.ANOVA_table_for_cowpea_seedling_droughtnew.txt")

for (k in 7:ncol(df_all_trait3)){
  
  column <- names(df_all_trait3[k])
  
  AVz <- summary(aov(df_all_trait3[,k] ~ Rep + Check + Box*Accession, df_all_trait3))
  
  model <- aov(df_all_trait3[,k] ~ Rep + Check + Box*Accession, df_all_trait3)
  
  min1 <- min(df_all_trait3[,k], na.rm = TRUE)
  
  max1 <- max(df_all_trait3[,k], na.rm = TRUE)
}

sink()

AVz

##                Df Sum Sq Mean Sq F value  Pr(>F)   
## Rep             1   0.24  0.2351   2.498 0.11903   
## Check           1   0.34  0.3413   3.625 0.06150 . 
## Box             1   1.10  1.0997  11.681 0.00111 **
## Accession     405  45.56  0.1125   1.195 0.19449   
## Box:Accession 260  27.51  0.1058   1.124 0.29494   
## Residuals      63   5.93  0.0941                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 129 observations deleted due to missingness

Multivariate analysis

# Let's check our dataset for the multivariate analysis

names(df_all_trait3)

##  [1] "Plot_No"          "Accession"        "Check"            "Rep"             
##  [5] "Box"              "No_of_Plant"      "Trifoliate_D1"    "Plant_height_D2" 
##  [9] "Trifoliate_D6"    "Plant_height_D7"  "Recovery_RR"      "Trifoliate_D14"  
## [13] "Trifoliate_D11"   "Plant_height_D12" "Senescence_D13"   "Plant_height_D15"
## [17] "Trifoliate_D17"   "Senescence_D22"   "Senescence_D16"   "Senescence_D19"  
## [21] "Stem_green_D21"   "Stem_green_D24"   "Trifoliate_D24"   "Senescence_D25"  
## [25] "Stem_green_D27"

df3 <- df_all_trait3[, c(2,7:25)] %>% 
  pivot_longer(!c(Accession),names_to = "trait", values_to = "value")

df3$trait <- recode(df3$trait,
                    Plant_height_D2 = "PH_D2", Plant_height_D7 = "PH_D7",
                    Plant_height_D12 = "PH_D12",Plant_height_D15 = "PH_D15",
                    Trifoliate_D1 = "TN_D1",Trifoliate_D6 = "TN_D6",
                    Trifoliate_D11 = "TN_D11",Trifoliate_D14 = "TN_D14",
                    Trifoliate_D17 = "TN_D17",Trifoliate_D24 = "TN_D24",
                    Senescence_D13 = "LS_D13",Senescence_D16 = "LS_D16",
                    Senescence_D19 = "LS_D19",Senescence_D22 = "LS_D22",
                    Senescence_D25 = "LS_D25",Stem_green_D21 = "SG_D21",
                    Stem_green_D24 = "SG_D24", Stem_green_D27 = "SG_D27",
                    Recovery_RR = "RR")

# Mean summary of the data per accessions

df4 <- df3 %>%
  group_by(Accession, trait) %>% 
  dplyr::summarise_each(funs(mean(., na.rm = T))) %>% 
  pivot_wider(Accession, names_from = trait, values_from = value)%>% 
  na.omit()

Principal Component Analysis (PCA) and K-mean clustering

df5 <- column_to_rownames(df4, var = "Accession")

pca <- df5 %>% 
  scale()%>%## scale the variables
  prcomp()## print out a summary

summary(pca)

## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.3468 1.8330 1.34215 1.11038 1.01315 0.92748 0.83969
## Proportion of Variance 0.2899 0.1768 0.09481 0.06489 0.05402 0.04528 0.03711
## Cumulative Proportion  0.2899 0.4667 0.56151 0.62641 0.68043 0.72571 0.76281
##                            PC8     PC9    PC10    PC11    PC12    PC13   PC14
## Standard deviation     0.81823 0.77617 0.71099 0.69535 0.67590 0.63351 0.5977
## Proportion of Variance 0.03524 0.03171 0.02661 0.02545 0.02404 0.02112 0.0188
## Cumulative Proportion  0.79805 0.82976 0.85636 0.88181 0.90586 0.92698 0.9458
##                           PC15    PC16    PC17    PC18    PC19
## Standard deviation     0.58364 0.51256 0.46322 0.33691 0.31411
## Proportion of Variance 0.01793 0.01383 0.01129 0.00597 0.00519
## Cumulative Proportion  0.96371 0.97754 0.98883 0.99481 1.00000

#pca$rotation

# Plot the loadings to see the major trait contribution to PC1 and PC2.

pcaloadings <- pca$rotation %>%
  as.data.frame() %>%
  mutate(variables = rownames(.)) %>%
  gather(PC,loading,PC1:PC2) %>%
  ggplot(aes(loading, variables, fill = loading > 0)) +
  geom_col() + geom_text(aes(label= (round(loading, 2))),color="black", size=2)+
  facet_wrap(~PC, scales = "free_y") +
  labs(x = "Value of loading",y = NULL, fill = "Positive?")+
  theme_minimal(base_size = 13)+ scale_fill_brewer(palette = "Set1")

pcaloadings

#ggsave("2022.04.01.droughtpcaloadings.png", pcaloadings, height = 5, width = 9, bg = "white")

# Plot the PCA with first with autoplot to have glance on how it will look like 

autoplot(pca, data = df5,
         frame = TRUE, frame.type = 'norm',
         label = TRUE, label.size = 3, loadings = TRUE,
         loadings.colour = 'blue',loadings.label = TRUE,
         loadings.label.size = 3)

# Plot the contribution of pca component using factoextra function-fviz_screeplot

screeplots <- fviz_screeplot(pca, addlabels = TRUE, title ="")+
  labs(x = "Principal component")+ theme_minimal(base_size = 15)

screeplots

# Now, the actual PCA plot with biplot and loadings.

drought_biplot <- fviz_pca_biplot(pca, col.ind = "black", geom = c("point"))+
  theme_minimal(base_size = 15)+
  labs(title = "", x = "PC1 (29.0%)", y = "PC2 (17.7%)")

drought_biplot

#ggsave("2022.04.01.drought_biplot_biplot33.png", drought_biplot,height = 6, width = 9, bg = "white")

# Next, let take a look at the variable contribution to the PCs

variableplot <- fviz_pca_var(pca, col.var="contrib",
                             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
                             repel = TRUE)+ # Avoid text overlapping
  labs(x = "PC1 (29.0%)", y = "PC2 (17.7%)", title = "") +theme_minimal(base_size = 15)

variableplot

# Better view of the contributions of variables to PC1.

fviz_contrib(pca, choice = "var", axes = 1, top = 19)

# Better view of the contributions of variables to PC2

fviz_contrib(pca, choice = "var", axes = 2, top = 19)

# Let's perform k-means clustering on our PCA analysis to identify major clusters.

set.seed(123)

km.res <- kmeans(scale(df5), 3)

table(km.res$cluster)

## 
##   1   2   3 
##  68 183 150

# We can now visualize the PCA plus the k-mean clustering.

clusterplot <- fviz_cluster(km.res, geom = "point",data = df5,
             palette = c("#00AFBB","#2E9FDF", "#E7B800", "#FC4E07"),
             ggtheme = theme_minimal(), main = "") +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  geom_vline(xintercept = 0, linetype = "dashed")+
  labs(title = "", x = "PC1 (29.1%)", y = "PC2 (18.3%)")

clusterplot

# We can now putting everything together to form one figure with multiple panels using plot_grid.

#bottom_row <- plot_grid(clusterplot, variableplot, labels = c('B', 'C'), label_size = 22)

#bottom_row

#completepcaplot <- plot_grid(screeplots, bottom_row, labels = c('A', ''), label_size = 22, ncol = 1)

#completepcaplot

#ggsave("2022.04.01.drought_biplot_PCA.png", completepcaplot,height = 10, width = 12, bg = "white")

# So next, we want to visualize the cluster for their differences in plant height,
# RR and other traits using the BLUP value.

# Put the cluster information in a dataframe.

CLuster_data <- data.frame(km.res$cluster) %>% rownames_to_column()

# Merge the cluster with the BLUP data for the landraces

Clustervisual <- merge(CLuster_data, DataOutput.final,
                       by.x = "rowname",by.y = "Name")

Clustervisual$km.res.cluster <- factor(Clustervisual$km.res.cluster)

# Plant height variation among the clusters at 15 days after drought imposition.

plotPH_D15 <- ggplot(Clustervisual, aes(km.res.cluster, Plant_height_D15))+
  geom_boxplot(aes(colour = km.res.cluster), width = 0.4)+ 
  stat_compare_means(aes(km.res.cluster, Plant_height_D15),
                     data = Clustervisual, method = "anova")+
  geom_jitter(aes(colour = km.res.cluster, shape = km.res.cluster),
              width = 0.1)+
  labs(x = "", y = "Plant height")+
  theme_minimal(base_size = 15) +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

plotPH_D15

# Variation in number of trifoliate  among the clusters at 24 days after last irrigation.

plotTN_D24 <- ggplot(Clustervisual, aes(km.res.cluster, Trifoliate_D24))+
  geom_boxplot(aes(colour = km.res.cluster), width = 0.4) + 
  stat_compare_means(aes(km.res.cluster,  Trifoliate_D24),
                     data = Clustervisual, method = "anova")+
  geom_jitter(aes(colour = km.res.cluster, shape = km.res.cluster),
              width = 0.1)+
  labs(x = "", y = "Number of trifoliate")+
  theme_minimal(base_size = 15) +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

plotTN_D24

# Variation in leaf senescence score among the clusters at 25 days after last watering.

plotLS_D25 <- ggplot(Clustervisual, aes(km.res.cluster, Senescence_D25))+
  geom_boxplot(aes(colour = km.res.cluster), width = 0.4) + 
  stat_compare_means(aes(km.res.cluster,  Senescence_D25),
                     data = Clustervisual, method = "anova")+
  geom_jitter(aes(colour = km.res.cluster, shape = km.res.cluster),
              width = 0.1)+
  labs(x = "", y = "Leaf senescence score")+
  theme_minimal(base_size = 15)+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

plotLS_D25

# Variation in stem greenness score among the clusters at 27 days after drought imposition.

plotSG_D27 <- ggplot(Clustervisual, aes(km.res.cluster, Stem_green_D27))+
  geom_boxplot(aes(colour = km.res.cluster), width = 0.4) + 
  stat_compare_means(aes(km.res.cluster,  Stem_green_D27),
                     data = Clustervisual, method = "anova")+
  geom_jitter(aes(colour = km.res.cluster, shape = km.res.cluster),
              width = 0.1)+
  labs(x = "Cluster", y = "Stem greenness score")+
  theme_minimal(base_size = 15) + 
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

plotSG_D27

# Variation in recovery rate among the clusters at 27 days after drought imposition.

plotRRbox <- ggplot(Clustervisual, aes(km.res.cluster, Recovery_RR))+
  geom_boxplot(aes(colour = km.res.cluster)) + 
  stat_compare_means(aes(km.res.cluster,
                         Recovery_RR),data = Clustervisual, method = "anova")+
  geom_jitter(aes(colour = km.res.cluster, shape = km.res.cluster))+
  theme_minimal(base_size = 15)

plotRRbox

# Variation in recovery rate among the clusters at 27 days after drought imposition.

plotRR <- ggplot(Clustervisual, aes(km.res.cluster,
                                    Recovery_RR, fill = km.res.cluster)) +
  stat_summary(fun = mean, geom = "bar", colour = "black")+
  labs(x = "Cluster", y = "Recovery rate (%) ")+
  theme_minimal(base_size = 15) + 
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

plotRR

# Let's put everything together as a figure with multiple panels.

#completepcaclusterplotblup <- plot_grid(plotPH_D15, plotTN_D24, plotLS_D25,
#plotSG_D27, plotRR, labels = "AUTO", label_size = 18, ncol = 2)

#completepcaclusterplotblup

#ggsave("2022.04.01.completepcaclusterplotblup.png", completepcaclusterplotblup,
#height = 14, width = 12, bg = "white")

Hierchirchal Clustering analysis and heatmap

# Cluster dendrogram and heatmap based on the seedling drought response traits.

set.seed(123)

row_dend = as.dendrogram(hclust(dist(df5)))

row_dend = color_branches(row_dend, k = 5) # `color_branches()` returns a dendrogram object

dendrogramclusters <- cutree(row_dend,k=5)

table(dendrogramclusters)

## dendrogramclusters
##   1   2   3   4   5 
## 252 131   4  11   3

# Scaled Heatmap

dm <- dimnames(df5)

mat <- t(apply(df5,1,scale)) # z-scale by row

dimnames(mat) <- dm

args <- commandArgs(T)

#png("2022.03.19.Heatmap_Dendrogram_Scaled.png", units="in", width=8, height=10, res=360)
Heatmap(mat, cluster_rows = row_dend,
        heatmap_legend_param = list(title = "Scaled"),
        row_labels = "nrow", show_row_names = F)

#dev.off()

# Hierarchical clustering and visualization of clusters for their difference in plant height,
#recovery rate and so on.

# Put the cluster information in a dataframe.

Dendo_data <- data.frame(dendrogramclusters) %>% rownames_to_column()

# Merge the cluster with the BLUP data for the landraces

Dendo_datavisual <- merge(Dendo_data, DataOutput.final, by.x = "rowname",by.y = "Name")

Dendo_datavisual$dendrogramclusters <- factor(Dendo_datavisual$dendrogramclusters)

# Plant height variation among the clusters at 15 days after drought imposition.

DendoplotPH_D15 <- ggplot(Dendo_datavisual, 
                          aes(dendrogramclusters, Plant_height_D15))+
  geom_boxplot(aes(colour = dendrogramclusters), width = 0.4) + 
  stat_compare_means(aes(dendrogramclusters,  Plant_height_D15),
                     data = Clustervisual, method = "anova")+
  geom_jitter(aes(colour = dendrogramclusters, shape = dendrogramclusters), width = 0.1)+
  labs(x = "", y = "Plant height")+
  theme_minimal(base_size = 15) +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

DendoplotPH_D15

# Variation in number of trifoliate  among the clusters at 24 days after last irrigation.

DendoplotTN_D24 <- ggplot(Dendo_datavisual,
                          aes(dendrogramclusters, Trifoliate_D24))+
  geom_boxplot(aes(colour = dendrogramclusters), width = 0.4) + 
  stat_compare_means(aes(dendrogramclusters,Trifoliate_D24),
                     data = Dendo_datavisual, method = "anova")+ 
  geom_jitter(aes(colour = dendrogramclusters, shape = dendrogramclusters), width = 0.1)+
  labs(x = "", y = "Number of trifoliate")+
  theme_minimal(base_size = 15) + 
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

DendoplotTN_D24

# Variation in leaf senescence score among the clusters at 25 days after last watering.

DendoplotLS_D25 <- ggplot(Dendo_datavisual,
                          aes(dendrogramclusters, Senescence_D25))+
  geom_boxplot(aes(colour = dendrogramclusters), width = 0.4) + 
  stat_compare_means(aes(dendrogramclusters, Senescence_D25),
                     data = Dendo_datavisual, method = "anova")+ 
  geom_jitter(aes(colour = dendrogramclusters, shape = dendrogramclusters), width = 0.1)+
  labs(x = "", y = "Leaf senescence score")+
  theme_minimal(base_size = 15)+ 
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

DendoplotLS_D25

# Variation in stem greenness score among the clusters at 27 days after drought imposition.

DendoplotSG_D27 <- ggplot(Dendo_datavisual,
                          aes(dendrogramclusters, Stem_green_D27))+
  geom_boxplot(aes(colour = dendrogramclusters), width = 0.4) + 
  stat_compare_means(aes(dendrogramclusters, Stem_green_D27),
                     data = Dendo_datavisual, method = "anova")+
  geom_jitter(aes(colour = dendrogramclusters, shape = dendrogramclusters), width = 0.1)+
  labs(x = "Cluster", y = "Stem greenness score")+
  theme_minimal(base_size = 15)+ 
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

DendoplotSG_D27

# Variation in recovery rate among the clusters at 27 days after drought imposition.

DendoplotRRbox <- ggplot(Dendo_datavisual, aes(dendrogramclusters, Recovery_RR))+
  geom_boxplot(aes(colour = dendrogramclusters))+ 
  stat_compare_means(aes(dendrogramclusters,  Recovery_RR),
                     data = Dendo_datavisual, method = "anova")+
  geom_jitter(aes(colour = dendrogramclusters, shape = dendrogramclusters))+
  theme_minimal(base_size = 15)

DendoplotRRbox

# Variation in recovery rate among the clusters at days after re-irrigation.
DendoplotRR <- ggplot(Dendo_datavisual, aes(dendrogramclusters,
                                            Recovery_RR, fill = dendrogramclusters)) +
  stat_summary(fun = mean, geom = "bar", colour = "black")+
  labs(x = "Cluster", y = "Recovery rate (%) ")+
  theme_minimal(base_size = 15) + 
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

DendoplotRR

# Let's put everything together as a figure with multiple panels.

#Dendocompletepcaclusterplotblup <- plot_grid(DendoplotPH_D15, DendoplotTN_D24, DendoplotLS_D25,
#DendoplotSG_D27, DendoplotRR, labels = "AUTO", label_size = 18, ncol = 2)

#Dendocompletepcaclusterplotblup

#ggsave("2022.04.01.Dendocompletepcaclusterplotblup.png", Dendocompletepcaclusterplotblup,
#height = 14, width = 12, bg = "white")

Correlation analysis

# So, let check the relationship between the traits using Pearson's correlation.

merged_cor3 <- as.matrix(df4[-1])

M<-rcorr(merged_cor3)

diag(M$r) = NA

diag(M$P) = NA

#png("MarkdownCorrelation.matrix.drought.cowpea1.png", units="cm", width=20, height=16, res=360)

par(mar=c(1,1,1,1))

corrplot(M$r, type = "upper", p.mat = M$P, insig = "label_sig",
         sig.level = c(.001, .01, .05), pch.cex = .9, pch.col = "black",
         na.label = "x", method = "color", tl.col = "black", tl.pos = "tp",
         tl.cex = 0.7, diag=T, col=brewer.pal(n=10, name="PuOr"), mar = c(1, 1, 1, 1))
#col=brewer.pal(n=10, name="PuOr") #viridis_pal()(10)
corrplot(M$r, add = TRUE, type = "lower", method = "number",
         col = "black", diag = FALSE, tl.pos = "n", cl.pos = "n",
         number.cex = 0.5, number.digits = 2)

#dev.off()