This notebook describes the analysis of rice landraces phenotypic and yield related data to explore the level of genetic diversity. The rice landraces were collected from farmers at harvest across northern states of Nigeria.

# Load all the packages required for this analysis
library(tidyverse)
library(agricolae)
library(cowplot)
library(ggpubr)
library(plotly)
library(rstudioapi)
library(ggmap)
library(ape)
library(ggdendro)
library(viridis)
library(RColorBrewer)
library(ggfortify)
library(WGCNA)
library(corrplot)
library(Hmisc)
library(factoextra)

Map of Landraces collection site

Let’s load the gps coordinate data of the sites where the landraces where collected.

# Load data into R

rice_gps <- read.csv("gps.csv", header = TRUE,
                     row.names = "Landraces")

head(rice_gps)

Next, we will plot this position on the map of Nigeria to spatially visualize the distribution of the landraces across different ecological zones of Nigeria.

# plot the map using ggmap

map_ngr <- c(left = 2, bottom = 3, right = 15, top = 16)
newmap <- get_stamenmap(map_ngr, zoom = 6, maptype = 'toner-hybrid') %>% ggmap()

## Source : http://tile.stamen.com/toner-hybrid/6/32/29.png

## Source : http://tile.stamen.com/toner-hybrid/6/33/29.png

## Source : http://tile.stamen.com/toner-hybrid/6/34/29.png

## Source : http://tile.stamen.com/toner-hybrid/6/32/30.png

## Source : http://tile.stamen.com/toner-hybrid/6/33/30.png

## Source : http://tile.stamen.com/toner-hybrid/6/34/30.png

## Source : http://tile.stamen.com/toner-hybrid/6/32/31.png

## Source : http://tile.stamen.com/toner-hybrid/6/33/31.png

## Source : http://tile.stamen.com/toner-hybrid/6/34/31.png

# Use ggplot to lay the gps coordinate on the map

nigeria_map1 <-  newmap + 
  geom_point(data = rice_gps, aes(Longitude,Latitude),
             colour = "red", alpha = 0.7, size = 5)+
  xlab("Longitude") + ylab("Latitude")+
  theme_bw(base_size = 20)

nigeria_map1

#Visualize the interactive map using ggplotly

ggplotly(nigeria_map1)

# Save map to directory using ggsave
#ggsave(nigeria_map1, filename = "nigeria_map_red2.png",
#height = 16, width = 14)

Phenotypic characterization of Nigerian rice landraces using agromorphological data

Let’s load the phenotypic data.

# Load data into R

rice_landraces_data <- read.csv("rice_landraces_genetic_diversity.csv",
                                header = T)

head(rice_landraces_data)

Analysis phenotypic variability

Genotypic variability

Examining the genotypic variability of the landraces using agro-morphological data.

# Compute the analysis of variance for rice landraces

names(rice_landraces_data)

##  [1] "SN"       "Landrace" "State"    "Town"     "Zone"     "Block"   
##  [7] "DE"       "PH"       "PL"       "Nlpp"     "Ntpp"     "Dff"     
## [13] "Nppp"     "Nfgpp"    "Nugpp"    "Ntg"      "sw"       "GY"

unique(rice_landraces_data$Landrace)

##  [1] "Jamila-Yl"          "Bakin Yar China-Ba" "Tasama"            
##  [4] "Jamila-Ba"          "Futia-12"           "Lete/Viu"          
##  [7] "Mass/Osi"           "Miruwa"             "Soppi"             
## [10] "Cdi"                "Dan Koydo"          "Bakin Iri - Bn"    
## [13] "Jan Iri - Bn"       "Jamila-Gb"          "Chaina-Gb"         
## [16] "Cp-Gb"              "Yar Das-Jg"         "Jamila-Jg"         
## [19] "Mai Zabuwa/Biro"    "Mai Adda/Kilaki"    "Jamila-Zrx"        
## [22] "Mai Allura"         "Yar Nupawa"         "Frajalam"          
## [25] "Doguwar Carolea"    "Mai Zabuwa Giwa "   "Yar Dashe"         
## [28] "Gajere Carolea"     "Yar Telak"          "Yar Yiginaye"      
## [31] "Yar Kura"           "Doguwar Jamila"     "Fijo"              
## [34] "Yar Dan Hassan"     "Farar Jellof"       "Farar Jana"        
## [37] "Jap"                "Santana(Yar Ruwa)"  "Farar Ja"          
## [40] "Jaka"               "Yar Maaji"          "Shatika"           
## [43] "Yar Gidan Yarima"   "Bolaga"             "Jamila-Kt"         
## [46] "Wacot"              "2Bc"                "Yar Mamman"        
## [49] "Yar Kalage"         "Jan Iri-Kb"         "Bakin Iri-Kb"      
## [52] "Bayawure"           "Yar China-Kb"       "Iresi Tsarigi"     
## [55] "Sipi-Nsw"           "Water Proof"        "Biruwa"            
## [58] "Koro-Koro"          "Dantudu"            "Sipi-Ng"           
## [61] "Wati"               "Jamila-Ng"          "Jamila Plt"        
## [64] "Ba Ingila"          "Yar Zaiti"          "Yar Kabori"        
## [67] "O-Tu"               "Jaton Mini"         "Dan Kaushi"        
## [70] "Faro-Plt"           "Faro-Jlg"           "Yar Dirya"

rice_landraces_data$Landrace <- as.factor(rice_landraces_data$Landrace)
rice_landraces_data$Block <- as.factor(rice_landraces_data$Block)
rice_landraces_data$Zone <- as.factor(rice_landraces_data$Zone)
rice_landraces_data$DE <- as.numeric(rice_landraces_data$DE)


AVz <- rep(NA, ncol(rice_landraces_data))
sink("markdown_2022.04.06.ANOVA_table_for_rice_Landraces.txt")
for (i in 7:ncol(rice_landraces_data)){
  column <- names(rice_landraces_data[i])
  AVz <- summary(aov(rice_landraces_data[,i] ~ Block + Landrace, data = rice_landraces_data))
  model <- aov(rice_landraces_data[,i] ~ Block + Landrace, data = rice_landraces_data)
  out <- LSD.test(rice_landraces_data[,i], rice_landraces_data$Landrace, AVz[[1]]$Df[2], AVz[[1]]$`Mean Sq`[2])
  min1 <- min(rice_landraces_data[,i])
  max1 <- max(rice_landraces_data[,i])
  print(column)
  print(AVz)
  print(out$statistics)
  print(min1)
  print(max1)
}

## [1] "DE"
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Block         2   4.75  2.3750   5.169  0.00681 ** 
## Landrace     71  81.83  1.1526   2.508 1.71e-06 ***
## Residuals   142  65.25  0.4595                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV  t.value      LSD
##   1.152582 71 5.305556 20.23509 1.993943 1.747846
## [1] 4
## [1] 8
## [1] "PH"
##              Df Sum Sq Mean Sq F value  Pr(>F)    
## Block         2    257   128.4   5.834 0.00367 ** 
## Landrace     71  33882   477.2  21.679 < 2e-16 ***
## Residuals   142   3126    22.0                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV  t.value      LSD
##   477.2143 71 70.97685 30.77797 1.993943 35.56509
## [1] 43
## [1] 98
## [1] "PL"
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Block         2      5    2.29   0.224      0.8    
## Landrace     71   3243   45.68   4.454 1.98e-14 ***
## Residuals   142   1456   10.26                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df    Mean       CV  t.value      LSD
##   45.67968 71 22.4256 30.13821 1.993943 11.00345
## [1] 13
## [1] 40
## [1] "Nlpp"
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Block         2      7    3.37   0.408  0.666    
## Landrace     71   3543   49.90   6.042 <2e-16 ***
## Residuals   142   1173    8.26                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV  t.value      LSD
##   49.89508 71 17.47685 40.41715 1.993943 11.49995
## [1] 8
## [1] 40
## [1] "Ntpp"
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Block         2   2.29  1.1435   1.971    0.143    
## Landrace     71 160.77  2.2644   3.903 2.65e-12 ***
## Residuals   142  82.38  0.5801                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV  t.value     LSD
##   2.264411 71 5.449074 27.61562 1.993943 2.44988
## [1] 4
## [1] 9
## [1] "Dff"
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Block         2    353   176.4   1.419  0.245    
## Landrace     71  49632   699.0   5.621 <2e-16 ***
## Residuals   142  17660   124.4                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df    Mean      CV  t.value      LSD
##   699.0451 71 98.0463 26.9663 1.993943 43.04471
## [1] 58
## [1] 144
## [1] "Nppp"
##              Df Sum Sq Mean Sq F value  Pr(>F)    
## Block         2    3.0   1.504   5.283 0.00612 ** 
## Landrace     71  458.3   6.456  22.671 < 2e-16 ***
## Residuals   142   40.4   0.285                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   MSerror Df     Mean       CV  t.value      LSD
##   6.45561 71 5.413704 46.93255 1.993943 4.136527
## [1] 3
## [1] 12
## [1] "Nfgpp"
##              Df Sum Sq Mean Sq F value  Pr(>F)    
## Block         2    184    92.0    5.60 0.00456 ** 
## Landrace     71  37326   525.7   31.98 < 2e-16 ***
## Residuals   142   2334    16.4                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV  t.value      LSD
##   525.7173 71 16.71319 137.1882 1.993943 37.32874
## [1] 1
## [1] 65
## [1] "Nugpp"
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Block         2    130    65.0   1.728  0.181    
## Landrace     71  30543   430.2  11.435 <2e-16 ***
## Residuals   142   5342    37.6                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV  t.value      LSD
##   430.1772 71 34.38546 60.31826 1.993943 33.76687
## [1] 6.33
## [1] 79
## [1] "Ntg"
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Block         2     89    44.7   1.407  0.248    
## Landrace     71  35163   495.3  15.582 <2e-16 ***
## Residuals   142   4513    31.8                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV  t.value      LSD
##   495.2513 71 51.13384 43.52155 1.993943 36.23097
## [1] 24.33
## [1] 92
## [1] "sw"
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Block         2  0.001 0.00032   0.633  0.532    
## Landrace     71 14.579 0.20534 401.162 <2e-16 ***
## Residuals   142  0.073 0.00051                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##     MSerror Df     Mean       CV  t.value       LSD
##   0.2053417 71 2.535185 17.87429 1.993943 0.7377441
## [1] 1.9
## [1] 3.1
## [1] "GY"
##              Df Sum Sq Mean Sq F value Pr(>F)    
## Block         2    2.8   1.378   4.163 0.0175 *  
## Landrace     71  804.3  11.328  34.236 <2e-16 ***
## Residuals   142   47.0   0.331                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV  t.value      LSD
##   11.32778 71 2.390139 140.8151 1.993943 5.479483
## [1] 0.11
## [1] 10.56

sink()

Agro-ecological variability

Next, let examining the variability across ecological zones using agro-morphological data.

# Compute the analysis of variance for  zones of collections
AVz <- rep(NA, ncol(rice_landraces_data))
sink("markdown_2022.04.06.ANOVA_table_for_rice_Collection_Zone.txt")
for (i in 7:ncol(rice_landraces_data)){
  column <- names(rice_landraces_data[i])
  AVz <- summary(aov(rice_landraces_data[,i] ~ Block + Zone, data = rice_landraces_data))
  model <- aov(rice_landraces_data[,i] ~ Block + Zone, data = rice_landraces_data)
  out <- LSD.test(rice_landraces_data[,i], rice_landraces_data$Zone, AVz[[1]]$Df[2], AVz[[1]]$`Mean Sq`[2])
  print(column)
  print(AVz)
  print(out$statistics)
}

## [1] "DE"
##              Df Sum Sq Mean Sq F value Pr(>F)  
## Block         2   4.75  2.3750   3.442 0.0338 *
## Zone          4   2.88  0.7205   1.044 0.3853  
## Residuals   209 144.20  0.6900                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##     MSerror Df     Mean       CV
##   0.7205305  4 5.305556 15.99909
## [1] "PH"
##              Df Sum Sq Mean Sq F value Pr(>F)
## Block         2    257   128.4   0.751  0.473
## Zone          4   1265   316.2   1.849  0.121
## Residuals   209  35743   171.0               
##    MSerror Df     Mean       CV
##   316.1821  4 70.97685 25.05255
## [1] "PL"
##              Df Sum Sq Mean Sq F value Pr(>F)
## Block         2      5    2.29   0.106  0.900
## Zone          4    160   40.12   1.847  0.121
## Residuals   209   4539   21.72               
##    MSerror Df    Mean       CV
##   40.11581  4 22.4256 28.24319
## [1] "Nlpp"
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## Block         2      7    3.37   0.160 0.85262   
## Zone          4    307   76.66   3.634 0.00692 **
## Residuals   209   4409   21.09                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   MSerror Df     Mean       CV
##    76.655  4 17.47685 50.09646
## [1] "Ntpp"
##              Df Sum Sq Mean Sq F value  Pr(>F)   
## Block         2   2.29   1.144   1.056 0.34983   
## Zone          4  16.75   4.187   3.865 0.00473 **
## Residuals   209 226.41   1.083                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV
##   4.186693  4 5.449074 37.55025
## [1] "Dff"
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Block         2    353   176.4   0.629    0.534    
## Zone          4   8634  2158.5   7.691 8.45e-06 ***
## Residuals   209  58659   280.7                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df    Mean       CV
##   2158.457  4 98.0463 47.38496
## [1] "Nppp"
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Block         2    3.0   1.504   0.754    0.472    
## Zone          4   82.0  20.503  10.282 1.28e-07 ***
## Residuals   209  416.8   1.994                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV
##   20.50322  4 5.413704 83.64049
## [1] "Nfgpp"
##              Df Sum Sq Mean Sq F value Pr(>F)
## Block         2    184   92.05   0.499  0.608
## Zone          4   1115  278.66   1.511  0.200
## Residuals   209  38545  184.43               
##    MSerror Df     Mean       CV
##   278.6563  4 16.71319 99.87918
## [1] "Nugpp"
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Block         2    130    65.0   0.419 0.657969    
## Zone          4   3501   875.2   5.649 0.000246 ***
## Residuals   209  32384   154.9                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##   MSerror Df     Mean       CV
##   875.215  4 34.38546 86.03645
## [1] "Ntg"
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Block         2     89    44.7   0.266    0.767    
## Zone          4   4542  1135.5   6.755 3.94e-05 ***
## Residuals   209  35134   168.1                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV
##   1135.479  4 51.13384 65.89936
## [1] "sw"
##              Df Sum Sq Mean Sq F value Pr(>F)
## Block         2  0.001 0.00032   0.005  0.995
## Zone          4  0.118 0.02955   0.425  0.791
## Residuals   209 14.534 0.06954               
##      MSerror Df     Mean       CV
##   0.02955107  4 2.535185 6.780737
## [1] "GY"
##              Df Sum Sq Mean Sq F value Pr(>F)  
## Block         2    2.8   1.378   0.353 0.7028  
## Zone          4   36.2   9.042   2.318 0.0582 .
## Residuals   209  815.1   3.900                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##    MSerror Df     Mean       CV
##   9.041922  4 2.390139 125.8077

sink()

Let’s visualize the traits variability among the ecological zones

# Plot of trait distribution across the agromorphological zones

names(rice_landraces_data)

##  [1] "SN"       "Landrace" "State"    "Town"     "Zone"     "Block"   
##  [7] "DE"       "PH"       "PL"       "Nlpp"     "Ntpp"     "Dff"     
## [13] "Nppp"     "Nfgpp"    "Nugpp"    "Ntg"      "sw"       "GY"

df.summary <- rice_landraces_data %>% group_by(Zone) %>%
  summarise(Max = max(DE, na.rm = TRUE))

df.summary

PlotDE <- ggplot(rice_landraces_data, aes(Zone, DE))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes( color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2")+
  #scale_fill_grey(start = 0.8, end = 0.1) + 
  #scale_colour_grey(start = 0.8, end = 0.1)+
  stat_compare_means(aes(Zone,  DE),data = rice_landraces_data,
                     method = "anova", label.y = 8)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 9),
                     breaks = seq(0, 9, by=1))+
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Days to emergence",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotDE

PlotPH <- ggplot(rice_landraces_data, aes(Zone, PH))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2")+
  stat_compare_means(aes(Zone,  PH),data = rice_landraces_data,
                     method = "anova", label.y = 105)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 110),breaks = seq(0, 110, by=10))+
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Plant height (cm)",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotPH

PlotPL <- ggplot(rice_landraces_data, aes(Zone, PL))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(Zone,  PL),data = rice_landraces_data,
                     method = "anova", label.y = 42.5)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 45),breaks = seq(0, 45, by=5))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2")+
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Panicle length (cm)",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotPL

PlotNlpp <- ggplot(rice_landraces_data, aes(Zone, Nlpp))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes( color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(Zone,  Nlpp),data = rice_landraces_data,
                     method = "anova", label.y = 42.5)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 45),breaks = seq(0, 45, by=5))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number leaves of per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNlpp

PlotNtpp <- ggplot(rice_landraces_data, aes(Zone, Ntpp))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(Zone,  Ntpp),data = rice_landraces_data,
                     method = "anova", label.y = 9.5)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 10),breaks = seq(0, 10, by=1))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number tiller per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNtpp

PlotDff <- ggplot(rice_landraces_data, aes(Zone, Dff))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(Zone,  Dff),data = rice_landraces_data,
                     method = "anova", label.y = 150)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 160),breaks = seq(0, 160, by=20))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Days to 50% flowering",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotDff

PlotNppp <- ggplot(rice_landraces_data, aes(Zone, Nppp))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(Zone,  Nppp),data = rice_landraces_data,
                     method = "anova", label.y = 14)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 15),breaks = seq(0, 15, by=3))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number of panicle per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNppp

PlotNfgpp <- ggplot(rice_landraces_data, aes(Zone, Nfgpp))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(Zone,  Nfgpp),data = rice_landraces_data,
                     method = "anova", label.y = 68)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 70),breaks = seq(0, 70, by=10))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number of filled grain per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNfgpp

PlotNugpp <- ggplot(rice_landraces_data, aes(Zone, Nugpp))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(Zone,  Nugpp),data = rice_landraces_data,
                     method = "anova", label.y = 77)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 80),breaks = seq(0, 80, by=10))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number of unfilled grain per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNugpp

PlotNtg <- ggplot(rice_landraces_data, aes(Zone, Ntg))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(Zone,  Ntg),data = rice_landraces_data,
                     method = "anova", label.y = 95)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 100),breaks = seq(0, 100, by=10))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number of panicle per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNtg

Plotsw <- ggplot(rice_landraces_data, aes(Zone, sw))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(Zone,  sw),data = rice_landraces_data,
                     method = "anova", label.y = 3.25)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 3.5),breaks = seq(0, 3.5, by=0.5))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2")+
  theme_bw(base_size = 16)+
  labs(x = "Ecological zones", y = "100 seed weight (g)",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

Plotsw

PlotGY <- ggplot(rice_landraces_data, aes(Zone, GY))+
  geom_violin(aes(fill = Zone))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = Zone), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(Zone,  GY),data = rice_landraces_data,
                     method = "anova", label.y = 10)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 12),breaks = seq(0, 12, by=4))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  #scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Grain yield (g per plant)",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotGY

# Pot all the plots together to form a single figure with multiple panels.
#allplot <- plot_grid(PlotDE, PlotPH,PlotPL,PlotNlpp,PlotNtpp,PlotDff,
#                     PlotNppp, PlotNfgpp,PlotNugpp,PlotNtg,Plotsw,PlotGY, 
#                     ncol = 3)
#allplot

#ggsave(allplot, filename = "2022.04.06.allplot1.png",
#       height = 18, width = 14, bg ="white")

#ggsave(plantheightplot, filename = "2022.04.01.plantheightplot.png", bg ="white")

Multivariate analysis

library(ggfortify)
# summarise the data for per landrace for all the variables

Rice_landrace_means <- rice_landraces_data[c(-1,-3:-6)] %>%
  group_by(Landrace) %>% 
  summarise_each(funs(mean))

## Warning: `summarise_each_()` was deprecated in dplyr 0.7.0.
## Please use `across()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

head(Rice_landrace_means)

Rice_matrix <- column_to_rownames(Rice_landrace_means,
                                  var = "Landrace") %>% as.matrix()

head(Rice_matrix)

##                          DE       PH       PL     Nlpp     Ntpp       Dff
## 2Bc                4.333333 79.00000 16.66667 22.00000 6.333333 126.33333
## Ba Ingila          5.666667 76.33333 24.94333 19.00000 5.333333  80.00000
## Bakin Iri - Bn     5.666667 88.66667 28.47667 14.00000 5.333333  88.66667
## Bakin Iri-Kb       4.666667 64.00000 17.66667 12.66667 5.000000  98.66667
## Bakin Yar China-Ba 5.333333 54.66667 20.65000 14.66667 6.333333  94.00000
## Bayawure           5.000000 63.66667 22.15667 12.00000 5.666667  85.66667
##                        Nppp     Nfgpp    Nugpp      Ntg       sw        GY
## 2Bc                3.666667  2.000000 49.00000 51.00000 2.700000 0.2166667
## Ba Ingila          6.890000 16.113333 40.11000 56.22333 2.300000 2.8433333
## Bakin Iri - Bn     7.443333 11.000000 30.00000 41.00000 2.700000 2.1766667
## Bakin Iri-Kb       6.666667  4.223333 33.78000 38.00333 2.800000 0.7900000
## Bakin Yar China-Ba 3.776667 12.556667 37.99667 50.55333 2.766667 1.4066667
## Bayawure           8.776667  5.223333 31.66667 36.55667 2.600000 1.2666667

Principal component analysis

pca <- Rice_matrix %>%
  scale()%>% ## scale the variables
  prcomp() ## print out a summary

summary(pca)

## Importance of components:
##                           PC1    PC2    PC3    PC4     PC5     PC6     PC7
## Standard deviation     1.6506 1.3635 1.3215 1.2076 1.08892 0.96776 0.84589
## Proportion of Variance 0.2271 0.1549 0.1455 0.1215 0.09881 0.07805 0.05963
## Cumulative Proportion  0.2271 0.3820 0.5275 0.6490 0.74784 0.82588 0.88551
##                           PC8     PC9    PC10    PC11    PC12
## Standard deviation     0.7373 0.66613 0.58921 0.19783 0.01497
## Proportion of Variance 0.0453 0.03698 0.02893 0.00326 0.00002
## Cumulative Proportion  0.9308 0.96779 0.99672 0.99998 1.00000

#pca$rotation

#pca$x

pcaloadings <- pca$rotation %>%
  as.data.frame() %>%
  mutate(variables = rownames(.)) %>%
  gather(PC,loading,PC1:PC2) %>%
  ggplot(aes(loading, variables, fill = loading > 0)) +
  geom_col() + geom_text(aes(label= (round(loading, 2))),
                         color="black", size=2)+
  facet_wrap(~PC, scales = "free_y") + scale_fill_brewer( palette = "Dark2")+
  labs(x = "Value of loading",y = NULL, fill = "Positive?")+
  theme_bw(base_size = 16)

pcaloadings

#ggsave("2021.12.28.pcaloadings.png", pcaloadings, height = 5,
#       width = 9, bg = "white")

pca11 <- autoplot(pca, data = Rice_landrace_means,frame = TRUE,
                  frame.type = 'norm', label = TRUE, label.size = 3, loadings = TRUE,
                  loadings.colour = 'blue', loadings.label = TRUE, loadings.label.size = 3)

ricepca <- pca11 + theme_minimal()

ricepca

# Load the zones of collection file and merge with th pca$x

zone_of_collection <- read.delim("landraces_zone_of_collection.txt")

pca_landraces <- data.frame(Landrace = rownames(pca$x), pca$x)

pointdata <- merge(pca_landraces,zone_of_collection, by="Landrace")

pointdata$Zone <- as.factor(pointdata$Zone)

head(pointdata)

rice.screeplot <- fviz_screeplot(pca, addlabels = TRUE, barfill = "steelblue",
  barcolor = "black", ncp = 12, title ="")+
  labs(x = "Principal component")+ theme_bw(base_size = 15)

rice.screeplot

rice_biplot <- fviz_pca_biplot(pca, geom = c( "text"),
                               col.ind = "black", col.var = "red")+
  geom_point(data = pointdata,
             aes(PC1, PC2, color = Zone, shape = Zone), size=2)+
  scale_color_brewer(palette="Dark2") +
  scale_shape_manual(values=c(15,16,17,18,19))+
  labs(title = "", x = "PC1 (22.7%)", y = "PC2 (15.5%)")+
  theme_bw()+ theme(legend.position = "bottom")
  
rice_biplot

#ggsave("2021.12.30.rice_biplot_n1.png", rice_biplot,
#       height = 8, width = 14, bg = "white")

K-mean clustering analysis on PCA data

set.seed(123)

km.res <- kmeans(scale(Rice_matrix), 4)

table(km.res$cluster)

## 
##  1  2  3  4 
## 20 21 13 18

clusterplot <- fviz_cluster(km.res, geom = "point",data = Rice_matrix,
                            palette = c("#00AFBB","#2E9FDF", "#E7B800", "#FC4E07"),
                            ggtheme = theme_minimal(), main = "")+
  labs(title = "", x = "PC1 (22.7%)", y = "PC2 (15.5%)")+
  geom_hline(yintercept = 0, linetype = "dashed") + 
  geom_vline(xintercept = 0, linetype = "dashed")

clusterplot

# Better view of the contributions of variables to PC1.

fviz_contrib(pca, choice = "var", axes = 1, top = 19)

# Better view of the contributions of variables to PC2

fviz_contrib(pca, choice = "var", axes = 2, top = 19)

variableplot <- fviz_pca_var(pca, col.var="contrib",
                             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
                             repel = TRUE)+ # Avoid text overlapping
  labs(x = "PC1 (22.7%)", y = "PC2 (15.5%)", title = "")+
  theme_minimal(base_size = 15)

variableplot

# So next, we want to visualize the cluster for their differences in evaluated agro-morphological data.

# Put the cluster information in a dataframe.

CLuster_data <- data.frame(km.res$cluster) %>% rownames_to_column()

# Merge the cluster with the BLUP data for the landraces

Clustervisual <- merge(CLuster_data, rice_landraces_data,
                       by.x = "rowname",by.y = "Landrace")

Clustervisual$km.res.cluster <- factor(Clustervisual$km.res.cluster)

head(Clustervisual)

# Plot of trait distribution across the agromorphological zones
library(agricolae)
library(ggpubr)
names(Clustervisual)

##  [1] "rowname"        "km.res.cluster" "SN"             "State"         
##  [5] "Town"           "Zone"           "Block"          "DE"            
##  [9] "PH"             "PL"             "Nlpp"           "Ntpp"          
## [13] "Dff"            "Nppp"           "Nfgpp"          "Nugpp"         
## [17] "Ntg"            "sw"             "GY"

df.summary <- Clustervisual %>% group_by(km.res.cluster) %>%
  summarise(Max = max(DE, na.rm = TRUE))

df.summary

PlotDE <- ggplot(Clustervisual, aes(km.res.cluster, DE))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes( color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2")+
  stat_compare_means(aes(km.res.cluster,  DE),
                     data = Clustervisual, method = "anova", label.y = 8)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 9),breaks = seq(0, 9, by=1))+
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Days to emergence",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotDE

PlotPH <- ggplot(Clustervisual, aes(km.res.cluster, PH))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2")+
  stat_compare_means(aes(km.res.cluster,  PH),
                     data = Clustervisual, method = "anova", label.y = 105)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 110),breaks = seq(0, 110, by=10))+
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Plant height (cm)",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotPH

PlotPL <- ggplot(Clustervisual, aes(km.res.cluster, PL))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(km.res.cluster,  PL),
                     data = Clustervisual, method = "anova", label.y = 42.5)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 45),breaks = seq(0, 45, by=5))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Panicle length (cm)",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotPL

PlotNlpp <- ggplot(Clustervisual, aes(km.res.cluster, Nlpp))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes( color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(km.res.cluster,  Nlpp),
                     data = Clustervisual, method = "anova", label.y = 42.5)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 45),breaks = seq(0, 45, by=5))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number leaves of per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNlpp

PlotNtpp <- ggplot(Clustervisual, aes(km.res.cluster, Ntpp))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(km.res.cluster,  Ntpp),
                     data = Clustervisual, method = "anova", label.y = 9.5)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 10),breaks = seq(0, 10, by=1))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number tiller per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNtpp

PlotDff <- ggplot(Clustervisual, aes(km.res.cluster, Dff))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(km.res.cluster,  Dff),
                     data = Clustervisual, method = "anova", label.y = 150)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 160),breaks = seq(0, 160, by=20))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Days to 50% flowering",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotDff

PlotNppp <- ggplot(Clustervisual, aes(km.res.cluster, Nppp))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(km.res.cluster,  Nppp),
                     data = Clustervisual, method = "anova", label.y = 14)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 15),breaks = seq(0, 15, by=3))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number of panicle per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNppp

PlotNfgpp <- ggplot(Clustervisual, aes(km.res.cluster, Nfgpp))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(km.res.cluster,  Nfgpp),
                     data = Clustervisual, method = "anova", label.y = 68)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 70),breaks = seq(0, 70, by=10))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number of filled grain per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNfgpp

PlotNugpp <- ggplot(Clustervisual, aes(km.res.cluster, Nugpp))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(km.res.cluster,  Nugpp),
                     data = Clustervisual, method = "anova", label.y = 77)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 80),breaks = seq(0, 80, by=10))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number of unfilled grain per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNugpp

PlotNtg <- ggplot(Clustervisual, aes(km.res.cluster, Ntg))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(km.res.cluster,  Ntg),
                     data = Clustervisual, method = "anova", label.y = 95)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 100),breaks = seq(0, 100, by=10))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  theme_bw(base_size = 16)+
  labs(x = "", y = "Number of panicle per plant",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotNtg

Plotsw <- ggplot(Clustervisual, aes(km.res.cluster, sw))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(km.res.cluster,  sw),
                     data = Clustervisual, method = "anova", label.y = 3.25)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 3.5),breaks = seq(0, 3.5, by=0.5))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2")+
  theme_bw(base_size = 16)+
  labs(x = "Ecological km.res.clusters", y = "100 seed weight (g)",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

Plotsw

PlotGY <- ggplot(Clustervisual, aes(km.res.cluster, GY))+
  geom_violin(aes(fill = km.res.cluster))+
  geom_boxplot( width = 0.1)+
  geom_point(aes(color = km.res.cluster), 
             position = position_jitter(width = 0.15), size = 1, alpha = 0.5)+
  stat_compare_means(aes(km.res.cluster,  GY),
                     data = Clustervisual, method = "anova", label.y = 10)+
  scale_y_continuous(expand = c(0,0), limits = c(0, 12),breaks = seq(0, 12, by=4))+
  scale_fill_brewer(palette="Dark2") + scale_colour_brewer(palette="Dark2") +
  #scale_x_discrete(labels = NULL)+
  theme_bw(base_size = 16)+
  labs(x = "", y = "Grain yield (g per plant)",colour = "")+
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

PlotGY

# Pot all the plots together to form a single figure with multiple panels.
#allplot <- plot_grid(PlotDE, PlotPH,PlotPL,PlotNlpp,PlotNtpp,PlotDff,
#                     PlotNppp, PlotNfgpp,PlotNugpp,PlotNtg,Plotsw,PlotGY, 
#                     ncol = 3)
#allplot

#ggsave(allplot, filename = "2022.04.06.allplot1.png",
#       height = 18, width = 14, bg ="white")

#ggsave(plantheightplot, filename = "2022.04.01.plantheightplot.png", bg ="white")

Hierarchical clustering analysis

set.seed(123)
#png("2021.12.30.Ricevariability_wgcna_2.png", units="in", width=16.5, height=7.5, res=1000)

fit <- hclust(dist(scale(Rice_matrix), method = "euclidean"), method="ward.D2")

groups  <- cutree(fit, 4)

table(groups)

## groups
##  1  2  3  4 
## 32  8 22 10

cluster_landraces <- data.frame(Landrace = rownames(Rice_matrix), Rice_matrix)
Rice_lanrace_matrix_cluster <- merge(cluster_landraces, zone_of_collection, by="Landrace")
Rice_lanrace_matrix_cluster <- column_to_rownames(Rice_lanrace_matrix_cluster, var = "Landrace")

manmade <- as.numeric(as.factor(Rice_lanrace_matrix_cluster$Zone))

plotDendroAndColors(fit, cbind(Clusters=labels2colors(groups), Zone=labels2colors(manmade)))

## Warning in cbind(Clusters = labels2colors(groups), Zone =
## labels2colors(manmade)): number of rows of result is not a multiple of vector
## length (arg 2)

#dev.off()

Correlation analysis

M <-rcorr(Rice_matrix)

diag(M$r) = NA
diag(M$P) = NA

#png("CorrelationplotlowerriceUpper.png", units="cm", width=16, height=16, res=360)
corrplot(M$r, p.mat = M$P, type="lower",insig = "label_sig",
sig.level = c(0.001, 0.01, 0.05), pch.cex = 0.9, pch.col = "black",
na.label = "x", tl.col = "black", tl.pos = "tp",
tl.cex = 0.7, col=brewer.pal(n=10, name="PuOr"), mar = c(1, 1, 1, 1))

corrplot(M$r, add = TRUE, type = "upper", method = "number",
col = "black", diag = FALSE, tl.pos = "n", cl.pos = "n",
number.cex = 0.5, number.digits = 2)

#dev.off()

Rice landraces phenotypic diversity