This notebook described the pipeline to produce cluster dendrogram of 72 rice landraces from Nigeria agro-ecological zones based on 19 SSR genotypic markers.

Analysis Workflow

Let’s load the required libraries to get started.

library(ggdendro)
library(ggplot2)

Genetic distance

The genetic distance between the acessions were estimated based on the SSR markers.

# Load the SSR distance matrix
geneticdiversity <- read.csv("ssrmarkermatrix1.csv", row.names = 1)
row.names(geneticdiversity)
##  [1] "2Bc"                "Ba Ingila"          "Bakin Iri-Kb"      
##  [4] "Bakin Iri - Bn"     "Bakin Yar China-Ba" "Bayawure"          
##  [7] "Biruwa"             "Bolaga"             "Cdi"               
## [10] "Chaina-Gb"          "Cp-Gb"              "Dan Kaushi"        
## [13] "Dan Koydo"          "Dantudu"            "Doguwar Carolea"   
## [16] "Doguwar Jamila"     "Farar Ja"           "Farar Jana"        
## [19] "Farar Jellof"       "Faro-Plt"           "Faro-Jlg"          
## [22] "Fijo"               "Frajalam"           "Futia-12"          
## [25] "Gajere Carolea"     "Iresi Tsarigi"      "Jaka"              
## [28] "Jamila-Kt"          "Jamila Plt"         "Jamila-Ba"         
## [31] "Jamila-Gb"          "Jamila-Jg"          "Jamila-Ng"         
## [34] "Jamila-Yl"          "Jamila-Zrx"         "Jan Iri-Kb"        
## [37] "Jan Iri - Bn"       "Jap"                "Jaton Mini"        
## [40] "Koro-Koro"          "Lete/Viu"           "Mai Adda/Kilaki"   
## [43] "Mai Allura"         "Mai Zabuwa Giwa"    "Mai Zabuwa/Biro"   
## [46] "Mass/Osi"           "Miruwa"             "O-Tu"              
## [49] "Santana(Yar Ruwa)"  "Shatika"            "Sipi-Nsw"          
## [52] "Sipi-Ng"            "Soppi"              "Tasama"            
## [55] "Wacot"              "Water Proof"        "Wati"              
## [58] "Yar China-Kb"       "Yar Dan Hassan"     "Yar Dashe"         
## [61] "Yar Das-Jg"         "Yar Dirya"          "Yar Gidan Yarima"  
## [64] "Yar Kabori"         "Yar Kalage"         "Yar Kura"          
## [67] "Yar Maaji"          "Yar Mamman"         "Yar Nupawa"        
## [70] "Yar Telak"          "Yar Yiginaye"       "Yar Zaiti"
head(geneticdiversity)

Next, we let’s load the file containing the list of landraces and their agro-ecologies of collection. So, we will merge the two dataset latter, to examining if the clustering following geographical zones of collection.

# Load the zones of collection file and merge

zone_of_collection <- read.csv("landraces_zone_of_collection.csv")

head(zone_of_collection)

These function are required to plot the fan dendrogram and to customized it. Check this link below for the functions: https://atrebas.github.io/post/2019-06-08-lightweight-dendrograms/

# Functions for creating fan dendrogram.

dendro_data_k <- function(hc, k) {
  hcdata    <-  ggdendro::dendro_data(hc, type = "rectangle")
  seg       <-  hcdata$segments
  labclust  <-  cutree(hc, k)[hc$order]
  segclust  <-  rep(0L, nrow(seg))
  heights   <-  sort(hc$height, decreasing = TRUE)
  height    <-  mean(c(heights[k], heights[k - 1L]), na.rm = TRUE)
  
  for (i in 1:k) {
    xi      <-  hcdata$labels$x[labclust == i]
    idx1    <-  seg$x    >= min(xi) & seg$x    <= max(xi)
    idx2    <-  seg$xend >= min(xi) & seg$xend <= max(xi)
    idx3    <-  seg$yend < height
    idx     <-  idx1 & idx2 & idx3
    segclust[idx] <- i
  }
  
  idx                    <-  which(segclust == 0L)
  segclust[idx]          <-  segclust[idx + 1L]
  hcdata$segments$clust  <-  segclust
  hcdata$segments$line   <-  as.integer(segclust < 1L)
  hcdata$labels$clust    <-  labclust
  
  hcdata
}

set_labels_params <- function(nbLabels,
                              direction = c("tb", "bt", "lr", "rl"),
                              fan       = FALSE) {
  if (fan) {
    angle       <-  360 / nbLabels * 1:nbLabels + 90
    idx         <-  angle >= 90 & angle <= 270
    angle[idx]  <-  angle[idx] + 180
    hjust       <-  rep(0, nbLabels)
    hjust[idx]  <-  1
  } else {
    angle       <-  rep(0, nbLabels)
    hjust       <-  0
    if (direction %in% c("tb", "bt")) { angle <- angle + 45 }
    if (direction %in% c("tb", "rl")) { hjust <- 1 }
  }
  list(angle = angle, hjust = hjust, vjust = 0.5)
}

plot_ggdendro <- function(hcdata,
                          direction   = c("lr", "rl", "tb", "bt"),
                          fan         = FALSE,
                          scale.color = NULL,
                          branch.size = 1,
                          label.size  = 3,
                          nudge.label = 0.01,
                          expand.y    = 0.1) {
  
  direction <- match.arg(direction) # if fan = FALSE
  ybreaks   <- pretty(segment(hcdata)$y, n = 5)
  ymax      <- max(segment(hcdata)$y)
  
  ## branches
  p <- ggplot() +
    geom_segment(data         =  segment(hcdata),
                 aes(x        =  x,
                     y        =  y,
                     xend     =  xend,
                     yend     =  yend,
                     linetype =  factor(line),
                     colour   =  factor(clust)),
                 lineend      =  "round",
                 show.legend  =  FALSE,
                 size         =  branch.size)
  
  ## orientation
  if (fan) {
    p <- p +
      coord_polar(direction = -1) +
      scale_x_continuous(breaks = NULL,
                         limits = c(0, nrow(label(hcdata)))) +
      scale_y_reverse(breaks = ybreaks)
  } else {
    p <- p + scale_x_continuous(breaks = NULL)
    if (direction %in% c("rl", "lr")) {
      p <- p + coord_flip()
    }
    if (direction %in% c("bt", "lr")) {
      p <- p + scale_y_reverse(breaks = ybreaks)
    } else {
      p <- p + scale_y_continuous(breaks = ybreaks)
      nudge.label <- -(nudge.label)
    }
  }
  
  # labels
  labelParams <- set_labels_params(nrow(hcdata$labels), direction, fan)
  hcdata$labels$angle <- labelParams$angle
  
  p <- p +
    geom_text(data        =  label(hcdata),
              aes(x       =  x,
                  y       =  y,
                  label   =  label,
                  colour  =  factor(clust),
                  angle   =  angle),
              vjust       =  labelParams$vjust,
              hjust       =  labelParams$hjust,
              nudge_y     =  ymax * nudge.label,
              size        =  label.size,
              show.legend =  FALSE)
  
  # colors and limits
  if (!is.null(scale.color)) {
    p <- p + scale_color_manual(values = scale.color)
  }
  
  ylim <- -round(ymax * expand.y, 1)
  p    <- p + expand_limits(y = ylim)
  
  p
}

Here, were are going to creating the genetic clustering based on the hclust function from base R using the complete method.

#cols <- viridis_pal()(5)

dd <- dist(scale(geneticdiversity), method = "euclidean")

hc <- hclust(dd, method = "complete")

hcdata <- dendro_data_k(hc, 3)

Finaly, we can put everything together to generated the circular dendrogram with discrete geographical location overlayyed.

par(cex=0.6,mar=c(0,0,0,0))
p <- plot_ggdendro(hcdata,
                   fan         = TRUE,
                   #scale.color = cols,
                   label.size  = 1.90,
                   nudge.label = 0.15,
                   expand.y    = 0.95,
                   branch.size = 0.3)+ theme_void()

plotfandendro <- p + geom_point(data = zone_of_collection, 
                                aes(x = match(Landrace, hcdata$labels$label),
                                    y = -1.4, fill = as.factor(Ecological.zone.of.cultivation), color = as.factor(Ecological.zone.of.cultivation), shape = as.factor(Ecological.zone.of.cultivation)),
                                size     = 1.5,
                                show.legend = FALSE)+
  scale_color_manual(values = c("#33638DFF","#440154FF",  "#55C667FF", "#FDE725FF", "#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E")) +
  scale_shape_manual(values = c(15, 16, 17, 18, 19))

plotfandendro

#ggsave(plotfandendro, filename = "2022.04.08.SSRFandendro_newfinalshape2.png", height = 4.0, width = 4.0, unit = "in",bg = "white", dpi = 360)