This notebook described the pipeline to produce cluster dendrogram of 72 rice landraces from Nigeria agro-ecological zones based on 19 SSR genotypic markers.
Let’s load the required libraries to get started.
library(ggdendro)
library(ggplot2)
The genetic distance between the acessions were estimated based on the SSR markers.
# Load the SSR distance matrix
geneticdiversity <- read.csv("ssrmarkermatrix1.csv", row.names = 1)
row.names(geneticdiversity)
## [1] "2Bc" "Ba Ingila" "Bakin Iri-Kb"
## [4] "Bakin Iri - Bn" "Bakin Yar China-Ba" "Bayawure"
## [7] "Biruwa" "Bolaga" "Cdi"
## [10] "Chaina-Gb" "Cp-Gb" "Dan Kaushi"
## [13] "Dan Koydo" "Dantudu" "Doguwar Carolea"
## [16] "Doguwar Jamila" "Farar Ja" "Farar Jana"
## [19] "Farar Jellof" "Faro-Plt" "Faro-Jlg"
## [22] "Fijo" "Frajalam" "Futia-12"
## [25] "Gajere Carolea" "Iresi Tsarigi" "Jaka"
## [28] "Jamila-Kt" "Jamila Plt" "Jamila-Ba"
## [31] "Jamila-Gb" "Jamila-Jg" "Jamila-Ng"
## [34] "Jamila-Yl" "Jamila-Zrx" "Jan Iri-Kb"
## [37] "Jan Iri - Bn" "Jap" "Jaton Mini"
## [40] "Koro-Koro" "Lete/Viu" "Mai Adda/Kilaki"
## [43] "Mai Allura" "Mai Zabuwa Giwa" "Mai Zabuwa/Biro"
## [46] "Mass/Osi" "Miruwa" "O-Tu"
## [49] "Santana(Yar Ruwa)" "Shatika" "Sipi-Nsw"
## [52] "Sipi-Ng" "Soppi" "Tasama"
## [55] "Wacot" "Water Proof" "Wati"
## [58] "Yar China-Kb" "Yar Dan Hassan" "Yar Dashe"
## [61] "Yar Das-Jg" "Yar Dirya" "Yar Gidan Yarima"
## [64] "Yar Kabori" "Yar Kalage" "Yar Kura"
## [67] "Yar Maaji" "Yar Mamman" "Yar Nupawa"
## [70] "Yar Telak" "Yar Yiginaye" "Yar Zaiti"
head(geneticdiversity)
Next, we let’s load the file containing the list of landraces and their agro-ecologies of collection. So, we will merge the two dataset latter, to examining if the clustering following geographical zones of collection.
# Load the zones of collection file and merge
zone_of_collection <- read.csv("landraces_zone_of_collection.csv")
head(zone_of_collection)
These function are required to plot the fan dendrogram and to customized it. Check this link below for the functions: https://atrebas.github.io/post/2019-06-08-lightweight-dendrograms/
# Functions for creating fan dendrogram.
dendro_data_k <- function(hc, k) {
hcdata <- ggdendro::dendro_data(hc, type = "rectangle")
seg <- hcdata$segments
labclust <- cutree(hc, k)[hc$order]
segclust <- rep(0L, nrow(seg))
heights <- sort(hc$height, decreasing = TRUE)
height <- mean(c(heights[k], heights[k - 1L]), na.rm = TRUE)
for (i in 1:k) {
xi <- hcdata$labels$x[labclust == i]
idx1 <- seg$x >= min(xi) & seg$x <= max(xi)
idx2 <- seg$xend >= min(xi) & seg$xend <= max(xi)
idx3 <- seg$yend < height
idx <- idx1 & idx2 & idx3
segclust[idx] <- i
}
idx <- which(segclust == 0L)
segclust[idx] <- segclust[idx + 1L]
hcdata$segments$clust <- segclust
hcdata$segments$line <- as.integer(segclust < 1L)
hcdata$labels$clust <- labclust
hcdata
}
set_labels_params <- function(nbLabels,
direction = c("tb", "bt", "lr", "rl"),
fan = FALSE) {
if (fan) {
angle <- 360 / nbLabels * 1:nbLabels + 90
idx <- angle >= 90 & angle <= 270
angle[idx] <- angle[idx] + 180
hjust <- rep(0, nbLabels)
hjust[idx] <- 1
} else {
angle <- rep(0, nbLabels)
hjust <- 0
if (direction %in% c("tb", "bt")) { angle <- angle + 45 }
if (direction %in% c("tb", "rl")) { hjust <- 1 }
}
list(angle = angle, hjust = hjust, vjust = 0.5)
}
plot_ggdendro <- function(hcdata,
direction = c("lr", "rl", "tb", "bt"),
fan = FALSE,
scale.color = NULL,
branch.size = 1,
label.size = 3,
nudge.label = 0.01,
expand.y = 0.1) {
direction <- match.arg(direction) # if fan = FALSE
ybreaks <- pretty(segment(hcdata)$y, n = 5)
ymax <- max(segment(hcdata)$y)
## branches
p <- ggplot() +
geom_segment(data = segment(hcdata),
aes(x = x,
y = y,
xend = xend,
yend = yend,
linetype = factor(line),
colour = factor(clust)),
lineend = "round",
show.legend = FALSE,
size = branch.size)
## orientation
if (fan) {
p <- p +
coord_polar(direction = -1) +
scale_x_continuous(breaks = NULL,
limits = c(0, nrow(label(hcdata)))) +
scale_y_reverse(breaks = ybreaks)
} else {
p <- p + scale_x_continuous(breaks = NULL)
if (direction %in% c("rl", "lr")) {
p <- p + coord_flip()
}
if (direction %in% c("bt", "lr")) {
p <- p + scale_y_reverse(breaks = ybreaks)
} else {
p <- p + scale_y_continuous(breaks = ybreaks)
nudge.label <- -(nudge.label)
}
}
# labels
labelParams <- set_labels_params(nrow(hcdata$labels), direction, fan)
hcdata$labels$angle <- labelParams$angle
p <- p +
geom_text(data = label(hcdata),
aes(x = x,
y = y,
label = label,
colour = factor(clust),
angle = angle),
vjust = labelParams$vjust,
hjust = labelParams$hjust,
nudge_y = ymax * nudge.label,
size = label.size,
show.legend = FALSE)
# colors and limits
if (!is.null(scale.color)) {
p <- p + scale_color_manual(values = scale.color)
}
ylim <- -round(ymax * expand.y, 1)
p <- p + expand_limits(y = ylim)
p
}
Here, were are going to creating the genetic clustering based on the hclust function from base R using the complete method.
#cols <- viridis_pal()(5)
dd <- dist(scale(geneticdiversity), method = "euclidean")
hc <- hclust(dd, method = "complete")
hcdata <- dendro_data_k(hc, 3)
Finaly, we can put everything together to generated the circular dendrogram with discrete geographical location overlayyed.
par(cex=0.6,mar=c(0,0,0,0))
p <- plot_ggdendro(hcdata,
fan = TRUE,
#scale.color = cols,
label.size = 1.90,
nudge.label = 0.15,
expand.y = 0.95,
branch.size = 0.3)+ theme_void()
plotfandendro <- p + geom_point(data = zone_of_collection,
aes(x = match(Landrace, hcdata$labels$label),
y = -1.4, fill = as.factor(Ecological.zone.of.cultivation), color = as.factor(Ecological.zone.of.cultivation), shape = as.factor(Ecological.zone.of.cultivation)),
size = 1.5,
show.legend = FALSE)+
scale_color_manual(values = c("#33638DFF","#440154FF", "#55C667FF", "#FDE725FF", "#1B9E77", "#D95F02", "#7570B3", "#E7298A", "#66A61E")) +
scale_shape_manual(values = c(15, 16, 17, 18, 19))
plotfandendro
#ggsave(plotfandendro, filename = "2022.04.08.SSRFandendro_newfinalshape2.png", height = 4.0, width = 4.0, unit = "in",bg = "white", dpi = 360)